From 9e9570ab14e891fe17b4de517ab4d9caea77b72a Mon Sep 17 00:00:00 2001 From: "Kevin M. Dean" Date: Mon, 23 Mar 2026 04:47:06 -0500 Subject: [PATCH 01/10] Support OME-Zarr v3 stores and namespaced cache Introduce OME-Zarr v3 storage helpers and migrate ClearEx stores to a namespaced runtime-cache layout. Adds new clearex.io.ome_store module with helpers for namespaced metadata, runtime-cache source components, analysis cache/auxiliary roots, OME metadata handling, and copying/publishing utilities. Update pipelines and I/O to read/write the new SOURCE_CACHE_COMPONENT and analysis cache/auxiliary paths, migrate store spatial-calibration and metadata handling, and adjust resume/checkpoint logic to use cache/auxiliary groups. Add CLI flags for migrating legacy stores (--migrate-store/--migrate-output/--migrate-overwrite). Update pyproject dependencies to include bioio-ome-zarr, ome-zarr, ome-zarr-models and bump zarr requirement to >=3.1.1. These changes refactor how canonical source data and analysis outputs are stored and advertised (OME-Zarr v3), while preserving compatibility checks and pyramid generation using the new namespaced layout. --- pyproject.toml | 5 +- src/clearex/deconvolution/pipeline.py | 47 +- src/clearex/detect/pipeline.py | 13 +- src/clearex/flatfield/pipeline.py | 125 +++-- src/clearex/io/cli.py | 19 + src/clearex/io/experiment.py | 320 ++++++----- src/clearex/io/ome_store.py | 734 ++++++++++++++++++++++++++ src/clearex/io/provenance.py | 49 +- src/clearex/io/read.py | 124 ++++- src/clearex/main.py | 135 +++-- src/clearex/mip_export/pipeline.py | 11 +- src/clearex/registration/pipeline.py | 88 ++- src/clearex/shear/pipeline.py | 23 +- src/clearex/usegment3d/pipeline.py | 68 ++- src/clearex/visualization/pipeline.py | 59 ++- src/clearex/workflow.py | 32 +- uv.lock | 419 ++++++++++++++- 17 files changed, 1910 insertions(+), 361 deletions(-) create mode 100644 src/clearex/io/ome_store.py diff --git a/pyproject.toml b/pyproject.toml index f9755fd..c56e68b 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ requires-python = ">=3.12,<3.13" dependencies = [ "antspyx", "basicpy", + "bioio-ome-zarr", "cython>=3.1.4", "dask==2025.1.0", "dask-image", @@ -29,6 +30,8 @@ dependencies = [ "matplotlib", "napari>0.6.1", "neuroglancer>=2.40.1,<3.0.0", + "ome-zarr", + "ome-zarr-models>=1.6", "opencv-python", "pandas>=2.3.3", "pywavelets", @@ -37,7 +40,7 @@ dependencies = [ "scipy<1.13", "seaborn", "tifffile==2025.1.10", - "zarr<3.0", + "zarr>=3.1.1,<4.0", ] [project.optional-dependencies] diff --git a/src/clearex/deconvolution/pipeline.py b/src/clearex/deconvolution/pipeline.py index cf81cf8..a0ed572 100644 --- a/src/clearex/deconvolution/pipeline.py +++ b/src/clearex/deconvolution/pipeline.py @@ -44,6 +44,14 @@ # Local Imports from clearex.deconvolution.petakit import run_petakit_deconvolution +from clearex.io.ome_store import ( + SOURCE_CACHE_COMPONENT, + analysis_auxiliary_root, + analysis_cache_data_component, + analysis_cache_root, + load_store_metadata, + public_analysis_root, +) from clearex.io.provenance import register_latest_output_reference if TYPE_CHECKING: @@ -462,13 +470,18 @@ def _extract_store_voxel_sizes_um( ``(xy_um, z_um)`` values when available. """ root_attrs = dict(root.attrs) - data_attrs = dict(root["data"].attrs) if "data" in root else {} + store_metadata = load_store_metadata(root) + data_attrs = ( + dict(root[SOURCE_CACHE_COMPONENT].attrs) + if SOURCE_CACHE_COMPONENT in root + else {} + ) - for attrs in (data_attrs, root_attrs): + for attrs in (data_attrs, store_metadata, root_attrs): voxel = attrs.get("voxel_size_um_zyx") if isinstance(voxel, (list, tuple)) and len(voxel) >= 3: return float(voxel[2]), float(voxel[0]) - for attrs in (root_attrs, data_attrs): + for attrs in (store_metadata, root_attrs, data_attrs): navigate = attrs.get("navigate_experiment") if not isinstance(navigate, dict): continue @@ -1375,11 +1388,14 @@ def _prepare_output_array( int(shape[5]), ) - results_group = root.require_group("results") - decon_group = results_group.require_group("deconvolution") - if "latest" in decon_group: - del decon_group["latest"] - latest = decon_group.create_group("latest") + cache_component = analysis_cache_data_component("deconvolution") + cache_root = analysis_cache_root("deconvolution") + auxiliary_root = analysis_auxiliary_root("deconvolution") + if cache_root in root: + del root[cache_root] + if auxiliary_root in root: + del root[auxiliary_root] + latest = root.require_group(cache_root) latest.create_dataset( name="data", shape=shape, @@ -1395,8 +1411,17 @@ def _prepare_output_array( "run_id": None, } ) - component = "results/deconvolution/latest" - data_component = "results/deconvolution/latest/data" + root.require_group(auxiliary_root).attrs.update( + { + "storage_policy": "latest_only", + "source_component": str(source_component), + "parameters": {str(key): value for key, value in dict(parameters).items()}, + "run_id": None, + "data_component": cache_component, + } + ) + component = public_analysis_root("deconvolution") + data_component = cache_component return ( component, data_component, @@ -1502,7 +1527,7 @@ def _emit(percent: int, message: str) -> None: _emit(12, "Generating vectorial synthetic PSF assets") synthetic_psf_components = _persist_synthetic_psf_assets( zarr_path=zarr_path, - latest_component=component, + latest_component=analysis_auxiliary_root("deconvolution"), params=normalized, selected_channels=selected_channels, voxel_xy_um=float(voxel_xy_um), diff --git a/src/clearex/detect/pipeline.py b/src/clearex/detect/pipeline.py index faad92f..c18b1c0 100644 --- a/src/clearex/detect/pipeline.py +++ b/src/clearex/detect/pipeline.py @@ -47,6 +47,7 @@ preprocess, remove_close_blobs, ) +from clearex.io.ome_store import analysis_auxiliary_root from clearex.io.provenance import register_latest_output_reference if TYPE_CHECKING: @@ -550,12 +551,11 @@ def save_particle_detections_to_store( str Latest component path for particle-detection results. """ + component = analysis_auxiliary_root("particle_detection") root = zarr.open_group(str(zarr_path), mode="a") - results_group = root.require_group("results") - particle_group = results_group.require_group("particle_detection") - if "latest" in particle_group: - del particle_group["latest"] - latest_group = particle_group.create_group("latest") + if component in root: + del root[component] + latest_group = root.require_group(component) detection_array = np.asarray(detections, dtype=np.float32) row_chunks = int(min(max(1, detection_array.shape[0]), 16384)) @@ -590,12 +590,11 @@ def save_particle_detections_to_store( "channel_index": int(parameters.get("channel_index", 0)), "detection_count": int(detection_array.shape[0]), "parameters": {str(k): v for k, v in dict(parameters).items()}, - "napari_points_component": "results/particle_detection/latest/points_tzyx", + "napari_points_component": f"{component}/points_tzyx", "run_id": run_id, } ) - component = "results/particle_detection/latest" register_latest_output_reference( zarr_path=zarr_path, analysis_name="particle_detection", diff --git a/src/clearex/flatfield/pipeline.py b/src/clearex/flatfield/pipeline.py index 71eb9f7..2595385 100644 --- a/src/clearex/flatfield/pipeline.py +++ b/src/clearex/flatfield/pipeline.py @@ -54,6 +54,13 @@ from numpy.typing import NDArray import zarr +from clearex.io.ome_store import ( + analysis_auxiliary_component, + analysis_auxiliary_root, + analysis_cache_data_component, + analysis_cache_root, + public_analysis_root, +) from clearex.io.provenance import register_latest_output_reference if TYPE_CHECKING: @@ -974,7 +981,7 @@ def _copy_source_array_attrs( copied: dict[str, Any] = { "source_component": str(source_component), "chunk_shape_tpczyx": [int(v) for v in output_chunks], - "pyramid_levels": ["results/flatfield/latest/data"], + "pyramid_levels": [analysis_cache_data_component("flatfield")], } for key in ( "scale_tpczyx", @@ -1514,7 +1521,9 @@ def _checkpoint_dataset_specs( def _checkpoint_is_compatible( *, + root: zarr.Group, latest_group: zarr.Group, + data_component: str, source_component: str, shape_tpczyx: tuple[int, int, int, int, int, int], chunks_tpczyx: tuple[int, int, int, int, int, int], @@ -1560,13 +1569,17 @@ def _checkpoint_is_compatible( int(shape_tpczyx[4]), int(shape_tpczyx[5]), ) - if not _has_dataset( - latest_group, name="data", shape=shape_tpczyx, dtype=np.float32 - ): + try: + data_array = root[data_component] + except Exception: return False - if tuple(int(v) for v in latest_group["data"].chunks) != tuple( - int(v) for v in chunks_tpczyx - ): + if not isinstance(data_array, zarr.Array): + return False + if tuple(int(v) for v in data_array.shape) != tuple(int(v) for v in shape_tpczyx): + return False + if np.dtype(data_array.dtype) != np.dtype(np.float32): + return False + if tuple(int(v) for v in data_array.chunks) != tuple(int(v) for v in chunks_tpczyx): return False if not _has_dataset( latest_group, @@ -1630,6 +1643,9 @@ def _checkpoint_is_compatible( return False # Ensure pre-existing chunks decode cleanly before attempting resume. + if not _dataset_chunk_probe_is_readable(data_array): + return False + for name in ("flatfield_pcyx", "darkfield_pcyx", "baseline_pctz"): candidate = latest_group.get(name) if candidate is None or not isinstance(candidate, zarr.Array): @@ -1831,11 +1847,13 @@ def _initialize_latest_flatfield_group( zarr.Group Newly created latest group. """ - results_group = root.require_group("results") - flatfield_group = results_group.require_group("flatfield") - if "latest" in flatfield_group: - del flatfield_group["latest"] - latest_group = flatfield_group.create_group("latest") + cache_root = analysis_cache_root("flatfield") + auxiliary_root = analysis_auxiliary_root("flatfield") + if cache_root in root: + del root[cache_root] + if auxiliary_root in root: + del root[auxiliary_root] + latest_group = root.require_group(cache_root) data_array = latest_group.create_dataset( name="data", @@ -1857,7 +1875,36 @@ def _initialize_latest_flatfield_group( max(1, min(int(chunks_tpczyx[4]), int(shape_tpczyx[4]))), max(1, min(int(chunks_tpczyx[5]), int(shape_tpczyx[5]))), ) - latest_group.create_dataset( + + latest_group.attrs.update( + { + "storage_policy": "latest_only", + "run_id": None, + "source_component": str(source_component), + "data_component": analysis_cache_data_component("flatfield"), + "flatfield_component": analysis_auxiliary_component( + "flatfield", "flatfield_pcyx" + ), + "darkfield_component": analysis_auxiliary_component( + "flatfield", "darkfield_pcyx" + ), + "baseline_component": analysis_auxiliary_component( + "flatfield", "baseline_pctz" + ), + "parameters": _to_jsonable(dict(parameters)), + "resume_parameters": dict(parameter_payload), + "resume_parameters_json": str(parameter_json), + "resume_parameter_fingerprint": str(parameter_fingerprint), + "output_dtype": "float32", + "output_chunks_tpczyx": [int(v) for v in chunks_tpczyx], + "basicpy_version": basicpy_version, + "resume_schema_version": RESUME_SCHEMA_VERSION, + "updated_utc": _utc_now_iso(), + } + ) + + auxiliary_group = root.require_group(auxiliary_root) + auxiliary_group.create_dataset( name="flatfield_pcyx", shape=( int(shape_tpczyx[1]), @@ -1869,7 +1916,7 @@ def _initialize_latest_flatfield_group( dtype=np.float32, overwrite=True, ) - latest_group.create_dataset( + auxiliary_group.create_dataset( name="darkfield_pcyx", shape=( int(shape_tpczyx[1]), @@ -1881,7 +1928,7 @@ def _initialize_latest_flatfield_group( dtype=np.float32, overwrite=True, ) - latest_group.create_dataset( + auxiliary_group.create_dataset( name="baseline_pctz", shape=( int(shape_tpczyx[1]), @@ -1898,29 +1945,9 @@ def _initialize_latest_flatfield_group( dtype=np.float32, overwrite=True, ) + auxiliary_group.attrs.update(dict(latest_group.attrs)) - latest_group.attrs.update( - { - "storage_policy": "latest_only", - "run_id": None, - "source_component": str(source_component), - "data_component": "results/flatfield/latest/data", - "flatfield_component": "results/flatfield/latest/flatfield_pcyx", - "darkfield_component": "results/flatfield/latest/darkfield_pcyx", - "baseline_component": "results/flatfield/latest/baseline_pctz", - "parameters": _to_jsonable(dict(parameters)), - "resume_parameters": dict(parameter_payload), - "resume_parameters_json": str(parameter_json), - "resume_parameter_fingerprint": str(parameter_fingerprint), - "output_dtype": "float32", - "output_chunks_tpczyx": [int(v) for v in chunks_tpczyx], - "basicpy_version": basicpy_version, - "resume_schema_version": RESUME_SCHEMA_VERSION, - "updated_utc": _utc_now_iso(), - } - ) - - checkpoint_group = latest_group.require_group(CHECKPOINT_GROUP_NAME) + checkpoint_group = auxiliary_group.require_group(CHECKPOINT_GROUP_NAME) _create_checkpoint_datasets( checkpoint_group=checkpoint_group, shape_tpczyx=shape_tpczyx, @@ -2021,16 +2048,16 @@ def _prepare_output_arrays( parameter_payload ) - component = "results/flatfield/latest" - data_component = "results/flatfield/latest/data" - flatfield_component = "results/flatfield/latest/flatfield_pcyx" - darkfield_component = "results/flatfield/latest/darkfield_pcyx" - baseline_component = "results/flatfield/latest/baseline_pctz" - checkpoint_component = "results/flatfield/latest/checkpoint" + component = public_analysis_root("flatfield") + data_component = analysis_cache_data_component("flatfield") + flatfield_component = analysis_auxiliary_component("flatfield", "flatfield_pcyx") + darkfield_component = analysis_auxiliary_component("flatfield", "darkfield_pcyx") + baseline_component = analysis_auxiliary_component("flatfield", "baseline_pctz") + checkpoint_component = analysis_auxiliary_component( + "flatfield", CHECKPOINT_GROUP_NAME + ) - results_group = root.require_group("results") - flatfield_group = results_group.require_group("flatfield") - latest_group = flatfield_group.get("latest") + latest_group = root.get(analysis_auxiliary_root("flatfield")) should_resume = False if ( latest_group is not None @@ -2038,7 +2065,9 @@ def _prepare_output_arrays( and not bool(parameters.get("force_rerun", False)) ): should_resume = _checkpoint_is_compatible( + root=root, latest_group=latest_group, + data_component=data_component, source_component=source_component, shape_tpczyx=shape_tpczyx, chunks_tpczyx=chunks_tpczyx, @@ -3764,7 +3793,9 @@ def _consume_transform_result( progress_end=99, ) ) - latest_group = zarr.open_group(str(zarr_path), mode="a")[layout.component] + latest_group = zarr.open_group(str(zarr_path), mode="a")[ + analysis_auxiliary_root("flatfield") + ] latest_group.attrs.update( { "profile_count": int(profile_count), diff --git a/src/clearex/io/cli.py b/src/clearex/io/cli.py index d64e828..e2790d4 100644 --- a/src/clearex/io/cli.py +++ b/src/clearex/io/cli.py @@ -227,6 +227,25 @@ def create_parser() -> argparse.ArgumentParser: type=str, required=False, ) + parser.add_argument( + "--migrate-store", + type=str, + default=None, + help="Migrate one legacy ClearEx .zarr/.n5 store into canonical OME-Zarr v3 layout.", + ) + parser.add_argument( + "--migrate-output", + type=str, + default=None, + help="Optional destination path for --migrate-store (defaults to .ome.zarr).", + ) + parser.add_argument( + "--migrate-overwrite", + required=False, + default=False, + action="store_true", + help="Overwrite the destination path when using --migrate-store.", + ) parser.add_argument( "--dask", diff --git a/src/clearex/io/experiment.py b/src/clearex/io/experiment.py index 55f782f..aecba19 100644 --- a/src/clearex/io/experiment.py +++ b/src/clearex/io/experiment.py @@ -63,10 +63,26 @@ from dask.delayed import delayed # Local Imports +from clearex.io.ome_store import ( + CLEAREX_PROVENANCE_GROUP, + CLEAREX_RESULTS_GROUP, + CLEAREX_ROOT_GROUP, + CLEAREX_RUNTIME_SOURCE_ROOT, + SOURCE_CACHE_PYRAMID_ROOT, + SOURCE_CACHE_COMPONENT, + compute_position_translations_zyx_um, + default_ome_store_path, + ensure_group, + is_ome_zarr_path, + load_store_spatial_calibration as load_namespaced_store_spatial_calibration, + publish_source_collection_from_cache, + save_store_spatial_calibration as save_namespaced_store_spatial_calibration, + source_cache_component, + update_store_metadata, +) from clearex.io.read import ImageInfo from clearex.workflow import ( SpatialCalibrationConfig, - spatial_calibration_from_dict, spatial_calibration_to_dict, ) @@ -111,7 +127,7 @@ def _is_zarr_like_path(path: Path) -> bool: def has_canonical_data_component(zarr_path: Union[str, Path]) -> bool: - """Return whether a store contains canonical 6D analysis data. + """Return whether a store contains canonical 6D runtime-cache source data. Parameters ---------- @@ -121,24 +137,25 @@ def has_canonical_data_component(zarr_path: Union[str, Path]) -> bool: Returns ------- bool - ``True`` when the store exposes a ``data`` array in canonical + ``True`` when the store exposes a namespaced runtime-cache ``data`` + array in canonical ``(t, p, c, z, y, x)`` form. If axis metadata is present, it must also normalize to that same canonical order. Notes ----- This helper is intentionally conservative and returns ``False`` for any - unreadable, missing, or malformed ``data`` component. + unreadable, missing, or malformed runtime-cache data component. """ try: root = zarr.open_group(str(Path(zarr_path).expanduser().resolve()), mode="r") except Exception: return False - if "data" not in root: + if SOURCE_CACHE_COMPONENT not in root: return False - data = root["data"] + data = root[SOURCE_CACHE_COMPONENT] if not hasattr(data, "shape"): return False @@ -237,7 +254,7 @@ def _write_ingestion_progress_record( def load_store_spatial_calibration( zarr_path: Union[str, Path], ) -> SpatialCalibrationConfig: - """Load store-level spatial calibration from root Zarr attrs. + """Load store-level spatial calibration from namespaced store metadata. Parameters ---------- @@ -254,15 +271,14 @@ def load_store_spatial_calibration( ValueError If stored spatial calibration metadata is malformed. """ - root = zarr.open_group(str(Path(zarr_path).expanduser().resolve()), mode="r") - return spatial_calibration_from_dict(root.attrs.get(_SPATIAL_CALIBRATION_ATTR)) + return load_namespaced_store_spatial_calibration(zarr_path) def save_store_spatial_calibration( zarr_path: Union[str, Path], calibration: SpatialCalibrationConfig, ) -> SpatialCalibrationConfig: - """Persist store-level spatial calibration into root Zarr attrs. + """Persist store-level spatial calibration into namespaced store metadata. Parameters ---------- @@ -276,11 +292,7 @@ def save_store_spatial_calibration( SpatialCalibrationConfig Normalized calibration written to the store. """ - normalized = spatial_calibration_from_dict(calibration) - serialized = json.loads(json.dumps(spatial_calibration_to_dict(normalized))) - root = zarr.open_group(str(Path(zarr_path).expanduser().resolve()), mode="a") - root.attrs[_SPATIAL_CALIBRATION_ATTR] = serialized - return normalized + return save_namespaced_store_spatial_calibration(zarr_path, calibration) def _resolve_expected_pyramid_level_factors( @@ -352,7 +364,8 @@ def _expected_pyramid_components( Returns ------- list[str] - Ordered component paths beginning with ``"data"``. + Ordered runtime-cache component paths beginning with the source base + component. Raises ------ @@ -360,8 +373,8 @@ def _expected_pyramid_components( This helper does not raise custom exceptions. """ return [ - "data", - *[f"data_pyramid/level_{idx}" for idx in range(1, len(level_factors))], + SOURCE_CACHE_COMPONENT, + *[source_cache_component(level_index=idx) for idx in range(1, len(level_factors))], ] @@ -390,9 +403,9 @@ def _has_expected_pyramid_structure( None Structural mismatches return ``False``. """ - if "data" not in root: + if SOURCE_CACHE_COMPONENT not in root: return False - data = root["data"] + data = root[SOURCE_CACHE_COMPONENT] if not hasattr(data, "shape") or not hasattr(data, "chunks"): return False try: @@ -413,14 +426,14 @@ def _has_expected_pyramid_structure( return True expected_components = _expected_pyramid_components(level_factors) - configured_levels = root.attrs.get("data_pyramid_levels") + configured_levels = data.attrs.get("pyramid_levels") if isinstance(configured_levels, (list, tuple)): configured_paths = [str(value) for value in configured_levels] if configured_paths != expected_components: return False for level_index, factors in enumerate(level_factors[1:], start=1): - component = f"data_pyramid/level_{level_index}" + component = source_cache_component(level_index=level_index) if component not in root: return False level_array = root[component] @@ -572,7 +585,7 @@ def has_complete_canonical_data_store( except Exception: return False - data = root.get("data") + data = root.get(SOURCE_CACHE_COMPONENT) if data is None or not hasattr(data, "shape") or not hasattr(data, "chunks"): return False if data.chunks is None: @@ -603,7 +616,7 @@ def has_complete_canonical_data_store( required_components = ( _expected_pyramid_components(level_factors) if level_factors is not None - else ["data"] + else [SOURCE_CACHE_COMPONENT] ) record = _read_ingestion_progress_record(root) @@ -3008,12 +3021,12 @@ def _materialize_data_pyramid( ] ] = None, ) -> list[str]: - """Build and persist downsampled Zarr pyramid levels in canonical store. + """Build and persist downsampled runtime-cache pyramid levels. Parameters ---------- store_path : pathlib.Path - Target Zarr store containing canonical ``data`` array. + Target OME-Zarr store containing namespaced runtime-cache source data. base_chunks_tpczyx : tuple[int, int, int, int, int, int] Effective base chunking in canonical order. pyramid_factors : tuple[tuple[int, ...], ...] @@ -3045,46 +3058,47 @@ def _materialize_data_pyramid( Raises ------ ValueError - If canonical base data or pyramid configuration is invalid. + If runtime-cache base data or pyramid configuration is invalid. Notes ----- - Levels are stored under ``data_pyramid/level_`` where ``n`` starts at 1. + Levels are stored under ``clearex/runtime_cache/source/data_pyramid``. Downsampling uses stride-based nearest-neighbor decimation for speed and deterministic dtype preservation. """ level_factors = _normalize_pyramid_level_factors(pyramid_factors) root = zarr.open_group(str(store_path), mode="a") - if "data" not in root: - raise ValueError(f"Expected canonical data array at {store_path}/data.") - base_dtype = np.dtype(root["data"].dtype) + if SOURCE_CACHE_COMPONENT not in root: + raise ValueError( + f"Expected runtime-cache data array at {store_path}/{SOURCE_CACHE_COMPONENT}." + ) + base_dtype = np.dtype(root[SOURCE_CACHE_COMPONENT].dtype) - if not preserve_existing and "data_pyramid" in root: - del root["data_pyramid"] - root.require_group("data_pyramid") + if not preserve_existing and SOURCE_CACHE_PYRAMID_ROOT in root: + del root[SOURCE_CACHE_PYRAMID_ROOT] + ensure_group(root, SOURCE_CACHE_PYRAMID_ROOT) resume_offsets = dict(start_regions_by_component or {}) base_shape = _normalize_tpczyx_shape( - tuple(int(size) for size in root["data"].shape) + tuple(int(size) for size in root[SOURCE_CACHE_COMPONENT].shape) ) - level_paths = ["data"] + level_paths = [SOURCE_CACHE_COMPONENT] level_factor_payload = [[int(value) for value in level_factors[0]]] level_shapes_payload = [[int(value) for value in base_shape]] total_downsample_levels = max(0, len(level_factors) - 1) if total_downsample_levels == 0: - root["data"].attrs.update( + root[SOURCE_CACHE_COMPONENT].attrs.update( { "pyramid_levels": level_paths, "pyramid_factors_tpczyx": level_factor_payload, } ) - root.attrs.update( - { - "data_pyramid_levels": level_paths, - "data_pyramid_factors_tpczyx": level_factor_payload, - "data_pyramid_shapes_tpczyx": level_shapes_payload, - } + update_store_metadata( + root, + data_pyramid_levels=level_paths, + data_pyramid_factors_tpczyx=level_factor_payload, + data_pyramid_shapes_tpczyx=level_shapes_payload, ) if progress_callback is not None: progress_callback( @@ -3092,7 +3106,7 @@ def _materialize_data_pyramid( ) return level_paths - prior_component = "data" + prior_component = SOURCE_CACHE_COMPONENT prior_factors = level_factors[0] for level_index, absolute_factors in enumerate(level_factors[1:], start=1): all_relative = all( @@ -3111,7 +3125,7 @@ def _materialize_data_pyramid( source_component = prior_component downsample_factors = relative_factors else: - source_component = "data" + source_component = SOURCE_CACHE_COMPONENT downsample_factors = absolute_factors message = ( @@ -3142,7 +3156,7 @@ def _materialize_data_pyramid( with dask.config.set({"array.rechunk.method": "tasks"}): downsampled = downsampled.rechunk(level_chunks) - component = f"data_pyramid/level_{level_index}" + component = source_cache_component(level_index=level_index) root = zarr.open_group(str(store_path), mode="a") level_total_regions = _count_tpczyx_chunk_regions( shape_tpczyx=level_shape, @@ -3164,12 +3178,13 @@ def _materialize_data_pyramid( ): should_overwrite_level = False if should_overwrite_level: - root.create_dataset( + root.create_array( name=component, shape=level_shape, chunks=level_chunks, dtype=base_dtype.name, overwrite=True, + dimension_names=("t", "p", "c", "z", "y", "x"), ) start_region_index = 0 @@ -3218,18 +3233,17 @@ def _emit_level_progress(completed: int, total: int) -> None: prior_component = component prior_factors = absolute_factors - root["data"].attrs.update( + root[SOURCE_CACHE_COMPONENT].attrs.update( { "pyramid_levels": level_paths, "pyramid_factors_tpczyx": level_factor_payload, } ) - root.attrs.update( - { - "data_pyramid_levels": level_paths, - "data_pyramid_factors_tpczyx": level_factor_payload, - "data_pyramid_shapes_tpczyx": level_shapes_payload, - } + update_store_metadata( + root, + data_pyramid_levels=level_paths, + data_pyramid_factors_tpczyx=level_factor_payload, + data_pyramid_shapes_tpczyx=level_shapes_payload, ) if progress_callback is not None: progress_callback(int(progress_end), "Pyramid generation complete") @@ -3335,9 +3349,9 @@ def resolve_data_store_path( Returns ------- pathlib.Path - Destination Zarr store path. Existing Zarr/N5 sources are reused - in-place; non-Zarr sources are materialized as ``data_store.zarr`` - next to ``experiment.yml``. + Destination OME-Zarr v3 store path. Existing OME-Zarr sources are + reused in-place; all other sources are materialized as + ``data_store.ome.zarr`` next to ``experiment.yml``. Raises ------ @@ -3345,9 +3359,9 @@ def resolve_data_store_path( This helper does not raise custom exceptions. """ source = Path(source_path).expanduser().resolve() - if _is_zarr_like_path(source): + if is_ome_zarr_path(source): return source - return (experiment.path.parent / "data_store.zarr").resolve() + return default_ome_store_path(experiment.path.parent) def materialize_experiment_data_store( @@ -3510,7 +3524,10 @@ def _emit_progress(percent: int, message: str) -> None: _emit_progress(100, "Canonical data store is already complete") data_root = zarr.open_group(str(store_path), mode="r") data_chunks = tuple( - int(value) for value in (data_root["data"].chunks or normalized_chunks) + int(value) + for value in ( + data_root[SOURCE_CACHE_COMPONENT].chunks or normalized_chunks + ) ) return MaterializedDataStore( source_path=source_resolved, @@ -3528,7 +3545,7 @@ def _emit_progress(percent: int, message: str) -> None: shape=canonical_shape, dtype=source_dtype, axes="TPCZYX", - metadata={"component": "data"}, + metadata={"component": SOURCE_CACHE_COMPONENT}, ), chunks_tpczyx=_normalize_write_chunks( shape_tpczyx=canonical_shape, @@ -3622,9 +3639,7 @@ def _write_canonical_component( chunks_tpczyx=normalized_chunks, ) - should_stage_same_component = ( - store_path == source_resolved and source_component == "data" - ) + should_stage_same_component = False checkpoint_resume_supported = not should_stage_same_component root = zarr.open_group(str(store_path), mode="a") existing_progress_record = _read_ingestion_progress_record(root) @@ -3638,7 +3653,7 @@ def _write_canonical_component( record=existing_progress_record, source_path=source_resolved, source_component=source_component, - target_component="data", + target_component=SOURCE_CACHE_COMPONENT, canonical_shape_tpczyx=canonical_shape, chunks_tpczyx=normalized_chunks, level_factors_tpczyx=level_factors_tpczyx, @@ -3647,7 +3662,7 @@ def _write_canonical_component( ) and _component_matches_shape_and_chunks( root=root, - component="data", + component=SOURCE_CACHE_COMPONENT, shape_tpczyx=canonical_shape, chunks_tpczyx=normalized_chunks, ) @@ -3673,7 +3688,7 @@ def _write_canonical_component( ingestion_record = _create_ingestion_progress_record( source_path=source_resolved, source_component=source_component, - target_component="data", + target_component=SOURCE_CACHE_COMPONENT, canonical_shape_tpczyx=canonical_shape, chunks_tpczyx=normalized_chunks, level_factors_tpczyx=level_factors_tpczyx, @@ -3786,7 +3801,7 @@ def _persist_level_progress( shape_tpczyx=canonical_shape, ) _write_canonical_component( - component="data", + component=SOURCE_CACHE_COMPONENT, progress_start=55, progress_end=70, progress_label="Writing canonical data", @@ -3855,7 +3870,7 @@ def _persist_level_progress( if use_source_aligned_plane_writes else "chunk_region_batches" ) - root["data"].attrs.update( + root[SOURCE_CACHE_COMPONENT].attrs.update( { "source_path": source_metadata_path, "source_axes": source_axes_attr, @@ -3879,31 +3894,51 @@ def _persist_level_progress( } ) if source_component is not None: - root["data"].attrs["source_component"] = source_component - root.attrs.update( - { - "source_data_path": source_metadata_path, - "source_data_axes": source_axes_attr, - "source_data_component": source_component, - "voxel_size_um_zyx": voxel_size_um_zyx, - "materialization_write_strategy": write_strategy, - "source_aligned_z_batch_depth": ( - int(source_aligned_z_batch_depth) - if source_aligned_z_batch_depth is not None - else None - ), - "source_aligned_worker_count": ( - int(source_aligned_worker_count) - if source_aligned_worker_count is not None - else None - ), - "source_aligned_worker_memory_limit_bytes": ( - int(source_aligned_worker_memory_limit_bytes) - if source_aligned_worker_memory_limit_bytes is not None - else None - ), - } + root[SOURCE_CACHE_COMPONENT].attrs["source_component"] = source_component + + stage_rows_payload = _load_multiposition_rows(experiment.save_directory) + stage_rows = ( + [row for row in stage_rows_payload if isinstance(row, dict)] + if isinstance(stage_rows_payload, list) + else [] + ) + spatial_calibration = load_store_spatial_calibration(store_path) + position_translations_zyx_um = compute_position_translations_zyx_um( + stage_rows if stage_rows else None, + spatial_calibration, + position_count=int(canonical_shape[1]), + ) + update_store_metadata( + root, + source_data_path=source_metadata_path, + source_data_axes=source_axes_attr, + source_data_component=source_component, + voxel_size_um_zyx=voxel_size_um_zyx, + materialization_write_strategy=write_strategy, + source_aligned_z_batch_depth=( + int(source_aligned_z_batch_depth) + if source_aligned_z_batch_depth is not None + else None + ), + source_aligned_worker_count=( + int(source_aligned_worker_count) + if source_aligned_worker_count is not None + else None + ), + source_aligned_worker_memory_limit_bytes=( + int(source_aligned_worker_memory_limit_bytes) + if source_aligned_worker_memory_limit_bytes is not None + else None + ), + selected_channels=[ + {"name": channel.name, "laser": channel.laser} + for channel in experiment.selected_channels + ], + stage_rows=stage_rows if stage_rows else None, + position_translations_zyx_um=position_translations_zyx_um, + spatial_calibration=spatial_calibration_to_dict(spatial_calibration), ) + publish_source_collection_from_cache(store_path) source_image_path = Path(source_metadata_path).expanduser() if not source_image_path.is_absolute(): @@ -3921,7 +3956,7 @@ def _persist_level_progress( shape=canonical_shape, dtype=source_dtype, axes="TPCZYX", - metadata={"component": "data"}, + metadata={"component": SOURCE_CACHE_COMPONENT}, ) _emit_progress(100, "Materialization complete") return MaterializedDataStore( @@ -4912,7 +4947,7 @@ def resolve_experiment_data_path( def default_analysis_store_path(experiment: NavigateExperiment) -> Path: - """Return canonical 6D analysis Zarr store path for an experiment. + """Return canonical OME-Zarr analysis-store path for an experiment. Parameters ---------- @@ -4922,9 +4957,9 @@ def default_analysis_store_path(experiment: NavigateExperiment) -> Path: Returns ------- pathlib.Path - Path to canonical analysis store (``analysis_6d.zarr``). + Path to canonical analysis store (``analysis.ome.zarr``). """ - return experiment.save_directory / "analysis_6d.zarr" + return experiment.save_directory / "analysis.ome.zarr" def infer_zyx_shape( @@ -4969,7 +5004,7 @@ def initialize_analysis_store( ] = ((1,), (1,), (1,), (1, 2, 4, 8), (1, 2, 4, 8), (1, 2, 4, 8)), dtype: Optional[str] = None, ) -> Path: - """Initialize canonical 6D analysis Zarr store for an experiment. + """Initialize canonical runtime-cache arrays within an OME-Zarr store. Parameters ---------- @@ -4994,7 +5029,7 @@ def initialize_analysis_store( Returns ------- pathlib.Path - Resolved Zarr store path. + Resolved OME-Zarr store path. Raises ------ @@ -5079,15 +5114,17 @@ def initialize_analysis_store( root = zarr.open_group(str(output_path), mode="a") root.require_group("results") - root.require_group("provenance") - spatial_calibration_payload = spatial_calibration_to_dict( - spatial_calibration_from_dict(root.attrs.get(_SPATIAL_CALIBRATION_ATTR)) - ) - if "data" in root: + ensure_group(root, CLEAREX_ROOT_GROUP) + ensure_group(root, CLEAREX_RESULTS_GROUP) + ensure_group(root, CLEAREX_PROVENANCE_GROUP) + ensure_group(root, CLEAREX_RUNTIME_SOURCE_ROOT) + + cache_root = ensure_group(root, CLEAREX_RUNTIME_SOURCE_ROOT) + if "data" in cache_root: if overwrite: - del root["data"] + del cache_root["data"] else: - existing = root["data"] + existing = cache_root["data"] existing_chunks = ( [int(chunk) for chunk in existing.chunks] if existing.chunks is not None @@ -5098,40 +5135,33 @@ def initialize_analysis_store( "axes": ["t", "p", "c", "z", "y", "x"], "storage_policy": "latest_only", "chunk_shape_tpczyx": existing_chunks, - "configured_chunks_tpczyx": [ - int(chunk) for chunk in requested_chunks - ], + "configured_chunks_tpczyx": [int(chunk) for chunk in requested_chunks], "resolution_pyramid_factors_tpczyx": pyramid_payload, "voxel_size_um_zyx": voxel_size_um_zyx, } ) - root.attrs.update( - { - "schema": "clearex.analysis_store.v1", - "axes": ["t", "p", "c", "z", "y", "x"], - "source_experiment": str(experiment.path), - "navigate_experiment": experiment.to_metadata_dict(), - "storage_policy_analysis_outputs": "latest_only", - "storage_policy_provenance": "append_only", - _SPATIAL_CALIBRATION_ATTR: spatial_calibration_payload, - "chunk_shape_tpczyx": existing_chunks, - "configured_chunks_tpczyx": [ - int(chunk) for chunk in requested_chunks - ], - "resolution_pyramid_factors_tpczyx": pyramid_payload, - "voxel_size_um_zyx": voxel_size_um_zyx, - } + update_store_metadata( + root, + source_experiment=str(experiment.path), + navigate_experiment=experiment.to_metadata_dict(), + storage_policy_analysis_outputs="latest_only", + storage_policy_provenance="append_only", + chunk_shape_tpczyx=existing_chunks, + configured_chunks_tpczyx=[int(chunk) for chunk in requested_chunks], + resolution_pyramid_factors_tpczyx=pyramid_payload, + voxel_size_um_zyx=voxel_size_um_zyx, ) return output_path - root.create_dataset( - name="data", + cache_root.create_array( + "data", shape=shape, chunks=normalized_chunks, dtype=dtype, overwrite=True, + dimension_names=("t", "p", "c", "z", "y", "x"), ) - root["data"].attrs.update( + cache_root["data"].attrs.update( { "axes": ["t", "p", "c", "z", "y", "x"], "storage_policy": "latest_only", @@ -5139,22 +5169,24 @@ def initialize_analysis_store( "configured_chunks_tpczyx": [int(chunk) for chunk in requested_chunks], "resolution_pyramid_factors_tpczyx": pyramid_payload, "voxel_size_um_zyx": voxel_size_um_zyx, + "pyramid_levels": _expected_pyramid_components( + _normalize_pyramid_level_factors(pyramid_factors) + ), } ) - root.attrs.update( - { - "schema": "clearex.analysis_store.v1", - "axes": ["t", "p", "c", "z", "y", "x"], - "source_experiment": str(experiment.path), - "navigate_experiment": experiment.to_metadata_dict(), - "storage_policy_analysis_outputs": "latest_only", - "storage_policy_provenance": "append_only", - _SPATIAL_CALIBRATION_ATTR: spatial_calibration_payload, - "chunk_shape_tpczyx": [int(chunk) for chunk in normalized_chunks], - "configured_chunks_tpczyx": [int(chunk) for chunk in requested_chunks], - "resolution_pyramid_factors_tpczyx": pyramid_payload, - "voxel_size_um_zyx": voxel_size_um_zyx, - } + update_store_metadata( + root, + source_experiment=str(experiment.path), + navigate_experiment=experiment.to_metadata_dict(), + storage_policy_analysis_outputs="latest_only", + storage_policy_provenance="append_only", + chunk_shape_tpczyx=[int(chunk) for chunk in normalized_chunks], + configured_chunks_tpczyx=[int(chunk) for chunk in requested_chunks], + resolution_pyramid_factors_tpczyx=pyramid_payload, + voxel_size_um_zyx=voxel_size_um_zyx, + spatial_calibration=spatial_calibration_to_dict( + load_namespaced_store_spatial_calibration(root) + ), ) return output_path @@ -5605,7 +5637,7 @@ def write_zyx_block( return da.to_zarr( block_6d, url=str(zarr_path), - component="data", + component=SOURCE_CACHE_COMPONENT, region=region, overwrite=False, compute=compute, @@ -5613,7 +5645,7 @@ def write_zyx_block( if isinstance(block, np.ndarray): root = zarr.open_group(str(zarr_path), mode="a") - root["data"][region] = block[None, None, None, :, :, :] + root[SOURCE_CACHE_COMPONENT][region] = block[None, None, None, :, :, :] return None raise TypeError( diff --git a/src/clearex/io/ome_store.py b/src/clearex/io/ome_store.py new file mode 100644 index 0000000..a315ed0 --- /dev/null +++ b/src/clearex/io/ome_store.py @@ -0,0 +1,734 @@ +# Copyright (c) 2021-2026 The University of Texas Southwestern Medical Center. +# All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted for academic and research use only (subject to the +# limitations in the disclaimer below) provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from this +# software without specific prior written permission. +# NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY +# THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +"""OME-Zarr v3 storage helpers for canonical ClearEx stores.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Mapping, Optional, Sequence +import json +import shutil + +import dask.array as da +import zarr + +from ome_zarr_models.common.plate import Column, Row, WellInPlate +from ome_zarr_models.v05.hcs import HCSAttrs +from ome_zarr_models.v05.image import ImageAttrs +from ome_zarr_models.v05.multiscales import Dataset, Multiscale +from ome_zarr_models.v05.plate import Plate +from ome_zarr_models.v05.well import WellAttrs +from ome_zarr_models.v05.well_types import WellImage, WellMeta + +from clearex.workflow import ( + SpatialCalibrationConfig, + spatial_calibration_from_dict, + spatial_calibration_to_dict, +) + +PUBLIC_WELL_ROW = "A" +PUBLIC_WELL_COLUMN = "1" +OME_ZARR_STORE_SUFFIX = ".ome.zarr" + +CLEAREX_ROOT_GROUP = "clearex" +CLEAREX_METADATA_GROUP = f"{CLEAREX_ROOT_GROUP}/metadata" +CLEAREX_PROVENANCE_GROUP = f"{CLEAREX_ROOT_GROUP}/provenance" +CLEAREX_GUI_STATE_GROUP = f"{CLEAREX_ROOT_GROUP}/gui_state" +CLEAREX_RESULTS_GROUP = f"{CLEAREX_ROOT_GROUP}/results" +CLEAREX_RUNTIME_CACHE_ROOT = f"{CLEAREX_ROOT_GROUP}/runtime_cache" +CLEAREX_RUNTIME_SOURCE_ROOT = f"{CLEAREX_RUNTIME_CACHE_ROOT}/source" +CLEAREX_RUNTIME_RESULTS_ROOT = f"{CLEAREX_RUNTIME_CACHE_ROOT}/results" + +SOURCE_CACHE_COMPONENT = f"{CLEAREX_RUNTIME_SOURCE_ROOT}/data" +SOURCE_CACHE_PYRAMID_ROOT = f"{CLEAREX_RUNTIME_SOURCE_ROOT}/data_pyramid" +STORE_METADATA_SCHEMA = "clearex.ome_store.v1" +LEGACY_STORE_MIGRATION_HINT = ( + "Legacy ClearEx stores are no longer treated as canonical runtime inputs. " + "Run `clearex --migrate-store ` to convert them to " + "OME-Zarr v3." +) + + +def default_ome_store_path(experiment_directory: Path) -> Path: + """Return the canonical OME-Zarr v3 output path beside ``experiment.yml``. + + Parameters + ---------- + experiment_directory : pathlib.Path + Parent directory for the source experiment file. + + Returns + ------- + pathlib.Path + Canonical output path. + """ + return experiment_directory.resolve() / f"data_store{OME_ZARR_STORE_SUFFIX}" + + +def is_ome_zarr_path(path: Path) -> bool: + """Return whether a path is an OME-Zarr directory-style store.""" + normalized = str(path).lower() + return path.is_dir() and normalized.endswith(OME_ZARR_STORE_SUFFIX) + + +def public_analysis_root(analysis_name: str) -> str: + """Return the public OME image-collection root for one analysis.""" + key = str(analysis_name).strip() + return f"results/{key}/latest" + + +def analysis_cache_root(analysis_name: str) -> str: + """Return the runtime-cache root for one analysis output.""" + key = str(analysis_name).strip() + return f"{CLEAREX_RUNTIME_RESULTS_ROOT}/{key}/latest" + + +def analysis_cache_data_component(analysis_name: str, *, level_index: int = 0) -> str: + """Return one runtime-cache image component path for an analysis output.""" + root = analysis_cache_root(analysis_name) + if level_index <= 0: + return f"{root}/data" + return f"{root}/data_pyramid/level_{int(level_index)}" + + +def analysis_auxiliary_root(analysis_name: str) -> str: + """Return the ClearEx-owned auxiliary artifact root for one analysis.""" + key = str(analysis_name).strip() + return f"{CLEAREX_RESULTS_GROUP}/{key}/latest" + + +def analysis_auxiliary_component(analysis_name: str, name: str) -> str: + """Return one ClearEx-owned auxiliary artifact component path.""" + return f"{analysis_auxiliary_root(analysis_name)}/{str(name).strip()}" + + +def source_cache_component(*, level_index: int = 0) -> str: + """Return one runtime-cache source image component path.""" + if level_index <= 0: + return SOURCE_CACHE_COMPONENT + return f"{SOURCE_CACHE_PYRAMID_ROOT}/level_{int(level_index)}" + + +def _split_component(path: str) -> tuple[str, ...]: + return tuple(part for part in str(path).strip("/").split("/") if part) + + +def ensure_group(root: zarr.Group, path: str) -> zarr.Group: + """Ensure a nested group path exists below ``root``.""" + group = root + for token in _split_component(path): + group = group.require_group(token) + return group + + +def get_node(root: zarr.Group, path: str) -> Any: + """Return a nested group or array node.""" + node: Any = root + for token in _split_component(path): + node = node[token] + return node + + +def delete_path(root: zarr.Group, path: str) -> None: + """Delete a nested path when it exists.""" + tokens = list(_split_component(path)) + if not tokens: + return + parent = root + for token in tokens[:-1]: + if token not in parent: + return + parent = parent[token] + leaf = tokens[-1] + if leaf in parent: + del parent[leaf] + + +def load_store_metadata(path_or_root: str | Path | zarr.Group) -> dict[str, Any]: + """Load ClearEx namespaced store metadata.""" + root = ( + path_or_root + if isinstance(path_or_root, zarr.Group) + else zarr.open_group(str(Path(path_or_root).expanduser().resolve()), mode="a") + ) + group = ensure_group(root, CLEAREX_METADATA_GROUP) + payload = dict(group.attrs) + if payload: + return payload + return {"schema": STORE_METADATA_SCHEMA} + + +def update_store_metadata(path_or_root: str | Path | zarr.Group, **payload: Any) -> dict[str, Any]: + """Merge and persist ClearEx namespaced store metadata.""" + root = ( + path_or_root + if isinstance(path_or_root, zarr.Group) + else zarr.open_group(str(Path(path_or_root).expanduser().resolve()), mode="a") + ) + group = ensure_group(root, CLEAREX_METADATA_GROUP) + current = {"schema": STORE_METADATA_SCHEMA} + current.update(dict(group.attrs)) + current.update(json.loads(json.dumps(payload))) + group.attrs.update(current) + return dict(group.attrs) + + +def store_has_public_ome_metadata(path_or_root: str | Path | zarr.Group) -> bool: + """Return whether a store/root advertises public OME metadata at the root.""" + root = ( + path_or_root + if isinstance(path_or_root, zarr.Group) + else zarr.open_group(str(Path(path_or_root).expanduser().resolve()), mode="r") + ) + payload = getattr(root, "attrs", {}).get("ome") + return isinstance(payload, Mapping) + + +def is_legacy_clearex_store(path_or_root: str | Path | zarr.Group) -> bool: + """Return whether a store still follows the legacy pre-OME ClearEx layout.""" + root = ( + path_or_root + if isinstance(path_or_root, zarr.Group) + else zarr.open_group(str(Path(path_or_root).expanduser().resolve()), mode="r") + ) + if store_has_public_ome_metadata(root): + return False + legacy_markers = ( + "data" in root, + "data_pyramid" in root, + "provenance" in root, + "results" in root and CLEAREX_ROOT_GROUP not in root, + root.attrs.get("schema") == "clearex.analysis_store.v1", + root.attrs.get("data_pyramid_levels") is not None, + ) + return any(bool(marker) for marker in legacy_markers) + + +def load_store_spatial_calibration(path_or_root: str | Path | zarr.Group) -> SpatialCalibrationConfig: + """Load store-level spatial calibration from the namespaced metadata group.""" + metadata = load_store_metadata(path_or_root) + return spatial_calibration_from_dict(metadata.get("spatial_calibration")) + + +def save_store_spatial_calibration( + path_or_root: str | Path | zarr.Group, + calibration: SpatialCalibrationConfig | Mapping[str, Any] | str | None, +) -> SpatialCalibrationConfig: + """Persist store-level spatial calibration in the namespaced metadata group.""" + normalized = spatial_calibration_from_dict(calibration) + update_store_metadata( + path_or_root, + spatial_calibration=spatial_calibration_to_dict(normalized), + ) + return normalized + + +def _jsonable_mapping(payload: Mapping[str, Any]) -> dict[str, Any]: + """Return a JSON-safe shallow mapping copy.""" + return json.loads(json.dumps(dict(payload))) + + +def _copy_array( + *, + source: zarr.Array, + dest_root: zarr.Group, + dest_component: str, +) -> None: + """Copy one array into the destination store.""" + parent_path, _, leaf = str(dest_component).rpartition("/") + parent = ensure_group(dest_root, parent_path) if parent_path else dest_root + if leaf in parent: + del parent[leaf] + shape = tuple(int(value) for value in source.shape) + source_chunks = getattr(source, "chunks", None) + chunks = ( + tuple(int(value) for value in source_chunks) + if isinstance(source_chunks, (tuple, list)) + else shape + ) + target = parent.create_array( + leaf, + shape=shape, + chunks=chunks, + dtype=source.dtype, + overwrite=True, + ) + da.store(da.from_zarr(source), target, lock=False, compute=True) + target.attrs.update(_jsonable_mapping(dict(source.attrs))) + + +def _copy_group_contents( + *, + source: zarr.Group, + dest_root: zarr.Group, + dest_prefix: str, + skip_children: Optional[set[str]] = None, +) -> None: + """Recursively copy one group tree into the destination store.""" + skip = {str(token) for token in (skip_children or set())} + target_group = ensure_group(dest_root, dest_prefix) + target_group.attrs.update(_jsonable_mapping(dict(source.attrs))) + for key in sorted(source.array_keys()): + if str(key) in skip: + continue + _copy_array( + source=source[key], + dest_root=dest_root, + dest_component=f"{dest_prefix}/{key}" if dest_prefix else str(key), + ) + for key in sorted(source.group_keys()): + if str(key) in skip: + continue + _copy_group_contents( + source=source[key], + dest_root=dest_root, + dest_prefix=f"{dest_prefix}/{key}" if dest_prefix else str(key), + ) + + +def compute_position_translations_zyx_um( + stage_rows: Sequence[Mapping[str, Any]] | None, + spatial_calibration: SpatialCalibrationConfig | Mapping[str, Any] | str | None, + *, + position_count: int, +) -> list[list[float]]: + """Resolve one world-space ``(z, y, x)`` translation per position.""" + normalized = spatial_calibration_from_dict(spatial_calibration) + rows = list(stage_rows or []) + if not rows: + return [[0.0, 0.0, 0.0] for _ in range(max(1, int(position_count)))] + + def _row_value(row: Mapping[str, Any], axis_name: str) -> float: + axis = str(axis_name).strip().lower() + for key, value in row.items(): + if str(key).strip().lower() == axis: + try: + return float(value) + except Exception: + return 0.0 + return 0.0 + + def _binding_value(row: Mapping[str, Any], binding: str) -> float: + token = str(binding).strip().lower() + if token == "none": + return 0.0 + sign = -1.0 if token.startswith("-") else 1.0 + axis_name = token[1:] if token[:1] in {"+", "-"} else token + return sign * _row_value(row, axis_name) + + stage_axis_map = normalized.stage_axis_map_by_world_axis() + reference = rows[0] + translations: list[list[float]] = [] + for position_index in range(max(1, int(position_count))): + row = rows[position_index] if position_index < len(rows) else reference + translations.append( + [ + float(_binding_value(row, stage_axis_map["z"]) - _binding_value(reference, stage_axis_map["z"])), + float(_binding_value(row, stage_axis_map["y"]) - _binding_value(reference, stage_axis_map["y"])), + float(_binding_value(row, stage_axis_map["x"]) - _binding_value(reference, stage_axis_map["x"])), + ] + ) + return translations + + +def _model_payload(model: Any) -> dict[str, Any]: + return json.loads(model.model_dump_json(by_alias=True, exclude_none=True)) + + +def _level_component_paths(cache_root: str, root: zarr.Group) -> list[str]: + components = [f"{cache_root}/data"] + pyramid_root = f"{cache_root}/data_pyramid" + try: + pyramid_group = get_node(root, pyramid_root) + except Exception: + pyramid_group = None + if isinstance(pyramid_group, zarr.Group): + level_tokens = sorted( + ( + token + for token in pyramid_group.group_keys() + if str(token).startswith("level_") + ), + key=lambda token: int(str(token).split("level_", maxsplit=1)[1]), + ) + components.extend(f"{pyramid_root}/{token}" for token in level_tokens) + return components + + +def _level_downsample_factors(level_array: Any, *, level_index: int) -> tuple[int, int, int, int, int, int]: + payload = getattr(level_array, "attrs", {}).get("downsample_factors_tpczyx") + if isinstance(payload, (tuple, list)) and len(payload) == 6: + try: + return tuple(int(value) for value in payload) # type: ignore[return-value] + except Exception: + pass + fallback = 2**max(0, int(level_index)) + return (1, 1, 1, fallback, fallback, fallback) + + +def _set_hcs_group_attrs(collection_group: zarr.Group, *, name: str, field_count: int) -> None: + payload = HCSAttrs( + version="0.5", + plate=Plate( + version="0.5", + name=str(name), + rows=[Row(name=PUBLIC_WELL_ROW)], + columns=[Column(name=PUBLIC_WELL_COLUMN)], + wells=[ + WellInPlate( + path=f"{PUBLIC_WELL_ROW}/{PUBLIC_WELL_COLUMN}", + rowIndex=0, + columnIndex=0, + ) + ], + field_count=max(1, int(field_count)), + ), + ) + collection_group.attrs["ome"] = _model_payload(payload) + + +def _set_well_group_attrs(well_group: zarr.Group, *, field_paths: Sequence[str]) -> None: + payload = WellAttrs( + version="0.5", + well=WellMeta(images=[WellImage(path=str(path)) for path in field_paths]), + ) + well_group.attrs["ome"] = _model_payload(payload) + + +def _set_image_group_attrs( + image_group: zarr.Group, + *, + level_count: int, + voxel_size_um_zyx: Sequence[float] | None, + position_translation_zyx_um: Sequence[float] | None, + level_factors_tpczyx: Sequence[tuple[int, int, int, int, int, int]], +) -> None: + voxel = tuple(float(value) for value in (voxel_size_um_zyx or (1.0, 1.0, 1.0))) + translation = tuple( + float(value) for value in (position_translation_zyx_um or (0.0, 0.0, 0.0)) + ) + axes = [ + {"name": "t", "type": "time"}, + {"name": "c", "type": "channel"}, + {"name": "z", "type": "space", "unit": "micrometer"}, + {"name": "y", "type": "space", "unit": "micrometer"}, + {"name": "x", "type": "space", "unit": "micrometer"}, + ] + datasets = [] + for level_index in range(max(1, int(level_count))): + factors = level_factors_tpczyx[level_index] + scale = [ + 1.0, + 1.0, + float(voxel[0]) * float(factors[3]), + float(voxel[1]) * float(factors[4]), + float(voxel[2]) * float(factors[5]), + ] + datasets.append( + Dataset( + path=str(level_index), + coordinateTransformations=[ + {"type": "scale", "scale": scale}, + { + "type": "translation", + "translation": [0.0, 0.0, translation[0], translation[1], translation[2]], + }, + ], + ) + ) + payload = ImageAttrs( + version="0.5", + multiscales=[Multiscale(axes=axes, datasets=tuple(datasets))], + ) + image_group.attrs["ome"] = _model_payload(payload) + + +def _prepare_public_collection_root( + root: zarr.Group, + *, + public_root: str, + name: str, + field_count: int, +) -> zarr.Group: + if str(public_root).strip(): + delete_path(root, public_root) + collection_group = ensure_group(root, public_root) + else: + if PUBLIC_WELL_ROW in root: + del root[PUBLIC_WELL_ROW] + collection_group = root + _set_hcs_group_attrs(collection_group, name=name, field_count=field_count) + ensure_group(collection_group, f"{PUBLIC_WELL_ROW}/{PUBLIC_WELL_COLUMN}") + well_group = get_node(collection_group, f"{PUBLIC_WELL_ROW}/{PUBLIC_WELL_COLUMN}") + _set_well_group_attrs( + well_group, + field_paths=[str(index) for index in range(max(1, int(field_count)))], + ) + return collection_group + + +def publish_image_collection_from_cache( + zarr_path: str | Path, + *, + cache_root: str, + public_root: str, + name: str, +) -> str: + """Publish one runtime-cache 6D image collection as public OME-Zarr HCS data.""" + store_path = Path(zarr_path).expanduser().resolve() + root = zarr.open_group(str(store_path), mode="a") + cache_components = _level_component_paths(cache_root, root) + if not cache_components: + raise ValueError(f"No runtime-cache data was found at {cache_root}.") + + base_array = get_node(root, cache_components[0]) + if len(tuple(base_array.shape)) != 6: + raise ValueError( + f"Expected runtime-cache image data at {cache_components[0]} to be 6D." + ) + + shape_tpczyx = tuple(int(value) for value in base_array.shape) + position_count = max(1, int(shape_tpczyx[1])) + voxel_size_um_zyx = base_array.attrs.get("voxel_size_um_zyx") + metadata = load_store_metadata(root) + translations = metadata.get("position_translations_zyx_um") + if not isinstance(translations, list): + translations = [[0.0, 0.0, 0.0] for _ in range(position_count)] + + level_arrays = [get_node(root, component) for component in cache_components] + level_factors = [ + (1, 1, 1, 1, 1, 1), + *[ + _level_downsample_factors(level_array, level_index=index) + for index, level_array in enumerate(level_arrays[1:], start=1) + ], + ] + + collection_group = _prepare_public_collection_root( + root, + public_root=public_root, + name=name, + field_count=position_count, + ) + well_group = get_node(collection_group, f"{PUBLIC_WELL_ROW}/{PUBLIC_WELL_COLUMN}") + + for position_index in range(position_count): + field_name = str(position_index) + image_group = ensure_group(well_group, field_name) + _set_image_group_attrs( + image_group, + level_count=len(level_arrays), + voxel_size_um_zyx=voxel_size_um_zyx, + position_translation_zyx_um=translations[position_index] + if position_index < len(translations) + else (0.0, 0.0, 0.0), + level_factors_tpczyx=level_factors, + ) + for level_index, level_array in enumerate(level_arrays): + level_shape_tczyx = ( + int(level_array.shape[0]), + int(level_array.shape[2]), + int(level_array.shape[3]), + int(level_array.shape[4]), + int(level_array.shape[5]), + ) + level_chunks_tczyx = ( + int(level_array.chunks[0]) if level_array.chunks is not None else level_shape_tczyx[0], + int(level_array.chunks[2]) if level_array.chunks is not None else level_shape_tczyx[1], + int(level_array.chunks[3]) if level_array.chunks is not None else level_shape_tczyx[2], + int(level_array.chunks[4]) if level_array.chunks is not None else level_shape_tczyx[3], + int(level_array.chunks[5]) if level_array.chunks is not None else level_shape_tczyx[4], + ) + if str(level_index) in image_group: + del image_group[str(level_index)] + target = image_group.create_array( + str(level_index), + shape=level_shape_tczyx, + chunks=level_chunks_tczyx, + dtype=level_array.dtype, + overwrite=True, + dimension_names=("t", "c", "z", "y", "x"), + ) + source = da.from_zarr(level_array)[:, position_index, :, :, :, :] + da.store(source, target, lock=False, compute=True) + + return public_root + + +def publish_source_collection_from_cache(zarr_path: str | Path) -> str: + """Publish the canonical source runtime-cache image tree at the store root.""" + return publish_image_collection_from_cache( + zarr_path, + cache_root=CLEAREX_RUNTIME_SOURCE_ROOT, + public_root="", + name="clearex-source", + ) + + +def publish_analysis_collection_from_cache( + zarr_path: str | Path, + *, + analysis_name: str, +) -> str: + """Publish one analysis runtime-cache image collection under ``results/``.""" + key = str(analysis_name).strip() + return publish_image_collection_from_cache( + zarr_path, + cache_root=analysis_cache_root(key), + public_root=public_analysis_root(key), + name=key, + ) + + +def default_migrated_ome_store_path(legacy_store_path: str | Path) -> Path: + """Return the default destination path for legacy-store migration.""" + source_path = Path(legacy_store_path).expanduser().resolve() + name = source_path.name + if name.endswith(".zarr"): + stem = name[: -len(".zarr")] + return source_path.with_name(f"{stem}{OME_ZARR_STORE_SUFFIX}") + if name.endswith(".n5"): + stem = name[: -len(".n5")] + return source_path.with_name(f"{stem}{OME_ZARR_STORE_SUFFIX}") + return source_path.with_name(f"{name}{OME_ZARR_STORE_SUFFIX}") + + +def migrate_legacy_store( + legacy_store_path: str | Path, + *, + output_path: str | Path | None = None, + overwrite: bool = False, +) -> Path: + """Migrate one legacy ClearEx store into canonical OME-Zarr layout.""" + source_path = Path(legacy_store_path).expanduser().resolve() + if not source_path.exists(): + raise FileNotFoundError(source_path) + + source_root = zarr.open_group(str(source_path), mode="r") + if not is_legacy_clearex_store(source_root): + raise ValueError(f"Path is not a legacy ClearEx store: {source_path}") + + dest_path = ( + Path(output_path).expanduser().resolve() + if output_path is not None + else default_migrated_ome_store_path(source_path) + ) + if dest_path.exists(): + if not overwrite: + raise FileExistsError(dest_path) + if dest_path.is_dir(): + shutil.rmtree(dest_path) + else: + dest_path.unlink() + + dest_root = zarr.open_group(str(dest_path), mode="a") + ensure_group(dest_root, CLEAREX_ROOT_GROUP) + ensure_group(dest_root, CLEAREX_METADATA_GROUP) + ensure_group(dest_root, CLEAREX_PROVENANCE_GROUP) + ensure_group(dest_root, CLEAREX_RESULTS_GROUP) + ensure_group(dest_root, CLEAREX_RUNTIME_SOURCE_ROOT) + ensure_group(dest_root, CLEAREX_RUNTIME_RESULTS_ROOT) + + if "data" not in source_root: + raise ValueError( + f"Legacy source store '{source_path}' does not contain a root 'data' array." + ) + _copy_array( + source=source_root["data"], + dest_root=dest_root, + dest_component=SOURCE_CACHE_COMPONENT, + ) + if "data_pyramid" in source_root: + _copy_group_contents( + source=source_root["data_pyramid"], + dest_root=dest_root, + dest_prefix=SOURCE_CACHE_PYRAMID_ROOT, + ) + + root_attrs = _jsonable_mapping(dict(source_root.attrs)) + source_metadata = { + str(key): value + for key, value in root_attrs.items() + if str(key) not in {"ome", "multiscales"} + } + source_metadata.update( + { + "migrated_from_store": str(source_path), + "migrated_at_utc": datetime.now(tz=timezone.utc).isoformat(), + "legacy_layout": True, + } + ) + update_store_metadata(dest_root, **source_metadata) + + if "provenance" in source_root: + _copy_group_contents( + source=source_root["provenance"], + dest_root=dest_root, + dest_prefix=CLEAREX_PROVENANCE_GROUP, + ) + + if "results" in source_root: + results_group = source_root["results"] + for analysis_name in sorted(results_group.group_keys()): + analysis_group = results_group[analysis_name] + latest_group = analysis_group.get("latest") + if not isinstance(latest_group, zarr.Group): + continue + + if "data" in latest_group: + _copy_array( + source=latest_group["data"], + dest_root=dest_root, + dest_component=analysis_cache_data_component(analysis_name), + ) + if "data_pyramid" in latest_group: + _copy_group_contents( + source=latest_group["data_pyramid"], + dest_root=dest_root, + dest_prefix=f"{analysis_cache_root(analysis_name)}/data_pyramid", + ) + _copy_group_contents( + source=latest_group, + dest_root=dest_root, + dest_prefix=analysis_auxiliary_root(analysis_name), + skip_children={"data", "data_pyramid"}, + ) + publish_analysis_collection_from_cache( + dest_path, + analysis_name=analysis_name, + ) + continue + + _copy_group_contents( + source=latest_group, + dest_root=dest_root, + dest_prefix=analysis_auxiliary_root(analysis_name), + ) + + publish_source_collection_from_cache(dest_path) + return dest_path diff --git a/src/clearex/io/provenance.py b/src/clearex/io/provenance.py index 2dd72d2..db8673c 100644 --- a/src/clearex/io/provenance.py +++ b/src/clearex/io/provenance.py @@ -47,6 +47,14 @@ import zarr # Local Imports +from clearex.io.ome_store import ( + CLEAREX_GUI_STATE_GROUP, + CLEAREX_PROVENANCE_GROUP, + analysis_auxiliary_root, + ensure_group, + get_node, + public_analysis_root, +) from clearex.io.read import ImageInfo from clearex.workflow import ( WorkflowConfig, @@ -66,6 +74,20 @@ _GUI_STATE_SCHEMA = "clearex.analysis_gui_state.v1" +def _provenance_group(root: zarr.Group) -> zarr.Group: + """Return the namespaced ClearEx provenance group.""" + return ensure_group(root, CLEAREX_PROVENANCE_GROUP) + + +def _existing_provenance_group(root: zarr.Group) -> Optional[zarr.Group]: + """Return the namespaced ClearEx provenance group when present.""" + try: + group = get_node(root, CLEAREX_PROVENANCE_GROUP) + except Exception: + return None + return group if isinstance(group, zarr.Group) else None + + def is_zarr_store_path(path: Union[str, Path]) -> bool: """Return whether a path appears to reference a Zarr/N5 store. @@ -404,8 +426,13 @@ def _default_outputs(workflow: WorkflowConfig) -> Dict[str, Any]: outputs: Dict[str, Any] = {} for analysis_name in _selected_analyses(workflow): key = _normalize_analysis_name(analysis_name) + component = ( + public_analysis_root(key) + if key in {"flatfield", "deconvolution", "shear_transform", "usegment3d", "registration"} + else analysis_auxiliary_root(key) + ) outputs[key] = { - "component": f"results/{key}/latest", + "component": component, "storage_policy": "latest_only", } return outputs @@ -536,7 +563,7 @@ def summarize_analysis_history( raise ValueError(f"Path is not a Zarr/N5 store: {zarr_path}") root = zarr.open_group(str(zarr_path), mode="r") - provenance_group = root.get("provenance") + provenance_group = _existing_provenance_group(root) if provenance_group is None or "runs" not in provenance_group: return { "has_successful_run": False, @@ -633,7 +660,7 @@ def load_latest_completed_workflow_state( raise ValueError(f"Path is not a Zarr/N5 store: {zarr_path}") root = zarr.open_group(str(zarr_path), mode="r") - provenance_group = root.get("provenance") + provenance_group = _existing_provenance_group(root) if provenance_group is None or "runs" not in provenance_group: return None @@ -693,8 +720,7 @@ def persist_latest_analysis_gui_state( raise ValueError(f"Path is not a Zarr/N5 store: {zarr_path}") root = zarr.open_group(str(zarr_path), mode="a") - provenance_group = root.require_group("provenance") - gui_state_group = provenance_group.require_group("gui_state") + gui_state_group = ensure_group(root, CLEAREX_GUI_STATE_GROUP) latest_group = gui_state_group.require_group("analysis_dialog") latest_group.attrs.update( _to_jsonable( @@ -733,10 +759,13 @@ def load_latest_analysis_gui_state( raise ValueError(f"Path is not a Zarr/N5 store: {zarr_path}") root = zarr.open_group(str(zarr_path), mode="r") - provenance_group = root.get("provenance") + provenance_group = _existing_provenance_group(root) if provenance_group is None: return None - gui_state_group = provenance_group.get("gui_state") + try: + gui_state_group = get_node(root, CLEAREX_GUI_STATE_GROUP) + except Exception: + gui_state_group = None if gui_state_group is None: return None latest_group = gui_state_group.get("analysis_dialog") @@ -800,7 +829,7 @@ def register_latest_output_reference( raise ValueError(f"Path is not a Zarr/N5 store: {zarr_path}") root = zarr.open_group(str(zarr_path), mode="a") - provenance_group = root.require_group("provenance") + provenance_group = _provenance_group(root) latest_outputs_group = provenance_group.require_group("latest_outputs") key = _normalize_analysis_name(analysis_name) latest_group = latest_outputs_group.require_group(key) @@ -972,7 +1001,7 @@ def persist_run_provenance( repository_root = Path(repo_root) if repo_root is not None else Path.cwd() root = zarr.open_group(str(zarr_path), mode="a") - provenance_group = root.require_group("provenance") + provenance_group = _provenance_group(root) runs_group = provenance_group.require_group("runs") run_id = uuid.uuid4().hex @@ -1100,7 +1129,7 @@ def verify_provenance_chain(zarr_path: Union[str, Path]) -> tuple[bool, list[str raise ValueError(f"Path is not a Zarr/N5 store: {zarr_path}") root = zarr.open_group(str(zarr_path), mode="r") - provenance_group = root.get("provenance") + provenance_group = _existing_provenance_group(root) if provenance_group is None or "runs" not in provenance_group: return True, [] diff --git a/src/clearex/io/read.py b/src/clearex/io/read.py index 3616160..f459504 100644 --- a/src/clearex/io/read.py +++ b/src/clearex/io/read.py @@ -29,7 +29,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, Iterable, Optional, Tuple, Type, Union +from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Type, Union import logging # Third Party Imports @@ -430,6 +430,96 @@ class ZarrReader(Reader): SUFFIXES = (".zarr", ".zarr/", ".n5", ".n5/") + @staticmethod + def _axes_from_ome_payload(payload: Any) -> Optional[list[str]]: + """Extract axis labels from an OME multiscales payload.""" + if not isinstance(payload, Mapping): + return None + multiscales = payload.get("multiscales") + if not isinstance(multiscales, list) or not multiscales: + return None + first = multiscales[0] + if not isinstance(first, Mapping): + return None + axes_payload = first.get("axes") + if not isinstance(axes_payload, list): + return None + axes: list[str] = [] + for axis in axes_payload: + if isinstance(axis, Mapping): + name = str(axis.get("name", "")).strip() + if name: + axes.append(name) + elif axis is not None: + text = str(axis).strip() + if text: + axes.append(text) + return axes or None + + def _resolve_ome_array( + self, + group: zarr.Group, + ) -> Optional[tuple[str, Any, Optional[list[str]]]]: + """Resolve the primary public OME array from an OME-Zarr root/group.""" + ome_payload = getattr(group, "attrs", {}).get("ome") + if not isinstance(ome_payload, Mapping): + return None + + axes = self._axes_from_ome_payload(ome_payload) + if "multiscales" in ome_payload and "0" in group: + return ("0", group["0"], axes) + + plate_payload = ome_payload.get("plate") + if isinstance(plate_payload, Mapping): + wells = plate_payload.get("wells") + if isinstance(wells, list) and wells: + first_well = wells[0] + if isinstance(first_well, Mapping): + well_path = str(first_well.get("path", "")).strip() + if well_path and well_path in group: + well_group = group[well_path] + well_ome = getattr(well_group, "attrs", {}).get("ome") + if isinstance(well_ome, Mapping): + well_payload = well_ome.get("well") + if isinstance(well_payload, Mapping): + images = well_payload.get("images") + if isinstance(images, list) and images: + first_image = images[0] + if isinstance(first_image, Mapping): + image_path = str( + first_image.get("path", "") + ).strip() + if image_path and image_path in well_group: + image_group = well_group[image_path] + image_ome = getattr( + image_group, "attrs", {} + ).get("ome") + image_axes = self._axes_from_ome_payload( + image_ome + ) + if "0" in image_group: + return ( + f"{well_path}/{image_path}/0", + image_group["0"], + image_axes, + ) + + well_payload = ome_payload.get("well") + if isinstance(well_payload, Mapping): + images = well_payload.get("images") + if isinstance(images, list) and images: + first_image = images[0] + if isinstance(first_image, Mapping): + image_path = str(first_image.get("path", "")).strip() + if image_path and image_path in group: + image_group = group[image_path] + image_ome = getattr(image_group, "attrs", {}).get("ome") + image_axes = self._axes_from_ome_payload(image_ome) + if "0" in image_group: + return (f"{image_path}/0", image_group["0"], image_axes) + + return None + def open( self, path: Path, @@ -521,6 +611,38 @@ def open( """ grp = zarr.open_group(str(path), mode="r") + ome_selection = self._resolve_ome_array(grp) + if ome_selection is not None: + array_path, array, axes = ome_selection + meta = dict(getattr(grp, "attrs", {})) + meta.update(dict(getattr(array, "attrs", {}))) + meta["selected_array_path"] = array_path + meta["ome_selected"] = True + if prefer_dask: + darr = ( + da.from_zarr(array, chunks=chunks) if chunks else da.from_zarr(array) + ) + logger.info(f"Loaded public OME-Zarr array from {path.name}.") + info = ImageInfo( + path=path, + shape=tuple(darr.shape), + dtype=darr.dtype, + axes=axes, + metadata=meta, + ) + return darr, info + + np_arr = np.asarray(array) + logger.info(f"Loaded public OME-Zarr array from {path.name}.") + info = ImageInfo( + path=path, + shape=tuple(np_arr.shape), + dtype=np_arr.dtype, + axes=axes, + metadata=meta, + ) + return np_arr, info + # Collect arrays recursively to support nested Zarr/N5 layouts. arrays: list[tuple[str, Any]] = [] diff --git a/src/clearex/main.py b/src/clearex/main.py index 9531079..3281580 100644 --- a/src/clearex/main.py +++ b/src/clearex/main.py @@ -53,6 +53,16 @@ ) from clearex.io.cli import create_parser, display_logo from clearex.io.log import initiate_logger +from clearex.io.ome_store import ( + LEGACY_STORE_MIGRATION_HINT, + SOURCE_CACHE_COMPONENT, + analysis_auxiliary_root, + analysis_cache_data_component, + is_legacy_clearex_store, + migrate_legacy_store, + public_analysis_root, + publish_analysis_collection_from_cache, +) from clearex.io.provenance import ( is_zarr_store_path, persist_run_provenance, @@ -210,19 +220,28 @@ def run_usegment3d_analysis(*, zarr_path, parameters, client, progress_callback) ) _ANALYSIS_PROVENANCE_REQUIRED_COMPONENTS: Dict[str, tuple[str, ...]] = { "flatfield": ( - "results/flatfield/latest/data", - "results/flatfield/latest/data_pyramid", - "results/flatfield/latest/flatfield_pcyx", - "results/flatfield/latest/darkfield_pcyx", - "results/flatfield/latest/baseline_pctz", + analysis_cache_data_component("flatfield"), + analysis_auxiliary_root("flatfield"), + ), + "deconvolution": ( + analysis_cache_data_component("deconvolution"), + analysis_auxiliary_root("deconvolution"), + ), + "shear_transform": ( + analysis_cache_data_component("shear_transform"), + analysis_auxiliary_root("shear_transform"), + ), + "particle_detection": (analysis_auxiliary_root("particle_detection"),), + "usegment3d": ( + analysis_cache_data_component("usegment3d"), + analysis_auxiliary_root("usegment3d"), ), - "deconvolution": ("results/deconvolution/latest/data",), - "shear_transform": ("results/shear_transform/latest/data",), - "particle_detection": ("results/particle_detection/latest/detections",), - "usegment3d": ("results/usegment3d/latest/data",), - "registration": ("results/registration/latest/data",), - "display_pyramid": ("results/display_pyramid/latest",), - "mip_export": ("results/mip_export/latest",), + "registration": ( + analysis_cache_data_component("registration"), + analysis_auxiliary_root("registration"), + ), + "display_pyramid": (analysis_auxiliary_root("display_pyramid"),), + "mip_export": (analysis_auxiliary_root("mip_export"),), } _DISTRIBUTED_TEARDOWN_NOISE_LOGGERS = ("distributed.batched",) @@ -399,7 +418,7 @@ def _collect_available_analysis_components( set[str] Components known to exist in the current store. """ - available_components = {"data"} + available_components = {"data", SOURCE_CACHE_COMPONENT} if not zarr_path or not is_zarr_store_path(zarr_path): return available_components @@ -1329,8 +1348,8 @@ def _emit_analysis_progress(percent: int, message: str) -> None: ) _log_loaded_image(image_info, logger) logger.info( - "Materialized source data to Zarr store " - f"{materialized.store_path} (component=data, " + "Materialized source data to OME-Zarr store " + f"{materialized.store_path} (component={SOURCE_CACHE_COMPONENT}, " f"chunks_tpczyx={materialized.chunks_tpczyx}, " "spatial_calibration=" f"{format_spatial_calibration(runtime_spatial_calibration)})." @@ -1342,7 +1361,7 @@ def _emit_analysis_progress(percent: int, message: str) -> None: "source_path": str(materialized.source_path), "source_component": materialized.source_component, "store_path": str(materialized.store_path), - "target_component": "data", + "target_component": SOURCE_CACHE_COMPONENT, "canonical_shape_tpczyx": list( materialized.data_image_info.shape ), @@ -1361,6 +1380,14 @@ def _emit_analysis_progress(percent: int, message: str) -> None: } ) else: + selected_input_path = Path(str(input_path)).expanduser().resolve() + if is_zarr_store_path(selected_input_path) and is_legacy_clearex_store( + selected_input_path + ): + raise ValueError( + f"Legacy ClearEx store detected at {selected_input_path}. " + f"{LEGACY_STORE_MIGRATION_HINT}" + ) opener = ImageOpener() _, info = opener.open( input_path, @@ -1487,7 +1514,7 @@ def _emit_analysis_progress(percent: int, message: str) -> None: else None ) - produced_components: Dict[str, str] = {"data": "data"} + produced_components: Dict[str, str] = {"data": SOURCE_CACHE_COMPONENT} total_operations = max(1, len(execution_sequence)) try: if failure_exc is not None: @@ -1672,6 +1699,10 @@ def _flatfield_progress(percent: int, message: str) -> None: client=analysis_client, progress_callback=_flatfield_progress, ) + publish_analysis_collection_from_cache( + provenance_store_path, + analysis_name="flatfield", + ) flatfield_source_component = str( getattr( summary, @@ -1729,7 +1760,7 @@ def _flatfield_progress(percent: int, message: str) -> None: ) else: logger.warning( - "Flatfield correction requires a canonical Zarr/N5 data store." + "Flatfield correction requires a canonical OME-Zarr store." ) step_records.append( { @@ -1790,6 +1821,10 @@ def _decon_progress(percent: int, message: str) -> None: client=analysis_client, progress_callback=_decon_progress, ) + publish_analysis_collection_from_cache( + provenance_store_path, + analysis_name="deconvolution", + ) produced_components["deconvolution"] = summary.data_component output_records["deconvolution"] = { "component": summary.component, @@ -1826,7 +1861,7 @@ def _decon_progress(percent: int, message: str) -> None: ) else: logger.warning( - "Deconvolution requires a canonical Zarr/N5 data store." + "Deconvolution requires a canonical OME-Zarr store." ) step_records.append( { @@ -1887,6 +1922,10 @@ def _shear_progress(percent: int, message: str) -> None: client=analysis_client, progress_callback=_shear_progress, ) + publish_analysis_collection_from_cache( + provenance_store_path, + analysis_name="shear_transform", + ) produced_components["shear_transform"] = summary.data_component output_records["shear_transform"] = { "component": summary.component, @@ -1947,7 +1986,7 @@ def _shear_progress(percent: int, message: str) -> None: ) else: logger.warning( - "Shear transform requires a canonical Zarr/N5 data store." + "Shear transform requires a canonical OME-Zarr store." ) step_records.append( { @@ -2039,7 +2078,7 @@ def _particle_progress(percent: int, message: str) -> None: ) else: logger.warning( - "Particle detection requires a canonical Zarr/N5 data store." + "Particle detection requires a canonical OME-Zarr store." ) step_records.append( { @@ -2098,14 +2137,22 @@ def _usegment3d_progress(percent: int, message: str) -> None: client=analysis_client, progress_callback=_usegment3d_progress, ) + publish_analysis_collection_from_cache( + provenance_store_path, + analysis_name="usegment3d", + ) usegment3d_component = str( - getattr(summary, "component", "results/usegment3d/latest") + getattr( + summary, + "component", + public_analysis_root("usegment3d"), + ) ) usegment3d_data_component = str( getattr( summary, "data_component", - f"{usegment3d_component}/data", + analysis_cache_data_component("usegment3d"), ) ) usegment3d_source_component = str( @@ -2162,7 +2209,7 @@ def _usegment3d_progress(percent: int, message: str) -> None: ) else: logger.warning( - "usegment3d requires a canonical Zarr/N5 data store." + "usegment3d requires a canonical OME-Zarr store." ) step_records.append( { @@ -2231,21 +2278,29 @@ def _registration_progress(percent: int, message: str) -> None: client=analysis_client, progress_callback=_registration_progress, ) + publish_analysis_collection_from_cache( + provenance_store_path, + analysis_name="registration", + ) registration_component = str( - getattr(summary, "component", "results/registration/latest") + getattr( + summary, + "component", + public_analysis_root("registration"), + ) ) registration_data_component = str( getattr( summary, "data_component", - f"{registration_component}/data", + analysis_cache_data_component("registration"), ) ) registration_affines_component = str( getattr( summary, "affines_component", - f"{registration_component}/affines_tpx44", + f"{analysis_auxiliary_root('registration')}/affines_tpx44", ) ) registration_source_component = str( @@ -2420,7 +2475,7 @@ def _registration_progress(percent: int, message: str) -> None: ) else: logger.warning( - "Registration requires a canonical Zarr/N5 data store." + "Registration requires a canonical OME-Zarr store." ) step_records.append( { @@ -2524,7 +2579,7 @@ def _display_pyramid_progress( ) else: logger.warning( - "Display pyramid preparation requires a canonical Zarr/N5 " + "Display pyramid preparation requires a canonical OME-Zarr " "data store." ) step_records.append( @@ -2751,7 +2806,7 @@ def _visualization_progress(percent: int, message: str) -> None: ) else: logger.warning( - "Visualization requires a canonical Zarr/N5 data store." + "Visualization requires a canonical OME-Zarr store." ) step_records.append( { @@ -2847,7 +2902,7 @@ def _mip_export_progress(percent: int, message: str) -> None: ) else: logger.warning( - "MIP export requires a canonical Zarr/N5 data store." + "MIP export requires a canonical OME-Zarr store." ) step_records.append( { @@ -3018,6 +3073,24 @@ def main() -> None: args = parser.parse_args() bootstrap_logger = _create_bootstrap_logger() + migrate_store_path = str(getattr(args, "migrate_store", "") or "").strip() + if migrate_store_path: + try: + migrated_path = migrate_legacy_store( + migrate_store_path, + output_path=getattr(args, "migrate_output", None), + overwrite=bool(getattr(args, "migrate_overwrite", False)), + ) + except Exception as exc: + parser.error(f"Store migration failed: {exc}") + return + bootstrap_logger.info( + "Migrated legacy ClearEx store '%s' to '%s'.", + migrate_store_path, + migrated_path, + ) + return + try: workflow = _build_workflow_config(args) except ValueError as exc: diff --git a/src/clearex/mip_export/pipeline.py b/src/clearex/mip_export/pipeline.py index 1c1af7c..4bb932c 100644 --- a/src/clearex/mip_export/pipeline.py +++ b/src/clearex/mip_export/pipeline.py @@ -43,6 +43,7 @@ import zarr # Local Imports +from clearex.io.ome_store import analysis_auxiliary_root from clearex.io.provenance import register_latest_output_reference if TYPE_CHECKING: @@ -2248,14 +2249,12 @@ def _emit(percent: int, message: str) -> None: task_results = sorted(task_results, key=lambda item: str(item.get("path", ""))) exported_files = int(len(task_results)) - component = "results/mip_export/latest" + component = analysis_auxiliary_root("mip_export") root_w = zarr.open_group(str(zarr_path), mode="a") try: - results_group = root_w.require_group("results") - mip_group = results_group.require_group("mip_export") - if "latest" in mip_group: - del mip_group["latest"] - latest_group = mip_group.create_group("latest") + if component in root_w: + del root_w[component] + latest_group = root_w.require_group(component) latest_group.attrs.update( { "storage_policy": "latest_only", diff --git a/src/clearex/registration/pipeline.py b/src/clearex/registration/pipeline.py index 0a4d2ff..4ca2f6b 100644 --- a/src/clearex/registration/pipeline.py +++ b/src/clearex/registration/pipeline.py @@ -31,6 +31,12 @@ _phase_cross_correlation = None # type: ignore[assignment] from clearex.io.experiment import load_navigate_experiment +from clearex.io.ome_store import ( + analysis_auxiliary_root, + analysis_cache_data_component, + analysis_cache_root, + public_analysis_root, +) from clearex.io.provenance import register_latest_output_reference from clearex.workflow import SpatialCalibrationConfig, spatial_calibration_from_dict @@ -423,24 +429,40 @@ def _resolve_source_components_for_level( def _pyramid_factor_zyx_for_level( - root: zarr.hierarchy.Group, *, level: int + root: zarr.hierarchy.Group, + *, + level: int, + source_component: Optional[str] = None, ) -> tuple[float, float, float]: """Return per-axis pyramid factors in ``(z, y, x)`` order.""" if level <= 0: return (1.0, 1.0, 1.0) - factors = root.attrs.get("data_pyramid_factors_tpczyx") - if isinstance(factors, (tuple, list)) and len(factors) > level: - entry = factors[level] - if isinstance(entry, (tuple, list)) and len(entry) >= 6: - try: - return ( - max(1.0, float(entry[3])), - max(1.0, float(entry[4])), - max(1.0, float(entry[5])), - ) - except Exception: - pass + candidate_factors: list[Any] = [] + if source_component: + try: + source_attrs = dict(root[str(source_component)].attrs) + except Exception: + source_attrs = {} + candidate_factors.extend( + [ + source_attrs.get("pyramid_factors_tpczyx"), + source_attrs.get("resolution_pyramid_factors_tpczyx"), + ] + ) + candidate_factors.append(root.attrs.get("data_pyramid_factors_tpczyx")) + for factors in candidate_factors: + if isinstance(factors, (tuple, list)) and len(factors) > level: + entry = factors[level] + if isinstance(entry, (tuple, list)) and len(entry) >= 6: + try: + return ( + max(1.0, float(entry[3])), + max(1.0, float(entry[4])), + max(1.0, float(entry[5])), + ) + except Exception: + pass uniform = float(2 ** int(level)) return (uniform, uniform, uniform) @@ -1900,11 +1922,13 @@ def _prepare_output_group( blend_weights_component)`` """ root = zarr.open_group(str(zarr_path), mode="a") - results_group = root.require_group("results") - registration_group = results_group.require_group("registration") - if "latest" in registration_group: - del registration_group["latest"] - latest = registration_group.create_group("latest") + cache_root = analysis_cache_root("registration") + auxiliary_root = analysis_auxiliary_root("registration") + if cache_root in root: + del root[cache_root] + if auxiliary_root in root: + del root[auxiliary_root] + latest = root.require_group(cache_root) latest.create_dataset( name="data", shape=output_shape_tpczyx, @@ -1921,7 +1945,8 @@ def _prepare_output_group( "storage_policy": "latest_only", } ) - latest.attrs.update( + auxiliary_group = root.require_group(auxiliary_root) + auxiliary_group.attrs.update( { "storage_policy": "latest_only", "source_component": str(source_component), @@ -1930,9 +1955,10 @@ def _prepare_output_group( "output_chunks_tpczyx": [int(value) for value in output_chunks_tpczyx], "voxel_size_um_zyx": [float(value) for value in voxel_size_um_zyx], "output_origin_xyz_um": [float(value) for value in output_origin_xyz], + "data_component": analysis_cache_data_component("registration"), } ) - latest.create_dataset( + auxiliary_group.create_dataset( name="affines_tpx44", shape=(output_shape_tpczyx[0], int(root[source_component].shape[1]), 4, 4), dtype=np.float64, @@ -1949,7 +1975,7 @@ def _prepare_output_group( blend_mode=str(blend_mode), overlap_zyx=overlap_zyx, ) - blend_group = latest.create_group("blend_weights", overwrite=True) + blend_group = auxiliary_group.create_group("blend_weights", overwrite=True) blend_group.create_dataset(name="profile_z", data=prof_z, dtype=np.float32, overwrite=True) blend_group.create_dataset(name="profile_y", data=prof_y, dtype=np.float32, @@ -1958,10 +1984,10 @@ def _prepare_output_group( overwrite=True) return ( - "results/registration/latest", - "results/registration/latest/data", - "results/registration/latest/affines_tpx44", - "results/registration/latest/blend_weights", + public_analysis_root("registration"), + analysis_cache_data_component("registration"), + f"{auxiliary_root}/affines_tpx44", + f"{auxiliary_root}/blend_weights", ) @@ -2071,7 +2097,11 @@ def run_registration_analysis( raise ValueError("registration anchor_position is out of bounds.") full_voxel_size_um_zyx = _extract_voxel_size_um_zyx(root, source_component) - level_factor_zyx = _pyramid_factor_zyx_for_level(root, level=effective_level) + level_factor_zyx = _pyramid_factor_zyx_for_level( + root, + level=effective_level, + source_component=pairwise_source_component, + ) pairwise_voxel_size_um_zyx = ( float(full_voxel_size_um_zyx[0]) * float(level_factor_zyx[0]), float(full_voxel_size_um_zyx[1]) * float(level_factor_zyx[1]), @@ -2346,7 +2376,7 @@ def run_registration_analysis( ) ) write_root = zarr.open_group(str(zarr_path), mode="a") - latest_group = write_root["results/registration/latest"] + latest_group = write_root[analysis_auxiliary_root("registration")] latest_group.create_dataset( name="edges_pe2", data=( @@ -2409,7 +2439,9 @@ def run_registration_analysis( source_component=source_component, output_component=data_component, affines_component=affines_component, - transformed_bboxes_component="results/registration/latest/transformed_bboxes_tpx6", + transformed_bboxes_component=( + f"{analysis_auxiliary_root('registration')}/transformed_bboxes_tpx6" + ), blend_weights_component=blend_weights_component, t_index=t_index, c_index=c_index, diff --git a/src/clearex/shear/pipeline.py b/src/clearex/shear/pipeline.py index 472ea40..5f54bce 100644 --- a/src/clearex/shear/pipeline.py +++ b/src/clearex/shear/pipeline.py @@ -51,6 +51,12 @@ import zarr # Local Imports +from clearex.io.ome_store import ( + analysis_auxiliary_root, + analysis_cache_data_component, + analysis_cache_root, + public_analysis_root, +) from clearex.io.provenance import register_latest_output_reference if TYPE_CHECKING: @@ -1251,17 +1257,19 @@ def _emit(percent: int, message: str) -> None: min(source_chunks_tpczyx[4], output_shape_tpczyx[4]), min(source_chunks_tpczyx[5], output_shape_tpczyx[5]), ) - component = "results/shear_transform/latest" - data_component = f"{component}/data" + component = public_analysis_root("shear_transform") + data_component = analysis_cache_data_component("shear_transform") + cache_root = analysis_cache_root("shear_transform") + auxiliary_root = analysis_auxiliary_root("shear_transform") output_dtype = str(normalized["output_dtype"]) _emit(5, "Preparing shear-transform output layout") root_w = zarr.open_group(str(zarr_path), mode="a") - results_group = root_w.require_group("results") - shear_group = results_group.require_group("shear_transform") - if "latest" in shear_group: - del shear_group["latest"] - latest_group = shear_group.create_group("latest") + if cache_root in root_w: + del root_w[cache_root] + if auxiliary_root in root_w: + del root_w[auxiliary_root] + latest_group = root_w.require_group(cache_root) latest_group.create_dataset( name="data", shape=output_shape_tpczyx, @@ -1297,6 +1305,7 @@ def _emit(percent: int, message: str) -> None: "voxel_size_um_zyx": [float(v) for v in voxel_size_um_zyx], } ) + root_w.require_group(auxiliary_root).attrs.update(dict(latest_group.attrs)) out_z_bounds = _axis_chunk_bounds(output_shape_tpczyx[3], output_chunks_tpczyx[3]) out_y_bounds = _axis_chunk_bounds(output_shape_tpczyx[4], output_chunks_tpczyx[4]) diff --git a/src/clearex/usegment3d/pipeline.py b/src/clearex/usegment3d/pipeline.py index 50ca7c8..105afa6 100644 --- a/src/clearex/usegment3d/pipeline.py +++ b/src/clearex/usegment3d/pipeline.py @@ -52,6 +52,12 @@ import zarr # Local Imports +from clearex.io.ome_store import ( + analysis_auxiliary_root, + analysis_cache_data_component, + analysis_cache_root, + public_analysis_root, +) from clearex.io.provenance import register_latest_output_reference from clearex.workflow import ( default_analysis_operation_parameters, @@ -400,6 +406,7 @@ def _pyramid_factor_zyx_for_level( root: zarr.hierarchy.Group, *, level: int, + source_component: Optional[str] = None, ) -> tuple[float, float, float]: """Return pyramid downsampling factors for one level in ``(z, y, x)``. @@ -418,19 +425,33 @@ def _pyramid_factor_zyx_for_level( if level <= 0: return (1.0, 1.0, 1.0) - factors = root.attrs.get("data_pyramid_factors_tpczyx") - if isinstance(factors, (tuple, list)) and len(factors) > level: - level_entry = factors[level] - if isinstance(level_entry, (tuple, list)) and len(level_entry) >= 6: - try: - parsed = ( - max(1.0, float(level_entry[3])), - max(1.0, float(level_entry[4])), - max(1.0, float(level_entry[5])), - ) - return parsed - except Exception: - pass + candidate_factors: list[Any] = [] + if source_component: + try: + source_attrs = dict(root[str(source_component)].attrs) + except Exception: + source_attrs = {} + candidate_factors.extend( + [ + source_attrs.get("pyramid_factors_tpczyx"), + source_attrs.get("resolution_pyramid_factors_tpczyx"), + ] + ) + candidate_factors.append(root.attrs.get("data_pyramid_factors_tpczyx")) + + for factors in candidate_factors: + if isinstance(factors, (tuple, list)) and len(factors) > level: + level_entry = factors[level] + if isinstance(level_entry, (tuple, list)) and len(level_entry) >= 6: + try: + parsed = ( + max(1.0, float(level_entry[3])), + max(1.0, float(level_entry[4])), + max(1.0, float(level_entry[5])), + ) + return parsed + except Exception: + pass uniform = float(2 ** int(level)) return (uniform, uniform, uniform) @@ -592,11 +613,13 @@ def _prepare_output_array( ) output_dtype = np.dtype(str(parameters.get("output_dtype", "uint32"))) - results_group = root.require_group("results") - usegment_group = results_group.require_group("usegment3d") - if "latest" in usegment_group: - del usegment_group["latest"] - latest = usegment_group.create_group("latest") + cache_root = analysis_cache_root("usegment3d") + auxiliary_root = analysis_auxiliary_root("usegment3d") + if cache_root in root: + del root[cache_root] + if auxiliary_root in root: + del root[auxiliary_root] + latest = root.require_group(cache_root) latest.create_dataset( name="data", shape=output_shape, @@ -634,7 +657,7 @@ def _prepare_output_array( dtype=output_dtype, overwrite=True, ) - native_data_component = "results/usegment3d/latest/data_native" + native_data_component = f"{cache_root}/data_native" latest.attrs.update( { "storage_policy": "latest_only", @@ -646,10 +669,11 @@ def _prepare_output_array( "run_id": None, } ) + root.require_group(auxiliary_root).attrs.update(dict(latest.attrs)) return ( - "results/usegment3d/latest", - "results/usegment3d/latest/data", + public_analysis_root("usegment3d"), + analysis_cache_data_component("usegment3d"), ( int(output_shape[0]), int(output_shape[1]), @@ -1351,10 +1375,12 @@ def _emit(percent: int, message: str) -> None: source_factor_zyx = _pyramid_factor_zyx_for_level( root, level=effective_resolution_level, + source_component=source_component, ) output_factor_zyx = _pyramid_factor_zyx_for_level( root, level=output_resolution_level, + source_component=output_reference_component, ) source_voxel_size_um_zyx = ( float(base_voxel_size_um_zyx[0] * source_factor_zyx[0]), diff --git a/src/clearex/visualization/pipeline.py b/src/clearex/visualization/pipeline.py index 41c5fba..71266f2 100644 --- a/src/clearex/visualization/pipeline.py +++ b/src/clearex/visualization/pipeline.py @@ -51,6 +51,7 @@ # Local Imports from clearex.io.experiment import load_navigate_experiment +from clearex.io.ome_store import analysis_auxiliary_root, load_store_metadata from clearex.io.provenance import register_latest_output_reference from clearex.workflow import ( SpatialCalibrationConfig, @@ -937,7 +938,7 @@ def _build_napari_layer_payload( "detection_component": str( parameters.get( "particle_detection_component", - "results/particle_detection/latest/detections", + f"{analysis_auxiliary_root('particle_detection')}/detections", ) ), "source_data_path": root_attrs.get("source_data_path"), @@ -994,10 +995,10 @@ def _normalize_visualization_parameters( str( normalized.get( "particle_detection_component", - "results/particle_detection/latest/detections", + f"{analysis_auxiliary_root('particle_detection')}/detections", ) ).strip() - or "results/particle_detection/latest/detections" + or f"{analysis_auxiliary_root('particle_detection')}/detections" ) normalized["require_gpu_rendering"] = bool( normalized.get("require_gpu_rendering", True) @@ -3062,6 +3063,8 @@ def _value(field: str, fallback_index: int) -> float: def _load_spatial_calibration( root_attrs: Mapping[str, Any], + *, + store_metadata: Optional[Mapping[str, Any]] = None, ) -> SpatialCalibrationConfig: """Load store-level spatial calibration from root attrs. @@ -3075,11 +3078,17 @@ def _load_spatial_calibration( SpatialCalibrationConfig Parsed store calibration. Missing attrs resolve to identity. """ + if isinstance(store_metadata, Mapping): + payload = store_metadata.get("spatial_calibration") + if payload is not None: + return spatial_calibration_from_dict(payload) return spatial_calibration_from_dict(root_attrs.get("spatial_calibration")) def _load_multiposition_stage_rows( root_attrs: Mapping[str, Any], + *, + store_metadata: Optional[Mapping[str, Any]] = None, ) -> list[dict[str, float]]: """Load multiposition stage rows from sidecar metadata when available. @@ -3094,7 +3103,17 @@ def _load_multiposition_stage_rows( Parsed stage rows from ``multi_positions.yml`` or fallback metadata. Returns an empty list when stage metadata cannot be resolved. """ - source_experiment = root_attrs.get("source_experiment") + if isinstance(store_metadata, Mapping): + stage_rows = store_metadata.get("stage_rows") + parsed_stage_rows = _parse_multiposition_stage_rows(stage_rows) + if parsed_stage_rows: + return parsed_stage_rows + + source_experiment = ( + store_metadata.get("source_experiment") + if isinstance(store_metadata, Mapping) + else root_attrs.get("source_experiment") + ) if not isinstance(source_experiment, str): return [] @@ -3209,6 +3228,7 @@ def _resolve_world_axis_delta( def _resolve_position_affines_tczyx( *, root_attrs: Mapping[str, Any], + store_metadata: Optional[Mapping[str, Any]], selected_positions: Sequence[int], scale_tczyx: Sequence[float], ) -> tuple[dict[int, np.ndarray], list[dict[str, float]], SpatialCalibrationConfig]: @@ -3232,8 +3252,14 @@ def _resolve_position_affines_tczyx( affines: dict[int, np.ndarray] = { int(index): np.eye(6, dtype=np.float64) for index in selected_positions } - spatial_calibration = _load_spatial_calibration(root_attrs) - stage_rows = _load_multiposition_stage_rows(root_attrs) + spatial_calibration = _load_spatial_calibration( + root_attrs, + store_metadata=store_metadata, + ) + stage_rows = _load_multiposition_stage_rows( + root_attrs, + store_metadata=store_metadata, + ) if not stage_rows: return affines, [], spatial_calibration @@ -4095,13 +4121,11 @@ def _save_display_pyramid_metadata( run_id: Optional[str] = None, ) -> str: """Persist display-pyramid metadata in ``results/display_pyramid/latest``.""" - component = "results/display_pyramid/latest" root = zarr.open_group(str(zarr_path), mode="a") - results_group = root.require_group("results") - display_pyramid_group = results_group.require_group("display_pyramid") - if "latest" in display_pyramid_group: - del display_pyramid_group["latest"] - latest_group = display_pyramid_group.create_group("latest") + component = analysis_auxiliary_root("display_pyramid") + if component in root: + del root[component] + latest_group = root.require_group(component) payload: Dict[str, Any] = { "source_component": str(source_component), @@ -4356,13 +4380,11 @@ def _save_visualization_metadata( str Component path for the latest visualization metadata group. """ - component = "results/visualization/latest" root = zarr.open_group(str(zarr_path), mode="a") - results_group = root.require_group("results") - visualization_group = results_group.require_group("visualization") - if "latest" in visualization_group: - del visualization_group["latest"] - latest_group = visualization_group.create_group("latest") + component = analysis_auxiliary_root("visualization") + if component in root: + del root[component] + latest_group = root.require_group(component) payload: Dict[str, Any] = { "source_component": str(source_component), @@ -4556,6 +4578,7 @@ def _emit(percent: int, message: str) -> None: spatial_calibration, ) = _resolve_position_affines_tczyx( root_attrs=dict(root.attrs), + store_metadata=load_store_metadata(root), selected_positions=selected_positions, scale_tczyx=napari_payload.scale_tczyx, ) diff --git a/src/clearex/workflow.py b/src/clearex/workflow.py index 3844e1d..8b90760 100644 --- a/src/clearex/workflow.py +++ b/src/clearex/workflow.py @@ -68,24 +68,24 @@ ) ANALYSIS_CHAINABLE_OUTPUT_COMPONENTS: Dict[str, str] = { - "data": "data", - "flatfield": "results/flatfield/latest/data", - "deconvolution": "results/deconvolution/latest/data", - "shear_transform": "results/shear_transform/latest/data", - "usegment3d": "results/usegment3d/latest/data", - "registration": "results/registration/latest/data", + "data": "clearex/runtime_cache/source/data", + "flatfield": "clearex/runtime_cache/results/flatfield/latest/data", + "deconvolution": "clearex/runtime_cache/results/deconvolution/latest/data", + "shear_transform": "clearex/runtime_cache/results/shear_transform/latest/data", + "usegment3d": "clearex/runtime_cache/results/usegment3d/latest/data", + "registration": "clearex/runtime_cache/results/registration/latest/data", } ANALYSIS_KNOWN_OUTPUT_COMPONENTS: Dict[str, str] = { - "data": "data", - "flatfield": "results/flatfield/latest/data", - "deconvolution": "results/deconvolution/latest/data", - "shear_transform": "results/shear_transform/latest/data", - "particle_detection": "results/particle_detection/latest/detections", - "usegment3d": "results/usegment3d/latest/data", - "registration": "results/registration/latest/data", - "display_pyramid": "results/display_pyramid/latest", - "visualization": "results/visualization/latest", - "mip_export": "results/mip_export/latest", + "data": "clearex/runtime_cache/source/data", + "flatfield": "clearex/runtime_cache/results/flatfield/latest/data", + "deconvolution": "clearex/runtime_cache/results/deconvolution/latest/data", + "shear_transform": "clearex/runtime_cache/results/shear_transform/latest/data", + "particle_detection": "clearex/results/particle_detection/latest/detections", + "usegment3d": "clearex/runtime_cache/results/usegment3d/latest/data", + "registration": "clearex/runtime_cache/results/registration/latest/data", + "display_pyramid": "clearex/results/display_pyramid/latest", + "visualization": "clearex/results/visualization/latest", + "mip_export": "clearex/results/mip_export/latest", } _OUTPUT_COMPONENT_TO_OPERATION: Dict[str, str] = { str(component): str(operation_name) diff --git a/uv.lock b/uv.lock index d47586b..088aa38 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" resolution-markers = [ "sys_platform == 'darwin'", @@ -7,6 +7,89 @@ resolution-markers = [ "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')", ] +[[package]] +name = "aiobotocore" +version = "3.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "aioitertools" }, + { name = "botocore" }, + { name = "jmespath" }, + { name = "multidict" }, + { name = "python-dateutil" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/9f/a0568deaf008f4a7e3d57a7f80f1537df894df0e49bd4a790bb22f9a2d8e/aiobotocore-3.3.0.tar.gz", hash = "sha256:9abc21d91edd6c9c2e4a07e11bdfcbb159f0b9116ab2a0a5a349113533a18fb2", size = 122940, upload-time = "2026-03-18T09:58:49.077Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/54/a295bd8d7ac900c339b2c7024ed0ff9538afb60e92eb0979b8bb49deb20e/aiobotocore-3.3.0-py3-none-any.whl", hash = "sha256:9125ab2b63740dfe3b66b8d5a90d13aed9587b850aa53225ef214a04a1aa7fdc", size = 87817, upload-time = "2026-03-18T09:58:47.466Z" }, +] + +[[package]] +name = "aiohappyeyeballs" +version = "2.6.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/26/30/f84a107a9c4331c14b2b586036f40965c128aa4fee4dda5d3d51cb14ad54/aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558", size = 22760, upload-time = "2025-03-12T01:42:48.764Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0f/15/5bf3b99495fb160b63f95972b81750f18f7f4e02ad051373b669d17d44f2/aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8", size = 15265, upload-time = "2025-03-12T01:42:47.083Z" }, +] + +[[package]] +name = "aiohttp" +version = "3.13.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohappyeyeballs" }, + { name = "aiosignal" }, + { name = "attrs" }, + { name = "frozenlist" }, + { name = "multidict" }, + { name = "propcache" }, + { name = "yarl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/be/4fc11f202955a69e0db803a12a062b8379c970c7c84f4882b6da17337cc1/aiohttp-3.13.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:b903a4dfee7d347e2d87697d0713be59e0b87925be030c9178c5faa58ea58d5c", size = 739732, upload-time = "2026-01-03T17:30:14.23Z" }, + { url = "https://files.pythonhosted.org/packages/97/2c/621d5b851f94fa0bb7430d6089b3aa970a9d9b75196bc93bb624b0db237a/aiohttp-3.13.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a45530014d7a1e09f4a55f4f43097ba0fd155089372e105e4bff4ca76cb1b168", size = 494293, upload-time = "2026-01-03T17:30:15.96Z" }, + { url = "https://files.pythonhosted.org/packages/5d/43/4be01406b78e1be8320bb8316dc9c42dbab553d281c40364e0f862d5661c/aiohttp-3.13.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:27234ef6d85c914f9efeb77ff616dbf4ad2380be0cda40b4db086ffc7ddd1b7d", size = 493533, upload-time = "2026-01-03T17:30:17.431Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a8/5a35dc56a06a2c90d4742cbf35294396907027f80eea696637945a106f25/aiohttp-3.13.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d32764c6c9aafb7fb55366a224756387cd50bfa720f32b88e0e6fa45b27dcf29", size = 1737839, upload-time = "2026-01-03T17:30:19.422Z" }, + { url = "https://files.pythonhosted.org/packages/bf/62/4b9eeb331da56530bf2e198a297e5303e1c1ebdceeb00fe9b568a65c5a0c/aiohttp-3.13.3-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:b1a6102b4d3ebc07dad44fbf07b45bb600300f15b552ddf1851b5390202ea2e3", size = 1703932, upload-time = "2026-01-03T17:30:21.756Z" }, + { url = "https://files.pythonhosted.org/packages/7c/f6/af16887b5d419e6a367095994c0b1332d154f647e7dc2bd50e61876e8e3d/aiohttp-3.13.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c014c7ea7fb775dd015b2d3137378b7be0249a448a1612268b5a90c2d81de04d", size = 1771906, upload-time = "2026-01-03T17:30:23.932Z" }, + { url = "https://files.pythonhosted.org/packages/ce/83/397c634b1bcc24292fa1e0c7822800f9f6569e32934bdeef09dae7992dfb/aiohttp-3.13.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2b8d8ddba8f95ba17582226f80e2de99c7a7948e66490ef8d947e272a93e9463", size = 1871020, upload-time = "2026-01-03T17:30:26Z" }, + { url = "https://files.pythonhosted.org/packages/86/f6/a62cbbf13f0ac80a70f71b1672feba90fdb21fd7abd8dbf25c0105fb6fa3/aiohttp-3.13.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ae8dd55c8e6c4257eae3a20fd2c8f41edaea5992ed67156642493b8daf3cecc", size = 1755181, upload-time = "2026-01-03T17:30:27.554Z" }, + { url = "https://files.pythonhosted.org/packages/0a/87/20a35ad487efdd3fba93d5843efdfaa62d2f1479eaafa7453398a44faf13/aiohttp-3.13.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:01ad2529d4b5035578f5081606a465f3b814c542882804e2e8cda61adf5c71bf", size = 1561794, upload-time = "2026-01-03T17:30:29.254Z" }, + { url = "https://files.pythonhosted.org/packages/de/95/8fd69a66682012f6716e1bc09ef8a1a2a91922c5725cb904689f112309c4/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bb4f7475e359992b580559e008c598091c45b5088f28614e855e42d39c2f1033", size = 1697900, upload-time = "2026-01-03T17:30:31.033Z" }, + { url = "https://files.pythonhosted.org/packages/e5/66/7b94b3b5ba70e955ff597672dad1691333080e37f50280178967aff68657/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c19b90316ad3b24c69cd78d5c9b4f3aa4497643685901185b65166293d36a00f", size = 1728239, upload-time = "2026-01-03T17:30:32.703Z" }, + { url = "https://files.pythonhosted.org/packages/47/71/6f72f77f9f7d74719692ab65a2a0252584bf8d5f301e2ecb4c0da734530a/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:96d604498a7c782cb15a51c406acaea70d8c027ee6b90c569baa6e7b93073679", size = 1740527, upload-time = "2026-01-03T17:30:34.695Z" }, + { url = "https://files.pythonhosted.org/packages/fa/b4/75ec16cbbd5c01bdaf4a05b19e103e78d7ce1ef7c80867eb0ace42ff4488/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:084911a532763e9d3dd95adf78a78f4096cd5f58cdc18e6fdbc1b58417a45423", size = 1554489, upload-time = "2026-01-03T17:30:36.864Z" }, + { url = "https://files.pythonhosted.org/packages/52/8f/bc518c0eea29f8406dcf7ed1f96c9b48e3bc3995a96159b3fc11f9e08321/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7a4a94eb787e606d0a09404b9c38c113d3b099d508021faa615d70a0131907ce", size = 1767852, upload-time = "2026-01-03T17:30:39.433Z" }, + { url = "https://files.pythonhosted.org/packages/9d/f2/a07a75173124f31f11ea6f863dc44e6f09afe2bca45dd4e64979490deab1/aiohttp-3.13.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:87797e645d9d8e222e04160ee32aa06bc5c163e8499f24db719e7852ec23093a", size = 1722379, upload-time = "2026-01-03T17:30:41.081Z" }, + { url = "https://files.pythonhosted.org/packages/3c/4a/1a3fee7c21350cac78e5c5cef711bac1b94feca07399f3d406972e2d8fcd/aiohttp-3.13.3-cp312-cp312-win32.whl", hash = "sha256:b04be762396457bef43f3597c991e192ee7da460a4953d7e647ee4b1c28e7046", size = 428253, upload-time = "2026-01-03T17:30:42.644Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b7/76175c7cb4eb73d91ad63c34e29fc4f77c9386bba4a65b53ba8e05ee3c39/aiohttp-3.13.3-cp312-cp312-win_amd64.whl", hash = "sha256:e3531d63d3bdfa7e3ac5e9b27b2dd7ec9df3206a98e0b3445fa906f233264c57", size = 455407, upload-time = "2026-01-03T17:30:44.195Z" }, +] + +[[package]] +name = "aioitertools" +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/3c/53c4a17a05fb9ea2313ee1777ff53f5e001aefd5cc85aa2f4c2d982e1e38/aioitertools-0.13.0.tar.gz", hash = "sha256:620bd241acc0bbb9ec819f1ab215866871b4bbd1f73836a55f799200ee86950c", size = 19322, upload-time = "2025-11-06T22:17:07.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/a1/510b0a7fadc6f43a6ce50152e69dbd86415240835868bb0bd9b5b88b1e06/aioitertools-0.13.0-py3-none-any.whl", hash = "sha256:0be0292b856f08dfac90e31f4739432f4cb6d7520ab9eb73e143f4f2fa5259be", size = 24182, upload-time = "2025-11-06T22:17:06.502Z" }, +] + +[[package]] +name = "aiosignal" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "frozenlist" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, +] + [[package]] name = "alabaster" version = "0.7.16" @@ -179,12 +262,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/c9/d7977eaacb9df673210491da99e6a247e93df98c715fc43fd136ce1d3d33/arrow-1.4.0-py3-none-any.whl", hash = "sha256:749f0769958ebdc79c173ff0b0670d59051a535fa26e8eba02953dc19eb43205", size = 68797, upload-time = "2025-10-18T17:46:45.663Z" }, ] -[[package]] -name = "asciitree" -version = "0.3.3" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2d/6a/885bc91484e1aa8f618f6f0228d76d0e67000b0fdd6090673b777e311913/asciitree-0.3.3.tar.gz", hash = "sha256:4aa4b9b649f85e3fcb343363d97564aa1fb62e249677f2e18a96765145cc0f6e", size = 3951, upload-time = "2016-09-05T19:10:42.681Z" } - [[package]] name = "asttokens" version = "3.0.1" @@ -270,6 +347,41 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1a/39/47f9197bdd44df24d67ac8893641e16f386c984a0619ef2ee4c51fbbc019/beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb", size = 107721, upload-time = "2025-11-30T15:08:24.087Z" }, ] +[[package]] +name = "bioio-base" +version = "3.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dask", extra = ["array", "distributed"] }, + { name = "fsspec" }, + { name = "numpy" }, + { name = "ome-types", extra = ["pint"] }, + { name = "pint" }, + { name = "xarray" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3a/05/64a855e25b89384b703255e10c12f0ccaa9862e58f1f73b642d8fbdb696e/bioio_base-3.2.0.tar.gz", hash = "sha256:f935e19ae243e9cdb1c46ae9c5ac2a19343345f6be7a74255e292c7188a37c68", size = 96797, upload-time = "2025-12-08T22:35:38.617Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c4/44/178363ad21c93bb94ac5d9abf03c39fb3a43d5849c97d6a213efbb77f318/bioio_base-3.2.0-py3-none-any.whl", hash = "sha256:ae37c33bfc5fe7efa89603ea678c6ba1a2e0b9c67a7e64377d4c5d1f0a75724a", size = 100971, upload-time = "2025-12-08T22:35:37.076Z" }, +] + +[[package]] +name = "bioio-ome-zarr" +version = "3.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "bioio-base" }, + { name = "dask" }, + { name = "fsspec", extra = ["http"] }, + { name = "s3fs" }, + { name = "scikit-image" }, + { name = "xarray" }, + { name = "zarr" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6e/11/1b53888c3645ea021c44734c39099132193f225c4afd3b112ebe81341093/bioio_ome_zarr-3.3.0.tar.gz", hash = "sha256:89af3058aa3253b597e41602d711558459e9f896963664a2ac3da8a9b154aa72", size = 41587, upload-time = "2026-03-03T18:20:03.602Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/ef/531f7bf022e6e2b4ca3daea57c5dd10242c6256e00f111d7c49e80895524/bioio_ome_zarr-3.3.0-py3-none-any.whl", hash = "sha256:a29917531540336bc8e0284b9ed9db42e87d67b880566f98f933d7636634b3cd", size = 31950, upload-time = "2026-03-03T18:20:01.543Z" }, +] + [[package]] name = "black" version = "26.1.0" @@ -329,6 +441,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/47/0b/bdf449df87be3f07b23091ceafee8c3ef569cf6d2fb7edec6e3b12b3faa4/bokeh-3.9.0-py3-none-any.whl", hash = "sha256:b252bfb16a505f0e0c57d532d0df308ae1667235bafc622aa9441fe9e7c5ce4a", size = 6396068, upload-time = "2026-03-11T17:58:31.645Z" }, ] +[[package]] +name = "botocore" +version = "1.42.70" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/66/54/b80e1fcee4f732e0e9314bbb8679be9d5690caa1566c4a4cd14e9724d2dd/botocore-1.42.70.tar.gz", hash = "sha256:9ee17553b7febd1a0c1253b3b62ab5d79607eb6163c8fb943470a8893c31d4fa", size = 14997068, upload-time = "2026-03-17T19:43:10.678Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/51/08f32aea872253173f513ba68122f4300966290677c8e59887b4ffd5d957/botocore-1.42.70-py3-none-any.whl", hash = "sha256:54ed9d25f05f810efd22b0dfda0bb9178df3ad8952b2e4359e05156c9321bd3c", size = 14671393, upload-time = "2026-03-17T19:43:06.777Z" }, +] + [[package]] name = "build" version = "1.4.0" @@ -469,6 +595,7 @@ source = { editable = "." } dependencies = [ { name = "antspyx" }, { name = "basicpy" }, + { name = "bioio-ome-zarr" }, { name = "cython" }, { name = "dask" }, { name = "dask-image" }, @@ -480,6 +607,8 @@ dependencies = [ { name = "matplotlib" }, { name = "napari" }, { name = "neuroglancer" }, + { name = "ome-zarr" }, + { name = "ome-zarr-models" }, { name = "opencv-python" }, { name = "pandas" }, { name = "pyqt6" }, @@ -530,6 +659,7 @@ usegment3d = [ requires-dist = [ { name = "antspyx" }, { name = "basicpy" }, + { name = "bioio-ome-zarr" }, { name = "black", marker = "extra == 'dev'", specifier = ">=25.11.0" }, { name = "cellpose", marker = "extra == 'usegment3d'", specifier = "<3" }, { name = "codespell", marker = "extra == 'docs'" }, @@ -547,6 +677,8 @@ requires-dist = [ { name = "nbconvert", marker = "extra == 'docs'" }, { name = "neuroglancer", specifier = ">=2.40.1,<3.0.0" }, { name = "numpydoc", marker = "extra == 'docs'" }, + { name = "ome-zarr" }, + { name = "ome-zarr-models", specifier = ">=1.6" }, { name = "opencv-python" }, { name = "pandas", specifier = ">=2.3.3" }, { name = "pandas-stubs", marker = "extra == 'dev'", specifier = "~=2.3.3" }, @@ -574,7 +706,7 @@ requires-dist = [ { name = "tifffile", specifier = "==2025.1.10" }, { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.1a32" }, { name = "u-segment3d", marker = "extra == 'usegment3d'", specifier = ">=0.1.4,<0.2" }, - { name = "zarr", specifier = "<3.0" }, + { name = "zarr", specifier = ">=3.1.1,<4.0" }, ] provides-extras = ["decon", "usegment3d", "dev", "docs"] @@ -853,6 +985,9 @@ dataframe = [ { name = "pandas" }, { name = "pyarrow" }, ] +distributed = [ + { name = "distributed" }, +] [[package]] name = "dask-image" @@ -1015,6 +1150,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/11/208f72084084d3f6a2ed5ebfdfc846692c3f7ad6dce65e400194924f7eed/domdf_python_tools-3.10.0-py3-none-any.whl", hash = "sha256:5e71c1be71bbcc1f881d690c8984b60e64298ec256903b3147f068bc33090c36", size = 126860, upload-time = "2025-02-12T17:34:04.093Z" }, ] +[[package]] +name = "donfig" +version = "0.8.1.post1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/71/80cc718ff6d7abfbabacb1f57aaa42e9c1552bfdd01e64ddd704e4a03638/donfig-0.8.1.post1.tar.gz", hash = "sha256:3bef3413a4c1c601b585e8d297256d0c1470ea012afa6e8461dc28bfb7c23f52", size = 19506, upload-time = "2024-05-23T14:14:31.513Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/d5/c5db1ea3394c6e1732fb3286b3bd878b59507a8f77d32a2cebda7d7b7cd4/donfig-0.8.1.post1-py3-none-any.whl", hash = "sha256:2a3175ce74a06109ff9307d90a230f81215cbac9a751f4d1c6194644b8204f9d", size = 21592, upload-time = "2024-05-23T14:13:55.283Z" }, +] + [[package]] name = "edt" version = "3.0.0" @@ -1163,6 +1310,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/93/6e/bd7fbfacca077bc6f34f1a1109800a2c41ab50f4704d3a0507ba41009915/freetype_py-2.5.1-py3-none-win_amd64.whl", hash = "sha256:0b7f8e0342779f65ca13ef8bc103938366fecade23e6bb37cb671c2b8ad7f124", size = 814608, upload-time = "2024-08-29T18:32:24.648Z" }, ] +[[package]] +name = "frozenlist" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload-time = "2025-10-06T05:38:17.865Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/29/948b9aa87e75820a38650af445d2ef2b6b8a6fab1a23b6bb9e4ef0be2d59/frozenlist-1.8.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78f7b9e5d6f2fdb88cdde9440dc147259b62b9d3b019924def9f6478be254ac1", size = 87782, upload-time = "2025-10-06T05:36:06.649Z" }, + { url = "https://files.pythonhosted.org/packages/64/80/4f6e318ee2a7c0750ed724fa33a4bdf1eacdc5a39a7a24e818a773cd91af/frozenlist-1.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:229bf37d2e4acdaf808fd3f06e854a4a7a3661e871b10dc1f8f1896a3b05f18b", size = 50594, upload-time = "2025-10-06T05:36:07.69Z" }, + { url = "https://files.pythonhosted.org/packages/2b/94/5c8a2b50a496b11dd519f4a24cb5496cf125681dd99e94c604ccdea9419a/frozenlist-1.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f833670942247a14eafbb675458b4e61c82e002a148f49e68257b79296e865c4", size = 50448, upload-time = "2025-10-06T05:36:08.78Z" }, + { url = "https://files.pythonhosted.org/packages/6a/bd/d91c5e39f490a49df14320f4e8c80161cfcce09f1e2cde1edd16a551abb3/frozenlist-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:494a5952b1c597ba44e0e78113a7266e656b9794eec897b19ead706bd7074383", size = 242411, upload-time = "2025-10-06T05:36:09.801Z" }, + { url = "https://files.pythonhosted.org/packages/8f/83/f61505a05109ef3293dfb1ff594d13d64a2324ac3482be2cedc2be818256/frozenlist-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96f423a119f4777a4a056b66ce11527366a8bb92f54e541ade21f2374433f6d4", size = 243014, upload-time = "2025-10-06T05:36:11.394Z" }, + { url = "https://files.pythonhosted.org/packages/d8/cb/cb6c7b0f7d4023ddda30cf56b8b17494eb3a79e3fda666bf735f63118b35/frozenlist-1.8.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3462dd9475af2025c31cc61be6652dfa25cbfb56cbbf52f4ccfe029f38decaf8", size = 234909, upload-time = "2025-10-06T05:36:12.598Z" }, + { url = "https://files.pythonhosted.org/packages/31/c5/cd7a1f3b8b34af009fb17d4123c5a778b44ae2804e3ad6b86204255f9ec5/frozenlist-1.8.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c4c800524c9cd9bac5166cd6f55285957fcfc907db323e193f2afcd4d9abd69b", size = 250049, upload-time = "2025-10-06T05:36:14.065Z" }, + { url = "https://files.pythonhosted.org/packages/c0/01/2f95d3b416c584a1e7f0e1d6d31998c4a795f7544069ee2e0962a4b60740/frozenlist-1.8.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d6a5df73acd3399d893dafc71663ad22534b5aa4f94e8a2fabfe856c3c1b6a52", size = 256485, upload-time = "2025-10-06T05:36:15.39Z" }, + { url = "https://files.pythonhosted.org/packages/ce/03/024bf7720b3abaebcff6d0793d73c154237b85bdf67b7ed55e5e9596dc9a/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:405e8fe955c2280ce66428b3ca55e12b3c4e9c336fb2103a4937e891c69a4a29", size = 237619, upload-time = "2025-10-06T05:36:16.558Z" }, + { url = "https://files.pythonhosted.org/packages/69/fa/f8abdfe7d76b731f5d8bd217827cf6764d4f1d9763407e42717b4bed50a0/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:908bd3f6439f2fef9e85031b59fd4f1297af54415fb60e4254a95f75b3cab3f3", size = 250320, upload-time = "2025-10-06T05:36:17.821Z" }, + { url = "https://files.pythonhosted.org/packages/f5/3c/b051329f718b463b22613e269ad72138cc256c540f78a6de89452803a47d/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:294e487f9ec720bd8ffcebc99d575f7eff3568a08a253d1ee1a0378754b74143", size = 246820, upload-time = "2025-10-06T05:36:19.046Z" }, + { url = "https://files.pythonhosted.org/packages/0f/ae/58282e8f98e444b3f4dd42448ff36fa38bef29e40d40f330b22e7108f565/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:74c51543498289c0c43656701be6b077f4b265868fa7f8a8859c197006efb608", size = 250518, upload-time = "2025-10-06T05:36:20.763Z" }, + { url = "https://files.pythonhosted.org/packages/8f/96/007e5944694d66123183845a106547a15944fbbb7154788cbf7272789536/frozenlist-1.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:776f352e8329135506a1d6bf16ac3f87bc25b28e765949282dcc627af36123aa", size = 239096, upload-time = "2025-10-06T05:36:22.129Z" }, + { url = "https://files.pythonhosted.org/packages/66/bb/852b9d6db2fa40be96f29c0d1205c306288f0684df8fd26ca1951d461a56/frozenlist-1.8.0-cp312-cp312-win32.whl", hash = "sha256:433403ae80709741ce34038da08511d4a77062aa924baf411ef73d1146e74faf", size = 39985, upload-time = "2025-10-06T05:36:23.661Z" }, + { url = "https://files.pythonhosted.org/packages/b8/af/38e51a553dd66eb064cdf193841f16f077585d4d28394c2fa6235cb41765/frozenlist-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:34187385b08f866104f0c0617404c8eb08165ab1272e884abc89c112e9c00746", size = 44591, upload-time = "2025-10-06T05:36:24.958Z" }, + { url = "https://files.pythonhosted.org/packages/a7/06/1dc65480ab147339fecc70797e9c2f69d9cea9cf38934ce08df070fdb9cb/frozenlist-1.8.0-cp312-cp312-win_arm64.whl", hash = "sha256:fe3c58d2f5db5fbd18c2987cba06d51b0529f52bc3a6cdc33d3f4eab725104bd", size = 40102, upload-time = "2025-10-06T05:36:26.333Z" }, + { url = "https://files.pythonhosted.org/packages/9a/9a/e35b4a917281c0b8419d4207f4334c8e8c5dbf4f3f5f9ada73958d937dcc/frozenlist-1.8.0-py3-none-any.whl", hash = "sha256:0c18a16eab41e82c295618a77502e17b195883241c563b00f0aa5106fc4eaa0d", size = 13409, upload-time = "2025-10-06T05:38:16.721Z" }, +] + [[package]] name = "fsspec" version = "2026.2.0" @@ -1172,6 +1344,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e6/ab/fb21f4c939bb440104cc2b396d3be1d9b7a9fd3c6c2a53d98c45b3d7c954/fsspec-2026.2.0-py3-none-any.whl", hash = "sha256:98de475b5cb3bd66bedd5c4679e87b4fdfe1a3bf4d707b151b3c07e58c9a2437", size = 202505, upload-time = "2026-02-05T21:50:51.819Z" }, ] +[package.optional-dependencies] +http = [ + { name = "aiohttp" }, +] +s3 = [ + { name = "s3fs" }, +] + [[package]] name = "google-apitools" version = "0.5.35" @@ -1201,6 +1381,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/1d/d6466de3a5249d35e832a52834115ca9d1d0de6abc22065f049707516d47/google_auth-2.48.0-py3-none-any.whl", hash = "sha256:2e2a537873d449434252a9632c28bfc268b0adb1e53f9fb62afc5333a975903f", size = 236499, upload-time = "2026-01-26T19:22:45.099Z" }, ] +[[package]] +name = "google-crc32c" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192, upload-time = "2025-12-16T00:35:25.142Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300, upload-time = "2025-12-16T00:21:56.723Z" }, + { url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867, upload-time = "2025-12-16T00:38:31.302Z" }, + { url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364, upload-time = "2025-12-16T00:40:22.96Z" }, + { url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740, upload-time = "2025-12-16T00:40:23.96Z" }, + { url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437, upload-time = "2025-12-16T00:35:21.395Z" }, +] + [[package]] name = "gradient-free-optimizers" version = "1.10.1" @@ -1495,6 +1688,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "joblib" version = "1.5.3" @@ -1941,6 +2143,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/23/28/1e3e5cd1d677cca68b26166f704f72e35b1e8b6d5076d8ebeebc4e40a649/mss-10.1.0-py3-none-any.whl", hash = "sha256:9179c110cadfef5dc6dc4a041a0cd161c74c379218648e6640b48c6b5cfe8918", size = 24525, upload-time = "2025-08-16T12:10:59.111Z" }, ] +[[package]] +name = "multidict" +version = "6.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/9c/f20e0e2cf80e4b2e4b1c365bf5fe104ee633c751a724246262db8f1a0b13/multidict-6.7.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a90f75c956e32891a4eda3639ce6dd86e87105271f43d43442a3aedf3cddf172", size = 76893, upload-time = "2026-01-26T02:43:52.754Z" }, + { url = "https://files.pythonhosted.org/packages/fe/cf/18ef143a81610136d3da8193da9d80bfe1cb548a1e2d1c775f26b23d024a/multidict-6.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fccb473e87eaa1382689053e4a4618e7ba7b9b9b8d6adf2027ee474597128cd", size = 45456, upload-time = "2026-01-26T02:43:53.893Z" }, + { url = "https://files.pythonhosted.org/packages/a9/65/1caac9d4cd32e8433908683446eebc953e82d22b03d10d41a5f0fefe991b/multidict-6.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0fa96985700739c4c7853a43c0b3e169360d6855780021bfc6d0f1ce7c123e7", size = 43872, upload-time = "2026-01-26T02:43:55.041Z" }, + { url = "https://files.pythonhosted.org/packages/cf/3b/d6bd75dc4f3ff7c73766e04e705b00ed6dbbaccf670d9e05a12b006f5a21/multidict-6.7.1-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:cb2a55f408c3043e42b40cc8eecd575afa27b7e0b956dfb190de0f8499a57a53", size = 251018, upload-time = "2026-01-26T02:43:56.198Z" }, + { url = "https://files.pythonhosted.org/packages/fd/80/c959c5933adedb9ac15152e4067c702a808ea183a8b64cf8f31af8ad3155/multidict-6.7.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb0ce7b2a32d09892b3dd6cc44877a0d02a33241fafca5f25c8b6b62374f8b75", size = 258883, upload-time = "2026-01-26T02:43:57.499Z" }, + { url = "https://files.pythonhosted.org/packages/86/85/7ed40adafea3d4f1c8b916e3b5cc3a8e07dfcdcb9cd72800f4ed3ca1b387/multidict-6.7.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c3a32d23520ee37bf327d1e1a656fec76a2edd5c038bf43eddfa0572ec49c60b", size = 242413, upload-time = "2026-01-26T02:43:58.755Z" }, + { url = "https://files.pythonhosted.org/packages/d2/57/b8565ff533e48595503c785f8361ff9a4fde4d67de25c207cd0ba3befd03/multidict-6.7.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9c90fed18bffc0189ba814749fdcc102b536e83a9f738a9003e569acd540a733", size = 268404, upload-time = "2026-01-26T02:44:00.216Z" }, + { url = "https://files.pythonhosted.org/packages/e0/50/9810c5c29350f7258180dfdcb2e52783a0632862eb334c4896ac717cebcb/multidict-6.7.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:da62917e6076f512daccfbbde27f46fed1c98fee202f0559adec8ee0de67f71a", size = 269456, upload-time = "2026-01-26T02:44:02.202Z" }, + { url = "https://files.pythonhosted.org/packages/f3/8d/5e5be3ced1d12966fefb5c4ea3b2a5b480afcea36406559442c6e31d4a48/multidict-6.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bfde23ef6ed9db7eaee6c37dcec08524cb43903c60b285b172b6c094711b3961", size = 256322, upload-time = "2026-01-26T02:44:03.56Z" }, + { url = "https://files.pythonhosted.org/packages/31/6e/d8a26d81ac166a5592782d208dd90dfdc0a7a218adaa52b45a672b46c122/multidict-6.7.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3758692429e4e32f1ba0df23219cd0b4fc0a52f476726fff9337d1a57676a582", size = 253955, upload-time = "2026-01-26T02:44:04.845Z" }, + { url = "https://files.pythonhosted.org/packages/59/4c/7c672c8aad41534ba619bcd4ade7a0dc87ed6b8b5c06149b85d3dd03f0cd/multidict-6.7.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:398c1478926eca669f2fd6a5856b6de9c0acf23a2cb59a14c0ba5844fa38077e", size = 251254, upload-time = "2026-01-26T02:44:06.133Z" }, + { url = "https://files.pythonhosted.org/packages/7b/bd/84c24de512cbafbdbc39439f74e967f19570ce7924e3007174a29c348916/multidict-6.7.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c102791b1c4f3ab36ce4101154549105a53dc828f016356b3e3bcae2e3a039d3", size = 252059, upload-time = "2026-01-26T02:44:07.518Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ba/f5449385510825b73d01c2d4087bf6d2fccc20a2d42ac34df93191d3dd03/multidict-6.7.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a088b62bd733e2ad12c50dad01b7d0166c30287c166e137433d3b410add807a6", size = 263588, upload-time = "2026-01-26T02:44:09.382Z" }, + { url = "https://files.pythonhosted.org/packages/d7/11/afc7c677f68f75c84a69fe37184f0f82fce13ce4b92f49f3db280b7e92b3/multidict-6.7.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3d51ff4785d58d3f6c91bdbffcb5e1f7ddfda557727043aa20d20ec4f65e324a", size = 259642, upload-time = "2026-01-26T02:44:10.73Z" }, + { url = "https://files.pythonhosted.org/packages/2b/17/ebb9644da78c4ab36403739e0e6e0e30ebb135b9caf3440825001a0bddcb/multidict-6.7.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fc5907494fccf3e7d3f94f95c91d6336b092b5fc83811720fae5e2765890dfba", size = 251377, upload-time = "2026-01-26T02:44:12.042Z" }, + { url = "https://files.pythonhosted.org/packages/ca/a4/840f5b97339e27846c46307f2530a2805d9d537d8b8bd416af031cad7fa0/multidict-6.7.1-cp312-cp312-win32.whl", hash = "sha256:28ca5ce2fd9716631133d0e9a9b9a745ad7f60bac2bccafb56aa380fc0b6c511", size = 41887, upload-time = "2026-01-26T02:44:14.245Z" }, + { url = "https://files.pythonhosted.org/packages/80/31/0b2517913687895f5904325c2069d6a3b78f66cc641a86a2baf75a05dcbb/multidict-6.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcee94dfbd638784645b066074b338bc9cc155d4b4bffa4adce1615c5a426c19", size = 46053, upload-time = "2026-01-26T02:44:15.371Z" }, + { url = "https://files.pythonhosted.org/packages/0c/5b/aba28e4ee4006ae4c7df8d327d31025d760ffa992ea23812a601d226e682/multidict-6.7.1-cp312-cp312-win_arm64.whl", hash = "sha256:ba0a9fb644d0c1a2194cf7ffb043bd852cea63a57f66fbd33959f7dae18517bf", size = 43307, upload-time = "2026-01-26T02:44:16.852Z" }, + { url = "https://files.pythonhosted.org/packages/81/08/7036c080d7117f28a4af526d794aab6a84463126db031b007717c1a6676e/multidict-6.7.1-py3-none-any.whl", hash = "sha256:55d97cc6dae627efa6a6e548885712d4864b81110ac76fa4e534c03819fa4a56", size = 12319, upload-time = "2026-01-26T02:46:44.004Z" }, +] + [[package]] name = "multiprocess" version = "0.70.19" @@ -2426,6 +2655,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/a9/4f25a14d23f0786b64875b91784607c2277eff25d48f915e39ff0cff505a/oauth2client-4.1.3-py2.py3-none-any.whl", hash = "sha256:b8a81cc5d60e2d364f0b1b98f958dbd472887acaf1a5b05e21c28c31a2d6d3ac", size = 98206, upload-time = "2018-09-07T21:38:16.742Z" }, ] +[[package]] +name = "ome-types" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "pydantic-extra-types" }, + { name = "xsdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/4c/d252c1619c733eec9b4d2d21fe369fd21a2594954b396bf4352edea1e272/ome_types-0.6.3.tar.gz", hash = "sha256:eef4138cda5edfdcb2a44cfb90b714a59ead1b69e4c5ce5f9892ad397ccaaa68", size = 121784, upload-time = "2025-11-26T00:28:24.34Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/6a/1000cad1700ab0af4d1b1d0a9c23c34badddb4f547c008bde2a6c61968f1/ome_types-0.6.3-py3-none-any.whl", hash = "sha256:ce9753ff351bbc534ee5c5038d3cf60b1e4c13d69ad2e6b5a5b75de2a52521a5", size = 245802, upload-time = "2025-11-26T00:28:22.853Z" }, +] + +[package.optional-dependencies] +pint = [ + { name = "pint" }, +] + +[[package]] +name = "ome-zarr" +version = "0.12.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "dask" }, + { name = "fsspec", extra = ["s3"] }, + { name = "numpy" }, + { name = "requests" }, + { name = "scikit-image" }, + { name = "toolz" }, + { name = "zarr" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a4/44/647843d872aa136609e805c06e9f9b2cdcb6e2a58ec485322311dec7b64d/ome_zarr-0.12.2.tar.gz", hash = "sha256:834e801e9aa4b870bed3dde2dc2a3ad7f388f1a13ffa6b3d7aade90691b9de64", size = 69891, upload-time = "2025-08-22T08:57:13.64Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4f/21/59baa90924b815b70f88045f0b206b7eab0b68b461c0192692486b516ab7/ome_zarr-0.12.2-py3-none-any.whl", hash = "sha256:655fe1b11ca01148603f9931a5b0af31207dfc03a3a35f9b0ab8639790282bbd", size = 41410, upload-time = "2025-08-22T08:57:12.44Z" }, +] + +[[package]] +name = "ome-zarr-models" +version = "1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "pydantic-zarr" }, + { name = "zarr" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3d/e7/c37b8896a24eabc76d6a5e7855f722792de5bdf3f5ccc562001738be4f7e/ome_zarr_models-1.6.tar.gz", hash = "sha256:00735ad939d2b5f6a4f583cc19af1e8a5247fa1e93f1f08191143cd913b9633a", size = 106913, upload-time = "2026-03-10T10:14:28.369Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/fc/7650df09dce0edfbaa1f2898c5eb0e0523ae5964dadb83855beb52166505/ome_zarr_models-1.6-py3-none-any.whl", hash = "sha256:d855a6fe885aa2613f64cd124ff8453d75854531bb3282eeaa916e976c7bcf66", size = 64251, upload-time = "2026-03-10T10:14:27.177Z" }, +] + [[package]] name = "opencv-python" version = "4.11.0.86" @@ -2701,6 +2982,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, ] +[[package]] +name = "propcache" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/0f/f17b1b2b221d5ca28b4b876e8bb046ac40466513960646bda8e1853cdfa2/propcache-0.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e153e9cd40cc8945138822807139367f256f89c6810c2634a4f6902b52d3b4e2", size = 80061, upload-time = "2025-10-08T19:46:46.075Z" }, + { url = "https://files.pythonhosted.org/packages/76/47/8ccf75935f51448ba9a16a71b783eb7ef6b9ee60f5d14c7f8a8a79fbeed7/propcache-0.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cd547953428f7abb73c5ad82cbb32109566204260d98e41e5dfdc682eb7f8403", size = 46037, upload-time = "2025-10-08T19:46:47.23Z" }, + { url = "https://files.pythonhosted.org/packages/0a/b6/5c9a0e42df4d00bfb4a3cbbe5cf9f54260300c88a0e9af1f47ca5ce17ac0/propcache-0.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f048da1b4f243fc44f205dfd320933a951b8d89e0afd4c7cacc762a8b9165207", size = 47324, upload-time = "2025-10-08T19:46:48.384Z" }, + { url = "https://files.pythonhosted.org/packages/9e/d3/6c7ee328b39a81ee877c962469f1e795f9db87f925251efeb0545e0020d0/propcache-0.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ec17c65562a827bba85e3872ead335f95405ea1674860d96483a02f5c698fa72", size = 225505, upload-time = "2025-10-08T19:46:50.055Z" }, + { url = "https://files.pythonhosted.org/packages/01/5d/1c53f4563490b1d06a684742cc6076ef944bc6457df6051b7d1a877c057b/propcache-0.4.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:405aac25c6394ef275dee4c709be43745d36674b223ba4eb7144bf4d691b7367", size = 230242, upload-time = "2025-10-08T19:46:51.815Z" }, + { url = "https://files.pythonhosted.org/packages/20/e1/ce4620633b0e2422207c3cb774a0ee61cac13abc6217763a7b9e2e3f4a12/propcache-0.4.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0013cb6f8dde4b2a2f66903b8ba740bdfe378c943c4377a200551ceb27f379e4", size = 238474, upload-time = "2025-10-08T19:46:53.208Z" }, + { url = "https://files.pythonhosted.org/packages/46/4b/3aae6835b8e5f44ea6a68348ad90f78134047b503765087be2f9912140ea/propcache-0.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:15932ab57837c3368b024473a525e25d316d8353016e7cc0e5ba9eb343fbb1cf", size = 221575, upload-time = "2025-10-08T19:46:54.511Z" }, + { url = "https://files.pythonhosted.org/packages/6e/a5/8a5e8678bcc9d3a1a15b9a29165640d64762d424a16af543f00629c87338/propcache-0.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:031dce78b9dc099f4c29785d9cf5577a3faf9ebf74ecbd3c856a7b92768c3df3", size = 216736, upload-time = "2025-10-08T19:46:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/f1/63/b7b215eddeac83ca1c6b934f89d09a625aa9ee4ba158338854c87210cc36/propcache-0.4.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ab08df6c9a035bee56e31af99be621526bd237bea9f32def431c656b29e41778", size = 213019, upload-time = "2025-10-08T19:46:57.595Z" }, + { url = "https://files.pythonhosted.org/packages/57/74/f580099a58c8af587cac7ba19ee7cb418506342fbbe2d4a4401661cca886/propcache-0.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4d7af63f9f93fe593afbf104c21b3b15868efb2c21d07d8732c0c4287e66b6a6", size = 220376, upload-time = "2025-10-08T19:46:59.067Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ee/542f1313aff7eaf19c2bb758c5d0560d2683dac001a1c96d0774af799843/propcache-0.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cfc27c945f422e8b5071b6e93169679e4eb5bf73bbcbf1ba3ae3a83d2f78ebd9", size = 226988, upload-time = "2025-10-08T19:47:00.544Z" }, + { url = "https://files.pythonhosted.org/packages/8f/18/9c6b015dd9c6930f6ce2229e1f02fb35298b847f2087ea2b436a5bfa7287/propcache-0.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:35c3277624a080cc6ec6f847cbbbb5b49affa3598c4535a0a4682a697aaa5c75", size = 215615, upload-time = "2025-10-08T19:47:01.968Z" }, + { url = "https://files.pythonhosted.org/packages/80/9e/e7b85720b98c45a45e1fca6a177024934dc9bc5f4d5dd04207f216fc33ed/propcache-0.4.1-cp312-cp312-win32.whl", hash = "sha256:671538c2262dadb5ba6395e26c1731e1d52534bfe9ae56d0b5573ce539266aa8", size = 38066, upload-time = "2025-10-08T19:47:03.503Z" }, + { url = "https://files.pythonhosted.org/packages/54/09/d19cff2a5aaac632ec8fc03737b223597b1e347416934c1b3a7df079784c/propcache-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:cb2d222e72399fcf5890d1d5cc1060857b9b236adff2792ff48ca2dfd46c81db", size = 41655, upload-time = "2025-10-08T19:47:04.973Z" }, + { url = "https://files.pythonhosted.org/packages/68/ab/6b5c191bb5de08036a8c697b265d4ca76148efb10fa162f14af14fb5f076/propcache-0.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:204483131fb222bdaaeeea9f9e6c6ed0cac32731f75dfc1d4a567fc1926477c1", size = 37789, upload-time = "2025-10-08T19:47:06.077Z" }, + { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, +] + [[package]] name = "psfmodels" version = "0.3.4.dev30+gdfe2b6f2e" @@ -2887,6 +3192,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/17/fabd56da47096d240dd45ba627bead0333b0cf0ee8ada9bec579287dadf3/pydantic_extra_types-2.11.0-py3-none-any.whl", hash = "sha256:84b864d250a0fc62535b7ec591e36f2c5b4d1325fa0017eb8cda9aeb63b374a6", size = 74296, upload-time = "2025-12-31T16:18:26.38Z" }, ] +[[package]] +name = "pydantic-zarr" +version = "0.9.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6a/19/ad4f22b7a395408c90cc0f213d70cf5b9ed5f3b0db2b319cacff93b66850/pydantic_zarr-0.9.2.tar.gz", hash = "sha256:59f536cbc456ff4dc64d887d651476e2a6a2ca9f3ea13228dce4e3aeb1cf00b9", size = 80379, upload-time = "2026-03-18T14:38:30.766Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0e/f4/93a623ab5d2e1c20de0b43e60d6bd8df7704b39e88f5424b4191391a40d9/pydantic_zarr-0.9.2-py3-none-any.whl", hash = "sha256:8009b661cdce3e8283c26d281975432841a39fc8af8309f4ff66722e43850c6f", size = 52952, upload-time = "2026-03-18T14:38:29.684Z" }, +] + [[package]] name = "pydata-sphinx-theme" version = "0.10.0rc2" @@ -3382,6 +3701,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" }, ] +[[package]] +name = "s3fs" +version = "2026.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiobotocore" }, + { name = "aiohttp" }, + { name = "fsspec" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fa/be/392c8c5e0da9bfa139e41084690dd49a5e3e931099f78f52d3f6070105c6/s3fs-2026.2.0.tar.gz", hash = "sha256:91cb2a9f76e35643b76eeac3f47a6165172bb3def671f76b9111c8dd5779a2ac", size = 84152, upload-time = "2026-02-05T21:57:57.968Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/57/e1/64c264db50b68de8a438b60ceeb921b2f22da3ebb7ad6255150225d0beac/s3fs-2026.2.0-py3-none-any.whl", hash = "sha256:65198835b86b1d5771112b0085d1da52a6ede36508b1aaa6cae2aedc765dfe10", size = 31328, upload-time = "2026-02-05T21:57:56.532Z" }, +] + [[package]] name = "scikit-fmm" version = "2025.6.23" @@ -4274,6 +4607,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c4/da/5a086bf4c22a41995312db104ec2ffeee2cf6accca9faaee5315c790377d/wrapt-2.1.1-py3-none-any.whl", hash = "sha256:3b0f4629eb954394a3d7c7a1c8cca25f0b07cefe6aa8545e862e9778152de5b7", size = 43886, upload-time = "2026-02-03T02:11:45.048Z" }, ] +[[package]] +name = "xarray" +version = "2026.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0f/03/e3353b72e518574b32993989d8f696277bf878e9d508c7dd22e86c0dab5b/xarray-2026.2.0.tar.gz", hash = "sha256:978b6acb018770554f8fd964af4eb02f9bcc165d4085dbb7326190d92aa74bcf", size = 3111388, upload-time = "2026-02-13T22:20:50.18Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/92/545eb2ca17fc0e05456728d7e4378bfee48d66433ae3b7e71948e46826fb/xarray-2026.2.0-py3-none-any.whl", hash = "sha256:e927d7d716ea71dea78a13417970850a640447d8dd2ceeb65c5687f6373837c9", size = 1405358, upload-time = "2026-02-13T22:20:47.847Z" }, +] + +[[package]] +name = "xsdata" +version = "26.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2f/c9/71e9e8eac669091fd434ed494d806c8cc37614aecb34ce4c62c283f99abf/xsdata-26.2.tar.gz", hash = "sha256:c631af71aaa75734f8ce92a08fcf8389d905dee2aab0b5032c9032e9071009a6", size = 349690, upload-time = "2026-02-15T16:13:31.274Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d3/92/f0edcbc2f895ecea14a68e492b24c157625e251279a94b172a6b263290e7/xsdata-26.2-py3-none-any.whl", hash = "sha256:85a591a4405d903416afbd4a917e8dda8ea44641a3e66d72134bc2a31b3c16b0", size = 235561, upload-time = "2026-02-15T16:13:29.614Z" }, +] + [[package]] name = "xyzservices" version = "2025.11.0" @@ -4283,19 +4642,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/5c/2c189d18d495dd0fa3f27ccc60762bbc787eed95b9b0147266e72bb76585/xyzservices-2025.11.0-py3-none-any.whl", hash = "sha256:de66a7599a8d6dad63980b77defd1d8f5a5a9cb5fc8774ea1c6e89ca7c2a3d2f", size = 93916, upload-time = "2025-11-22T11:31:50.525Z" }, ] +[[package]] +name = "yarl" +version = "1.23.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "idna" }, + { name = "multidict" }, + { name = "propcache" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/23/6e/beb1beec874a72f23815c1434518bfc4ed2175065173fb138c3705f658d4/yarl-1.23.0.tar.gz", hash = "sha256:53b1ea6ca88ebd4420379c330aea57e258408dd0df9af0992e5de2078dc9f5d5", size = 194676, upload-time = "2026-03-01T22:07:53.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/8a/94615bc31022f711add374097ad4144d569e95ff3c38d39215d07ac153a0/yarl-1.23.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1932b6b8bba8d0160a9d1078aae5838a66039e8832d41d2992daa9a3a08f7860", size = 124737, upload-time = "2026-03-01T22:05:12.897Z" }, + { url = "https://files.pythonhosted.org/packages/e3/6f/c6554045d59d64052698add01226bc867b52fe4a12373415d7991fdca95d/yarl-1.23.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:411225bae281f114067578891bc75534cfb3d92a3b4dfef7a6ca78ba354e6069", size = 87029, upload-time = "2026-03-01T22:05:14.376Z" }, + { url = "https://files.pythonhosted.org/packages/19/2a/725ecc166d53438bc88f76822ed4b1e3b10756e790bafd7b523fe97c322d/yarl-1.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13a563739ae600a631c36ce096615fe307f131344588b0bc0daec108cdb47b25", size = 86310, upload-time = "2026-03-01T22:05:15.71Z" }, + { url = "https://files.pythonhosted.org/packages/99/30/58260ed98e6ff7f90ba84442c1ddd758c9170d70327394a6227b310cd60f/yarl-1.23.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9cbf44c5cb4a7633d078788e1b56387e3d3cf2b8139a3be38040b22d6c3221c8", size = 97587, upload-time = "2026-03-01T22:05:17.384Z" }, + { url = "https://files.pythonhosted.org/packages/76/0a/8b08aac08b50682e65759f7f8dde98ae8168f72487e7357a5d684c581ef9/yarl-1.23.0-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:53ad387048f6f09a8969631e4de3f1bf70c50e93545d64af4f751b2498755072", size = 92528, upload-time = "2026-03-01T22:05:18.804Z" }, + { url = "https://files.pythonhosted.org/packages/52/07/0b7179101fe5f8385ec6c6bb5d0cb9f76bd9fb4a769591ab6fb5cdbfc69a/yarl-1.23.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4a59ba56f340334766f3a4442e0efd0af895fae9e2b204741ef885c446b3a1a8", size = 105339, upload-time = "2026-03-01T22:05:20.235Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8a/36d82869ab5ec829ca8574dfcb92b51286fcfb1e9c7a73659616362dc880/yarl-1.23.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:803a3c3ce4acc62eaf01eaca1208dcf0783025ef27572c3336502b9c232005e7", size = 105061, upload-time = "2026-03-01T22:05:22.268Z" }, + { url = "https://files.pythonhosted.org/packages/66/3e/868e5c3364b6cee19ff3e1a122194fa4ce51def02c61023970442162859e/yarl-1.23.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3d2bff8f37f8d0f96c7ec554d16945050d54462d6e95414babaa18bfafc7f51", size = 100132, upload-time = "2026-03-01T22:05:23.638Z" }, + { url = "https://files.pythonhosted.org/packages/cf/26/9c89acf82f08a52cb52d6d39454f8d18af15f9d386a23795389d1d423823/yarl-1.23.0-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c75eb09e8d55bceb4367e83496ff8ef2bc7ea6960efb38e978e8073ea59ecb67", size = 99289, upload-time = "2026-03-01T22:05:25.749Z" }, + { url = "https://files.pythonhosted.org/packages/6f/54/5b0db00d2cb056922356104468019c0a132e89c8d3ab67d8ede9f4483d2a/yarl-1.23.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:877b0738624280e34c55680d6054a307aa94f7d52fa0e3034a9cc6e790871da7", size = 96950, upload-time = "2026-03-01T22:05:27.318Z" }, + { url = "https://files.pythonhosted.org/packages/f6/40/10fa93811fd439341fad7e0718a86aca0de9548023bbb403668d6555acab/yarl-1.23.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b5405bb8f0e783a988172993cfc627e4d9d00432d6bbac65a923041edacf997d", size = 93960, upload-time = "2026-03-01T22:05:28.738Z" }, + { url = "https://files.pythonhosted.org/packages/bc/d2/8ae2e6cd77d0805f4526e30ec43b6f9a3dfc542d401ac4990d178e4bf0cf/yarl-1.23.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1c3a3598a832590c5a3ce56ab5576361b5688c12cb1d39429cf5dba30b510760", size = 104703, upload-time = "2026-03-01T22:05:30.438Z" }, + { url = "https://files.pythonhosted.org/packages/2f/0c/b3ceacf82c3fe21183ce35fa2acf5320af003d52bc1fcf5915077681142e/yarl-1.23.0-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:8419ebd326430d1cbb7efb5292330a2cf39114e82df5cc3d83c9a0d5ebeaf2f2", size = 98325, upload-time = "2026-03-01T22:05:31.835Z" }, + { url = "https://files.pythonhosted.org/packages/9d/e0/12900edd28bdab91a69bd2554b85ad7b151f64e8b521fe16f9ad2f56477a/yarl-1.23.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:be61f6fff406ca40e3b1d84716fde398fc08bc63dd96d15f3a14230a0973ed86", size = 105067, upload-time = "2026-03-01T22:05:33.358Z" }, + { url = "https://files.pythonhosted.org/packages/15/61/74bb1182cf79c9bbe4eb6b1f14a57a22d7a0be5e9cedf8e2d5c2086474c3/yarl-1.23.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ceb13c5c858d01321b5d9bb65e4cf37a92169ea470b70fec6f236b2c9dd7e34", size = 100285, upload-time = "2026-03-01T22:05:35.4Z" }, + { url = "https://files.pythonhosted.org/packages/69/7f/cd5ef733f2550de6241bd8bd8c3febc78158b9d75f197d9c7baa113436af/yarl-1.23.0-cp312-cp312-win32.whl", hash = "sha256:fffc45637bcd6538de8b85f51e3df3223e4ad89bccbfca0481c08c7fc8b7ed7d", size = 82359, upload-time = "2026-03-01T22:05:36.811Z" }, + { url = "https://files.pythonhosted.org/packages/f5/be/25216a49daeeb7af2bec0db22d5e7df08ed1d7c9f65d78b14f3b74fd72fc/yarl-1.23.0-cp312-cp312-win_amd64.whl", hash = "sha256:f69f57305656a4852f2a7203efc661d8c042e6cc67f7acd97d8667fb448a426e", size = 87674, upload-time = "2026-03-01T22:05:38.171Z" }, + { url = "https://files.pythonhosted.org/packages/d2/35/aeab955d6c425b227d5b7247eafb24f2653fedc32f95373a001af5dfeb9e/yarl-1.23.0-cp312-cp312-win_arm64.whl", hash = "sha256:6e87a6e8735b44816e7db0b2fbc9686932df473c826b0d9743148432e10bb9b9", size = 81879, upload-time = "2026-03-01T22:05:40.006Z" }, + { url = "https://files.pythonhosted.org/packages/69/68/c8739671f5699c7dc470580a4f821ef37c32c4cb0b047ce223a7f115757f/yarl-1.23.0-py3-none-any.whl", hash = "sha256:a2df6afe50dea8ae15fa34c9f824a3ee958d785fd5d089063d960bae1daa0a3f", size = 48288, upload-time = "2026-03-01T22:07:51.388Z" }, +] + [[package]] name = "zarr" -version = "2.18.7" +version = "3.1.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "asciitree" }, - { name = "fasteners", marker = "sys_platform != 'emscripten'" }, + { name = "donfig" }, + { name = "google-crc32c" }, { name = "numcodecs" }, { name = "numpy" }, + { name = "packaging" }, + { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/da/1d/01cf9e3ab2d85190278efc3fca9f68563de35ae30ee59e7640e3af98abe3/zarr-2.18.7.tar.gz", hash = "sha256:b2b8f66f14dac4af66b180d2338819981b981f70e196c9a66e6bfaa9e59572f5", size = 3604558, upload-time = "2025-04-09T07:59:28.482Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/76/7fa87f57c112c7b9c82f0a730f8b6f333e792574812872e2cd45ab604199/zarr-3.1.5.tar.gz", hash = "sha256:fbe0c79675a40c996de7ca08e80a1c0a20537bd4a9f43418b6d101395c0bba2b", size = 366825, upload-time = "2025-11-21T14:06:01.492Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5e/d8/9ffd8c237b3559945bb52103cf0eed64ea098f7b7f573f8d2962ef27b4b2/zarr-2.18.7-py3-none-any.whl", hash = "sha256:ac3dc4033e9ae4e9d7b5e27c97ea3eaf1003cc0a07f010bd83d5134bf8c4b223", size = 211273, upload-time = "2025-04-09T07:59:27.039Z" }, + { url = "https://files.pythonhosted.org/packages/44/15/bb13b4913ef95ad5448490821eee4671d0e67673342e4d4070854e5fe081/zarr-3.1.5-py3-none-any.whl", hash = "sha256:29cd905afb6235b94c09decda4258c888fcb79bb6c862ef7c0b8fe009b5c8563", size = 284067, upload-time = "2025-11-21T14:05:59.235Z" }, ] [[package]] From 6d302759a2de9440cf2b453d420fd9419e0bb152 Mon Sep 17 00:00:00 2001 From: "Kevin M. Dean" Date: Mon, 23 Mar 2026 05:08:05 -0500 Subject: [PATCH 02/10] Make OME-Zarr v3 the canonical store MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update docs and README to adopt OME‑Zarr v3 (*.ome.zarr) as the canonical ClearEx store and formalize a public vs internal layout. Introduces a clear separation between public OME HCS collections (root A/1// and results//latest) and ClearEx-owned runtime namespaces (clearex/runtime_cache/*, clearex/metadata, clearex/provenance, clearex/results). Add CLI flags and examples for migration and display-pyramid workflows (--display-pyramid, --migrate-store, --migrate-output, --migrate-overwrite) and update runtime/ingestion, provenance, module maps, and workflow docs to reference the new persistence and naming conventions. Provides migration guidance and changes many examples to use data_store.ome.zarr, clarifies spatial_calibration storage in clearex/metadata, and documents that legacy root data/data_pyramid layouts are migration-only. --- AGENTS.md | 15 ++ README.md | 118 +++++---- docs/AGENTS.md | 20 +- docs/analysis_particle_detection_workflow.rst | 221 +++++------------ docs/source/getting-started.rst | 10 + docs/source/runtime/architecture-overview.rst | 58 +++-- docs/source/runtime/cli-and-execution.rst | 67 ++--- .../runtime/ingestion-and-canonical-store.rst | 104 ++++---- docs/source/runtime/module-map.rst | 61 +++-- docs/source/runtime/provenance.rst | 28 ++- docs/zarr_materialization_workflow.rst | 228 ++++++------------ src/clearex/AGENTS.md | 66 ++++- src/clearex/detect/README.md | 7 +- src/clearex/flatfield/README.md | 40 +-- src/clearex/gui/README.md | 24 +- src/clearex/io/README.md | 61 +++-- src/clearex/mip_export/README.md | 11 +- src/clearex/registration/README.md | 31 ++- src/clearex/visualization/README.md | 21 +- 19 files changed, 643 insertions(+), 548 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 533857f..441f067 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -48,6 +48,21 @@ workflow behavior for ClearEx lives in `src/clearex/AGENTS.md`. - Avoid duplicating subsystem strategy in this root file; link or defer to the more specific package/subsystem docs instead. +## Canonical Store Policy + +- Treat OME-Zarr v3 (``*.ome.zarr``) as the only canonical ClearEx store + format in new code, tests, examples, and docs. +- Treat legacy ClearEx root-``data`` / root-``data_pyramid`` layouts in + ``.zarr`` / ``.n5`` as migration-only inputs. Do not describe them as the + preferred runtime contract and do not introduce new fixtures or examples that + rely on them as canonical. +- Public microscopy-facing image data must use the OME-Zarr HCS contract. + ClearEx-owned execution caches, provenance, GUI state, and non-image + artifacts belong under the namespaced ``clearex/`` tree. +- Storage-contract changes must update the repository ``README.md``, + ``src/clearex/AGENTS.md``, and the affected ``docs/source/runtime/*.rst`` / + subsystem ``README.md`` files in the same change set. + ## Validation - Run linting and tests that match the files and behavior you changed. diff --git a/README.md b/README.md index a2be2a8..bb8d839 100755 --- a/README.md +++ b/README.md @@ -11,15 +11,35 @@ ClearEx is an open source Python package for scalable analytics of cleared and e - Headless CLI for scripted runs. - Input support for TIFF/OME-TIFF, Zarr/N5, HDF5 (`.h5/.hdf5/.hdf`), and NumPy (`.npy/.npz`). - Navigate experiment ingestion from `experiment.yml` / `experiment.yaml`. -- Canonical analysis store layout with axis order `(t, p, c, z, y, x)`. -- Store-level spatial calibration for Navigate multiposition data, persisted per analysis store and applied to physical placement metadata without rewriting canonical image data. +- Canonical persisted store format is OME-Zarr v3 (`*.ome.zarr`). +- Public microscopy-facing image data is published as OME-Zarr HCS collections, while ClearEx execution caches and non-image artifacts live under namespaced `clearex/...` groups. +- Internal analysis image layout remains `(t, p, c, z, y, x)` for runtime-cache arrays and analysis kernels. +- Store-level spatial calibration for Navigate multiposition data is persisted in `clearex/metadata` and applied to physical placement metadata without rewriting image data. - Analysis operations available from the main entrypoint: - - deconvolution (`results/deconvolution/latest/data`) - - particle detection (`results/particle_detection/latest`) - - uSegment3D segmentation (`results/usegment3d/latest/data`) - - visualization metadata (`results/visualization/latest`) with napari launch - - registration workflow hook (currently initialized from CLI/GUI, but latest-output persistence is not yet wired like other analyses) -- FAIR-style provenance records persisted in Zarr/N5 (`provenance/runs`) with append-only run history and hash chaining. + - flatfield (`results/flatfield/latest`) + - deconvolution (`results/deconvolution/latest`) + - shear transform (`results/shear_transform/latest`) + - registration (`results/registration/latest`) + - uSegment3D segmentation (`results/usegment3d/latest`) + - particle detection (`clearex/results/particle_detection/latest`) + - display-pyramid metadata (`clearex/results/display_pyramid/latest`) + - visualization metadata (`clearex/results/visualization/latest`) with napari launch + - MIP export metadata (`clearex/results/mip_export/latest`) +- FAIR-style provenance records are persisted in `clearex/provenance/runs` with append-only run history and hash chaining. + +## Canonical Store Contract +- `data_store.ome.zarr` is the canonical materialized store beside `experiment.yml`. +- The public source image collection is a synthetic single-well HCS layout at the store root: `A/1//`. +- Public image-producing analysis outputs are sibling HCS collections under `results//latest`. +- ClearEx internal execution data lives under: + - `clearex/runtime_cache/source/...` + - `clearex/runtime_cache/results//latest/...` +- ClearEx-owned metadata, provenance, GUI state, and non-image artifacts live under: + - `clearex/metadata` + - `clearex/provenance` + - `clearex/gui_state` + - `clearex/results//latest` +- Legacy root `data`, root `data_pyramid`, and `results//latest/data` layouts are migration-only and are no longer the canonical public contract. ## Installation @@ -139,10 +159,10 @@ Current CLI usage: usage: clearex [-h] [--flatfield] [--deconvolution] [--particle-detection] [--usegment3d] [--channel-indices CHANNEL_INDICES] [--input-resolution-level INPUT_RESOLUTION_LEVEL] - [--shear-transform] [-r] [-v] [--mip-export] [-f FILE] + [--shear-transform] [-r] [--display-pyramid] [-v] + [--mip-export] [-f FILE] [--migrate-store MIGRATE_STORE] + [--migrate-output MIGRATE_OUTPUT] [--migrate-overwrite] [--dask | --no-dask] [--chunks CHUNKS] - [--execution-mode {auto,advanced}] [--max-workers MAX_WORKERS] - [--memory-per-worker MEMORY_PER_WORKER] [--calibrate] [--stage-axis-map STAGE_AXIS_MAP] [--gui | --no-gui] [--headless] ``` @@ -157,14 +177,14 @@ usage: clearex [-h] [--flatfield] [--deconvolution] [--particle-detection] - `--input-resolution-level`: uSegment3D input pyramid level (`0`, `1`, ...). - `--shear-transform`: Run shear-transform workflow. - `-r, --registration`: Run registration workflow hook. +- `--display-pyramid`: Prepare reusable display pyramids for visualization. - `-v, --visualization`: Run visualization workflow. - `--mip-export`: Export XY/XZ/YZ maximum-intensity projections. +- `--migrate-store`: Convert one legacy ClearEx `.zarr` / `.n5` store into canonical OME-Zarr v3. +- `--migrate-output`: Optional destination path for `--migrate-store`. +- `--migrate-overwrite`: Overwrite the migration destination. - `--dask / --no-dask`: Enable/disable Dask-backed reading. - `--chunks`: Chunk spec for Dask reads, for example `256` or `1,256,256`. -- `--execution-mode`: Automatic or advanced Dask execution planning mode. -- `--max-workers`: Worker cap for automatic execution planning. -- `--memory-per-worker`: Preferred per-worker memory limit for automatic execution planning. -- `--calibrate`: Refresh cached execution-planning calibration before running. - `--stage-axis-map`: Store-level world `z/y/x` mapping for Navigate multiposition stage coordinates, for example `z=+x,y=none,x=+y`. - `--gui / --no-gui`: Enable/disable GUI launch (default is `--gui`). - `--headless`: Force non-interactive mode (overrides `--gui`). @@ -204,38 +224,45 @@ clearex --headless \ --input-resolution-level 1 ``` -Run headless particle detection on an existing canonical Zarr store: +Run headless particle detection on an existing canonical OME-Zarr store: ```bash clearex --headless \ - --file /path/to/data_store.zarr \ + --file /path/to/data_store.ome.zarr \ --particle-detection ``` Disable Dask lazy loading: ```bash -clearex --headless --no-dask --file /path/to/data_store.zarr --particle-detection +clearex --headless --no-dask --file /path/to/data_store.ome.zarr --particle-detection +``` + +Migrate one legacy ClearEx store into canonical OME-Zarr v3: + +```bash +clearex --migrate-store /path/to/legacy_store.zarr ``` ## Runtime Behavior Notes - If `--file` points to Navigate `experiment.yml`, ClearEx resolves acquisition data and materializes a canonical store first. -- For non-Zarr/N5 acquisition data, materialization target is `data_store.zarr` beside `experiment.yml`. -- For Zarr/N5 acquisition data, ClearEx reuses the source store path in place. -- Canonical stores persist root-attr `spatial_calibration = {schema, stage_axis_map_zyx, theta_mode}`. Missing metadata resolves to the identity mapping `z=+z,y=+y,x=+x`. +- Existing canonical OME-Zarr stores are reused in place. +- Non-canonical acquisition inputs, including TIFF/OME-TIFF, HDF5, NumPy, generic Zarr/N5, and Navigate source layouts, materialize to `data_store.ome.zarr` beside `experiment.yml`. +- Legacy ClearEx `.zarr` / `.n5` stores are not treated as canonical runtime inputs. Migrate them first with `clearex --migrate-store`. +- Canonical stores persist `spatial_calibration = {schema, stage_axis_map_zyx, theta_mode}` inside `clearex/metadata`. Missing metadata resolves to the identity mapping `z=+z,y=+y,x=+x`. - In the setup window, `Spatial Calibration` is configured per listed experiment. Draft mappings are tracked per experiment while the dialog is open, existing stores prefill the control, and `Next` writes the resolved mapping to every reused or newly prepared store before analysis selection opens. -- In headless mode, `--stage-axis-map` writes the supplied mapping to materialized experiment stores and existing Zarr/N5 stores before analysis starts. If the flag is omitted, existing store calibration is preserved. -- Deconvolution, particle detection, uSegment3D, and visualization operations run against canonical Zarr/N5 stores. -- Visualization supports multi-volume overlays (for example raw `data` + `results/usegment3d/latest/data`) with per-layer image/labels display controls. +- In headless mode, `--stage-axis-map` writes the supplied mapping to materialized experiment stores and existing canonical OME-Zarr stores before analysis starts. If the flag is omitted, existing store calibration is preserved. +- Deconvolution, particle detection, uSegment3D, and visualization operations run against canonical OME-Zarr stores. +- Visualization supports multi-volume overlays using logical sources and/or public OME image collections (for example source data plus `results/usegment3d/latest`) with per-layer image/labels display controls. - Multiposition visualization placement now resolves world `z/y/x` translations from the store-level spatial calibration. Bindings support `X`, `Y`, `Z`, and Navigate focus axis `F` with sign inversion or `none`; `THETA` remains a rotation of the `z/y` plane about world `x`. - Visualization now probes napari OpenGL renderer info (`vendor`/`renderer`/`version`) and can fail fast when software rendering is detected or GPU rendering cannot be confirmed (`require_gpu_rendering=True`). - MIP export writes TIFF outputs as OME-TIFF (`.tif`) with projection-aware physical pixel calibration (`PhysicalSizeX/Y`) derived from source `voxel_size_um_zyx`. -- uSegment3D runs per `(t, p, selected channel)` volume task and writes labels to `results/usegment3d/latest/data`. +- uSegment3D runs per `(t, p, selected channel)` volume task and publishes the latest result as `results/usegment3d/latest`. - GUI channel checkboxes now support selecting multiple channels in one run (`channel_indices`). - With GPU-aware `LocalCluster`, separate channel tasks can execute concurrently across GPUs. - - `input_resolution_level` lets segmentation run on a selected pyramid level (for example `data_pyramid/level_1`). + - `input_resolution_level` lets segmentation run on a selected prepared pyramid level. - `output_reference_space=level0` upsamples labels back to original resolution. - - `save_native_labels=True` (when upsampling) also writes native-resolution labels to `results/usegment3d/latest/data_native`. + - `save_native_labels=True` (when upsampling) also stores native-resolution labels as ClearEx-owned auxiliary artifacts. - GPU awareness: - `gpu=True` requests GPU use for Cellpose/uSegment3D internals. - `require_gpu=True` fails fast if CUDA is unavailable. @@ -270,7 +297,7 @@ clearex --headless --no-dask --file /path/to/data_store.zarr --particle-detectio - save the current ordered experiment list for later reuse. - Selecting an experiment in the setup list loads metadata automatically; double-clicking reloads that entry explicitly. -- Pressing `Next` batch-prepares canonical `data_store.zarr` outputs for every +- Pressing `Next` batch-prepares canonical `data_store.ome.zarr` outputs for every listed experiment that does not already have a complete store, then opens analysis selection for the currently selected experiment. - The setup dialog persists the last-used Zarr save config across sessions. @@ -316,16 +343,25 @@ clearex --headless --no-dask --file /path/to/data_store.zarr --particle-detectio - Visualization parameters include `require_gpu_rendering` (enabled by default). Disable only when running intentionally without a GPU-backed OpenGL context. ## Output Layout (Canonical Store) -- Root metadata: - - `spatial_calibration` for store-level world `z/y/x` placement mapping -- Base image data: `data` -- Multiscale pyramid levels: `data_pyramid/level_*` -- Latest analysis outputs: - - `results/deconvolution/latest` - - `results/particle_detection/latest` - - `results/usegment3d/latest` - - optional native-level labels: `results/usegment3d/latest/data_native` - - `results/visualization/latest` -- Provenance: - - run records: `provenance/runs/` - - latest output pointers: `provenance/latest_outputs/` +- Public OME source image collection: + - root `A/1//` (`TCZYX`) +- Public OME image analysis collections: + - `results/flatfield/latest/A/1//` + - `results/deconvolution/latest/A/1//` + - `results/shear_transform/latest/A/1//` + - `results/registration/latest/A/1//` + - `results/usegment3d/latest/A/1//` +- ClearEx metadata and runtime namespaces: + - `clearex/metadata` + - `clearex/provenance/runs/` + - `clearex/provenance/latest_outputs/` + - `clearex/gui_state` + - `clearex/runtime_cache/source/data` + - `clearex/runtime_cache/source/data_pyramid/level_*` + - `clearex/runtime_cache/results//latest/data` + - `clearex/runtime_cache/results//latest/data_pyramid/level_*` + - `clearex/results//latest` +- Migration-only legacy layouts: + - root `data` + - root `data_pyramid` + - `results//latest/data` diff --git a/docs/AGENTS.md b/docs/AGENTS.md index 731e846..93190a2 100644 --- a/docs/AGENTS.md +++ b/docs/AGENTS.md @@ -11,7 +11,9 @@ You are a computer vision expert in fluorescence microscopy and technical writer - Your task: read code from `src/` and generate or update documentation in `docs/` ## Project knowledge -- **Tech Stack:** Python 3.12, antsypyx, dask, h5py, matplotlib, napari, opencv-python, numpy, pandas, scikit-image, scipy, seaborn +- **Tech Stack:** Python 3.12, antsypyx, dask, h5py, matplotlib, napari, numpy, + ome-zarr-models, ome-zarr, bioio-ome-zarr, opencv-python, pandas, + scikit-image, scipy, seaborn, zarr v3 - **File Structure:** - `src/` – Application source code (you READ from here) - `docs/` – All documentation (you WRITE to here) @@ -27,11 +29,19 @@ Build docs: `make html -j 15` Be concise, specific, and value dense Write so that a new developer to this codebase can understand your writing, don’t assume your audience are experts in the topic/area you are writing about. - When runtime behavior changes, update the matching ``docs/source/runtime`` - pages and the affected top-level/module ``README.md`` / ``CODEX.md`` / - ``AGENTS.md`` notes in the same change so CLI flags, store metadata names, - and provenance fields stay aligned. + pages and the affected top-level/module ``README.md`` / ``AGENTS.md`` notes + in the same change so CLI flags, store metadata names, and provenance fields + stay aligned. +- Document OME-Zarr v3 ``*.ome.zarr`` as the canonical ClearEx store format. + Public image examples should use the OME HCS layout, while ClearEx-owned + metadata/provenance/runtime-cache examples should use ``clearex/...`` + paths. +- Do not document legacy root ``data`` / ``data_pyramid`` or + ``results//latest/data`` layouts as the preferred public contract. + If legacy layouts are mentioned, label them explicitly as migration-only and + point readers to ``clearex --migrate-store``. ## Boundaries - ✅ **Always do:** Write new files to `docs/`, follow the style examples -- ⚠️ **Ask first:** Before modifying existing documents in a major way +- ⚠️ **Ask first:** Before reorganizing documentation structure in a major way - 🚫 **Never do:** Modify code in `src/`, edit config files, commit secrets diff --git a/docs/analysis_particle_detection_workflow.rst b/docs/analysis_particle_detection_workflow.rst index 8355afa..f0dbb8d 100644 --- a/docs/analysis_particle_detection_workflow.rst +++ b/docs/analysis_particle_detection_workflow.rst @@ -1,211 +1,116 @@ -.. _analysis-workflow-particle-detection-dask--zarr: +.. _analysis-workflow-particle-detection-dask--ome-zarr: -Analysis Workflow: Particle Detection (Dask + Zarr) -=================================================== +Analysis Workflow: Particle Detection (OME-Zarr + Dask) +======================================================= Implemented behavior -------------------- -This implementation adds a runnable analysis path for particle detection -on canonical ClearEx stores (``data_store.zarr``, ``.n5``, -``.ome.zarr``) with CPU-oriented Dask execution and persisted outputs in -``results/``. +This analysis path runs particle detection on canonical ClearEx OME-Zarr stores +(``data_store.ome.zarr`` or another canonical ``*.ome.zarr`` input) with +CPU-oriented Dask execution and persisted outputs in the ClearEx namespace. Dask backend policy by workload ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- **I/O-heavy ingestion/materialization**: local Dask uses - ``processes=False`` (threads). -- **CPU-heavy analysis**: local Dask uses ``processes=True`` (worker - processes). -- Backend mode still follows the GUI-selected backend configuration. +- **I/O-heavy ingestion/materialization**: local Dask uses + ``processes=False`` (threads). +- **CPU-heavy analysis**: local Dask uses ``processes=True`` (worker + processes). +- Backend mode still follows the GUI-selected backend configuration. Default analysis parameter dictionary ------------------------------------- -``WorkflowConfig`` now carries ``analysis_parameters``, keyed by -analysis method, with defaults in -``DEFAULT_ANALYSIS_OPERATION_PARAMETERS``. +``WorkflowConfig`` carries ``analysis_parameters``, keyed by analysis method, +with defaults in ``DEFAULT_ANALYSIS_OPERATION_PARAMETERS``. -Current keys include: +All operation dictionaries include: -- ``deconvolution`` -- ``particle_detection`` -- ``registration`` -- ``visualization`` - -All operation dictionaries now include: - -- ``execution_order`` (int): defines run order when multiple routines - are selected. -- ``input_source`` (str): source dataset key/component (default - ``data``). +- ``execution_order`` (int): defines run order when multiple routines are + selected. +- ``input_source`` (str): logical source alias or explicit internal component + path (default ``data``). The ``particle_detection`` defaults include: -- channel selection (``channel_index``) -- chunk/detection profile metadata (``chunk_basis``, - ``detect_2d_per_slice``) -- overlap controls (``use_map_overlap``, ``overlap_zyx``) -- memory hint (``memory_overhead_factor``) -- detection parameters (``bg_sigma``, ``fwhm_px``, - ``sigma_min_factor``, ``sigma_max_factor``, ``threshold``, - ``overlap``, ``exclude_border``) -- optional post-filters (``eliminate_insignificant_particles``, - ``remove_close_particles``, ``min_distance_sigma``) - -GUI updates (second window) ---------------------------- - -The analysis-selection dialog remains split into two halves: - -- **Left panel**: operation selection rows with: - - - enable checkbox, - - execution-order spinner, - - ``Configure`` button. - -- **Right panel**: stacked operation-parameter panel. - - - Only one operation panel is visible at a time. - - Clicking ``Configure`` on one operation hides the previous - operation panel. - - Unselected operations cannot be configured. - -Implemented controls: - -- Per-operation ``Input source`` selector (raw ``data`` or prior - selected upstream output). -- Particle-detection parameter form: - - - channel index, - - preprocessing + detector numeric parameters, - - overlap toggle and ``overlap_zyx``, - - optional significance/proximity filtering toggles, - - minimum-distance setting. - -- Parameter-help panel: - - - Updates when hovering/focusing a specific parameter. - - Includes verbose hints for fields such as ``fwhm_px``. - -- Theme consistency: - - - Operation parameter controls use the same dark theme. - - Dropdown item views use dark backgrounds and readable selected - text. +- channel selection (``channel_index``), +- chunk/detection profile metadata (``chunk_basis``, + ``detect_2d_per_slice``), +- overlap controls (``use_map_overlap``, ``overlap_zyx``), +- memory hint (``memory_overhead_factor``), +- detection parameters (``bg_sigma``, ``fwhm_px``, + ``sigma_min_factor``, ``sigma_max_factor``, ``threshold``, + ``overlap``, ``exclude_border``), +- optional post-filters (``eliminate_insignificant_particles``, + ``remove_close_particles``, ``min_distance_sigma``). Execution sequence and upstream input behavior ---------------------------------------------- -- Runtime now resolves selected analyses by ``execution_order`` instead - of fixed hard-coded ordering. -- Per-step ``input_source`` is resolved at runtime: +- Runtime resolves selected analyses by ``execution_order`` instead of fixed + hard-coded ordering. +- Per-step ``input_source`` is resolved at runtime: - - ``data`` maps to canonical ``data`` array. - - Operation keys map to canonical latest component paths. - - Explicit Zarr component paths are also supported. + - ``data`` maps to ``clearex/runtime_cache/source/data`` + - operation keys map to internal runtime-cache result components + - explicit internal component paths are also supported -- Particle detection now reads from configurable ``input_source``. -- If requested component is missing, particle detection falls back to - ``data`` with a warning. +- Particle detection reads from the resolved source component. +- If the requested component is missing, particle detection raises an input + dependency error instead of silently falling back. Particle detection execution workflow ------------------------------------- -1. Read configured input component from selected store +1. Read the configured input component from the selected canonical store (``t,p,c,z,y,x``). -2. Select user-configured channel. +2. Select the configured channel. 3. Build chunk tasks at native 3D chunk boundaries. 4. For each chunk: - - preprocess each z-slice (``preprocess(..., bg_sigma=...)``) - - detect blobs per slice (``detect_particles(...)``) - - convert chunk-local detections to global ``(t,p,c,z,y,x)`` - coordinates - - optionally apply significance/proximity filters - - optionally apply core-region masking for overlap mode + - preprocess each z-slice (``preprocess(..., bg_sigma=...)``), + - detect blobs per slice (``detect_particles(...)``), + - convert chunk-local detections to global ``(t,p,c,z,y,x)`` + coordinates, + - optionally apply significance/proximity filters, + - optionally apply core-region masking for overlap mode. 5. Merge and sort detections globally. 6. Save latest outputs under: - - ``results/particle_detection/latest/detections`` (columns: - ``t,p,c,z,y,x,sigma,intensity``) - - ``results/particle_detection/latest/points_tzyx`` (Napari-friendly - points) + - ``clearex/results/particle_detection/latest/detections`` + - ``clearex/results/particle_detection/latest/points_tzyx`` -7. Register latest-output reference and persist provenance. +7. Register the latest-output reference and persist provenance. Output format for Napari ------------------------ Napari-friendly point coordinates are persisted at: -- ``results/particle_detection/latest/points_tzyx`` +- ``clearex/results/particle_detection/latest/points_tzyx`` -Detection table metadata includes column names and points-axis metadata -in Zarr attrs. +Detection table metadata includes column names and points-axis metadata in Zarr +attrs. Provenance integration ---------------------- -- Particle-detection step parameters and run summary are included in - provenance steps. -- Workflow provenance now includes ``analysis_parameters``. -- Output records include latest component and detection summary. -- Latest output reference for particle detection is registered in - provenance metadata. +- Particle-detection step parameters and run summary are included in + provenance steps. +- Workflow provenance includes ``analysis_parameters``. +- Output records include latest component and detection summary. +- Latest output reference for particle detection is registered under + ``clearex/provenance/latest_outputs/particle_detection``. Verification ------------ -Unit tests -~~~~~~~~~~ - -Executed: - -.. code:: bash - - uv run --with pytest --with requests python -m pytest -q tests/detect/test_pipeline.py tests/test_workflow.py tests/io/test_cli.py tests/io/test_experiment.py tests/io/test_provenance.py - -Result: - -- ``48 passed`` - -Lint checks -~~~~~~~~~~~ - -Executed: - -.. code:: bash - - uv run ruff check src/clearex/main.py src/clearex/workflow.py src/clearex/gui/app.py src/clearex/io/cli.py src/clearex/io/experiment.py src/clearex/io/provenance.py src/clearex/detect/pipeline.py tests/detect/test_pipeline.py tests/test_workflow.py - -Result: - -- ``All checks passed`` - -Headless real-data smoke test -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Executed: - -.. code:: bash - - uv run python -m clearex.main --headless --no-gui --file /Users/Dean/Desktop/kevin/20260307_lung_mv3_488nm/cell_009/CH00_000000.n5 --particle-detection --dask - -Observed: - -- Analysis backend ran with ``LocalCluster`` in process mode - (``processes=True``). -- Progress updates were emitted for chunk completion. -- Results were written to ``results/particle_detection/latest``. -- Provenance run record was persisted - (``run_id=77d6e00e9ade47b0a524b964a6bc435d`` in this verification - run). - -Post-check of stored outputs: +When this workflow changes, validation should cover: -- ``detections`` shape: ``(10663, 8)`` -- ``points_tzyx`` shape: ``(10663, 4)`` -- ``channel_index``: ``0`` +- logical input-source resolution, +- chunk task planning and global coordinate stitching, +- correct write locations in ``clearex/results/particle_detection/latest``, +- provenance latest-output registration, +- headless CLI execution against a canonical ``*.ome.zarr`` store. diff --git a/docs/source/getting-started.rst b/docs/source/getting-started.rst index d525f82..3d23d59 100644 --- a/docs/source/getting-started.rst +++ b/docs/source/getting-started.rst @@ -62,6 +62,9 @@ Run in headless mode against an experiment file: uv run python -m clearex.main --headless --no-gui --file /path/to/experiment.yml --dask +This materializes ``data_store.ome.zarr`` beside ``experiment.yml`` when a +canonical store does not already exist. + Run in headless mode with an explicit Navigate stage-to-world mapping: .. code-block:: bash @@ -74,6 +77,13 @@ Run in headless mode with an explicit Navigate stage-to-world mapping: In the GUI setup flow, the same mapping can be authored through the ``Spatial Calibration`` panel before entering analysis selection. +If you have a legacy ClearEx ``.zarr`` / ``.n5`` store that predates the OME +transition, migrate it before using it as a canonical input: + +.. code-block:: bash + + uv run clearex --migrate-store /path/to/legacy_store.zarr + Documentation Build ------------------- diff --git a/docs/source/runtime/architecture-overview.rst b/docs/source/runtime/architecture-overview.rst index a73dcc5..f81c785 100644 --- a/docs/source/runtime/architecture-overview.rst +++ b/docs/source/runtime/architecture-overview.rst @@ -7,9 +7,12 @@ Design Goals The runtime architecture is built around these constraints: - GUI-first operator workflow, with fully supported headless execution. -- Canonical analysis data layout in ``(t, p, c, z, y, x)``. +- Canonical persisted store format is OME-Zarr v3 (``*.ome.zarr``). +- Public image interoperability uses OME-Zarr HCS collections. +- ClearEx internal analysis arrays keep canonical ``(t, p, c, z, y, x)`` + layout for execution kernels and workflow chaining. - Metadata-only spatial calibration for Navigate multiposition placement. -- Deterministic latest-output paths for large derived arrays. +- Deterministic latest-output publication for image and non-image artifacts. - Append-only, FAIR-oriented provenance records. - Shared configuration model between GUI and headless paths. @@ -27,12 +30,13 @@ ClearEx is intentionally split into layers that can evolve independently: 3. Orchestration layer: workflow entrypoint and execution coordinator in ``clearex.main``. 4. Data and metadata layer: - ingestion/canonical store logic in ``clearex.io.experiment`` and provenance - persistence in ``clearex.io.provenance``. + ingestion in ``clearex.io.experiment``, OME publication and migration in + ``clearex.io.ome_store``, and provenance persistence in + ``clearex.io.provenance``. 5. Analysis layer: - analysis routines (flatfield, deconvolution, particle detection, - visualization via ``clearex..pipeline``, with registration hooks - currently under ``clearex.registration``. + analysis routines (flatfield, deconvolution, shear transform, registration, + particle detection, uSegment3D, visualization, MIP export) via + ``clearex..pipeline``. End-to-End Execution Flow ------------------------- @@ -41,26 +45,37 @@ At runtime, control flows through one orchestrator path: 1. Build a ``WorkflowConfig`` from CLI arguments and/or GUI state. 2. Optionally launch GUI and let the operator finalize settings. -3. If input is Navigate ``experiment.yml``, resolve acquisition source data. -4. Materialize or validate canonical Zarr/N5 store. -5. Resolve analysis sequence and per-operation effective inputs. -6. Run selected analyses in sequence. -7. Write latest outputs and append one provenance run record. +3. Resolve acquisition source data or target store. +4. Materialize or validate canonical OME-Zarr store state. +5. Resolve analysis sequence and per-operation logical inputs. +6. Run selected analyses against runtime-cache image components. +7. Publish public OME image outputs and append one provenance run record. Operational Invariants ---------------------- These contracts are stable and expected by multiple modules: -- Canonical base image component is ``data``. -- Canonical base image shape is always six-dimensional in +- Canonical public source image collection is the root OME HCS layout: + ``A/1//``. +- Canonical internal source component is + ``clearex/runtime_cache/source/data``. +- Canonical internal image shape is always six-dimensional in ``(t, p, c, z, y, x)`` order. -- Multiscale levels are stored under ``data_pyramid/level_``. -- Root attr ``spatial_calibration`` is the canonical per-store placement - mapping for Navigate multiposition stage coordinates. -- Large analysis outputs are latest-only under ``results//latest``. -- Provenance run history is append-only under ``provenance/runs``. -- Provenance includes hash chaining for tamper-evident verification. +- Internal multiscale source levels are stored under + ``clearex/runtime_cache/source/data_pyramid/level_``. +- Public image-producing analysis outputs are published under + ``results//latest``. +- Internal image-producing analysis arrays live under + ``clearex/runtime_cache/results//latest``. +- ClearEx-owned metadata, provenance, GUI state, and non-image artifacts live + under ``clearex/...``. +- Store-level placement metadata is persisted in + ``clearex/metadata["spatial_calibration"]``. +- Provenance run history is append-only under ``clearex/provenance/runs``. +- Legacy root ``data``, root ``data_pyramid``, and + ``results//latest/data`` layouts are migration-only and are not the + canonical public contract. Analysis Composition Model -------------------------- @@ -69,7 +84,8 @@ Selected operations are not hard-coded into one fixed pipeline. Composition is driven by normalized per-operation parameters in ``analysis_parameters``: - ``execution_order`` controls relative ordering between selected operations. -- ``input_source`` controls which prior/latest component an operation reads. +- ``input_source`` controls which logical upstream image source an operation + reads. - ``force_rerun`` lets operators bypass provenance-based dedup logic. This allows one run to execute only one step, or a custom chain of steps, diff --git a/docs/source/runtime/cli-and-execution.rst b/docs/source/runtime/cli-and-execution.rst index 41dd9fd..3b6aae7 100644 --- a/docs/source/runtime/cli-and-execution.rst +++ b/docs/source/runtime/cli-and-execution.rst @@ -16,15 +16,15 @@ Current primary arguments are: - ``--input-resolution-level`` - ``--shear-transform`` - ``--registration`` +- ``--display-pyramid`` - ``--visualization`` - ``--mip-export`` - ``--file`` +- ``--migrate-store`` +- ``--migrate-output`` +- ``--migrate-overwrite`` - ``--dask`` / ``--no-dask`` - ``--chunks`` -- ``--execution-mode`` -- ``--max-workers`` -- ``--memory-per-worker`` -- ``--calibrate`` - ``--stage-axis-map`` - ``--gui`` / ``--no-gui`` - ``--headless`` @@ -56,18 +56,16 @@ The first GUI window is an experiment-list driven setup flow: - The current ordered list can be saved back to a reusable ``.clearex-experiment-list.json`` file. - ``Spatial Calibration`` edits store-level world ``z/y/x`` placement mapping - for the currently selected experiment without rewriting canonical image data. + for the currently selected experiment without rewriting image data. - Spatial-calibration drafts are tracked per experiment while setup is open. -- Existing stores prefill the spatial-calibration control when metadata is - already present. -- Pressing ``Next`` batch-prepares canonical stores for every listed - experiment that is missing a complete store, persists the resolved spatial - calibration to every reused or newly prepared store, then opens analysis - selection for the currently selected experiment. -- The setup dialog persists the last-used Zarr save configuration across - sessions. -- ``Rebuild Canonical Store`` forces the listed stores to be rebuilt with the - current chunk and pyramid settings instead of reusing complete stores. +- Existing canonical stores prefill the spatial-calibration control when + metadata is already present. +- Pressing ``Next`` batch-prepares canonical ``data_store.ome.zarr`` stores for + every listed experiment that is missing a complete store, persists the + resolved spatial calibration to every reused or newly prepared store, then + opens analysis selection for the currently selected experiment. +- ``Rebuild Canonical Store`` forces the listed stores to be rebuilt as + canonical OME-Zarr outputs with the current chunk and pyramid settings. .. figure:: ../../screenshots/setup_dialog_experiment_list.png :alt: ClearEx setup dialog showing the themed experiment list pane and metadata panel @@ -92,7 +90,8 @@ In the GUI analysis window: saved GUI state is preferred, and ``Restore Latest Run Parameters`` falls back to the latest completed provenance-backed run for the active store. -Examples: +Examples +-------- .. code-block:: bash @@ -108,9 +107,9 @@ Examples: .. code-block:: bash - # Headless visualization only + # Headless visualization against an existing canonical OME-Zarr store clearex --headless \ - --file /path/to/data_store.zarr \ + --file /path/to/data_store.ome.zarr \ --visualization .. code-block:: bash @@ -121,6 +120,11 @@ Examples: --visualization \ --stage-axis-map z=+x,y=none,x=+y +.. code-block:: bash + + # Migrate one legacy ClearEx store + clearex --migrate-store /path/to/legacy_store.zarr + Spatial Calibration ------------------- @@ -140,10 +144,9 @@ GUI and headless flows share the same normalized parser and storage policy: - GUI setup writes the resolved mappings to the listed experiment stores on ``Next``. - ``--stage-axis-map`` writes an explicit override to Navigate-materialized - stores and existing Zarr/N5 stores before analysis starts. + stores and existing canonical OME-Zarr stores before analysis starts. - If no explicit override is supplied, existing store calibration is preserved. -- The mapping changes placement metadata only; canonical ``data`` remains - unchanged. +- The mapping changes placement metadata only; image payloads remain unchanged. Interchangeable Routine Composition ----------------------------------- @@ -152,7 +155,7 @@ In orchestration, routines are composed from normalized ``analysis_parameters`` rather than hard-coded fixed order: - ``execution_order`` decides sequence among selected routines. -- ``input_source`` decides which component each routine reads. +- ``input_source`` decides which logical upstream component each routine reads. - ``force_rerun`` can override provenance-based skip behavior. This allows operators to rerun one stage, swap stage order, or run partial @@ -163,13 +166,19 @@ Input Source Resolution Runtime source aliases currently include: -- ``data`` -> ``data`` -- ``flatfield`` -> ``results/flatfield/latest/data`` -- ``deconvolution`` -> ``results/deconvolution/latest/data`` -- ``registration`` -> ``results/registration/latest/data`` +- ``data`` -> ``clearex/runtime_cache/source/data`` +- ``flatfield`` -> ``clearex/runtime_cache/results/flatfield/latest/data`` +- ``deconvolution`` -> ``clearex/runtime_cache/results/deconvolution/latest/data`` +- ``shear_transform`` -> ``clearex/runtime_cache/results/shear_transform/latest/data`` +- ``usegment3d`` -> ``clearex/runtime_cache/results/usegment3d/latest/data`` +- ``registration`` -> ``clearex/runtime_cache/results/registration/latest/data`` + +Public OME image collections at the root and under ``results//latest`` +exist for interoperability and visualization. Analysis kernels should not write +into those public arrays directly. When a requested source component does not exist, runtime raises an input -dependency error instead of silently falling back to ``data``. +dependency error instead of silently falling back. Progress and Run Lifecycle -------------------------- @@ -177,9 +186,9 @@ Progress and Run Lifecycle Execution progresses through these coarse stages: 1. Resolve workflow and inputs. -2. Materialize canonical store when needed. +2. Materialize canonical OME-Zarr store when needed. 3. Execute selected analyses in resolved order. -4. Persist latest outputs and append provenance run record. +4. Publish latest outputs and append provenance run record. GUI execution uses explicit progress callbacks and per-run logging in the resolved workflow log directory. diff --git a/docs/source/runtime/ingestion-and-canonical-store.rst b/docs/source/runtime/ingestion-and-canonical-store.rst index e4ebbd0..f4b65f8 100644 --- a/docs/source/runtime/ingestion-and-canonical-store.rst +++ b/docs/source/runtime/ingestion-and-canonical-store.rst @@ -8,9 +8,8 @@ For Navigate runs, ``experiment.yml``/``experiment.yaml`` is the first-class entrypoint. ``clearex.io.experiment`` parses: - save directory and declared file type, -- timepoints/z steps/channels/positions, -- camera dimensions, -- pixel size metadata, +- timepoints, z steps, channels, and positions, +- camera dimensions and pixel-size metadata, - multiposition metadata (including ``multi_positions.yml`` and its ``X/Y/Z/F/THETA`` stage rows when available). @@ -23,7 +22,7 @@ Source candidates are resolved from ordered search roots: 2. ``Saving.save_directory``, 3. directory containing ``experiment.yml``. -File-type aliases are normalized (for example OME-TIFF/OME-ZARR aliases), and +File-type aliases are normalized (for example OME-TIFF / OME-Zarr aliases), and TIFF discovery prefers primary stack files over MIP preview artifacts. Supported Source Inputs @@ -33,8 +32,9 @@ Materialization supports: - TIFF/OME-TIFF, - H5/HDF5/HDF, -- Zarr/N5 (including nested group layouts), -- NumPy ``.npy`` and ``.npz``. +- NumPy ``.npy`` and ``.npz``, +- generic Zarr / N5 stores, +- canonical OME-Zarr stores. Special collection logic is implemented for: @@ -47,73 +47,95 @@ Canonical Store Path Policy ``resolve_data_store_path`` follows this policy: -- Source already Zarr/N5: reuse source store path in place. -- Source not Zarr/N5: write canonical store to ``data_store.zarr`` beside +- Existing canonical OME-Zarr store: reuse the ``*.ome.zarr`` path in place. +- Non-canonical source input: materialize ``data_store.ome.zarr`` beside ``experiment.yml``. +- Legacy ClearEx canonical stores (root ``data`` / ``data_pyramid`` layout): + migrate them first with ``clearex --migrate-store`` before treating them as a + canonical runtime input. Canonical Layout Contract ------------------------- -Base analysis data: - -- component: ``data`` -- axis order: ``(t, p, c, z, y, x)`` - -Pyramid levels: - -- ``data_pyramid/level_1``, ``data_pyramid/level_2``, ... -- per-axis factors normalized from workflow save settings - -Store metadata captures source path/component/axes and effective write strategy -for reproducibility. +Canonical ClearEx stores have two layers of structure: + +Public OME image contract +~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Root source data is published as a synthetic single-well HCS collection: + ``A/1//``. +- Image-producing analyses publish sibling HCS collections under + ``results//latest``. +- Each field image is ``TCZYX`` with OME multiscale metadata and coordinate + transforms. + +Internal ClearEx execution contract +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +- Source runtime array: + ``clearex/runtime_cache/source/data`` +- Source internal pyramids: + ``clearex/runtime_cache/source/data_pyramid/level_`` +- Image-analysis runtime outputs: + ``clearex/runtime_cache/results//latest/data`` +- Image-analysis runtime pyramids: + ``clearex/runtime_cache/results//latest/data_pyramid/level_`` +- ClearEx-owned metadata and non-image artifacts: + ``clearex/metadata``, ``clearex/provenance``, ``clearex/gui_state``, + ``clearex/results//latest`` Store-Level Spatial Calibration ------------------------------- -Canonical analysis stores also persist optional placement metadata for Navigate -multiposition datasets in the root attr ``spatial_calibration``. +Canonical analysis stores persist optional placement metadata for Navigate +multiposition datasets in ``clearex/metadata["spatial_calibration"]``. - Schema payload is ``{schema, stage_axis_map_zyx, theta_mode}``. - Missing metadata resolves to the identity mapping ``z=+z,y=+y,x=+x``. -- Calibration is metadata-only and does not rewrite canonical ``data``. +- Calibration is metadata-only and does not rewrite source image payloads. - GUI setup writes the resolved mapping on ``Next`` for every prepared or reused store in the experiment list. - Headless ``--stage-axis-map`` writes an explicit override after materialization for ``experiment.yml`` inputs and before analysis for - existing Zarr/N5 stores. -- Legacy stores without this attr are backfilled logically as identity, while - stores that already have a mapping keep it unless the operator explicitly - overrides it. + existing canonical OME-Zarr stores. +- Legacy stores without this metadata are backfilled logically as identity + during migration, while stores that already have a mapping keep it unless the + operator explicitly overrides it. Materialization Lifecycle ------------------------- ``materialize_experiment_data_store`` performs: -1. source open + metadata extraction, -2. axis coercion to canonical order, +1. source open and metadata extraction, +2. axis coercion to canonical ``(t, p, c, z, y, x)``, 3. chunk normalization, -4. canonical base data writes, -5. pyramid level materialization, -6. ingestion completion metadata update and store-metadata preservation. +4. internal source-array writes to + ``clearex/runtime_cache/source/data``, +5. internal source-pyramid materialization, +6. stage-to-world translation computation from Navigate stage rows and spatial + calibration, +7. publication of the root public OME HCS source collection, +8. namespaced metadata update for reproducibility and resume checks. -If a store is already complete for expected chunks/pyramid settings, -materialization returns quickly without rewriting data. +If a store is already complete for expected chunks and pyramid settings, +materialization returns quickly without rewriting image data. Ingestion Progress and Resume ----------------------------- -Ingestion progress is tracked in store metadata and validated via: - -- ``has_canonical_data_component`` -- ``has_complete_canonical_data_store`` +Ingestion progress is tracked in namespaced store metadata and validated via +completion checks over the runtime-cache source component and public OME +metadata. -Progress records include completion counters for base/pyramid regions. This +Progress records include completion counters for base and pyramid regions. This enables robust completion checks and resume-aware writes for interrupted runs. Operational Rule for Downstream Analyses ---------------------------------------- -After canonical ``data`` is established, downstream analyses treat base source -data as immutable and write derived outputs under ``results//latest``. +After canonical source data is established, downstream analyses should resolve +logical input aliases to internal runtime-cache components. New code should not +treat root arrays or legacy ``results/.../data`` paths as the canonical public +interface. diff --git a/docs/source/runtime/module-map.rst b/docs/source/runtime/module-map.rst index 467b8f9..3bf22ad 100644 --- a/docs/source/runtime/module-map.rst +++ b/docs/source/runtime/module-map.rst @@ -15,19 +15,19 @@ Core Orchestration - Current role * - Entrypoint - ``clearex.main`` - - GUI/headless launch, ingestion + analysis orchestration, provenance - persistence. + - GUI/headless launch, ingestion + analysis orchestration, OME output + publication, and provenance persistence. * - Shared runtime schema - ``clearex.workflow`` - Typed workflow config, operation parameter normalization, execution - sequence resolution, Dask backend config/serialization. + sequence resolution, and logical input-source mapping. * - GUI - ``clearex.gui.app``, ``clearex.gui.spacing`` - - Setup dialog, backend/Zarr settings dialogs, analysis selection and - parameter collection, workflow progress UI. + - Setup dialog, backend / OME-Zarr settings dialogs, analysis selection, + parameter collection, and workflow progress UI. * - CLI and logging - ``clearex.io.cli``, ``clearex.io.log`` - - Command-line parser and runtime logger setup. + - Command-line parser, migration entrypoints, and runtime logger setup. Data and Metadata ----------------- @@ -41,15 +41,20 @@ Data and Metadata - Current role * - Experiment ingestion - ``clearex.io.experiment`` - - Navigate experiment parsing, source discovery, canonical Zarr/N5 - materialization, pyramid generation, completion checks. + - Navigate experiment parsing, source discovery, canonical OME-Zarr + materialization, runtime-cache generation, and completion checks. + * - OME store helpers + - ``clearex.io.ome_store`` + - OME-Zarr path helpers, namespaced metadata helpers, public HCS + publication, and legacy-store migration. * - Provenance - ``clearex.io.provenance`` - Append-only run records, latest-output references, history summaries, - hash-chain verification. + and hash-chain verification. * - Data reading - ``clearex.io.read`` - - Reader abstraction used for metadata loading and non-Navigate inputs. + - Reader abstraction used for metadata loading and non-Navigate inputs, + with OME-aware array selection. Analysis Modules ---------------- @@ -65,23 +70,35 @@ The following operations are wired into ``clearex.main``: - Runtime status * - Flatfield correction - ``clearex.flatfield.pipeline`` - - Integrated and executable; writes latest outputs in canonical results - namespace. + - Integrated and executable; writes runtime-cache image outputs and + publishes public OME results. * - Deconvolution - ``clearex.deconvolution.pipeline`` - - Integrated and executable with canonical-store inputs. + - Integrated and executable with OME-Zarr runtime inputs and public OME + output publication. + * - Shear transform + - ``clearex.shear.pipeline`` + - Integrated and executable with runtime-cache output publication. + * - Registration + - ``clearex.registration.pipeline`` + - Integrated and executable; writes fused runtime-cache outputs, publishes + public OME results, and stores auxiliary transforms in ``clearex/results``. + * - uSegment3D + - ``clearex.usegment3d.pipeline`` + - Integrated and executable with runtime-cache image outputs and public + OME publication. * - Particle detection - ``clearex.detect.pipeline`` - - Integrated and executable with canonical-store inputs. - * - Visualization + - Integrated and executable; writes metadata/table outputs under + ``clearex/results``. + * - Display pyramid / visualization - ``clearex.visualization.pipeline`` - - Integrated and executable; supports napari launch behavior and optional - overlays, interactive keyframe capture, and persisted movie-ready - keyframe manifests. - * - Registration - - Registration modules under ``clearex.registration`` - - Selectable in workflow config, but canonical 6D-store integration in - ``main.py`` is currently marked as not integrated and is skipped. + - Integrated and executable; prepares internal display pyramids, + launches napari, and persists visualization metadata. + * - MIP export + - ``clearex.mip_export.pipeline`` + - Integrated and executable; exports files outside the store and records + metadata in ``clearex/results``. Supporting Algorithm Packages ----------------------------- diff --git a/docs/source/runtime/provenance.rst b/docs/source/runtime/provenance.rst index bab8146..bee7b67 100644 --- a/docs/source/runtime/provenance.rst +++ b/docs/source/runtime/provenance.rst @@ -14,24 +14,30 @@ ClearEx provenance is designed to answer: Storage Layout -------------- -Within a canonical Zarr/N5 store: +Within a canonical OME-Zarr store: -- run records: ``provenance/runs/`` -- latest output references: ``provenance/latest_outputs/`` +- run records: ``clearex/provenance/runs/`` +- latest output references: ``clearex/provenance/latest_outputs/`` +- public image collections: root source HCS collection and + ``results//latest`` for image-producing analyses +- internal image outputs: + ``clearex/runtime_cache/results//latest`` +- ClearEx-owned non-image latest outputs: + ``clearex/results//latest`` -Run records are append-only. Large analysis outputs remain latest-only under -``results//latest`` to control storage growth. +Run records are append-only. Large image outputs remain latest-only in the +runtime-cache/public-output split to control storage growth. Run Record Content ------------------ ``persist_run_provenance`` stores: -- run identifiers/index/status/timestamps, +- run identifiers, index, status, and timestamps, - input summary and input fingerprint hash, - normalized workflow settings, - effective Dask backend payload and chunk/pyramid settings, -- effective spatial-calibration payload/text/explicitness, +- effective spatial-calibration payload and canonical text form, - selected analyses and per-analysis parameters, - ordered step records and output references, - software metadata (package version, git commit/branch/dirty), @@ -67,8 +73,10 @@ Spatial Placement Reproducibility Store-level Navigate placement metadata is part of the reproducibility record: -- workflow provenance stores the effective ``spatial_calibration`` payload, - canonical text form, and whether it was explicitly supplied by the operator; +- canonical store metadata persists ``spatial_calibration`` in + ``clearex/metadata``, +- workflow provenance stores the effective payload, canonical text form, and + whether it was explicitly supplied by the operator, - visualization latest metadata stores the effective spatial calibration used for multiposition placement. @@ -87,4 +95,4 @@ execution: - if required outputs are missing, the operation is re-run. Current runtime applies this matching logic to flatfield, deconvolution, -particle detection, and registration steps. +particle detection, registration, and other latest-only analysis steps. diff --git a/docs/zarr_materialization_workflow.rst b/docs/zarr_materialization_workflow.rst index 3911357..a884e2c 100644 --- a/docs/zarr_materialization_workflow.rst +++ b/docs/zarr_materialization_workflow.rst @@ -1,185 +1,95 @@ -Zarr Materialization Workflow and Verification -============================================== +OME-Zarr Materialization and Migration Workflow +=============================================== Implemented workflow -------------------- -The ingestion path for Navigate ``experiment.yml`` now materializes -source data into a canonical 6D Zarr ``data`` array and writes -configured resolution-pyramid levels with Dask-parallel writes. +The ingestion path for Navigate ``experiment.yml`` materializes source data +into a canonical OME-Zarr v3 store and preserves ClearEx's internal execution +arrays alongside public OME image collections. 1. Load and normalize experiment metadata from ``experiment.yml``. 2. Resolve source data path from acquisition outputs. -3. Open source directly with format-specific, Dask-compatible loaders: - - - TIFF/OME-TIFF via ``tifffile`` + ``da.from_zarr(...)`` - - H5/HDF5 via ``h5py`` + ``da.from_array(...)`` - - Zarr/N5 via ``da.from_zarr(...)`` - - NPY/NPZ via ``numpy`` + ``da.from_array(...)`` - -4. Infer/normalize axis order and coerce to canonical - ``(t, p, c, z, y, x)``. -5. Materialize base ``data`` using GUI-configured chunks. -6. Build downsampled levels in ``data_pyramid/level_`` according to - GUI pyramid factors. -7. Persist canonical store metadata and source provenance fields in Zarr - attributes. - -Pyramid downsampling behavior +3. Open source directly with format-specific loaders: + + - TIFF/OME-TIFF + - H5/HDF5 + - generic Zarr/N5 + - NumPy ``.npy`` / ``.npz`` + - canonical OME-Zarr + +4. Infer and normalize axis order, coercing to canonical + ``(t, p, c, z, y, x)`` for internal execution. +5. Materialize the internal source array at + ``clearex/runtime_cache/source/data`` using GUI-configured chunks. +6. Build internal source pyramid levels at + ``clearex/runtime_cache/source/data_pyramid/level_`` according to GUI + pyramid factors. +7. Persist namespaced store metadata in ``clearex/metadata``. +8. Compute multiposition translations from Navigate stage rows plus the + effective spatial calibration. +9. Publish the public root OME HCS source collection + ``A/1//``. + +Canonical store path behavior ----------------------------- -- Pyramid levels are generated in the same materialization run, - immediately after base ``data`` write. -- Writes run through Dask and Zarr for chunk-parallel persistence. -- Levels are generated with stride-based decimation (nearest-neighbor) - for speed and dtype preservation. -- Metadata is stored at root and base-array attrs: +- Existing canonical OME-Zarr stores are reused in place. +- Non-canonical inputs materialize to ``data_store.ome.zarr`` beside + ``experiment.yml``. +- Legacy ClearEx ``.zarr`` / ``.n5`` stores are not reused as canonical inputs + directly. They must be migrated. - - ``data_pyramid_levels`` - - ``data_pyramid_factors_tpczyx`` - - ``data_pyramid_shapes_tpczyx`` - - ``data.attrs["pyramid_levels"]`` - - ``data.attrs["pyramid_factors_tpczyx"]`` +Public vs internal layout +------------------------- -GUI workflow (two-step) ------------------------ +Public OME contract +~~~~~~~~~~~~~~~~~~~ -The GUI now runs in two windows: +- Root source image collection: ``A/1//`` +- Public image-analysis collections: ``results//latest`` -1. **Setup window** +Internal ClearEx contract +~~~~~~~~~~~~~~~~~~~~~~~~~ - - User selects ``experiment.yml``. - - User configures Dask backend and Zarr save settings. - - User loads and reviews image metadata. - - User clicks **Next**. +- Source runtime array: ``clearex/runtime_cache/source/data`` +- Source runtime pyramid: ``clearex/runtime_cache/source/data_pyramid/level_*`` +- Image-analysis runtime outputs: + ``clearex/runtime_cache/results//latest/data`` +- Non-image artifacts and metadata: + ``clearex/results//latest`` +- Provenance: + ``clearex/provenance/runs/`` -2. **Analysis window** - - - Opens after canonical store readiness is confirmed. - - User selects analysis operations (deconvolution, particle - detection, registration, visualization). - -Setup-window behavior on **Next**: - -- If target canonical store already exists, setup proceeds directly to - analysis selection. -- If target store does not exist, GUI creates it first and shows a - styled progress dialog with stage updates. -- After store creation completes, setup closes and analysis-selection - window opens. - -Store path behavior -------------------- - -- If source is already Zarr/N5, the same store path is reused (no - duplicate store path is created). -- If source is non-Zarr, output store is written next to - ``experiment.yml`` as: - - - ``data_store.zarr`` - -Dask backend behavior ---------------------- - -- Runtime now uses the configured GUI backend as before. -- Local cluster startup for this I/O-heavy workflow is now - thread-oriented (``processes=False``). -- Materialization compute executes on the active Dask client when - available. -- For source graphs that cannot be serialized to distributed workers - (for example some lock-backed TIFF/HDF inputs), execution - automatically falls back to local threaded compute. - -Safety for in-place Zarr conversion ------------------------------------ - -When source and destination are the same store and source component is -already ``data``, conversion stages into a temporary component and then -swaps into ``data`` to avoid read/write self-conflicts. - -Automated verification +Legacy-store migration ---------------------- -Unit tests -~~~~~~~~~~ - -Executed: - -.. code:: bash - - uv run --with pytest --with requests python -m pytest -q tests/test_workflow.py tests/io/test_cli.py tests/io/test_experiment.py tests/io/test_provenance.py - -Result: ``40 passed`` - -Added coverage validates: - -- Non-Zarr source writes to ``data_store.zarr`` in ``experiment.yml`` - directory. -- Existing Zarr source reuses same store path. -- Same-component (``data``) in-place Zarr rewrite path works. -- Canonical shape/chunk expectations and value integrity. -- Downsampled pyramid levels are written and have expected - shapes/values. - -Lint checks -~~~~~~~~~~~ - -Executed: - -.. code:: bash - - ruff check src/clearex/main.py src/clearex/workflow.py src/clearex/gui/app.py src/clearex/io/cli.py src/clearex/io/experiment.py src/clearex/io/provenance.py - -Result: ``All checks passed`` - -Real-data run (provided dataset) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Dataset root: - -- ``/Users/Dean/Desktop/kevin/20260307_lung_mv3_488nm/`` +Use the explicit migration command to upgrade pre-OME ClearEx stores: -Materialization run on representative cells (``cell_006`` to -``cell_010``) across TIFF, OME-TIFF, H5, N5, and OME-Zarr with chunks -``(1, 1, 1, 8, 128, 128)`` and pyramid factors -``((1,), (1,), (1,), (1, 2, 4), (1, 2, 4), (1, 2, 4))``: +.. code-block:: bash -- ``cell_006`` TIFF -> ``data_store.zarr``, shape - ``(1, 1, 1, 100, 2048, 2048)``, pyramid shapes - ``[(1, 1, 1, 50, 1024, 1024), (1, 1, 1, 25, 512, 512)]``, elapsed - ``6.63s`` -- ``cell_007`` OME-TIFF -> ``data_store.zarr``, shape - ``(1, 1, 1, 100, 2048, 2048)``, pyramid shapes - ``[(1, 1, 1, 50, 1024, 1024), (1, 1, 1, 25, 512, 512)]``, elapsed - ``6.27s`` -- ``cell_008`` H5 -> ``data_store.zarr``, shape - ``(1, 1, 1, 100, 2048, 2048)``, pyramid shapes - ``[(1, 1, 1, 50, 1024, 1024), (1, 1, 1, 25, 512, 512)]``, elapsed - ``29.76s`` -- ``cell_009`` N5 -> same store ``CH00_000000.n5``, shape - ``(1, 1, 1, 100, 512, 512)``, pyramid shapes - ``[(1, 1, 1, 50, 256, 256), (1, 1, 1, 25, 128, 128)]``, elapsed - ``0.69s`` -- ``cell_010`` OME-Zarr -> same store ``CH00_000000.ome.zarr``, shape - ``(2, 1, 2, 100, 512, 512)``, pyramid shapes - ``[(2, 1, 2, 50, 256, 256), (2, 1, 2, 25, 128, 128)]``, elapsed - ``1.46s`` + clearex --migrate-store /path/to/legacy_store.zarr -This confirms output-path policy, canonical layout, and actual persisted -downsample pyramid levels on heterogeneous acquisition formats. +Optional destination and overwrite flags are available: -Headless workflow smoke test -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. code-block:: bash -Executed: + clearex --migrate-store /path/to/legacy_store.n5 \ + --migrate-output /path/to/migrated_store.ome.zarr \ + --migrate-overwrite -.. code:: bash +Migration copies legacy source/runtime arrays into the new namespaced layout, +copies provenance and auxiliary analysis artifacts, and republishes public OME +image collections. - uv run python -m clearex.main --headless --no-gui --file /Users/Dean/Desktop/kevin/20260307_lung_mv3_488nm/cell_001/experiment.yml --dask +Verification expectations +------------------------- -Result: +When this workflow changes, validation should cover: -- Workflow completed successfully. -- Source TIFF was materialized to - ``/Users/Dean/Desktop/kevin/20260307_lung_mv3_488nm/cell_001/data_store.zarr``. -- Provenance run record was persisted in the same store. +- canonical path selection to ``data_store.ome.zarr``, +- internal source-array and pyramid completeness, +- public OME root metadata and HCS publication, +- preservation of spatial calibration / position translations, +- migration of legacy source data and representative analysis outputs, +- OME-aware reader selection of the public collection. diff --git a/src/clearex/AGENTS.md b/src/clearex/AGENTS.md index 2f9baa9..65f1e64 100644 --- a/src/clearex/AGENTS.md +++ b/src/clearex/AGENTS.md @@ -22,13 +22,29 @@ This directory contains the runtime orchestration surface for ClearEx. ## Runtime Invariants -- Canonical analysis image layout is `(t, p, c, z, y, x)`. -- Canonical source array component is `data`. -- Analysis outputs use `results//latest/...` (latest-only replacement). +- Canonical persisted stores are OME-Zarr v3 `*.ome.zarr` objects. +- Canonical analysis image layout remains `(t, p, c, z, y, x)` for ClearEx + internal execution arrays. +- Public source data is exposed as a single-well OME-Zarr HCS collection at the + store root (`A/1//` with `TCZYX` arrays). +- Public image-producing analysis outputs are exposed as OME-Zarr HCS + collections under `results//latest`. +- ClearEx internal execution arrays live under + `clearex/runtime_cache/source/...` and + `clearex/runtime_cache/results//latest/...`. +- ClearEx-owned metadata and non-image artifacts live under namespaced paths: + `clearex/metadata`, `clearex/provenance`, `clearex/gui_state`, and + `clearex/results//latest`. +- Workflow input aliases such as `data`, `flatfield`, `deconvolution`, + `shear_transform`, `usegment3d`, and `registration` are logical names that + resolve to runtime-cache components. New runtime code must not hard-code old + public array paths. - Provenance records are append-only and include workflow + runtime parameters. -- Root store attr `spatial_calibration` is the canonical store-level - stage-to-world axis mapping for Navigate multiposition placement; missing - attrs mean identity mapping. +- Store-level spatial calibration is persisted in `clearex/metadata` and + missing values resolve to the identity mapping. +- Legacy root `data`, root `data_pyramid`, and + `results//latest/data` layouts are migration-only and must not be + reintroduced as canonical outputs. ## Dask Workload Policy @@ -74,7 +90,8 @@ This directory contains the runtime orchestration surface for ClearEx. - GUI runtime controls now expose channel checkboxes and emit `channel_indices`, - headless CLI supports `--channel-indices` with `all` for full-channel runs, - headless CLI supports `--input-resolution-level` for pyramid-level selection, - - latest output is persisted to `results/usegment3d/latest/data`, + - image output is written to the runtime cache and published as the public + OME collection `results/usegment3d/latest`, - provenance references include GPU/tiling configuration, resolution/output-space metadata, and selected views. - Runtime uses optional dependency loading (`u-Segment3D`) and supports `require_gpu` fail-fast behavior when CUDA is unavailable. @@ -88,8 +105,8 @@ This directory contains the runtime orchestration surface for ClearEx. - per-layer multiscale policies are now `inherit`, `require`, and `off`, - legacy saved `auto_build` values are normalized to `inherit`, - reusable display pyramids are prepared explicitly via the - `display_pyramid` analysis task under - `results/display_pyramid/by_component/...`. + `display_pyramid` analysis task and summarized under + `clearex/results/display_pyramid/latest`. ## Recent Runtime Updates (2026-03-20) @@ -97,8 +114,7 @@ This directory contains the runtime orchestration surface for ClearEx. - `WorkflowConfig` now carries `SpatialCalibrationConfig`, - canonical text form is `z=...,y=...,x=...`, - allowed bindings are `+/-x`, `+/-y`, `+/-z`, `+/-f`, and `none`, - - the root store attr `spatial_calibration` persists schema, mapping, and - `theta_mode`, + - `clearex/metadata` persists schema, mapping, and `theta_mode`, - missing attrs resolve to identity instead of requiring backfilled config. - Setup flow now exposes a lightweight `Spatial Calibration` control per experiment: @@ -106,7 +122,7 @@ This directory contains the runtime orchestration surface for ClearEx. - existing stores prefill the current mapping, - `Next` writes the resolved mapping to every reused or newly prepared store. - Headless workflows now accept `--stage-axis-map` for Navigate - `experiment.yml` inputs and existing Zarr/N5 stores. + `experiment.yml` inputs and existing canonical OME-Zarr stores. - Visualization position affines now derive world `z/y/x` translations from the stored calibration: - Navigate `F` is available as a placement source, @@ -133,6 +149,21 @@ This directory contains the runtime orchestration surface for ClearEx. - Detailed operational guidance for this area now lives in `src/clearex/visualization/README.md`. +## Recent Runtime Updates (2026-03-23) + +- ClearEx adopted OME-Zarr v3 as the canonical store contract: + - materialization now targets `data_store.ome.zarr`, + - public source data is published as a root OME HCS collection, + - public image-analysis outputs are published as OME HCS collections under + `results//latest`, + - internal execution arrays moved under `clearex/runtime_cache/...`, + - store metadata, provenance, GUI state, and non-image artifacts moved under + `clearex/...`. +- The runtime now refuses legacy canonical ClearEx stores as direct canonical + inputs and requires migration via `clearex --migrate-store`. +- Reader selection now prefers validated/public OME metadata instead of + “largest array wins” heuristics. + ## Recent Runtime Updates (2026-03-22) - Registration pipeline (`pipeline.py`) performance optimizations: @@ -172,12 +203,23 @@ This directory contains the runtime orchestration surface for ClearEx. - Operation order is driven by `analysis_parameters[]["execution_order"]`. - Per-step input source comes from `analysis_parameters[]["input_source"]`. - `workflow.resolve_analysis_execution_sequence(...)` is the canonical order resolver. +- Input-source UI and workflow defaults should present logical aliases rather + than raw internal component paths whenever possible. ## Implementation Rules - Keep parsing/normalization centralized in `workflow.py`; avoid duplicating logic in GUI/runtime. - Keep function signatures type hinted. - Use numpydoc docstrings for new/changed functions. +- New image-producing analyses must: + - write executable arrays into `clearex/runtime_cache/results//latest`, + - publish a public OME image collection under `results//latest`, + - keep auxiliary arrays / metadata under `clearex/results//latest`. +- New metadata/table-only analyses must write to `clearex/results/...` and + must not allocate fake public image collections. +- Readers, GUI discovery, and visualization helpers must prefer public OME + metadata and OME coordinate transforms. Do not reintroduce root-array + heuristics as the canonical path. ## Ongoing GUI And Docs Hygiene diff --git a/src/clearex/detect/README.md b/src/clearex/detect/README.md index 15408de..b5ec7d9 100644 --- a/src/clearex/detect/README.md +++ b/src/clearex/detect/README.md @@ -10,16 +10,19 @@ This folder contains particle-detection logic and chunk-parallel analysis execut ## Particle Pipeline Contract - Input is a canonical 6D component `(t, p, c, z, y, x)`. -- `input_source` parameter selects the source component path (default: `data`). +- `input_source` parameter selects a logical source alias or explicit internal + component path (default: `data`). - Channel selection is done with `channel_index`. - Chunk tasks run in parallel and return global-coordinate rows. ## Output Contract -- Latest result root: `results/particle_detection/latest` +- Latest result root: `clearex/results/particle_detection/latest` - Datasets: - `detections` columns: `t,p,c,z,y,x,sigma,intensity` - `points_tzyx` for Napari points rendering +- Particle detection is a metadata/table output, not a public OME image + collection. Keep it in the ClearEx namespace. - Latest-output reference must be registered in provenance metadata. ## Performance Notes diff --git a/src/clearex/flatfield/README.md b/src/clearex/flatfield/README.md index 0a6594a..7a8f98a 100644 --- a/src/clearex/flatfield/README.md +++ b/src/clearex/flatfield/README.md @@ -1,7 +1,7 @@ # Flatfield Agent Notes This folder owns BaSiCPy-driven flatfield correction for canonical ClearEx -stores. +OME-Zarr stores. ## Key File @@ -14,7 +14,8 @@ stores. ## Input Contract - Source must be canonical 6D `(t, p, c, z, y, x)`. -- `input_source` defaults to `data`. +- `input_source` defaults to the logical alias `data`, which resolves to the + runtime-cache source component. - Core params: - `fit_mode`: `tiled` or `full_volume`. - `fit_tile_shape_yx`: tile size for tiled fitting. @@ -25,14 +26,21 @@ stores. ## Output Contract -- Latest output root: `results/flatfield/latest` -- Datasets: - - `data` (corrected output, float32) - - `flatfield_pcyx` (float32) - - `darkfield_pcyx` (float32) - - `baseline_pctz` (float32) +- Public latest image root: `results/flatfield/latest` +- Internal image data: + - `clearex/runtime_cache/results/flatfield/latest/data` (corrected output, + float32) + - `clearex/runtime_cache/results/flatfield/latest/data_pyramid/level_*` +- ClearEx-owned auxiliary artifacts: + - `clearex/results/flatfield/latest/flatfield_pcyx` + - `clearex/results/flatfield/latest/darkfield_pcyx` + - `clearex/results/flatfield/latest/baseline_pctz` + - `clearex/results/flatfield/latest/checkpoint` - Storage policy is latest-only for large arrays. - `run_id` is backfilled later by main workflow/provenance path. +- The public OME collection is published from the runtime-cache data after the + flatfield write completes. New code must not treat the public collection as + the executable write target. ## Correction Formula @@ -64,7 +72,7 @@ stores. ## Resume / Checkpoint Contract -- Checkpoint group: `results/flatfield/latest/checkpoint` +- Checkpoint group: `clearex/results/flatfield/latest/checkpoint` - Schema guard: `clearex.flatfield.resume.v1` - Resume is allowed only when all are compatible: - source component, shape, chunks @@ -79,13 +87,13 @@ stores. ## Pyramid Materialization - Builds multiscale corrected output under - `results/flatfield/latest/data_pyramid/level_*`. -- Factors are resolved from source/root attrs when available; otherwise base - level only. + `clearex/runtime_cache/results/flatfield/latest/data_pyramid/level_*`. +- Factors are resolved from source-component attrs / store metadata when + available; otherwise base level only. - Uses Dask array slicing/rechunk/to_zarr. -- Writes pyramid metadata to both: - - `results/flatfield/latest/data` attrs - - `results/flatfield/latest` attrs (`data_pyramid_*`). +- Writes pyramid metadata to the runtime-cache image attrs and the + `clearex/results/flatfield/latest` attrs. The public OME collection is a + published view, not the metadata source of truth. ## Provenance / Latest Output Reference @@ -101,7 +109,7 @@ stores. issues appear, consider bounded in-flight scheduling. - Keep transform writes non-overlapping and preserve checkpoint update semantics. - Preserve chunk-probe resume guards; they protect against malformed checkpoint - chunks in N5/Zarr stores. + chunks in migrated/legacy stores. ## Validation diff --git a/src/clearex/gui/README.md b/src/clearex/gui/README.md index 3b66e40..477163a 100644 --- a/src/clearex/gui/README.md +++ b/src/clearex/gui/README.md @@ -11,19 +11,20 @@ This folder owns the PyQt6 UX in `app.py`. - drag and drop of experiments, folders, or `.clearex-experiment-list.json` files - Add/remove experiment entries from the list and persist the list for reuse - Auto-load metadata when the current list selection changes - - Configure Dask backend and Zarr save options + - Configure Dask backend and OME-Zarr save options - Configure `Spatial Calibration` for the currently selected experiment: - map world `z/y/x` to Navigate stage `X/Y/Z/F` or `none`, - prefill from the target store when available, - otherwise keep a per-experiment draft while setup remains open - Persist the last-used Zarr save config across sessions - Display image metadata - - On `Next`, batch-materialize only missing/incomplete canonical stores for + - On `Next`, batch-materialize only missing/incomplete canonical OME-Zarr + stores for every listed experiment, persist the resolved spatial calibration for every reused/new store, then continue with the currently selected experiment - - `Rebuild Canonical Store` forces the listed stores to be rebuilt with the - current GUI chunking and pyramid settings + - `Rebuild Canonical Store` forces the listed stores to be rebuilt as + `.ome.zarr` outputs with the current GUI chunking and pyramid settings - Analysis window (`AnalysisSelectionDialog`): - Top `Analysis Scope` panel: - choose the active `experiment.yml` from the loaded setup list, @@ -52,7 +53,7 @@ This folder owns the PyQt6 UX in `app.py`. - Per-operation `Input source` options depend on selected upstream operations and execution order. - `Visualization` is treated as a terminal/view step; it should not be offered as an upstream image source for later operations. - Visualization placement should come from the active target store's persisted - `spatial_calibration`, not from one-off GUI-only state. + `clearex/metadata["spatial_calibration"]`, not from one-off GUI-only state. - Visualization configuration currently exposes: - `position_index` for multiposition datasets - multiscale loading toggle @@ -76,6 +77,19 @@ This folder owns the PyQt6 UX in `app.py`. - selected background and selected text colors - Keep parameter cards, help panels, and popups visually aligned with title/label palette. +## Canonical Store UX Rules + +- Treat `.ome.zarr` as the only canonical store suffix in labels, tooltips, + examples, and validation messages. +- Present logical analysis inputs (`data`, `flatfield`, `deconvolution`, + `shear_transform`, `usegment3d`, `registration`) instead of raw internal + component paths wherever possible. +- If the UI shows store structure for debugging, distinguish: + - public OME image collections at the root and under `results//latest` + - internal ClearEx execution/artifact paths under `clearex/...` +- Do not teach users that root `data`, root `data_pyramid`, or + `results//latest/data` are the canonical public contract. + ## Branding Assets and Layout - Branding assets live in `src/clearex/gui/`: diff --git a/src/clearex/io/README.md b/src/clearex/io/README.md index 4bcadf7..cf3b3ed 100644 --- a/src/clearex/io/README.md +++ b/src/clearex/io/README.md @@ -5,25 +5,41 @@ This folder contains ingestion, CLI, logging, and provenance logic. ## Key Files - `experiment.py`: Navigate `experiment.yml` parsing, source resolution, canonical store materialization. +- `ome_store.py`: OME-Zarr v3 path helpers, publication helpers, metadata namespace helpers, and legacy-store migration. - `provenance.py`: run record persistence and latest-output references. - `cli.py`: headless flags and parser behavior. - `log.py`: logger setup and output location. ## Data-Store Rules -- For non-Zarr/N5 sources: create `data_store.zarr` beside `experiment.yml`. -- For Zarr/N5 sources: reuse existing store path (no duplicate copy path). -- Canonical base array component is `data` with shape `(t, p, c, z, y, x)`. -- Root store attr `spatial_calibration` is the canonical store-level - stage-to-world mapping for Navigate multiposition placement metadata. -- Missing `spatial_calibration` attrs resolve to identity +- Canonical stores are OME-Zarr v3 directories ending in `*.ome.zarr`. +- For non-canonical sources: create `data_store.ome.zarr` beside + `experiment.yml`. +- Reuse an input store in place only when it is already a canonical OME-Zarr + store. Generic Zarr/N5 inputs are source formats, not canonical outputs. +- Canonical public source data is a single-well OME HCS collection at the store + root. +- Canonical internal source array is + `clearex/runtime_cache/source/data` with shape `(t, p, c, z, y, x)`. +- Canonical metadata/provenance namespaces are: + - `clearex/metadata` + - `clearex/provenance` + - `clearex/gui_state` + - `clearex/results` +- Store-level stage-to-world mapping is persisted in + `clearex/metadata["spatial_calibration"]`. +- Missing `spatial_calibration` metadata resolves to identity (`z=+z,y=+y,x=+x`). +- Legacy root `data`, root `data_pyramid`, and old `results/.../data` layouts + are migration-only. Direct canonical runtime use must go through + `clearex --migrate-store`. ## Materialization Rules - Use GUI/Workflow chunk and pyramid configuration as the source of truth. - Prefer chunk-parallel, non-overlapping writes. -- Preserve metadata required for downstream analysis/provenance. +- Preserve metadata required for downstream analysis/provenance and publish the + public OME source collection after runtime-cache source writes succeed. - For Navigate TIFF acquisitions written as `Position*/CH*.tiff`, materialization must assemble the full file collection into one source array over `(t, p, c, z, y, x)` before canonical write. @@ -40,25 +56,35 @@ This folder contains ingestion, CLI, logging, and provenance logic. 3. raw `pixel_size` fallback. - In collection mode, store-level `source_data_path` should point at the acquisition directory (not only the first TIFF file) for clearer provenance. -- If `data_store.zarr` was generated by older single-file TIFF logic, rerun - materialization from `experiment.yml` to rebuild correct `p`/`c` dimensions. +- If a legacy `data_store.zarr` was generated by older single-file TIFF logic, + migrate or rebuild it as `data_store.ome.zarr` before relying on it as a + canonical store. - Materialization and store initialization must preserve an existing `spatial_calibration` mapping when present and backfill identity for legacy - stores without rewriting canonical `data`. + stores without rewriting canonical source data. - Updating spatial calibration is metadata-only; never rewrite the canonical source array to express placement changes. ## Headless Spatial Calibration Override - CLI accepts `--stage-axis-map "z=+x,y=none,x=+y"` for Navigate - `experiment.yml` inputs and existing Zarr/N5 stores. + `experiment.yml` inputs and existing canonical OME-Zarr stores. - For `experiment.yml` inputs, explicit overrides are written after store materialization. -- For existing Zarr/N5 stores, explicit overrides update the root attr before - analysis starts. +- For existing canonical OME-Zarr stores, explicit overrides update + `clearex/metadata` before analysis starts. - When `--stage-axis-map` is omitted, existing store calibration must be preserved rather than overwritten with identity. +## Migration Rules + +- `clearex --migrate-store ` is the required path for upgrading + old ClearEx `.zarr` / `.n5` stores into canonical `.ome.zarr`. +- Migration copies legacy source arrays into the runtime cache, copies + namespaced artifacts, publishes public OME image collections, and preserves + legacy metadata in `clearex/metadata`. +- New code should not silently accept legacy stores as canonical runtime inputs. + ## Dask Client Defaults - `create_dask_client(...)` local-mode startup defaults to `dashboard_address=":0"`. @@ -73,7 +99,14 @@ This folder contains ingestion, CLI, logging, and provenance logic. ## Provenance Rules - Provenance is append-only. -- Large analysis outputs use latest-only storage under `results//latest`. +- Run records live under `clearex/provenance/runs/`. +- Latest output references live under + `clearex/provenance/latest_outputs/`. +- Large image outputs use latest-only internal storage under + `clearex/runtime_cache/results//latest` and are republished as + public OME collections under `results//latest`. +- Metadata/table-only outputs use latest-only storage under + `clearex/results//latest`. - Persist effective backend config and analysis parameters. - Persist effective spatial calibration in workflow provenance so historical runs preserve the placement rule they used. diff --git a/src/clearex/mip_export/README.md b/src/clearex/mip_export/README.md index 20cea94..d3a9e67 100644 --- a/src/clearex/mip_export/README.md +++ b/src/clearex/mip_export/README.md @@ -1,7 +1,7 @@ # MIP Export Agent Notes This folder owns maximum-intensity projection (MIP) export from canonical -ClearEx stores. +ClearEx OME-Zarr stores. ## Key Files @@ -14,7 +14,8 @@ ClearEx stores. ## Input Contract - Source data must be canonical 6D `(t, p, c, z, y, x)`. -- `input_source` selects component path (default: `data`). +- `input_source` selects a logical source alias or explicit internal image + component (default: `data`). - `position_mode`: - `per_position`: one file per `(projection, p, t, c)`. - `multi_position`: one file per `(projection, t, c)` with leading `p` axis. @@ -64,11 +65,13 @@ ClearEx stores. ## Output + Provenance Contract -- Latest analysis metadata path: `results/mip_export/latest`. +- Latest analysis metadata path: `clearex/results/mip_export/latest`. - Large projection files are stored outside the analysis store in a `latest` output directory (configured or auto-generated). - `register_latest_output_reference(...)` must be called with analysis key - `mip_export` and component `results/mip_export/latest`. + `mip_export` and component `clearex/results/mip_export/latest`. +- MIP export is metadata-only inside the OME-Zarr store; it does not publish a + public OME image collection under `results/mip_export/latest`. ## Failure Patterns diff --git a/src/clearex/registration/README.md b/src/clearex/registration/README.md index b699556..60c9e3f 100644 --- a/src/clearex/registration/README.md +++ b/src/clearex/registration/README.md @@ -8,7 +8,33 @@ The registration module provides tools for aligning images using combined linear - `linear.py` - Linear/affine registration functions - `nonlinear.py` - Nonlinear/deformable registration functions - `common.py` - Shared utilities (transform I/O, cropping, etc.) -- `pipeline.py` - Chunked tile-registration workflow for canonical 6D analysis stores (Dask + Zarr) +- `pipeline.py` - Chunked tile-registration workflow for canonical 6D OME-Zarr + stores (Dask + Zarr v3) + +## ClearEx Runtime Contract + +These notes are mandatory when editing `pipeline.py` and related runtime +integration: + +- Canonical runtime inputs are OME-Zarr v3 `*.ome.zarr` stores. +- `input_source` is a logical workflow alias by default (`data`, + `flatfield`, `deconvolution`, `shear_transform`, `usegment3d`, + `registration`) and resolves to a ClearEx runtime-cache image component. +- Registration writes fused image data to + `clearex/runtime_cache/results/registration/latest/data`. +- Registration publishes its public image result as the OME collection + `results/registration/latest`. +- Registration auxiliary artifacts stay under + `clearex/results/registration/latest`: + - `affines_tpx44` + - `blend_weights` + - `edges_pe2` + - `pairwise_affines_tex44` + - `edge_status_te` + - `edge_residual_te` + - `transformed_bboxes_tpx6` +- Do not reintroduce `results/registration/latest/data` as the canonical write + target. That legacy path is migration-only. ## Quick Start @@ -443,7 +469,7 @@ parameters are passed through the `parameters` dict to `_source_subvolume_for_overlap` to compute the minimal source Zarr slice that covers each output region, dramatically reducing I/O for large tiles. - **Cached blend weights.** The blend weight volume is pre-computed once and - stored as `results/registration/latest/blend_weights_zyx` in the analysis + stored under `clearex/results/registration/latest/blend_weights` in the analysis store. Fusion workers lazily load it instead of recomputing per tile per chunk. @@ -466,4 +492,3 @@ drop-in GPU replacement via `cupyx.scipy.ndimage.affine_transform` is marked as a TODO for future integration. This would benefit both pairwise registration and fusion. The deconvolution subsystem already supports GPU-pinned `LocalCluster` workers; registration would reuse the same backend. - diff --git a/src/clearex/visualization/README.md b/src/clearex/visualization/README.md index 3606904..9fec742 100644 --- a/src/clearex/visualization/README.md +++ b/src/clearex/visualization/README.md @@ -44,15 +44,21 @@ display pyramids for one selected source component. ### Storage contract - Latest task metadata is stored at: - - `results/display_pyramid/latest` -- Prepared levels are written under: - - `results/display_pyramid/by_component//level_n` + - `clearex/results/display_pyramid/latest` +- Prepared levels are written as ClearEx-owned helper arrays adjacent to the + selected internal source component: + - base-source helper levels use source-adjacent pyramid naming + - derived-source helper levels use `_pyramid/level_n` - Source-component attrs store lookup metadata: - `display_pyramid_levels` - `display_pyramid_factors_tpczyx` - Root attrs store lookup metadata: - `display_pyramid_levels_by_component` +These helper levels are not public OME image collections. They are internal +visualization aids that should remain separate from the canonical OME source +and analysis image collections. + Compatibility attrs from older visualization-driven behavior are still written and still read: @@ -165,8 +171,8 @@ When display metadata must be regenerated, use the sampled pipeline from ## Multiposition Policy -Multiposition rendering continues to use store-level `spatial_calibration` and -Navigate position metadata for affine placement. +Multiposition rendering continues to use store-level spatial calibration from +`clearex/metadata` and Navigate position metadata for affine placement. Default image blending has changed: @@ -180,7 +186,7 @@ This avoids stripe artifacts from overlapping multiposition regions. Visualization metadata is stored at: -- `results/visualization/latest` +- `clearex/results/visualization/latest` The latest metadata must include: @@ -221,3 +227,6 @@ When editing this area: - update tests with behavior changes in the same change set - preserve compatibility reads for older stored visualization pyramid attrs until migration is intentional and explicit +- do not document display-pyramid helper arrays as the canonical public image + contract; public OME collections remain the root source image and + `results//latest` image outputs From ff4dd88048719ca541a65a083d2fa5974d757772 Mon Sep 17 00:00:00 2001 From: "Kevin M. Dean" Date: Mon, 23 Mar 2026 05:39:07 -0500 Subject: [PATCH 03/10] Add TensorStore-backed Navigate BDV N5 support Introduce TensorStore-based handling for Navigate BDV N5 sources and document that legacy .n5 is source-only (route via experiment.yml). Key changes: add tensorstore dependency (pyproject.toml, uv.lock), implement TensorStore N5 adapters and helpers in src/clearex/io/experiment.py (adapter, open-as-dask, N5 enumeration/summarization, BDV XML parsing, load_navigate_experiment_source_image_info), update GUI to use the new loader, and update docs to describe the required experiment.yml+XML workflow and that raw zarr.read on .n5 is unsupported. Tests were added/adjusted to create real N5 fixtures and verify behavior (tests/io/test_experiment.py). Overall this enforces canonical materialization into *.ome.zarr and keeps Dask ingestion parallelized on zarr>=3 by using TensorStore for N5 reads. --- AGENTS.md | 9 + README.md | 6 +- docs/AGENTS.md | 7 +- docs/source/getting-started.rst | 5 + .../runtime/ingestion-and-canonical-store.rst | 11 +- docs/zarr_materialization_workflow.rst | 7 +- pyproject.toml | 1 + src/clearex/AGENTS.md | 10 + src/clearex/gui/README.md | 6 + src/clearex/gui/app.py | 7 +- src/clearex/io/README.md | 13 +- src/clearex/io/experiment.py | 584 ++++++++++++++---- tests/io/test_experiment.py | 185 +++++- uv.lock | 35 ++ 14 files changed, 728 insertions(+), 158 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 441f067..58549b4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -56,6 +56,15 @@ workflow behavior for ClearEx lives in `src/clearex/AGENTS.md`. ``.zarr`` / ``.n5`` as migration-only inputs. Do not describe them as the preferred runtime contract and do not introduce new fixtures or examples that rely on them as canonical. +- Treat legacy ``.n5`` as source-only unless the path is a Navigate BDV + acquisition reached through ``experiment.yml``. +- For Navigate BDV ``.n5`` sources, documentation and examples must describe + TensorStore-backed reads of ``setup*/timepoint*/s0`` plus companion XML + ``ViewSetup`` metadata. Do not describe raw ``zarr.open_group(...)`` / + ``da.from_zarr(...)`` reads on ``.n5`` paths as the supported contract. +- Do not document bare direct ``--file /path/to/source.n5`` usage as a + supported runtime workflow in phase 1. The supported path is + ``experiment.yml`` materialization into canonical ``*.ome.zarr``. - Public microscopy-facing image data must use the OME-Zarr HCS contract. ClearEx-owned execution caches, provenance, GUI state, and non-image artifacts belong under the namespaced ``clearex/`` tree. diff --git a/README.md b/README.md index bb8d839..b18bcf0 100755 --- a/README.md +++ b/README.md @@ -9,9 +9,10 @@ ClearEx is an open source Python package for scalable analytics of cleared and e ## Current Functionality - GUI-first entrypoint with headless fallback. - Headless CLI for scripted runs. -- Input support for TIFF/OME-TIFF, Zarr/N5, HDF5 (`.h5/.hdf5/.hdf`), and NumPy (`.npy/.npz`). +- Input support for TIFF/OME-TIFF, generic Zarr, HDF5 (`.h5/.hdf5/.hdf`), NumPy (`.npy/.npz`), and Navigate BDV N5 acquisitions through `experiment.yml`. - Navigate experiment ingestion from `experiment.yml` / `experiment.yaml`. - Canonical persisted store format is OME-Zarr v3 (`*.ome.zarr`). +- Legacy `.n5` remains a source-only input format. Navigate BDV N5 materialization requires companion `*.xml` metadata and now reads `setup*/timepoint*/s0` datasets through TensorStore so Dask ingestion stays parallelized on `zarr>=3`. - Public microscopy-facing image data is published as OME-Zarr HCS collections, while ClearEx execution caches and non-image artifacts live under namespaced `clearex/...` groups. - Internal analysis image layout remains `(t, p, c, z, y, x)` for runtime-cache arrays and analysis kernels. - Store-level spatial calibration for Navigate multiposition data is persisted in `clearex/metadata` and applied to physical placement metadata without rewriting image data. @@ -247,7 +248,8 @@ clearex --migrate-store /path/to/legacy_store.zarr ## Runtime Behavior Notes - If `--file` points to Navigate `experiment.yml`, ClearEx resolves acquisition data and materializes a canonical store first. - Existing canonical OME-Zarr stores are reused in place. -- Non-canonical acquisition inputs, including TIFF/OME-TIFF, HDF5, NumPy, generic Zarr/N5, and Navigate source layouts, materialize to `data_store.ome.zarr` beside `experiment.yml`. +- Non-canonical acquisition inputs, including TIFF/OME-TIFF, HDF5, NumPy, generic Zarr, Navigate BDV N5 sources, and other Navigate source layouts, materialize to `data_store.ome.zarr` beside `experiment.yml`. +- Bare direct source `.n5` runtime input is not a supported phase-1 workflow. For N5 acquisitions, point `--file` at Navigate `experiment.yml` so ClearEx can resolve BDV XML metadata and materialize canonical `.ome.zarr`. - Legacy ClearEx `.zarr` / `.n5` stores are not treated as canonical runtime inputs. Migrate them first with `clearex --migrate-store`. - Canonical stores persist `spatial_calibration = {schema, stage_axis_map_zyx, theta_mode}` inside `clearex/metadata`. Missing metadata resolves to the identity mapping `z=+z,y=+y,x=+x`. - In the setup window, `Spatial Calibration` is configured per listed experiment. Draft mappings are tracked per experiment while the dialog is open, existing stores prefill the control, and `Next` writes the resolved mapping to every reused or newly prepared store before analysis selection opens. diff --git a/docs/AGENTS.md b/docs/AGENTS.md index 93190a2..188ef92 100644 --- a/docs/AGENTS.md +++ b/docs/AGENTS.md @@ -12,7 +12,7 @@ You are a computer vision expert in fluorescence microscopy and technical writer ## Project knowledge - **Tech Stack:** Python 3.12, antsypyx, dask, h5py, matplotlib, napari, numpy, - ome-zarr-models, ome-zarr, bioio-ome-zarr, opencv-python, pandas, + ome-zarr-models, ome-zarr, bioio-ome-zarr, tensorstore, opencv-python, pandas, scikit-image, scipy, seaborn, zarr v3 - **File Structure:** - `src/` – Application source code (you READ from here) @@ -40,6 +40,11 @@ Write so that a new developer to this codebase can understand your writing, don ``results//latest/data`` layouts as the preferred public contract. If legacy layouts are mentioned, label them explicitly as migration-only and point readers to ``clearex --migrate-store``. +- Document legacy ``.n5`` as source-only except for Navigate BDV acquisition + input routed through ``experiment.yml``. +- When documenting Navigate BDV ``.n5`` ingestion, describe TensorStore-backed + reads of ``setup*/timepoint*/s0`` plus companion XML metadata; do not teach + raw Zarr API reads on ``.n5`` as the supported path. ## Boundaries - ✅ **Always do:** Write new files to `docs/`, follow the style examples diff --git a/docs/source/getting-started.rst b/docs/source/getting-started.rst index 3d23d59..dd70207 100644 --- a/docs/source/getting-started.rst +++ b/docs/source/getting-started.rst @@ -65,6 +65,11 @@ Run in headless mode against an experiment file: This materializes ``data_store.ome.zarr`` beside ``experiment.yml`` when a canonical store does not already exist. +For Navigate BDV ``.n5`` acquisitions, point ClearEx at ``experiment.yml`` +rather than the raw ``.n5`` directory. ClearEx uses the companion BDV XML plus +TensorStore-backed reads of ``setup*/timepoint*/s0`` to materialize the +canonical OME-Zarr store. + Run in headless mode with an explicit Navigate stage-to-world mapping: .. code-block:: bash diff --git a/docs/source/runtime/ingestion-and-canonical-store.rst b/docs/source/runtime/ingestion-and-canonical-store.rst index f4b65f8..11586b2 100644 --- a/docs/source/runtime/ingestion-and-canonical-store.rst +++ b/docs/source/runtime/ingestion-and-canonical-store.rst @@ -33,7 +33,8 @@ Materialization supports: - TIFF/OME-TIFF, - H5/HDF5/HDF, - NumPy ``.npy`` and ``.npz``, -- generic Zarr / N5 stores, +- generic Zarr stores, +- Navigate BDV N5 acquisitions routed through ``experiment.yml``, - canonical OME-Zarr stores. Special collection logic is implemented for: @@ -42,6 +43,14 @@ Special collection logic is implemented for: dimensions), - Navigate BDV H5/N5 setup collections (mapped with companion XML metadata). +Navigate BDV N5 sources are source-only and are not opened through Zarr APIs. +ClearEx reads ``setup*/timepoint*/s0`` datasets through TensorStore so Dask +ingestion remains parallelized on ``zarr>=3``. Standalone bare ``.n5`` runtime +input remains unsupported in this phase; use ``experiment.yml`` materialization +to convert the source into canonical ``*.ome.zarr``. +If stale legacy ClearEx groups such as ``data`` or ``results`` exist inside the +source ``.n5`` tree, they are ignored for source selection. + Canonical Store Path Policy --------------------------- diff --git a/docs/zarr_materialization_workflow.rst b/docs/zarr_materialization_workflow.rst index a884e2c..27ea067 100644 --- a/docs/zarr_materialization_workflow.rst +++ b/docs/zarr_materialization_workflow.rst @@ -14,10 +14,15 @@ arrays alongside public OME image collections. - TIFF/OME-TIFF - H5/HDF5 - - generic Zarr/N5 + - generic Zarr + - Navigate BDV N5 through ``experiment.yml`` plus companion XML - NumPy ``.npy`` / ``.npz`` - canonical OME-Zarr + Navigate BDV ``.n5`` is read through TensorStore-backed access to + ``setup*/timepoint*/s0`` datasets. Raw Zarr API access to ``.n5`` is not the + supported runtime path on ``zarr>=3``. + 4. Infer and normalize axis order, coercing to canonical ``(t, p, c, z, y, x)`` for internal execution. 5. Materialize the internal source array at diff --git a/pyproject.toml b/pyproject.toml index c56e68b..b0f1f17 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "scikit-image", "scipy<1.13", "seaborn", + "tensorstore", "tifffile==2025.1.10", "zarr>=3.1.1,<4.0", ] diff --git a/src/clearex/AGENTS.md b/src/clearex/AGENTS.md index 65f1e64..75727e6 100644 --- a/src/clearex/AGENTS.md +++ b/src/clearex/AGENTS.md @@ -45,6 +45,13 @@ This directory contains the runtime orchestration surface for ClearEx. - Legacy root `data`, root `data_pyramid`, and `results//latest/data` layouts are migration-only and must not be reintroduced as canonical outputs. +- Legacy `.n5` inputs are source-only. For Navigate BDV N5, agents must use + TensorStore-backed reads from `setup*/timepoint*/s0` plus companion XML + `ViewSetup` metadata; do not reintroduce `zarr.open_group(...)` or + `da.from_zarr(...)` on raw `.n5` paths. +- Bare standalone `.n5` runtime input remains unsupported in phase 1. If the + source is a Navigate N5 acquisition, route it through `experiment.yml` + materialization into canonical `.ome.zarr`. ## Dask Workload Policy @@ -63,6 +70,9 @@ This directory contains the runtime orchestration surface for ClearEx. `(t, p, c, z, y, x)` data instead of materializing only the first TIFF file. - Navigate BDV `H5`/`N5` ingestion now stacks setup collections using companion XML `ViewSetup` metadata so channels/positions are preserved across formats. +- Navigate BDV `N5` now reads through TensorStore because `zarr>=3` no longer + provides `N5Store`; legacy ClearEx groups inside source `.n5` trees must be + ignored during source selection. - Napari multiposition affine translations are now treated in world-space microns, with scale preferring stored `voxel_size_um_zyx` attrs. diff --git a/src/clearex/gui/README.md b/src/clearex/gui/README.md index 477163a..f03332e 100644 --- a/src/clearex/gui/README.md +++ b/src/clearex/gui/README.md @@ -11,6 +11,9 @@ This folder owns the PyQt6 UX in `app.py`. - drag and drop of experiments, folders, or `.clearex-experiment-list.json` files - Add/remove experiment entries from the list and persist the list for reuse - Auto-load metadata when the current list selection changes + - for Navigate BDV ``file_type: N5``, source metadata must come from the + Navigate experiment context plus TensorStore-backed BDV source summary, + not from sending the raw ``.n5`` path through the generic Zarr reader - Configure Dask backend and OME-Zarr save options - Configure `Spatial Calibration` for the currently selected experiment: - map world `z/y/x` to Navigate stage `X/Y/Z/F` or `none`, @@ -81,6 +84,9 @@ This folder owns the PyQt6 UX in `app.py`. - Treat `.ome.zarr` as the only canonical store suffix in labels, tooltips, examples, and validation messages. +- Treat source `.n5` as Navigate acquisition input only. GUI messaging should + point users to `experiment.yml` materialization, not direct raw `.n5` + runtime opening. - Present logical analysis inputs (`data`, `flatfield`, `deconvolution`, `shear_transform`, `usegment3d`, `registration`) instead of raw internal component paths wherever possible. diff --git a/src/clearex/gui/app.py b/src/clearex/gui/app.py index 91ec529..66a2f70 100644 --- a/src/clearex/gui/app.py +++ b/src/clearex/gui/app.py @@ -61,6 +61,7 @@ infer_zyx_shape, is_navigate_experiment_file, load_navigate_experiment, + load_navigate_experiment_source_image_info, load_store_spatial_calibration, materialize_experiment_data_store, resolve_data_store_path, @@ -6680,8 +6681,10 @@ def _load_experiment_context( experiment_path, experiment, source_data_path = ( self._resolve_experiment_source_context(path=path) ) - _, info = self._opener.open( - path=str(source_data_path), + info = load_navigate_experiment_source_image_info( + experiment=experiment, + source_path=source_data_path, + opener=self._opener, prefer_dask=True, chunks=self._chunks, ) diff --git a/src/clearex/io/README.md b/src/clearex/io/README.md index cf3b3ed..8b42532 100644 --- a/src/clearex/io/README.md +++ b/src/clearex/io/README.md @@ -16,7 +16,11 @@ This folder contains ingestion, CLI, logging, and provenance logic. - For non-canonical sources: create `data_store.ome.zarr` beside `experiment.yml`. - Reuse an input store in place only when it is already a canonical OME-Zarr - store. Generic Zarr/N5 inputs are source formats, not canonical outputs. + store. Generic Zarr inputs and legacy `.n5` inputs are source formats, not + canonical outputs. +- Legacy `.n5` is source-only. First-class `.n5` support currently means + Navigate BDV acquisition input routed through `experiment.yml`, not bare + direct `.n5` runtime input. - Canonical public source data is a single-well OME HCS collection at the store root. - Canonical internal source array is @@ -46,9 +50,16 @@ This folder contains ingestion, CLI, logging, and provenance logic. - For Navigate BDV `H5`/`N5` outputs, materialization must parse companion `*.xml` `ViewSetup` entries and map `setup -> (tile=position, channel)` so canonical channel/position axes are preserved. +- For Navigate BDV `.n5`, readers must use TensorStore-backed access to + `setup*/timepoint*/s0` datasets so ingestion stays compatible with + `zarr>=3` and remains Dask-parallel. Do not use `zarr.open_group(...)` or + `da.from_zarr(...)` on raw `.n5` paths. - Navigate BDV storage layout uses setup-major indexing `setup = channel * positions + position`; loader fallback should respect this rule when XML metadata is unavailable. +- Legacy ClearEx groups such as `data`, `data_pyramid`, `results`, and + `provenance` inside source `.n5` trees are stale source-side artifacts and + must be ignored for source discovery/materialization. - XY pixel-size inference should use active microscope camera profile (from `MicroscopeState.microscope_name`) and prefer: 1. `fov_x / img_x_pixels` or `fov_y / img_y_pixels`, diff --git a/src/clearex/io/experiment.py b/src/clearex/io/experiment.py index aecba19..3ce22dc 100644 --- a/src/clearex/io/experiment.py +++ b/src/clearex/io/experiment.py @@ -58,10 +58,19 @@ import dask.array as da import h5py import numpy as np +import numpy.typing as npt import tifffile import zarr from dask.delayed import delayed +try: + import tensorstore as ts + + HAS_TENSORSTORE = True +except Exception: # pragma: no cover - optional dependency import guard + ts = None + HAS_TENSORSTORE = False + # Local Imports from clearex.io.ome_store import ( CLEAREX_PROVENANCE_GROUP, @@ -110,6 +119,41 @@ _SPATIAL_CALIBRATION_ATTR = "spatial_calibration" +@dataclass(frozen=True) +class _TensorStoreN5ArrayAdapter: + """Expose one TensorStore N5 dataset through Dask's array protocol. + + Parameters + ---------- + source_path : str + N5 root path on disk. + component : str + Dataset component path within the N5 root. + shape : tuple[int, ...] + Dataset shape in source axis order. + dtype : numpy.dtype + NumPy dtype of the dataset. + """ + + source_path: str + component: str + shape: tuple[int, ...] + dtype: npt.DTypeLike + + @property + def ndim(self) -> int: + """Return dataset dimensionality.""" + return len(self.shape) + + def __getitem__(self, item: Any) -> np.ndarray[Any, Any]: + """Read one N5 selection as a NumPy array.""" + dataset = _open_tensorstore_n5_dataset( + Path(self.source_path), + component=self.component, + ) + return np.asarray(dataset[item].read().result(), dtype=np.dtype(self.dtype)) + + def _is_zarr_like_path(path: Path) -> bool: """Return whether a path is a Zarr or N5 directory store. @@ -126,6 +170,87 @@ def _is_zarr_like_path(path: Path) -> bool: return path.is_dir() and path.suffix.lower() in {".zarr", ".n5"} +def _require_tensorstore_for_n5() -> None: + """Raise a clear error when TensorStore-backed N5 support is unavailable.""" + if HAS_TENSORSTORE: + return + raise ImportError( + "TensorStore is required for Navigate BDV N5 ingestion with zarr>=3. " + "Install the 'tensorstore' dependency to read .n5 sources." + ) + + +def _tensorstore_n5_spec(*, source_path: Path, component: str) -> dict[str, Any]: + """Build a TensorStore N5 spec for one dataset component.""" + return { + "driver": "n5", + "kvstore": {"driver": "file", "path": str(source_path)}, + "path": str(component).strip("/"), + } + + +def _open_tensorstore_n5_dataset(source_path: Path, *, component: str) -> Any: + """Open one N5 dataset with TensorStore.""" + _require_tensorstore_for_n5() + return ts.open(_tensorstore_n5_spec(source_path=source_path, component=component)).result() + + +def _load_n5_attributes(source_path: Path, *, component: str) -> dict[str, Any]: + """Load ``attributes.json`` for one N5 dataset component.""" + attributes_path = source_path / component / "attributes.json" + try: + payload = json.loads(attributes_path.read_text()) + except Exception: + return {} + return payload if isinstance(payload, dict) else {} + + +def _normalize_n5_chunks( + *, + shape: tuple[int, ...], + attributes: dict[str, Any], +) -> tuple[int, ...]: + """Resolve Dask chunk sizes for one N5 dataset.""" + raw_chunks = attributes.get("blockSize") + if not isinstance(raw_chunks, (list, tuple)) or len(raw_chunks) != len(shape): + return tuple(int(size) for size in shape) + normalized: list[int] = [] + for dim_size, chunk_size in zip(shape, raw_chunks, strict=False): + try: + parsed = int(chunk_size) + except (TypeError, ValueError): + parsed = int(dim_size) + normalized.append(max(1, min(int(dim_size), parsed))) + return tuple(normalized) + + +def _open_tensorstore_n5_as_dask( + source_path: Path, + *, + component: str, +) -> tuple[da.Array, dict[str, Any]]: + """Open one N5 dataset as a lazy Dask array via TensorStore.""" + dataset = _open_tensorstore_n5_dataset(source_path, component=component) + shape = tuple(int(size) for size in dataset.shape) + dtype = np.dtype(dataset.dtype.numpy_dtype) + attributes = _load_n5_attributes(source_path, component=component) + chunks = _normalize_n5_chunks(shape=shape, attributes=attributes) + adapter = _TensorStoreN5ArrayAdapter( + source_path=str(source_path), + component=component, + shape=shape, + dtype=dtype, + ) + array = da.from_array( + adapter, + chunks=chunks, + asarray=False, + fancy=False, + meta=np.empty((0,) * len(shape), dtype=dtype), + ) + return array, attributes + + def has_canonical_data_component(zarr_path: Union[str, Path]) -> bool: """Return whether a store contains canonical 6D runtime-cache source data. @@ -918,6 +1043,12 @@ def _open_source_as_dask( suffix = source_path.suffix.lower() meta: dict[str, Any] = {"source_path": str(source_path)} + if suffix == ".n5" and _is_zarr_like_path(source_path): + raise ValueError( + "Standalone N5 ingestion is not supported. Use Navigate " + "experiment.yml-driven BDV N5 materialization instead." + ) + if _is_zarr_like_path(source_path): array, component, axes = _collect_largest_zarr_array(source_path) source_array = ( @@ -984,6 +1115,91 @@ def _collect_datasets(group: h5py.Group) -> None: raise ValueError(f"Unsupported source format for ingestion: {source_path}") +def _candidate_bdv_xml_paths(path: Path) -> list[Path]: + """Return candidate BDV XML sidecar paths for one source path.""" + candidates: list[Path] = [] + candidates.append(path.with_suffix(".xml")) + candidates.append(path.parent / f"{path.name}.xml") + + lower_name = path.name.lower() + for token in (".ome.zarr", ".zarr", ".n5", ".hdf5", ".hdf", ".h5"): + if not lower_name.endswith(token): + continue + stem = path.name[: -len(token)] + if stem: + candidates.append(path.parent / f"{stem}.xml") + break + + ordered: list[Path] = [] + seen: set[str] = set() + for candidate in candidates: + key = str(candidate) + if key in seen: + continue + seen.add(key) + ordered.append(candidate) + return ordered + + +def _bdv_loader_format_matches_source_suffix(*, suffix: str, image_format: str) -> bool: + """Return whether a BDV XML loader format matches the source suffix.""" + normalized = str(image_format).strip().lower() + if suffix in {".h5", ".hdf5", ".hdf"}: + return normalized in {"bdv.hdf5", "bdv.h5", "bdv.hdf"} + if suffix == ".n5": + return normalized == "bdv.n5" or normalized.startswith("bdv.n5.") + if suffix == ".zarr": + if normalized in { + "bdv.zarr", + "bdv.ome.zarr", + "bdv.ngff", + "bdv.omezarr", + "bdv.omengff", + "ome.zarr", + "ome-zarr", + "ome.ngff", + "ome-ngff", + "ngff", + "zarr", + }: + return True + return ( + normalized.startswith("bdv.zarr.") + or normalized.startswith("bdv.ome.zarr.") + or normalized.startswith("bdv.ngff.") + or normalized.startswith("ome.zarr.") + or normalized.startswith("ome.ngff.") + ) + return False + + +def _load_navigate_bdv_xml_root(source_path: Path) -> Optional[ET.Element]: + """Load the companion BDV XML root when present and compatible.""" + suffix = source_path.suffix.lower() + if suffix not in {".h5", ".hdf5", ".hdf", ".n5", ".zarr"}: + return None + + for xml_path in _candidate_bdv_xml_paths(source_path): + if not xml_path.exists(): + continue + try: + root = ET.fromstring(xml_path.read_text()) + except Exception: + continue + + image_loader = root.find("SequenceDescription/ImageLoader") + if image_loader is None: + continue + image_format = str(image_loader.attrib.get("format", "")) + if not _bdv_loader_format_matches_source_suffix( + suffix=suffix, + image_format=image_format, + ): + continue + return root + return None + + def _parse_navigate_bdv_setup_index_map( source_path: Path, ) -> Optional[dict[int, tuple[int, int]]]: @@ -1007,140 +1223,168 @@ def _parse_navigate_bdv_setup_index_map( Parsing is best-effort and falls back to ``None``. """ - def _candidate_bdv_xml_paths(path: Path) -> list[Path]: - """Return candidate BDV XML sidecar paths for one source path. + root = _load_navigate_bdv_xml_root(source_path) + if root is None: + return None - Parameters - ---------- - path : pathlib.Path - Source BDV container path. + raw_entries: list[tuple[int, int, int]] = [] + for view_setup in root.findall("SequenceDescription/ViewSetups/ViewSetup"): + setup_text = view_setup.findtext("id") + channel_text = view_setup.findtext("attributes/channel") + tile_text = view_setup.findtext("attributes/tile") + if setup_text is None or channel_text is None or tile_text is None: + continue + try: + setup_index = int(setup_text) + channel_index = int(channel_text) + tile_index = int(tile_text) + except ValueError: + continue + raw_entries.append((setup_index, channel_index, tile_index)) - Returns - ------- - list[pathlib.Path] - Ordered XML candidate paths, deduplicated while preserving order. - """ - candidates: list[Path] = [] - candidates.append(path.with_suffix(".xml")) - candidates.append(path.parent / f"{path.name}.xml") + if not raw_entries: + return None - lower_name = path.name.lower() - for token in (".ome.zarr", ".zarr", ".n5", ".hdf5", ".hdf", ".h5"): - if not lower_name.endswith(token): - continue - stem = path.name[: -len(token)] - if stem: - candidates.append(path.parent / f"{stem}.xml") - break + channel_values = sorted({entry[1] for entry in raw_entries}) + tile_values = sorted({entry[2] for entry in raw_entries}) + channel_lookup = {value: idx for idx, value in enumerate(channel_values)} + tile_lookup = {value: idx for idx, value in enumerate(tile_values)} - ordered: list[Path] = [] - seen: set[str] = set() - for candidate in candidates: - key = str(candidate) - if key in seen: - continue - seen.add(key) - ordered.append(candidate) - return ordered + return { + int(setup_index): ( + int(tile_lookup[tile_index]), + int(channel_lookup[channel_index]), + ) + for setup_index, channel_index, tile_index in raw_entries + } - def _loader_format_matches_source_suffix(*, suffix: str, image_format: str) -> bool: - """Return whether a BDV XML loader format matches the source suffix. - Parameters - ---------- - suffix : str - Source path suffix. - image_format : str - XML ``ImageLoader`` format value. +@dataclass(frozen=True) +class _NavigateBdvN5Entry: + """Describe one BDV N5 dataset component.""" - Returns - ------- - bool - ``True`` when ``image_format`` is compatible with ``suffix``. - """ - normalized = str(image_format).strip().lower() - if suffix in {".h5", ".hdf5", ".hdf"}: - return normalized in {"bdv.hdf5", "bdv.h5", "bdv.hdf"} - if suffix == ".n5": - return normalized == "bdv.n5" or normalized.startswith("bdv.n5.") - if suffix == ".zarr": - if normalized in { - "bdv.zarr", - "bdv.ome.zarr", - "bdv.ngff", - "bdv.omezarr", - "bdv.omengff", - "ome.zarr", - "ome-zarr", - "ome.ngff", - "ome-ngff", - "ngff", - "zarr", - }: - return True - return ( - normalized.startswith("bdv.zarr.") - or normalized.startswith("bdv.ome.zarr.") - or normalized.startswith("bdv.ngff.") - or normalized.startswith("ome.zarr.") - or normalized.startswith("ome.ngff.") - ) - return False + time_index: int + setup_index: int + component: str + source_shape: tuple[int, ...] + source_chunks: tuple[int, ...] - suffix = source_path.suffix.lower() - if suffix not in {".h5", ".hdf5", ".hdf", ".n5", ".zarr"}: - return None - xml_candidates = _candidate_bdv_xml_paths(source_path) - for xml_path in xml_candidates: - if not xml_path.exists(): - continue +def _iter_navigate_bdv_n5_entries(source_path: Path) -> list[_NavigateBdvN5Entry]: + """Enumerate BDV-style N5 datasets from the filesystem layout.""" + entries: list[_NavigateBdvN5Entry] = [] + for attributes_path in sorted(source_path.rglob("attributes.json")): + component_path = attributes_path.parent try: - root = ET.fromstring(xml_path.read_text()) - except Exception: + relative_component = component_path.relative_to(source_path) + except ValueError: continue - - image_loader = root.find("SequenceDescription/ImageLoader") - if image_loader is None: + component = relative_component.as_posix() + match = re.match(r"^setup(\d+)/timepoint(\d+)/s(\d+)$", component) + if match is None or int(match.group(3)) != 0: continue - image_format = str(image_loader.attrib.get("format", "")) - if not _loader_format_matches_source_suffix( - suffix=suffix, - image_format=image_format, - ): + attributes = _load_n5_attributes(source_path, component=component) + dimensions = attributes.get("dimensions") + if not isinstance(dimensions, (list, tuple)) or not dimensions: continue + shape = tuple(int(size) for size in dimensions) + chunks = _normalize_n5_chunks(shape=shape, attributes=attributes) + entries.append( + _NavigateBdvN5Entry( + time_index=int(match.group(2)), + setup_index=int(match.group(1)), + component=component, + source_shape=shape, + source_chunks=chunks, + ) + ) + return entries - raw_entries: list[tuple[int, int, int]] = [] - for view_setup in root.findall("SequenceDescription/ViewSetups/ViewSetup"): - setup_text = view_setup.findtext("id") - channel_text = view_setup.findtext("attributes/channel") - tile_text = view_setup.findtext("attributes/tile") - if setup_text is None or channel_text is None or tile_text is None: - continue - try: - setup_index = int(setup_text) - channel_index = int(channel_text) - tile_index = int(tile_text) - except ValueError: - continue - raw_entries.append((setup_index, channel_index, tile_index)) - if not raw_entries: - continue +def summarize_navigate_bdv_n5_image_info( + *, + experiment: "NavigateExperiment", + source_path: Path, +) -> Optional[ImageInfo]: + """Summarize a Navigate BDV N5 collection as canonical ``ImageInfo``.""" + if source_path.suffix.lower() != ".n5" or not _is_zarr_like_path(source_path): + return None - channel_values = sorted({entry[1] for entry in raw_entries}) - tile_values = sorted({entry[2] for entry in raw_entries}) - channel_lookup = {value: idx for idx, value in enumerate(channel_values)} - tile_lookup = {value: idx for idx, value in enumerate(tile_values)} + entries = _iter_navigate_bdv_n5_entries(source_path) + if len(entries) <= 1: + return None - return { - int(setup_index): ( - int(tile_lookup[tile_index]), - int(channel_lookup[channel_index]), + setup_map = _parse_navigate_bdv_setup_index_map(source_path) + inferred_positions = max(1, int(experiment.multiposition_count)) + time_indices: set[int] = set() + position_indices: set[int] = set() + channel_indices: set[int] = set() + base_shape: Optional[tuple[int, ...]] = None + + for entry in entries: + if setup_map is not None and entry.setup_index not in setup_map: + continue + if setup_map is not None: + position_index, channel_index = setup_map[entry.setup_index] + else: + channel_index = int(entry.setup_index // inferred_positions) + position_index = int(entry.setup_index % inferred_positions) + time_indices.add(int(entry.time_index)) + position_indices.add(int(position_index)) + channel_indices.add(int(channel_index)) + if base_shape is None: + base_shape = entry.source_shape + elif entry.source_shape != base_shape: + raise ValueError( + "Navigate BDV N5 collection has inconsistent source shapes: " + f"expected {base_shape}, got {entry.source_shape} at " + f"timepoint={entry.time_index}, setup={entry.setup_index}." ) - for setup_index, channel_index, tile_index in raw_entries - } - return None + + if not time_indices or not position_indices or not channel_indices or base_shape is None: + return None + + sample_entry = min( + entries, + key=lambda item: (item.time_index, item.setup_index, item.component), + ) + sample_dataset = _open_tensorstore_n5_dataset( + source_path, + component=sample_entry.component, + ) + dtype = np.dtype(sample_dataset.dtype.numpy_dtype) + x_size, y_size, z_size = (int(size) for size in base_shape) + metadata: dict[str, Any] = { + "source_component": sample_entry.component, + "source_layout": "navigate_bdv_n5", + "positions": len(position_indices), + "channels": len(channel_indices), + } + if ( + experiment.z_step_um is not None + and experiment.xy_pixel_size_um is not None + and experiment.z_step_um > 0 + and experiment.xy_pixel_size_um > 0 + ): + metadata["voxel_size_um_zyx"] = [ + float(experiment.z_step_um), + float(experiment.xy_pixel_size_um), + float(experiment.xy_pixel_size_um), + ] + return ImageInfo( + path=source_path, + shape=( + len(time_indices), + len(position_indices), + len(channel_indices), + z_size, + y_size, + x_size, + ), + dtype=dtype, + axes="TPCZYX", + metadata=metadata, + ) def _open_navigate_bdv_collection_as_dask( @@ -1257,6 +1501,52 @@ def _collect_h5_entries(name: str, obj: Any) -> None: f"t={time_index}, setup={setup_index}." ) arrays_by_index[key] = source_array + elif is_n5: + entries = [ + (entry.time_index, entry.setup_index, entry.component, ("x", "y", "z")) + for entry in _iter_navigate_bdv_n5_entries(source_path) + ] + if len(entries) <= 1: + return None + + for time_index, setup_index, component, source_axes in sorted( + entries, key=lambda item: (item[0], item[1], item[2]) + ): + if setup_map is not None and setup_index not in setup_map: + continue + if setup_map is not None: + position_index, channel_index = setup_map[setup_index] + else: + channel_index = int(setup_index // inferred_positions) + position_index = int(setup_index % inferred_positions) + + key = (int(time_index), int(position_index), int(channel_index)) + if key in arrays_by_index: + continue + + source_array, _ = _open_tensorstore_n5_as_dask( + source_path, + component=component, + ) + normalized_axes = tuple(source_axes) + source_shape = tuple(int(size) for size in source_array.shape) + if base_axes is None: + base_axes = normalized_axes + base_shape = source_shape + else: + if normalized_axes != base_axes: + raise ValueError( + "Navigate BDV N5 collection has inconsistent source axes: " + f"expected {base_axes}, got {normalized_axes} at " + f"t={time_index}, setup={setup_index}." + ) + if source_shape != base_shape: + raise ValueError( + "Navigate BDV N5 collection has inconsistent source shapes: " + f"expected {base_shape}, got {source_shape} at " + f"t={time_index}, setup={setup_index}." + ) + arrays_by_index[key] = source_array else: root = zarr.open_group(str(source_path), mode="r") group_attrs = dict(getattr(root, "attrs", {})) @@ -1378,6 +1668,68 @@ def _walk(group_node: Any, prefix: str = "") -> None: return source_array, source_axes, metadata +def load_navigate_experiment_source_image_info( + *, + experiment: "NavigateExperiment", + source_path: Path, + opener: Any = None, + prefer_dask: bool = True, + chunks: Optional[Union[int, tuple[int, ...]]] = None, +) -> ImageInfo: + """Load source metadata for one Navigate experiment input path. + + Parameters + ---------- + experiment : NavigateExperiment + Parsed experiment metadata. + source_path : pathlib.Path + Resolved acquisition source path. + opener : object, optional + Existing ``ImageOpener``-compatible instance for non-N5 sources. + prefer_dask : bool, default=True + Forwarded to ``ImageOpener.open`` when non-N5 metadata is read. + chunks : int or tuple[int, ...], optional + Forwarded to ``ImageOpener.open`` when non-N5 metadata is read. + + Returns + ------- + ImageInfo + Metadata summary for the resolved acquisition source. + + Raises + ------ + ValueError + If Navigate ``file_type: N5`` does not resolve to a BDV-style N5 + source layout. + """ + resolved_source = Path(source_path).expanduser().resolve() + if ( + _normalize_file_type(experiment.file_type) == "N5" + and resolved_source.suffix.lower() == ".n5" + ): + info = summarize_navigate_bdv_n5_image_info( + experiment=experiment, + source_path=resolved_source, + ) + if info is None: + raise ValueError( + "Navigate file_type=N5 currently requires a BDV-style N5 source " + "with companion XML metadata." + ) + return info + + if opener is None: + from clearex.io.read import ImageOpener + + opener = ImageOpener() + _, info = opener.open( + path=str(resolved_source), + prefer_dask=prefer_dask, + chunks=chunks, + ) + return info + + def _parse_navigate_tiff_indices(path: Path) -> tuple[int, int, int]: """Parse ``(time, position, channel)`` indices from a Navigate TIFF path. diff --git a/tests/io/test_experiment.py b/tests/io/test_experiment.py index 244e139..a6e6249 100644 --- a/tests/io/test_experiment.py +++ b/tests/io/test_experiment.py @@ -25,6 +25,7 @@ # POSSIBILITY OF SUCH DAMAGE. # Standard Library Imports +from contextlib import ExitStack from pathlib import Path import json @@ -46,6 +47,7 @@ initialize_analysis_store, is_navigate_experiment_file, load_navigate_experiment, + load_navigate_experiment_source_image_info, load_store_spatial_calibration, materialize_experiment_data_store, resolve_data_store_path, @@ -53,6 +55,7 @@ save_store_spatial_calibration, write_zyx_block, ) +from clearex.io.ome_store import SOURCE_CACHE_COMPONENT from clearex.io.read import ImageInfo from clearex.workflow import SpatialCalibrationConfig @@ -150,6 +153,55 @@ def _write_bdv_xml( path.write_text(xml) +def _tensorstore_module(): + """Import TensorStore for real N5 fixture creation.""" + return pytest.importorskip("tensorstore") + + +def _write_real_n5_dataset( + root_path: Path, + *, + component: str, + data_xyz: np.ndarray, + block_size_xyz: tuple[int, int, int], +) -> None: + """Create one real N5 dataset using TensorStore.""" + ts = _tensorstore_module() + spec = { + "driver": "n5", + "kvstore": {"driver": "file", "path": str(root_path)}, + "path": component, + "metadata": { + "blockSize": [int(v) for v in block_size_xyz], + "compression": { + "type": "blosc", + "cname": "lz4", + "clevel": 5, + "shuffle": 1, + "blocksize": 0, + }, + "dataType": np.dtype(data_xyz.dtype).name, + "dimensions": [int(v) for v in data_xyz.shape], + }, + "create": True, + "delete_existing": True, + } + dataset = ts.open(spec).result() + dataset[...] = data_xyz + + +def _write_legacy_n5_group( + root_path: Path, + *, + component: str, + schema: str = "clearex.analysis_store.v1", +) -> None: + """Write a stale legacy ClearEx group marker into an N5 tree.""" + group_path = root_path / component + group_path.mkdir(parents=True, exist_ok=True) + (group_path / "attributes.json").write_text(json.dumps({"schema": schema})) + + def test_normalize_gpu_device_ids_deduplicates_and_strips() -> None: values = [0, "1", " 1 ", "", "2", "0", " "] assert experiment_module._normalize_gpu_device_ids(values) == ["0", "1", "2"] @@ -1137,43 +1189,29 @@ def test_materialize_experiment_data_store_stacks_bdv_n5_setups( experiment = load_navigate_experiment(experiment_path) source_path = tmp_path / "CH00_000000.n5" - source_root = zarr.open_group(str(source_path), mode="w") expected_blocks = { (0, 0): np.full((2, 3, 4), fill_value=11, dtype=np.uint16), (1, 0): np.full((2, 3, 4), fill_value=21, dtype=np.uint16), (0, 1): np.full((2, 3, 4), fill_value=31, dtype=np.uint16), (1, 1): np.full((2, 3, 4), fill_value=41, dtype=np.uint16), } - source_root.create_dataset( - "setup0/timepoint0/s0", - data=expected_blocks[(0, 0)], - chunks=(1, 3, 4), - overwrite=True, - ) - source_root.create_dataset( - "setup1/timepoint0/s0", - data=expected_blocks[(1, 0)], - chunks=(1, 3, 4), - overwrite=True, - ) - source_root.create_dataset( - "setup2/timepoint0/s0", - data=expected_blocks[(0, 1)], - chunks=(1, 3, 4), - overwrite=True, - ) - source_root.create_dataset( - "setup3/timepoint0/s0", - data=expected_blocks[(1, 1)], - chunks=(1, 3, 4), - overwrite=True, - ) - source_root.create_dataset( - "setup99/timepoint0/s0", - data=np.zeros((2, 3, 4), dtype=np.uint16), - chunks=(1, 3, 4), - overwrite=True, - ) + for setup_index, block in { + 0: expected_blocks[(0, 0)], + 1: expected_blocks[(1, 0)], + 2: expected_blocks[(0, 1)], + 3: expected_blocks[(1, 1)], + 99: np.zeros((2, 3, 4), dtype=np.uint16), + }.items(): + _write_real_n5_dataset( + source_path, + component=f"setup{setup_index}/timepoint0/s0", + data_xyz=np.transpose(block, (2, 1, 0)), + block_size_xyz=(4, 3, 1), + ) + _write_legacy_n5_group(source_path, component="data") + _write_legacy_n5_group(source_path, component="data_pyramid") + _write_legacy_n5_group(source_path, component="results") + _write_legacy_n5_group(source_path, component="provenance") _write_bdv_xml( tmp_path / "CH00_000000.xml", @@ -1195,17 +1233,96 @@ def test_materialize_experiment_data_store_stacks_bdv_n5_setups( ) root = zarr.open_group(str(materialized.store_path), mode="r") - assert tuple(root["data"].shape) == (1, 2, 2, 2, 3, 4) - assert root.attrs["source_data_path"] == str(source_path.resolve()) + assert tuple(root[SOURCE_CACHE_COMPONENT].shape) == (1, 2, 2, 2, 3, 4) + assert root["clearex/metadata"].attrs["source_data_path"] == str(source_path.resolve()) + assert materialized.source_image_info.shape == (1, 2, 2, 4, 3, 2) + assert materialized.source_image_info.axes == "TPCXYZ" + assert root["A/1/0/0"].shape == (1, 2, 2, 3, 4) for position_index in range(2): for channel_index in range(2): - loaded = np.array(root["data"][0, position_index, channel_index, :, :, :]) + loaded = np.array( + root[SOURCE_CACHE_COMPONENT][ + 0, position_index, channel_index, :, :, : + ] + ) assert np.array_equal( loaded, expected_blocks[(position_index, channel_index)] ) +def test_load_navigate_experiment_source_image_info_summarizes_bdv_n5( + tmp_path: Path, +) -> None: + experiment_path = tmp_path / "experiment.yml" + _write_minimal_experiment( + experiment_path, + save_directory=tmp_path, + file_type="N5", + is_multiposition=True, + ) + _write_multipositions_sidecar(tmp_path / "multi_positions.yml", count=2) + experiment = load_navigate_experiment(experiment_path) + + source_path = tmp_path / "CH00_000000.n5" + for setup_index, fill_value in enumerate((11, 21, 31, 41)): + canonical_block = np.full((2, 3, 4), fill_value=fill_value, dtype=np.uint16) + _write_real_n5_dataset( + source_path, + component=f"setup{setup_index}/timepoint0/s0", + data_xyz=np.transpose(canonical_block, (2, 1, 0)), + block_size_xyz=(4, 3, 1), + ) + + _write_bdv_xml( + tmp_path / "CH00_000000.xml", + loader_format="bdv.n5", + data_file_name=source_path.name, + setup_channel_tile={ + 0: (0, 0), + 1: (0, 1), + 2: (1, 0), + 3: (1, 1), + }, + ) + + class _FailingOpener: + def open(self, *args, **kwargs): + raise AssertionError("ImageOpener.open should not be used for BDV N5.") + + info = load_navigate_experiment_source_image_info( + experiment=experiment, + source_path=source_path, + opener=_FailingOpener(), + ) + + assert info.path == source_path.resolve() + assert info.shape == (1, 2, 2, 2, 3, 4) + assert info.dtype == np.dtype(np.uint16) + assert info.axes == "TPCZYX" + assert info.metadata is not None + assert info.metadata["source_layout"] == "navigate_bdv_n5" + assert info.metadata["source_component"] == "setup0/timepoint0/s0" + assert info.metadata["positions"] == 2 + assert info.metadata["channels"] == 2 + + +def test_open_source_as_dask_rejects_standalone_n5_source(tmp_path: Path) -> None: + source_path = tmp_path / "standalone.n5" + _write_real_n5_dataset( + source_path, + component="setup0/timepoint0/s0", + data_xyz=np.zeros((4, 3, 2), dtype=np.uint16), + block_size_xyz=(4, 3, 1), + ) + + with ExitStack() as exit_stack, pytest.raises(ValueError, match="Standalone N5"): + experiment_module._open_source_as_dask( + source_path, + exit_stack=exit_stack, + ) + + def test_materialize_experiment_data_store_stacks_bdv_ome_zarr_setups( tmp_path: Path, ): diff --git a/uv.lock b/uv.lock index 088aa38..c74d610 100644 --- a/uv.lock +++ b/uv.lock @@ -616,6 +616,7 @@ dependencies = [ { name = "scikit-image" }, { name = "scipy" }, { name = "seaborn" }, + { name = "tensorstore" }, { name = "tifffile" }, { name = "zarr" }, ] @@ -703,6 +704,7 @@ requires-dist = [ { name = "sphinx-issues", marker = "extra == 'docs'" }, { name = "sphinx-rtd-theme", marker = "extra == 'docs'" }, { name = "sphinx-toolbox", marker = "extra == 'docs'" }, + { name = "tensorstore" }, { name = "tifffile", specifier = "==2025.1.10" }, { name = "ty", marker = "extra == 'dev'", specifier = ">=0.0.1a32" }, { name = "u-segment3d", marker = "extra == 'usegment3d'", specifier = ">=0.1.4,<0.2" }, @@ -2099,6 +2101,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9b/f7/4a5e785ec9fbd65146a27b6b70b6cdc161a66f2024e4b04ac06a67f5578b/mistune-3.2.0-py3-none-any.whl", hash = "sha256:febdc629a3c78616b94393c6580551e0e34cc289987ec6c35ed3f4be42d0eee1", size = 53598, upload-time = "2025-12-23T11:36:33.211Z" }, ] +[[package]] +name = "ml-dtypes" +version = "0.5.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0e/4a/c27b42ed9b1c7d13d9ba8b6905dece787d6259152f2309338aed29b2447b/ml_dtypes-0.5.4.tar.gz", hash = "sha256:8ab06a50fb9bf9666dd0fe5dfb4676fa2b0ac0f31ecff72a6c3af8e22c063453", size = 692314, upload-time = "2025-11-17T22:32:31.031Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/b8/3c70881695e056f8a32f8b941126cf78775d9a4d7feba8abcb52cb7b04f2/ml_dtypes-0.5.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a174837a64f5b16cab6f368171a1a03a27936b31699d167684073ff1c4237dac", size = 676927, upload-time = "2025-11-17T22:31:48.182Z" }, + { url = "https://files.pythonhosted.org/packages/54/0f/428ef6881782e5ebb7eca459689448c0394fa0a80bea3aa9262cba5445ea/ml_dtypes-0.5.4-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a7f7c643e8b1320fd958bf098aa7ecf70623a42ec5154e3be3be673f4c34d900", size = 5028464, upload-time = "2025-11-17T22:31:50.135Z" }, + { url = "https://files.pythonhosted.org/packages/3a/cb/28ce52eb94390dda42599c98ea0204d74799e4d8047a0eb559b6fd648056/ml_dtypes-0.5.4-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9ad459e99793fa6e13bd5b7e6792c8f9190b4e5a1b45c63aba14a4d0a7f1d5ff", size = 5009002, upload-time = "2025-11-17T22:31:52.001Z" }, + { url = "https://files.pythonhosted.org/packages/f5/f0/0cfadd537c5470378b1b32bd859cf2824972174b51b873c9d95cfd7475a5/ml_dtypes-0.5.4-cp312-cp312-win_amd64.whl", hash = "sha256:c1a953995cccb9e25a4ae19e34316671e4e2edaebe4cf538229b1fc7109087b7", size = 212222, upload-time = "2025-11-17T22:31:53.742Z" }, + { url = "https://files.pythonhosted.org/packages/16/2e/9acc86985bfad8f2c2d30291b27cd2bb4c74cea08695bd540906ed744249/ml_dtypes-0.5.4-cp312-cp312-win_arm64.whl", hash = "sha256:9bad06436568442575beb2d03389aa7456c690a5b05892c471215bfd8cf39460", size = 160793, upload-time = "2025-11-17T22:31:55.358Z" }, +] + [[package]] name = "more-itertools" version = "10.8.0" @@ -4201,6 +4219,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/be/5d2d47b1fb58943194fb59dcf222f7c4e35122ec0ffe8c36e18b5d728f0b/tblib-3.2.2-py3-none-any.whl", hash = "sha256:26bdccf339bcce6a88b2b5432c988b266ebbe63a4e593f6b578b1d2e723d2b76", size = 12893, upload-time = "2025-11-12T12:21:14.407Z" }, ] +[[package]] +name = "tensorstore" +version = "0.1.82" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ml-dtypes" }, + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/cd/9b/43aedb544937f214dd7c665a7edf1b8b74f2f55d53ebd351c0ce69acf81a/tensorstore-0.1.82.tar.gz", hash = "sha256:ccfceffb7611fc61330f6da24b8b0abd9251d480ac8a5bac5a1729f9ed0c3a9f", size = 7160364, upload-time = "2026-03-13T00:22:16.888Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0d/c3/5ab0b99487b2596bdc0ebd3a569e50415949a63bad90b18e6476de91a7bb/tensorstore-0.1.82-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:f0ac091bd47ea6f051fe11230ad2642c254b46a8fabdd5184b0600556b5529ed", size = 16570668, upload-time = "2026-03-13T00:21:36.386Z" }, + { url = "https://files.pythonhosted.org/packages/aa/95/92b00a4b2e6192528a9c5bac9f53007acf4aa5d54943b9e114bedb72b2da/tensorstore-0.1.82-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8cae7d0c9b2fa0653f90b147daaf9ed04664cab7d297b9772efcfa088da26cab", size = 14904517, upload-time = "2026-03-13T00:21:38.464Z" }, + { url = "https://files.pythonhosted.org/packages/46/7e/c9c8ad65ee4015787e32d31bcf8278fcb27109e809f8334a64285bd73028/tensorstore-0.1.82-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:34c491ea3c6c1904d4618bfe40020bd83aaeb19d52a266ea0f6919eb3fdc64c4", size = 19344428, upload-time = "2026-03-13T00:21:40.575Z" }, + { url = "https://files.pythonhosted.org/packages/f9/8a/590bb60a190d414abd2f83dd5b5148722d0c5d310a73e21b7a60ab98cf00/tensorstore-0.1.82-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d4182300d8ffa172e961e79c6bd89e38ce6bc5cd3abf1a7dacb22c2396ce40b7", size = 20964954, upload-time = "2026-03-13T00:21:42.515Z" }, + { url = "https://files.pythonhosted.org/packages/43/1c/34e6e97426e1718106e9cb74d3045992bdea3ee368f9ea4ea25b809bdba8/tensorstore-0.1.82-cp312-cp312-win_amd64.whl", hash = "sha256:6369809d01edf66cd487cde5c94f57138167c09561f3d906020fd53c72687f92", size = 13393361, upload-time = "2026-03-13T00:21:44.443Z" }, +] + [[package]] name = "terminado" version = "0.18.1" From 3502034668f390e0f95b428b75f5b5b4274a0703 Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Mon, 23 Mar 2026 08:21:38 -0500 Subject: [PATCH 04/10] io: harden canonical reuse and Navigate N5 ingestion guards - add OME-model based validation helper for public source collections and use it when ingestion_progress metadata is stale/incomplete so valid canonical stores are reused instead of re-ingested - add Navigate multiposition sidecar fallback: when is_multiposition=true, resolve multi_positions.yml from both Saving.save_directory and the local experiment.yml directory to handle remote Windows save paths on Linux - filter BDV N5 setup discovery to include only setup*/timepoint*/s0 components with persisted chunk payload files (exclude attributes-only placeholder setups that read as implicit zeros) - extend io tests for stale progress acceptance/rejection, multiposition sidecar fallback, and placeholder setup exclusion; keep existing BDV N5 coverage - update io README materialization rules to document canonical reuse fallback, multiposition sidecar precedence, and placeholder setup filtering --- src/clearex/io/README.md | 12 +++ src/clearex/io/experiment.py | 82 +++++++++++++++-- src/clearex/io/ome_store.py | 143 +++++++++++++++++++++++++---- tests/io/test_experiment.py | 172 ++++++++++++++++++++++++++++++++--- 4 files changed, 370 insertions(+), 39 deletions(-) diff --git a/src/clearex/io/README.md b/src/clearex/io/README.md index 8b42532..84c4f03 100644 --- a/src/clearex/io/README.md +++ b/src/clearex/io/README.md @@ -30,6 +30,9 @@ This folder contains ingestion, CLI, logging, and provenance logic. - `clearex/provenance` - `clearex/gui_state` - `clearex/results` +- Canonical-store reuse checks validate the public OME source collection with + `ome-zarr-models` before deciding whether an existing `data_store.ome.zarr` + can be reused. - Store-level stage-to-world mapping is persisted in `clearex/metadata["spatial_calibration"]`. - Missing `spatial_calibration` metadata resolves to identity @@ -57,6 +60,8 @@ This folder contains ingestion, CLI, logging, and provenance logic. - Navigate BDV storage layout uses setup-major indexing `setup = channel * positions + position`; loader fallback should respect this rule when XML metadata is unavailable. +- Navigate BDV `.n5` setup discovery must ignore placeholder setup datasets + that only contain `attributes.json` and no persisted chunk payload files. - Legacy ClearEx groups such as `data`, `data_pyramid`, `results`, and `provenance` inside source `.n5` trees are stale source-side artifacts and must be ignored for source discovery/materialization. @@ -67,6 +72,10 @@ This folder contains ingestion, CLI, logging, and provenance logic. 3. raw `pixel_size` fallback. - In collection mode, store-level `source_data_path` should point at the acquisition directory (not only the first TIFF file) for clearer provenance. +- When `MicroscopeState.is_multiposition` is true, infer position count from + `multi_positions.yml` before trusting `experiment.yml` counters; check both + `Saving.save_directory` and the local `experiment.yml` directory so remote + Windows save paths still resolve local sidecars. - If a legacy `data_store.zarr` was generated by older single-file TIFF logic, migrate or rebuild it as `data_store.ome.zarr` before relying on it as a canonical store. @@ -75,6 +84,9 @@ This folder contains ingestion, CLI, logging, and provenance logic. stores without rewriting canonical source data. - Updating spatial calibration is metadata-only; never rewrite the canonical source array to express placement changes. +- If ingestion-progress metadata is stale (for example `status=in_progress` + after an interrupted run) but runtime-cache arrays and public OME metadata + validate, treat the store as reusable instead of rematerializing source data. ## Headless Spatial Calibration Override diff --git a/src/clearex/io/experiment.py b/src/clearex/io/experiment.py index 3ce22dc..f49ead0 100644 --- a/src/clearex/io/experiment.py +++ b/src/clearex/io/experiment.py @@ -86,6 +86,7 @@ load_store_spatial_calibration as load_namespaced_store_spatial_calibration, publish_source_collection_from_cache, save_store_spatial_calibration as save_namespaced_store_spatial_calibration, + store_has_valid_public_source_collection, source_cache_component, update_store_metadata, ) @@ -192,7 +193,9 @@ def _tensorstore_n5_spec(*, source_path: Path, component: str) -> dict[str, Any] def _open_tensorstore_n5_dataset(source_path: Path, *, component: str) -> Any: """Open one N5 dataset with TensorStore.""" _require_tensorstore_for_n5() - return ts.open(_tensorstore_n5_spec(source_path=source_path, component=component)).result() + return ts.open( + _tensorstore_n5_spec(source_path=source_path, component=component) + ).result() def _load_n5_attributes(source_path: Path, *, component: str) -> dict[str, Any]: @@ -205,6 +208,39 @@ def _load_n5_attributes(source_path: Path, *, component: str) -> dict[str, Any]: return payload if isinstance(payload, dict) else {} +def _n5_component_has_persisted_chunks(source_path: Path, *, component: str) -> bool: + """Return whether an N5 component has at least one persisted chunk file. + + Parameters + ---------- + source_path : pathlib.Path + Root N5 directory path. + component : str + Dataset component path inside ``source_path``. + + Returns + ------- + bool + ``True`` when the component contains at least one file other than + ``attributes.json``. + + Notes + ----- + Some legacy acquisition runs emit setup scaffolding with only + ``attributes.json`` and no chunk payload files. These placeholders read back + as implicit zeros and should not be treated as acquired data. + """ + component_path = source_path / component + if not component_path.is_dir(): + return False + + for _, _, filenames in os.walk(component_path): + for filename in filenames: + if filename != "attributes.json": + return True + return False + + def _normalize_n5_chunks( *, shape: tuple[int, ...], @@ -499,7 +535,10 @@ def _expected_pyramid_components( """ return [ SOURCE_CACHE_COMPONENT, - *[source_cache_component(level_index=idx) for idx in range(1, len(level_factors))], + *[ + source_cache_component(level_index=idx) + for idx in range(1, len(level_factors)) + ], ] @@ -747,10 +786,16 @@ def has_complete_canonical_data_store( record = _read_ingestion_progress_record(root) if record is None: return True - return _ingestion_record_is_complete( + if _ingestion_record_is_complete( record=record, required_components=required_components, - ) + ): + return True + + # Recovery path: if ingestion-progress metadata is stale/incomplete but the + # canonical runtime-cache structure and public OME source collection are + # already valid, prefer reusing the existing canonical store. + return store_has_valid_public_source_collection(root) def _normalize_axis_token(token: Any) -> Optional[str]: @@ -1287,6 +1332,8 @@ def _iter_navigate_bdv_n5_entries(source_path: Path) -> list[_NavigateBdvN5Entry dimensions = attributes.get("dimensions") if not isinstance(dimensions, (list, tuple)) or not dimensions: continue + if not _n5_component_has_persisted_chunks(source_path, component=component): + continue shape = tuple(int(size) for size in dimensions) chunks = _normalize_n5_chunks(shape=shape, attributes=attributes) entries.append( @@ -1341,7 +1388,12 @@ def summarize_navigate_bdv_n5_image_info( f"timepoint={entry.time_index}, setup={entry.setup_index}." ) - if not time_indices or not position_indices or not channel_indices or base_shape is None: + if ( + not time_indices + or not position_indices + or not channel_indices + or base_shape is None + ): return None sample_entry = min( @@ -4733,6 +4785,7 @@ def _infer_multiposition_count( raw: Dict[str, Any], state: Dict[str, Any], save_directory: Path, + experiment_directory: Path, ) -> int: """Infer position count from sidecar metadata and experiment payload. @@ -4744,6 +4797,8 @@ def _infer_multiposition_count( ``MicroscopeState`` mapping. save_directory : pathlib.Path Acquisition save directory. + experiment_directory : pathlib.Path + Directory containing ``experiment.yml`` and sidecar metadata. Returns ------- @@ -4754,9 +4809,15 @@ def _infer_multiposition_count( # Navigate records detailed position lists in the sidecar file. if is_multiposition: - rows = _load_multiposition_rows(save_directory=save_directory) - if rows is not None: - return max(1, len(rows)) + seen: set[str] = set() + for directory in (save_directory, experiment_directory): + identity = _search_directory_identity(directory) + if identity in seen: + continue + seen.add(identity) + rows = _load_multiposition_rows(save_directory=directory) + if rows is not None: + return max(1, len(rows)) fallback = raw.get("MultiPositions", []) if isinstance(fallback, list) and fallback: @@ -5037,6 +5098,7 @@ def load_navigate_experiment(path: Union[str, Path]) -> NavigateExperiment: raw=raw, state=state, save_directory=save_directory, + experiment_directory=experiment_path.parent.resolve(), ) microscope_name = ( str(state.get("microscope_name")) @@ -5487,7 +5549,9 @@ def initialize_analysis_store( "axes": ["t", "p", "c", "z", "y", "x"], "storage_policy": "latest_only", "chunk_shape_tpczyx": existing_chunks, - "configured_chunks_tpczyx": [int(chunk) for chunk in requested_chunks], + "configured_chunks_tpczyx": [ + int(chunk) for chunk in requested_chunks + ], "resolution_pyramid_factors_tpczyx": pyramid_payload, "voxel_size_um_zyx": voxel_size_um_zyx, } diff --git a/src/clearex/io/ome_store.py b/src/clearex/io/ome_store.py index a315ed0..66b8c37 100644 --- a/src/clearex/io/ome_store.py +++ b/src/clearex/io/ome_store.py @@ -183,7 +183,9 @@ def load_store_metadata(path_or_root: str | Path | zarr.Group) -> dict[str, Any] return {"schema": STORE_METADATA_SCHEMA} -def update_store_metadata(path_or_root: str | Path | zarr.Group, **payload: Any) -> dict[str, Any]: +def update_store_metadata( + path_or_root: str | Path | zarr.Group, **payload: Any +) -> dict[str, Any]: """Merge and persist ClearEx namespaced store metadata.""" root = ( path_or_root @@ -209,6 +211,66 @@ def store_has_public_ome_metadata(path_or_root: str | Path | zarr.Group) -> bool return isinstance(payload, Mapping) +def store_has_valid_public_source_collection( + path_or_root: str | Path | zarr.Group, +) -> bool: + """Return whether the root source HCS collection validates via OME models. + + Parameters + ---------- + path_or_root : str or pathlib.Path or zarr.Group + Store path or opened root group. + + Returns + ------- + bool + ``True`` when root/well/image OME metadata validates and every + declared multiscale dataset path exists. + """ + root = ( + path_or_root + if isinstance(path_or_root, zarr.Group) + else zarr.open_group(str(Path(path_or_root).expanduser().resolve()), mode="r") + ) + well_path = f"{PUBLIC_WELL_ROW}/{PUBLIC_WELL_COLUMN}" + try: + root_ome = HCSAttrs.model_validate(getattr(root, "attrs", {}).get("ome")) + well_group = get_node(root, well_path) + well_ome = WellAttrs.model_validate(getattr(well_group, "attrs", {}).get("ome")) + except Exception: + return False + + declared_wells = tuple( + str(entry.path).strip() for entry in root_ome.plate.wells if entry is not None + ) + if well_path not in declared_wells: + return False + + for image in well_ome.well.images: + image_token = str(image.path).strip() + if not image_token: + return False + image_component = f"{well_path}/{image_token}" + try: + image_group = get_node(root, image_component) + image_ome = ImageAttrs.model_validate( + getattr(image_group, "attrs", {}).get("ome") + ) + except Exception: + return False + if not image_ome.multiscales: + return False + for multiscale in image_ome.multiscales: + for dataset in multiscale.datasets: + dataset_path = str(dataset.path).strip() + if not dataset_path: + return False + if dataset_path not in image_group: + return False + + return True + + def is_legacy_clearex_store(path_or_root: str | Path | zarr.Group) -> bool: """Return whether a store still follows the legacy pre-OME ClearEx layout.""" root = ( @@ -229,7 +291,9 @@ def is_legacy_clearex_store(path_or_root: str | Path | zarr.Group) -> bool: return any(bool(marker) for marker in legacy_markers) -def load_store_spatial_calibration(path_or_root: str | Path | zarr.Group) -> SpatialCalibrationConfig: +def load_store_spatial_calibration( + path_or_root: str | Path | zarr.Group, +) -> SpatialCalibrationConfig: """Load store-level spatial calibration from the namespaced metadata group.""" metadata = load_store_metadata(path_or_root) return spatial_calibration_from_dict(metadata.get("spatial_calibration")) @@ -348,9 +412,18 @@ def _binding_value(row: Mapping[str, Any], binding: str) -> float: row = rows[position_index] if position_index < len(rows) else reference translations.append( [ - float(_binding_value(row, stage_axis_map["z"]) - _binding_value(reference, stage_axis_map["z"])), - float(_binding_value(row, stage_axis_map["y"]) - _binding_value(reference, stage_axis_map["y"])), - float(_binding_value(row, stage_axis_map["x"]) - _binding_value(reference, stage_axis_map["x"])), + float( + _binding_value(row, stage_axis_map["z"]) + - _binding_value(reference, stage_axis_map["z"]) + ), + float( + _binding_value(row, stage_axis_map["y"]) + - _binding_value(reference, stage_axis_map["y"]) + ), + float( + _binding_value(row, stage_axis_map["x"]) + - _binding_value(reference, stage_axis_map["x"]) + ), ] ) return translations @@ -380,18 +453,22 @@ def _level_component_paths(cache_root: str, root: zarr.Group) -> list[str]: return components -def _level_downsample_factors(level_array: Any, *, level_index: int) -> tuple[int, int, int, int, int, int]: +def _level_downsample_factors( + level_array: Any, *, level_index: int +) -> tuple[int, int, int, int, int, int]: payload = getattr(level_array, "attrs", {}).get("downsample_factors_tpczyx") if isinstance(payload, (tuple, list)) and len(payload) == 6: try: return tuple(int(value) for value in payload) # type: ignore[return-value] except Exception: pass - fallback = 2**max(0, int(level_index)) + fallback = 2 ** max(0, int(level_index)) return (1, 1, 1, fallback, fallback, fallback) -def _set_hcs_group_attrs(collection_group: zarr.Group, *, name: str, field_count: int) -> None: +def _set_hcs_group_attrs( + collection_group: zarr.Group, *, name: str, field_count: int +) -> None: payload = HCSAttrs( version="0.5", plate=Plate( @@ -412,7 +489,9 @@ def _set_hcs_group_attrs(collection_group: zarr.Group, *, name: str, field_count collection_group.attrs["ome"] = _model_payload(payload) -def _set_well_group_attrs(well_group: zarr.Group, *, field_paths: Sequence[str]) -> None: +def _set_well_group_attrs( + well_group: zarr.Group, *, field_paths: Sequence[str] +) -> None: payload = WellAttrs( version="0.5", well=WellMeta(images=[WellImage(path=str(path)) for path in field_paths]), @@ -456,7 +535,13 @@ def _set_image_group_attrs( {"type": "scale", "scale": scale}, { "type": "translation", - "translation": [0.0, 0.0, translation[0], translation[1], translation[2]], + "translation": [ + 0.0, + 0.0, + translation[0], + translation[1], + translation[2], + ], }, ], ) @@ -544,9 +629,11 @@ def publish_image_collection_from_cache( image_group, level_count=len(level_arrays), voxel_size_um_zyx=voxel_size_um_zyx, - position_translation_zyx_um=translations[position_index] - if position_index < len(translations) - else (0.0, 0.0, 0.0), + position_translation_zyx_um=( + translations[position_index] + if position_index < len(translations) + else (0.0, 0.0, 0.0) + ), level_factors_tpczyx=level_factors, ) for level_index, level_array in enumerate(level_arrays): @@ -558,11 +645,31 @@ def publish_image_collection_from_cache( int(level_array.shape[5]), ) level_chunks_tczyx = ( - int(level_array.chunks[0]) if level_array.chunks is not None else level_shape_tczyx[0], - int(level_array.chunks[2]) if level_array.chunks is not None else level_shape_tczyx[1], - int(level_array.chunks[3]) if level_array.chunks is not None else level_shape_tczyx[2], - int(level_array.chunks[4]) if level_array.chunks is not None else level_shape_tczyx[3], - int(level_array.chunks[5]) if level_array.chunks is not None else level_shape_tczyx[4], + ( + int(level_array.chunks[0]) + if level_array.chunks is not None + else level_shape_tczyx[0] + ), + ( + int(level_array.chunks[2]) + if level_array.chunks is not None + else level_shape_tczyx[1] + ), + ( + int(level_array.chunks[3]) + if level_array.chunks is not None + else level_shape_tczyx[2] + ), + ( + int(level_array.chunks[4]) + if level_array.chunks is not None + else level_shape_tczyx[3] + ), + ( + int(level_array.chunks[5]) + if level_array.chunks is not None + else level_shape_tczyx[4] + ), ) if str(level_index) in image_group: del image_group[str(level_index)] diff --git a/tests/io/test_experiment.py b/tests/io/test_experiment.py index a6e6249..da63fa5 100644 --- a/tests/io/test_experiment.py +++ b/tests/io/test_experiment.py @@ -55,7 +55,11 @@ save_store_spatial_calibration, write_zyx_block, ) -from clearex.io.ome_store import SOURCE_CACHE_COMPONENT +from clearex.io.ome_store import ( + SOURCE_CACHE_COMPONENT, + SOURCE_CACHE_PYRAMID_ROOT, + source_cache_component, +) from clearex.io.read import ImageInfo from clearex.workflow import SpatialCalibrationConfig @@ -190,6 +194,25 @@ def _write_real_n5_dataset( dataset[...] = data_xyz +def _strip_n5_component_payload_files(root_path: Path, *, component: str) -> None: + """Remove chunk payload files while preserving ``attributes.json``.""" + component_path = root_path / component + if not component_path.exists(): + return + for path in component_path.rglob("*"): + if path.is_file() and path.name != "attributes.json": + path.unlink() + for path in sorted( + (entry for entry in component_path.rglob("*") if entry.is_dir()), + key=lambda entry: len(entry.parts), + reverse=True, + ): + try: + path.rmdir() + except OSError: + continue + + def _write_legacy_n5_group( root_path: Path, *, @@ -610,6 +633,33 @@ def test_load_uses_multi_positions_sidecar_when_multiposition_enabled(tmp_path: assert experiment.multiposition_count == 24 +def test_load_uses_experiment_dir_multi_positions_sidecar_when_save_directory_is_windows_path( + tmp_path: Path, +) -> None: + experiment_path = tmp_path / "experiment.yml" + payload = { + "Saving": { + "save_directory": r"E:\acquisition\remote_only", + "file_type": "N5", + }, + "MicroscopeState": { + "timepoints": 1, + "number_z_steps": 2, + "is_multiposition": True, + "multiposition_count": 1, + "channels": {"channel_1": {"is_selected": True, "laser": "488nm"}}, + }, + "CameraParameters": {"img_x_pixels": 8, "img_y_pixels": 8}, + "MultiPositions": [[0.0, 0.0, 0.0]], + } + experiment_path.write_text(json.dumps(payload, indent=2)) + _write_multipositions_sidecar(tmp_path / "multi_positions.yml", count=2) + + experiment = load_navigate_experiment(experiment_path) + + assert experiment.multiposition_count == 2 + + def test_load_infers_xy_pixel_size_from_zoom_and_binning(tmp_path: Path): experiment_path = tmp_path / "experiment.yml" payload = { @@ -926,7 +976,7 @@ def test_has_complete_canonical_data_store_rejects_missing_expected_pyramid( ) -def test_has_complete_canonical_data_store_requires_completed_progress_record( +def test_has_complete_canonical_data_store_accepts_stale_progress_when_public_ome_is_valid( tmp_path: Path, ): experiment_path = tmp_path / "experiment.yml" @@ -958,8 +1008,53 @@ def test_has_complete_canonical_data_store_requires_completed_progress_record( root = zarr.open_group(str(materialized.store_path), mode="a") progress = dict(root.attrs["ingestion_progress"]) progress["status"] = "in_progress" + progress["base_progress"] = { + "total_regions": int(progress.get("base_progress", {}).get("total_regions", 1)), + "completed_regions": 0, + } root.attrs["ingestion_progress"] = progress + assert ( + has_complete_canonical_data_store( + materialized.store_path, + expected_chunks_tpczyx=(1, 1, 1, 1, 2, 2), + expected_pyramid_factors=((1,), (1,), (1,), (1,), (1,), (1,)), + ) + is True + ) + + +def test_has_complete_canonical_data_store_rejects_stale_progress_when_public_ome_is_invalid( + tmp_path: Path, +): + experiment_path = tmp_path / "experiment.yml" + _write_minimal_experiment( + experiment_path, save_directory=tmp_path, file_type="TIFF" + ) + experiment = load_navigate_experiment(experiment_path) + + source_data = np.arange(24, dtype=np.uint16).reshape(2, 3, 4) + source_path = tmp_path / "source.npy" + np.save(source_path, source_data) + + materialized = materialize_experiment_data_store( + experiment=experiment, + source_path=source_path, + chunks=(1, 1, 1, 1, 2, 2), + pyramid_factors=((1,), (1,), (1,), (1,), (1,), (1,)), + ) + + root = zarr.open_group(str(materialized.store_path), mode="a") + progress = dict(root.attrs["ingestion_progress"]) + progress["status"] = "in_progress" + progress["base_progress"] = { + "total_regions": int(progress.get("base_progress", {}).get("total_regions", 1)), + "completed_regions": 0, + } + root.attrs["ingestion_progress"] = progress + # Break OME-root validation to ensure stale/incomplete progress is rejected. + root.attrs["ome"] = {"version": "0.5"} + assert ( has_complete_canonical_data_store( materialized.store_path, @@ -999,10 +1094,12 @@ def test_materialize_experiment_data_store_reuses_complete_store_by_default_and_ root = zarr.open_group(str(initial.store_path), mode="r") assert reused.store_path == initial.store_path - assert tuple(root["data"].chunks) == (1, 1, 1, 1, 2, 2) - assert root.attrs["data_pyramid_levels"] == ["data"] - if "data_pyramid" in root: - assert list(root["data_pyramid"].array_keys()) == [] + assert tuple(root[SOURCE_CACHE_COMPONENT].chunks) == (1, 1, 1, 1, 2, 2) + assert root[SOURCE_CACHE_COMPONENT].attrs["pyramid_levels"] == [ + SOURCE_CACHE_COMPONENT + ] + if SOURCE_CACHE_PYRAMID_ROOT in root: + assert list(root[SOURCE_CACHE_PYRAMID_ROOT].array_keys()) == [] rebuilt = materialize_experiment_data_store( experiment=experiment, @@ -1013,8 +1110,11 @@ def test_materialize_experiment_data_store_reuses_complete_store_by_default_and_ ) rebuilt_root = zarr.open_group(str(rebuilt.store_path), mode="r") - assert tuple(rebuilt_root["data"].chunks) == (1, 1, 1, 2, 3, 4) - assert rebuilt_root.attrs["data_pyramid_levels"] == ["data", "data_pyramid/level_1"] + assert tuple(rebuilt_root[SOURCE_CACHE_COMPONENT].chunks) == (1, 1, 1, 2, 3, 4) + assert rebuilt_root[SOURCE_CACHE_COMPONENT].attrs["pyramid_levels"] == [ + SOURCE_CACHE_COMPONENT, + source_cache_component(level_index=1), + ] def test_materialize_experiment_data_store_handles_same_component_rewrite( @@ -1234,7 +1334,9 @@ def test_materialize_experiment_data_store_stacks_bdv_n5_setups( root = zarr.open_group(str(materialized.store_path), mode="r") assert tuple(root[SOURCE_CACHE_COMPONENT].shape) == (1, 2, 2, 2, 3, 4) - assert root["clearex/metadata"].attrs["source_data_path"] == str(source_path.resolve()) + assert root["clearex/metadata"].attrs["source_data_path"] == str( + source_path.resolve() + ) assert materialized.source_image_info.shape == (1, 2, 2, 4, 3, 2) assert materialized.source_image_info.axes == "TPCXYZ" assert root["A/1/0/0"].shape == (1, 2, 2, 3, 4) @@ -1242,15 +1344,61 @@ def test_materialize_experiment_data_store_stacks_bdv_n5_setups( for position_index in range(2): for channel_index in range(2): loaded = np.array( - root[SOURCE_CACHE_COMPONENT][ - 0, position_index, channel_index, :, :, : - ] + root[SOURCE_CACHE_COMPONENT][0, position_index, channel_index, :, :, :] ) assert np.array_equal( loaded, expected_blocks[(position_index, channel_index)] ) +def test_materialize_experiment_data_store_skips_n5_setups_without_persisted_chunks( + tmp_path: Path, +) -> None: + experiment_path = tmp_path / "experiment.yml" + _write_minimal_experiment( + experiment_path, + save_directory=tmp_path, + file_type="N5", + is_multiposition=True, + ) + _write_multipositions_sidecar(tmp_path / "multi_positions.yml", count=2) + experiment = load_navigate_experiment(experiment_path) + + source_path = tmp_path / "CH00_000000.n5" + expected_blocks = { + (0, 0): np.full((2, 3, 4), fill_value=13, dtype=np.uint16), + (1, 0): np.full((2, 3, 4), fill_value=23, dtype=np.uint16), + } + for setup_index, block in { + 0: expected_blocks[(0, 0)], + 1: expected_blocks[(1, 0)], + 2: np.zeros((2, 3, 4), dtype=np.uint16), + }.items(): + _write_real_n5_dataset( + source_path, + component=f"setup{setup_index}/timepoint0/s0", + data_xyz=np.transpose(block, (2, 1, 0)), + block_size_xyz=(4, 3, 1), + ) + _strip_n5_component_payload_files(source_path, component="setup2/timepoint0/s0") + + # Do not provide XML here: fallback setup indexing should still ignore + # placeholder setups without persisted chunks. + materialized = materialize_experiment_data_store( + experiment=experiment, + source_path=source_path, + chunks=(1, 1, 1, 2, 2, 2), + pyramid_factors=((1,), (1,), (1,), (1,), (1,), (1,)), + ) + + root = zarr.open_group(str(materialized.store_path), mode="r") + assert tuple(root[SOURCE_CACHE_COMPONENT].shape) == (1, 2, 1, 2, 3, 4) + + for position_index in range(2): + loaded = np.array(root[SOURCE_CACHE_COMPONENT][0, position_index, 0, :, :, :]) + assert np.array_equal(loaded, expected_blocks[(position_index, 0)]) + + def test_load_navigate_experiment_source_image_info_summarizes_bdv_n5( tmp_path: Path, ) -> None: From e9c6c679298e27da95f2b6e23a54e79ebee0b5c8 Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Mon, 23 Mar 2026 10:43:58 -0500 Subject: [PATCH 05/10] runtime: always rerun mip_export and serialize unsafe source-aligned writes Stop provenance dedup from skipping mip_export runs by removing mip_export from the dedup operation set in _run_workflow. This ensures MIP export executes even when a matching provenance run exists, which avoids stale latest-output reuse during validation/export iterations. Fix a correctness bug in source-aligned canonical ingestion writes: when z_batch_depth does not align to the target z chunk size, concurrent region submissions can race on read-modify-write chunk updates and zero out sub-chunk ranges. Add _source_aligned_writes_require_serial_submission(...) and force regions_per_submission=1 for this misaligned case while preserving existing concurrency when writes are chunk-aligned. Plumb target chunk metadata into _write_dask_array_source_aligned_plane_batches and cover behavior with regression tests for serial-submission detection and serialized batch execution under misalignment. Add a workflow regression test that asserts mip_export is executed (not skipped) even with a matching completed provenance run. --- src/clearex/io/experiment.py | 42 ++++++++++++++++++++ src/clearex/main.py | 1 - tests/io/test_experiment.py | 69 ++++++++++++++++++++++++++++++++ tests/test_main.py | 77 ++++++++++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+), 1 deletion(-) diff --git a/src/clearex/io/experiment.py b/src/clearex/io/experiment.py index f49ead0..92098eb 100644 --- a/src/clearex/io/experiment.py +++ b/src/clearex/io/experiment.py @@ -2774,6 +2774,31 @@ def _estimate_source_aligned_submission_batch_count( ) +def _source_aligned_writes_require_serial_submission( + *, + z_batch_depth: int, + target_chunks_tpczyx: CanonicalShapeTpczyx, +) -> bool: + """Return whether source-aligned writes must serialize region submissions. + + Parameters + ---------- + z_batch_depth : int + Source-aligned z-planes per write region. + target_chunks_tpczyx : tuple[int, int, int, int, int, int] + Target chunk shape in canonical ``(t, p, c, z, y, x)`` order. + + Returns + ------- + bool + ``True`` when source-aligned z regions do not land on target z-chunk + boundaries and concurrent submissions could clobber partially-written + chunks. + """ + target_chunk_z = max(1, int(target_chunks_tpczyx[3])) + return int(max(1, int(z_batch_depth))) % int(target_chunk_z) != 0 + + def _write_numpy_region( block: np.ndarray, *, @@ -2931,6 +2956,7 @@ def _write_dask_array_source_aligned_plane_batches( shape_tpczyx: CanonicalShapeTpczyx, z_batch_depth: int, dtype_itemsize: int, + target_chunks_tpczyx: Optional[CanonicalShapeTpczyx] = None, client: Optional["Client"] = None, progress_callback: Optional[ProgressCallback] = None, progress_start: int = 0, @@ -2955,6 +2981,9 @@ def _write_dask_array_source_aligned_plane_batches( Full output shape. z_batch_depth : int Number of z-planes per source-aligned write region. + target_chunks_tpczyx : tuple[int, int, int, int, int, int], optional + Target canonical chunk shape. When provided, submission concurrency is + constrained to avoid concurrent partial writes into the same z chunk. dtype_itemsize : int Bytes per array element. client : dask.distributed.Client, optional @@ -3022,6 +3051,18 @@ def _write_dask_array_source_aligned_plane_batches( worker_count=detected_worker_count, worker_memory_limit_bytes=detected_worker_memory_limit_bytes, ) + if ( + target_chunks_tpczyx is not None + and _source_aligned_writes_require_serial_submission( + z_batch_depth=z_batch_depth, + target_chunks_tpczyx=target_chunks_tpczyx, + ) + ): + # Concurrent source-aligned writes can race when z-regions split target + # z chunks (read-modify-write overlap). Serialize submissions in this + # case to preserve correctness. + regions_per_submission = 1 + remaining_regions = int(total_regions - start_region) total_batches = max(1, math.ceil(remaining_regions / regions_per_submission)) completed_regions = int(start_region) @@ -3998,6 +4039,7 @@ def _write_canonical_component( component=component, shape_tpczyx=canonical_shape, z_batch_depth=source_aligned_z_batch_depth, + target_chunks_tpczyx=normalized_chunks, dtype_itemsize=int(source_dtype.itemsize), client=write_client, progress_callback=progress_callback, diff --git a/src/clearex/main.py b/src/clearex/main.py index 3281580..6ad6190 100644 --- a/src/clearex/main.py +++ b/src/clearex/main.py @@ -215,7 +215,6 @@ def run_usegment3d_analysis(*, zarr_path, parameters, client, progress_callback) "particle_detection", "usegment3d", "registration", - "mip_export", } ) _ANALYSIS_PROVENANCE_REQUIRED_COMPONENTS: Dict[str, tuple[str, ...]] = { diff --git a/tests/io/test_experiment.py b/tests/io/test_experiment.py index da63fa5..83464f8 100644 --- a/tests/io/test_experiment.py +++ b/tests/io/test_experiment.py @@ -1732,3 +1732,72 @@ def test_estimate_source_plane_batch_depth_respects_worker_memory_limit(): assert depth < 256 assert depth <= 64 + + +def test_source_aligned_writes_require_serial_submission_when_chunk_misaligned() -> ( + None +): + assert ( + experiment_module._source_aligned_writes_require_serial_submission( + z_batch_depth=128, + target_chunks_tpczyx=(1, 1, 1, 256, 256, 256), + ) + is True + ) + assert ( + experiment_module._source_aligned_writes_require_serial_submission( + z_batch_depth=256, + target_chunks_tpczyx=(1, 1, 1, 256, 256, 256), + ) + is False + ) + + +def test_write_source_aligned_batches_serializes_when_chunk_misaligned( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + store_path = tmp_path / "source_aligned_serialized.zarr" + root = zarr.open_group(str(store_path), mode="w") + root.create_dataset( + SOURCE_CACHE_COMPONENT, + shape=(1, 1, 1, 6, 4, 4), + chunks=(1, 1, 1, 4, 2, 2), + dtype="uint16", + overwrite=True, + ) + + source_data = np.arange(1 * 1 * 1 * 6 * 4 * 4, dtype=np.uint16).reshape( + (1, 1, 1, 6, 4, 4) + ) + source = da.from_array(source_data, chunks=(1, 1, 1, 1, 4, 4)) + compute_batch_sizes: list[int] = [] + original_compute = experiment_module._compute_dask_graph + + monkeypatch.setattr( + experiment_module, + "_estimate_source_aligned_submission_batch_count", + lambda **kwargs: 8, + ) + + def _capture_compute(graph, *, client=None): + compute_batch_sizes.append(len(list(graph))) + return original_compute(graph, client=client) + + monkeypatch.setattr(experiment_module, "_compute_dask_graph", _capture_compute) + + experiment_module._write_dask_array_source_aligned_plane_batches( + array=source, + store_path=store_path, + component=SOURCE_CACHE_COMPONENT, + shape_tpczyx=(1, 1, 1, 6, 4, 4), + z_batch_depth=2, + dtype_itemsize=int(np.dtype(source_data.dtype).itemsize), + target_chunks_tpczyx=(1, 1, 1, 4, 2, 2), + ) + + reloaded = np.asarray( + zarr.open_group(str(store_path), mode="r")[SOURCE_CACHE_COMPONENT] + ) + assert np.array_equal(reloaded, source_data) + assert compute_batch_sizes == [1, 1, 1] diff --git a/tests/test_main.py b/tests/test_main.py index ac0dd5e..d621507 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1483,6 +1483,83 @@ def _fake_deconvolution(*, zarr_path, parameters, client, progress_callback): assert called["value"] is True +def test_run_workflow_does_not_skip_mip_export_on_matching_provenance( + tmp_path: Path, monkeypatch +) -> None: + store_path = tmp_path / "analysis_store_mip_match.zarr" + root = main_module.zarr.open_group(str(store_path), mode="w") + root.create_dataset( + name=main_module.SOURCE_CACHE_COMPONENT, + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="uint16", + overwrite=True, + ) + root.require_group(main_module.analysis_auxiliary_root("mip_export")) + + workflow = WorkflowConfig( + file=str(store_path), + prefer_dask=True, + mip_export=True, + analysis_parameters={ + "mip_export": { + "input_source": "data", + "position_mode": "per_position", + "export_format": "ome-tiff", + "output_directory": "", + "force_rerun": False, + } + }, + ) + + persist_run_provenance( + zarr_path=store_path, + workflow=workflow, + image_info=ImageInfo( + path=store_path, + shape=(1, 1, 1, 2, 2, 2), + dtype=np.uint16, + axes=["t", "p", "c", "z", "y", "x"], + ), + steps=[{"name": "mip_export", "parameters": {}}], + repo_root=tmp_path, + ) + + called = {"value": False} + + def _fake_configure_dask_backend(*, workflow, logger, exit_stack, workload="io"): + del workflow, logger, exit_stack, workload + return None + + def _fake_mip_export(*, zarr_path, parameters, client, progress_callback): + del zarr_path, parameters, client, progress_callback + called["value"] = True + return SimpleNamespace( + component=main_module.analysis_auxiliary_root("mip_export"), + source_component="data", + output_directory=str(tmp_path / "mip_output"), + export_format="ome-tiff", + position_mode="per_position", + task_count=3, + exported_files=3, + projections=("xy", "xz", "yz"), + ) + + monkeypatch.setattr( + main_module, "_configure_dask_backend", _fake_configure_dask_backend + ) + monkeypatch.setattr(main_module, "run_mip_export_analysis", _fake_mip_export) + monkeypatch.setattr(main_module, "is_navigate_experiment_file", lambda path: False) + monkeypatch.setattr(main_module, "is_legacy_clearex_store", lambda path: False) + + main_module._run_workflow( + workflow=workflow, + logger=_test_logger("clearex.test.main.mip_force_execution"), + ) + + assert called["value"] is True + + def test_run_workflow_skips_matching_provenance_usegment3d( tmp_path: Path, monkeypatch ) -> None: From 8fe0ec762b36e280c8eb0c6e72c215569cd932ef Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Mon, 23 Mar 2026 11:29:06 -0500 Subject: [PATCH 06/10] mip_export: fix multi-position TIFF z-resample writeback corruption Fix a data-loss bug in _resample_axis_linear_to_uint16 that affected multi-position TIFF outputs when resampling the z axis. For non-leading resample axes (for example axes p,z,x where z is axis 1), reshaping the moved destination could produce a non-shared copy, so interpolation writes never reached the destination buffer and outputs remained zero-filled after resample rewrite. Detect when the reshaped destination does not share memory with the moved destination view and explicitly copy the interpolated payload back into dst_moved before returning. Preserve existing blockwise interpolation behavior and memory budgeting. Add regression coverage for this path by validating non-leading-axis writeback and multi-position TIFF xz/yz resampled outputs. Update touched mip_export tests to use zarr create_array for zarr v3 compatibility and refresh legacy component-path assertions to the current clearex namespaced contract. --- src/clearex/mip_export/pipeline.py | 6 ++ tests/mip_export/test_pipeline.py | 105 +++++++++++++++++++++++++---- 2 files changed, 97 insertions(+), 14 deletions(-) diff --git a/src/clearex/mip_export/pipeline.py b/src/clearex/mip_export/pipeline.py index 4bb932c..0553762 100644 --- a/src/clearex/mip_export/pipeline.py +++ b/src/clearex/mip_export/pipeline.py @@ -470,6 +470,7 @@ def _resample_axis_linear_to_uint16( dst_moved = np.moveaxis(dst, axis_index, 0) src_flat = src_moved.reshape(src_len, -1) dst_flat = dst_moved.reshape(dst_len, -1) + dst_requires_copy_back = not np.shares_memory(dst_flat, dst_moved) if dst_flat.size == 0: return @@ -477,6 +478,8 @@ def _resample_axis_linear_to_uint16( if src_len <= 1: repeated = np.repeat(src_flat[:1, :], repeats=dst_len, axis=0) dst_flat[:, :] = _to_uint16(repeated) + if dst_requires_copy_back: + dst_moved[...] = dst_flat.reshape(dst_moved.shape) return sample_positions = np.linspace( @@ -504,6 +507,9 @@ def _resample_axis_linear_to_uint16( ) dst_flat[:, start:stop] = _to_uint16(interpolated) + if dst_requires_copy_back: + dst_moved[...] = dst_flat.reshape(dst_moved.shape) + def _resample_tiff_projection_z_axis_if_needed( *, diff --git a/tests/mip_export/test_pipeline.py b/tests/mip_export/test_pipeline.py index 91d2b9c..13867a0 100644 --- a/tests/mip_export/test_pipeline.py +++ b/tests/mip_export/test_pipeline.py @@ -82,11 +82,10 @@ def test_run_mip_export_analysis_writes_uint16_ome_tiff_outputs_with_calibration data = np.arange(1 * 2 * 1 * 3 * 4 * 5, dtype=np.float32).reshape( (1, 2, 1, 3, 4, 5) ) - root.create_dataset( + root.create_array( name="data", data=data, chunks=(1, 1, 1, 3, 4, 5), - dtype="float32", overwrite=True, ) root["data"].attrs["voxel_size_um_zyx"] = [5.0, 2.0, 3.0] @@ -102,7 +101,7 @@ def test_run_mip_export_analysis_writes_uint16_ome_tiff_outputs_with_calibration client=None, ) - assert summary.component == "results/mip_export/latest" + assert summary.component == "clearex/results/mip_export/latest" assert summary.export_format == "ome-tiff" assert summary.position_mode == "per_position" assert summary.exported_files == 6 @@ -156,19 +155,19 @@ def test_run_mip_export_analysis_writes_uint16_ome_tiff_outputs_with_calibration assert yz_pixels["PhysicalSizeY"] == "2.0" latest_attrs = dict( - zarr.open_group(str(store_path), mode="r")["results"]["mip_export"][ - "latest" - ].attrs + zarr.open_group(str(store_path), mode="r")["clearex"]["results"][ + "mip_export" + ]["latest"].attrs ) assert latest_attrs["exported_files"] == 6 assert latest_attrs["export_format"] == "ome-tiff" assert latest_attrs["voxel_size_um_zyx"] == [5.0, 2.0, 3.0] latest_ref_attrs = dict( - zarr.open_group(str(store_path), mode="r")["provenance"]["latest_outputs"][ - "mip_export" - ].attrs + zarr.open_group(str(store_path), mode="r")["clearex"]["provenance"][ + "latest_outputs" + ]["mip_export"].attrs ) - assert latest_ref_attrs["component"] == "results/mip_export/latest" + assert latest_ref_attrs["component"] == "clearex/results/mip_export/latest" def test_run_mip_export_analysis_writes_multi_position_zarr_outputs( @@ -177,11 +176,10 @@ def test_run_mip_export_analysis_writes_multi_position_zarr_outputs( store_path = tmp_path / "mip_zarr_store.zarr" root = zarr.open_group(str(store_path), mode="w") data = np.arange(2 * 3 * 2 * 4 * 3 * 5, dtype=np.uint16).reshape((2, 3, 2, 4, 3, 5)) - root.create_dataset( + root.create_array( name="data", data=data, chunks=(1, 1, 1, 2, 3, 5), - dtype="uint16", overwrite=True, ) @@ -219,6 +217,86 @@ def test_run_mip_export_analysis_writes_multi_position_zarr_outputs( assert list(yz_root["data"].attrs["axes"]) == ["p", "z", "y"] +def test_run_mip_export_analysis_writes_multi_position_tiff_outputs_with_resampled_z( + tmp_path: Path, +) -> None: + store_path = tmp_path / "mip_multi_tiff_store.zarr" + root = zarr.open_group(str(store_path), mode="w") + data = np.arange(1 * 2 * 1 * 3 * 4 * 5, dtype=np.float32).reshape( + (1, 2, 1, 3, 4, 5) + ) + root.create_array( + name="data", + data=data, + chunks=(1, 1, 1, 2, 2, 2), + overwrite=True, + ) + root["data"].attrs["voxel_size_um_zyx"] = [5.0, 2.0, 3.0] + + summary = run_mip_export_analysis( + zarr_path=store_path, + parameters={ + "input_source": "data", + "position_mode": "multi_position", + "export_format": "tiff", + "output_directory": str(tmp_path / "mip_multi_tiff_outputs"), + }, + client=None, + ) + + output_directory = Path(summary.output_directory) + xz_path = output_directory / "mip_xz_t0000_c0000.tif" + yz_path = output_directory / "mip_yz_t0000_c0000.tif" + assert xz_path.exists() + assert yz_path.exists() + + xz = np.asarray(tifffile.imread(str(xz_path))) + yz = np.asarray(tifffile.imread(str(yz_path))) + expected_source = data[0, :, 0, :, :, :] + xz_expected = np.max(expected_source, axis=2).astype(np.uint16) + yz_expected = np.max(expected_source, axis=3).astype(np.uint16) + + assert tuple(xz.shape) == ( + 2, + pipeline._resampled_axis_length( + axis_length=int(xz_expected.shape[1]), + source_spacing_um=5.0, + target_spacing_um=3.0, + ), + 5, + ) + assert tuple(yz.shape) == ( + 2, + pipeline._resampled_axis_length( + axis_length=int(yz_expected.shape[1]), + source_spacing_um=5.0, + target_spacing_um=2.0, + ), + 4, + ) + assert int(xz.max()) > 0 + assert int(yz.max()) > 0 + np.testing.assert_array_equal(xz[:, 0, :], xz_expected[:, 0, :]) + np.testing.assert_array_equal(xz[:, -1, :], xz_expected[:, -1, :]) + np.testing.assert_array_equal(yz[:, 0, :], yz_expected[:, 0, :]) + np.testing.assert_array_equal(yz[:, -1, :], yz_expected[:, -1, :]) + + +def test_resample_axis_linear_to_uint16_writes_nonleading_axis() -> None: + source = np.arange(2 * 3 * 4, dtype=np.uint16).reshape((2, 3, 4)) + destination = np.zeros((2, 7, 4), dtype=np.uint16) + + pipeline._resample_axis_linear_to_uint16( + source=source, + destination=destination, + axis=1, + ) + + assert int(destination.max()) > 0 + np.testing.assert_array_equal(destination[:, 0, :], source[:, 0, :]) + np.testing.assert_array_equal(destination[:, -1, :], source[:, -1, :]) + + @pytest.mark.parametrize( ("projection", "expected_axis"), [("xy", 0), ("xz", 1), ("yz", 2)] ) @@ -304,11 +382,10 @@ def test_run_mip_export_analysis_distributed_writes_expected_outputs( store_path = tmp_path / "mip_distributed_store.zarr" root = zarr.open_group(str(store_path), mode="w") data = np.arange(1 * 2 * 1 * 4 * 6 * 8, dtype=np.uint16).reshape((1, 2, 1, 4, 6, 8)) - root.create_dataset( + root.create_array( name="data", data=data, chunks=(1, 1, 1, 2, 3, 4), - dtype="uint16", overwrite=True, ) From 44db5181286e138c49c33de439a5614fa72441a8 Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Mon, 23 Mar 2026 12:46:00 -0500 Subject: [PATCH 07/10] registration: load multiposition stage metadata from namespaced store attrs Fix registration startup failures on canonical OME-Zarr v3 stores where source_experiment/stage_rows/spatial_calibration are persisted under clearex/metadata instead of root attrs. run_registration_analysis now merges root attrs with namespaced store metadata before resolving spatial calibration and stage rows. Extend _parse_multiposition_stage_rows to accept list-of-dict payloads (x/y/z/theta/f or uppercase variants), and make _load_stage_rows prefer embedded stage_rows and navigate_experiment metadata before falling back to source_experiment sidecar loading. Add regression coverage for dict-row parsing and for run_registration_analysis progressing past the multiposition stage-metadata gate when metadata is provided via clearex/metadata. --- src/clearex/registration/pipeline.py | 37 ++++++++++++- tests/registration/test_pipeline.py | 82 +++++++++++++++++++++++++++- 2 files changed, 115 insertions(+), 4 deletions(-) diff --git a/src/clearex/registration/pipeline.py b/src/clearex/registration/pipeline.py index 4ca2f6b..5925faf 100644 --- a/src/clearex/registration/pipeline.py +++ b/src/clearex/registration/pipeline.py @@ -35,6 +35,7 @@ analysis_auxiliary_root, analysis_cache_data_component, analysis_cache_root, + load_store_metadata, public_analysis_root, ) from clearex.io.provenance import register_latest_output_reference @@ -181,6 +182,20 @@ def _parse_multiposition_stage_rows(payload: Any) -> list[dict[str, float]]: parsed: list[dict[str, float]] = [] for row in rows: + if isinstance(row, Mapping): + parsed.append( + { + "x": _safe_float(row.get("x", row.get("X")), default=0.0), + "y": _safe_float(row.get("y", row.get("Y")), default=0.0), + "z": _safe_float(row.get("z", row.get("Z")), default=0.0), + "theta": _safe_float( + row.get("theta", row.get("THETA")), default=0.0 + ), + "f": _safe_float(row.get("f", row.get("F")), default=0.0), + } + ) + continue + if not isinstance(row, (list, tuple)): continue @@ -204,6 +219,16 @@ def _value(field: str, fallback_index: int) -> float: def _load_stage_rows(root_attrs: Mapping[str, Any]) -> list[dict[str, float]]: """Load multiposition stage rows from experiment metadata.""" + parsed = _parse_multiposition_stage_rows(root_attrs.get("stage_rows")) + if parsed: + return parsed + + navigate_payload = root_attrs.get("navigate_experiment") + if isinstance(navigate_payload, Mapping): + parsed = _parse_multiposition_stage_rows(navigate_payload.get("MultiPositions")) + if parsed: + return parsed + source_experiment = root_attrs.get("source_experiment") if not isinstance(source_experiment, str): return [] @@ -2070,10 +2095,18 @@ def run_registration_analysis( ) root_attrs = dict(root.attrs) + try: + store_metadata = load_store_metadata(root) + except Exception: + store_metadata = {} + merged_metadata = dict(root_attrs) + if isinstance(store_metadata, Mapping): + merged_metadata.update(dict(store_metadata)) + spatial_calibration = spatial_calibration_from_dict( - root_attrs.get("spatial_calibration") + merged_metadata.get("spatial_calibration") ) - stage_rows = _load_stage_rows(root_attrs) + stage_rows = _load_stage_rows(merged_metadata) if len(positions) > 1 and len(stage_rows) < len(positions): raise ValueError( "registration requires multiposition stage metadata when more than one position is present." diff --git a/tests/registration/test_pipeline.py b/tests/registration/test_pipeline.py index 07f827c..0ee9639 100644 --- a/tests/registration/test_pipeline.py +++ b/tests/registration/test_pipeline.py @@ -135,6 +135,31 @@ def test_build_edge_specs_only_keeps_overlapping_neighbors() -> None: ] +def test_parse_multiposition_stage_rows_accepts_mapping_rows() -> None: + parsed = registration_pipeline._parse_multiposition_stage_rows( + [ + {"x": 1.5, "y": -2.0, "z": 3.0, "theta": 4.0, "f": 5.0}, + {"X": 2.5, "Y": -1.0, "Z": 1.0, "THETA": 8.0, "F": 9.0}, + ] + ) + + assert len(parsed) == 2 + assert parsed[0] == { + "x": pytest.approx(1.5), + "y": pytest.approx(-2.0), + "z": pytest.approx(3.0), + "theta": pytest.approx(4.0), + "f": pytest.approx(5.0), + } + assert parsed[1] == { + "x": pytest.approx(2.5), + "y": pytest.approx(-1.0), + "z": pytest.approx(1.0), + "theta": pytest.approx(8.0), + "f": pytest.approx(9.0), + } + + def test_resolve_source_components_for_level_uses_requested_pyramid( tmp_path: Path, ) -> None: @@ -420,6 +445,61 @@ def _fake_pairwise(**kwargs): assert len(progress_percents) >= 5 +def test_run_registration_analysis_uses_namespaced_stage_metadata( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + store_path = tmp_path / "registration_namespaced_metadata_store.zarr" + root = zarr.open_group(str(store_path), mode="w") + data = root.create_dataset( + name="data", + shape=(1, 2, 1, 3, 3, 4), + chunks=(1, 1, 1, 3, 3, 4), + dtype="uint16", + overwrite=True, + ) + data[:] = np.uint16(1) + + metadata_group = root.require_group("clearex/metadata") + metadata_group.attrs.update( + { + "schema": "clearex.store_metadata.v1", + "spatial_calibration": { + "schema": SPATIAL_CALIBRATION_SCHEMA, + "stage_axis_map_zyx": {"z": "+z", "y": "+y", "x": "+x"}, + "theta_mode": "rotate_zy_about_x", + }, + "stage_rows": [ + {"x": 0.0, "y": 0.0, "z": 0.0, "theta": 0.0, "f": 0.0}, + {"x": 4.0, "y": 0.0, "z": 0.0, "theta": 0.0, "f": 0.0}, + ], + } + ) + + def _raise_after_stage_metadata(*args, **kwargs): + del args, kwargs + raise RuntimeError("sentinel-after-stage-metadata") + + monkeypatch.setattr( + registration_pipeline, + "_build_nominal_transforms_xyz", + _raise_after_stage_metadata, + ) + + with pytest.raises(RuntimeError, match="sentinel-after-stage-metadata"): + registration_pipeline.run_registration_analysis( + zarr_path=store_path, + parameters={ + "input_source": "data", + "registration_channel": 0, + "registration_type": "rigid", + "input_resolution_level": 0, + "anchor_mode": "central", + "blend_mode": "feather", + }, + client=None, + ) + + def test_source_subvolume_for_overlap_returns_tighter_slices() -> None: """_source_subvolume_for_overlap should return slices narrower than the full tile.""" transform = np.eye(4, dtype=np.float64) @@ -587,5 +667,3 @@ def test_memory_estimate_positive(self): # Should be dominated by source tile size (~337 GiB) # but with only 4× source voxels it's much more reasonable assert est < 2_000_000_000_000 # < 2 TiB sanity check - - From bb1f3b18b5ff8c80f57f4162ee4b38092e02a7be Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Mon, 23 Mar 2026 12:52:04 -0500 Subject: [PATCH 08/10] registration: make output writes zarr-v3 compatible Fix registration runtime failures caused by zarr v3 AsyncGroup.create_dataset requiring explicit shape for data payload writes. Replace registration output writes that used create_dataset(data=...) with create_array(...) so blend-weight profiles and edge/affine metadata arrays are written through the v3-compatible API. Also align registration test expectations with the canonical OME-Zarr v3 layout by asserting auxiliary artifacts under clearex/results/... and fused runtime data under clearex/runtime_cache/results/... . Validated with targeted registration tests that cover run_registration_analysis output materialization and namespaced stage-metadata handling. --- src/clearex/registration/pipeline.py | 35 +++++++++++++++++----------- tests/registration/test_pipeline.py | 4 ++-- 2 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/clearex/registration/pipeline.py b/src/clearex/registration/pipeline.py index 5925faf..55fa7e9 100644 --- a/src/clearex/registration/pipeline.py +++ b/src/clearex/registration/pipeline.py @@ -1954,7 +1954,7 @@ def _prepare_output_group( if auxiliary_root in root: del root[auxiliary_root] latest = root.require_group(cache_root) - latest.create_dataset( + latest.create_array( name="data", shape=output_shape_tpczyx, chunks=output_chunks_tpczyx, @@ -1983,7 +1983,7 @@ def _prepare_output_group( "data_component": analysis_cache_data_component("registration"), } ) - auxiliary_group.create_dataset( + auxiliary_group.create_array( name="affines_tpx44", shape=(output_shape_tpczyx[0], int(root[source_component].shape[1]), 4, 4), dtype=np.float64, @@ -2001,12 +2001,21 @@ def _prepare_output_group( overlap_zyx=overlap_zyx, ) blend_group = auxiliary_group.create_group("blend_weights", overwrite=True) - blend_group.create_dataset(name="profile_z", data=prof_z, dtype=np.float32, - overwrite=True) - blend_group.create_dataset(name="profile_y", data=prof_y, dtype=np.float32, - overwrite=True) - blend_group.create_dataset(name="profile_x", data=prof_x, dtype=np.float32, - overwrite=True) + blend_group.create_array( + name="profile_z", + data=np.asarray(prof_z, dtype=np.float32), + overwrite=True, + ) + blend_group.create_array( + name="profile_y", + data=np.asarray(prof_y, dtype=np.float32), + overwrite=True, + ) + blend_group.create_array( + name="profile_x", + data=np.asarray(prof_x, dtype=np.float32), + overwrite=True, + ) return ( public_analysis_root("registration"), @@ -2410,7 +2419,7 @@ def run_registration_analysis( ) write_root = zarr.open_group(str(zarr_path), mode="a") latest_group = write_root[analysis_auxiliary_root("registration")] - latest_group.create_dataset( + latest_group.create_array( name="edges_pe2", data=( np.asarray( @@ -2425,22 +2434,22 @@ def run_registration_analysis( ), overwrite=True, ) - latest_group.create_dataset( + latest_group.create_array( name="pairwise_affines_tex44", data=correction_affines_tex44, overwrite=True, ) - latest_group.create_dataset( + latest_group.create_array( name="edge_status_te", data=edge_status_te, overwrite=True, ) - latest_group.create_dataset( + latest_group.create_array( name="edge_residual_te", data=edge_residual_te, overwrite=True, ) - latest_group.create_dataset( + latest_group.create_array( name="transformed_bboxes_tpx6", data=transformed_bboxes_tpx6, overwrite=True, diff --git a/tests/registration/test_pipeline.py b/tests/registration/test_pipeline.py index 0ee9639..a81f66b 100644 --- a/tests/registration/test_pipeline.py +++ b/tests/registration/test_pipeline.py @@ -399,8 +399,8 @@ def _fake_pairwise(**kwargs): ) root = zarr.open_group(str(store_path), mode="r") - latest = root["results/registration/latest"] - data = latest["data"] + latest = root["clearex/results/registration/latest"] + data = root["clearex/runtime_cache/results/registration/latest/data"] affines = latest["affines_tpx44"] assert summary.source_component == "data" From d52d899a880727096908820db00a9bcd9e099813 Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Mon, 23 Mar 2026 19:10:21 -0500 Subject: [PATCH 09/10] Preserve physical voxel scale lineage across OME-Zarr workflows This patch closes the recent scale-regression loop by centralizing voxel-size\nresolution and propagating it consistently through shear, registration,\nvisualization, flatfield, mip export, and OME publication.\n\nKey changes\n- Added shared voxel-size resolver helpers in io/ome_store.py with ordered\n fallback across:\n 1) source component attrs\n 2) source_component ancestry\n 3) namespaced store metadata\n 4) root attrs\n 5) Navigate metadata\n 6) default (1,1,1)\n- Added source provenance labeling (voxel_size_resolution_source) so downstream\n products can report exactly where scale was resolved.\n- Made load_store_metadata read-only safe (no metadata-group creation for\n mode='r' roots) to prevent resolver-side write attempts during read flows.\n- Updated flatfield/shear/registration/mip-export and OME publish paths to use\n the shared resolver and persist resolved scale lineage metadata.\n- Updated visualization scale resolution so non-base downsampled layers still\n apply shape-ratio scaling when only root/store-level voxel metadata is\n available (preserves physical spacing in napari).\n- Added subsystem guardrails and validation guidance in\n src/clearex/shear/README.md to prevent recurrence.\n\nTests\n- Added tests/io/test_ome_store_scale.py (resolver chain, metadata fallback,\n read-only metadata safety, publish scale transform lineage).\n- Added/updated scale lineage tests in flatfield, shear, registration, and\n visualization test suites.\n- Updated shear tests to assert runtime-cache output component access via\n summary.data_component.\n\nValidation executed\n- uv run ruff format src/clearex/io/ome_store.py src/clearex/visualization/pipeline.py tests/io/test_ome_store_scale.py tests/shear/test_pipeline.py\n- uv run ruff check src/clearex/io/ome_store.py src/clearex/visualization/pipeline.py tests/io/test_ome_store_scale.py tests/shear/test_pipeline.py\n- uv run --with pytest --with requests python -m pytest -q \\n tests/shear/test_pipeline.py \\n tests/io/test_ome_store_scale.py \\n tests/flatfield/test_pipeline.py::test_copy_source_array_attrs_preserves_voxel_size \\n tests/registration/test_pipeline.py::test_extract_voxel_size_uses_source_component_chain \\n tests/visualization/test_pipeline.py::test_run_visualization_analysis_prefers_voxel_size_um_attrs \\n tests/visualization/test_pipeline.py::test_run_visualization_analysis_resolves_scale_from_source_chain \\n tests/visualization/test_pipeline.py::test_launch_napari_viewer_resolves_per_layer_scale_for_downsampled_components\n- Result: all targeted tests passed. --- src/clearex/flatfield/pipeline.py | 28 ++-- src/clearex/io/ome_store.py | 208 +++++++++++++++++++++++++- src/clearex/mip_export/pipeline.py | 41 ++--- src/clearex/registration/pipeline.py | 157 ++++++++++--------- src/clearex/shear/README.md | 89 +++++++++++ src/clearex/shear/pipeline.py | 50 ++----- src/clearex/visualization/pipeline.py | 81 +++++++++- tests/flatfield/test_pipeline.py | 31 ++++ tests/io/test_ome_store_scale.py | 140 +++++++++++++++++ tests/registration/test_pipeline.py | 64 +++++++- tests/shear/test_pipeline.py | 79 +++++++--- tests/visualization/test_pipeline.py | 96 +++++++++++- 12 files changed, 874 insertions(+), 190 deletions(-) create mode 100644 src/clearex/shear/README.md create mode 100644 tests/io/test_ome_store_scale.py diff --git a/src/clearex/flatfield/pipeline.py b/src/clearex/flatfield/pipeline.py index 2595385..a8281df 100644 --- a/src/clearex/flatfield/pipeline.py +++ b/src/clearex/flatfield/pipeline.py @@ -60,6 +60,7 @@ analysis_cache_data_component, analysis_cache_root, public_analysis_root, + resolve_voxel_size_um_zyx, ) from clearex.io.provenance import register_latest_output_reference @@ -978,10 +979,19 @@ def _copy_source_array_attrs( """ source_attrs = dict(root[source_component].attrs) root_attrs = dict(root.attrs) + resolved_voxel_size_um_zyx = resolve_voxel_size_um_zyx( + root, + source_component=source_component, + ) copied: dict[str, Any] = { "source_component": str(source_component), "chunk_shape_tpczyx": [int(v) for v in output_chunks], "pyramid_levels": [analysis_cache_data_component("flatfield")], + "voxel_size_um_zyx": [ + float(resolved_voxel_size_um_zyx[0]), + float(resolved_voxel_size_um_zyx[1]), + float(resolved_voxel_size_um_zyx[2]), + ], } for key in ( "scale_tpczyx", @@ -2391,15 +2401,11 @@ def _fit_profile_tiled( flatfield_sum[ y_read_start:y_read_stop, x_read_start:x_read_stop, - ] += ( - flatfield_tile.astype(np.float64) * blend_weights_64 - ) + ] += flatfield_tile.astype(np.float64) * blend_weights_64 darkfield_sum[ y_read_start:y_read_stop, x_read_start:x_read_stop, - ] += ( - darkfield_tile.astype(np.float64) * blend_weights_64 - ) + ] += darkfield_tile.astype(np.float64) * blend_weights_64 weight_sum[ y_read_start:y_read_stop, x_read_start:x_read_stop, @@ -3516,11 +3522,13 @@ def _consume_tile_result( profile_key = (position_index, channel_index) if profile_key in fallback_profile_keys: return - y_start, y_stop = int(tile_result.y_bounds[0]), int( - tile_result.y_bounds[1] + y_start, y_stop = ( + int(tile_result.y_bounds[0]), + int(tile_result.y_bounds[1]), ) - x_start, x_stop = int(tile_result.x_bounds[0]), int( - tile_result.x_bounds[1] + x_start, x_stop = ( + int(tile_result.x_bounds[0]), + int(tile_result.x_bounds[1]), ) profile_selection = ( slice(position_index, position_index + 1), diff --git a/src/clearex/io/ome_store.py b/src/clearex/io/ome_store.py index 66b8c37..6bf0536 100644 --- a/src/clearex/io/ome_store.py +++ b/src/clearex/io/ome_store.py @@ -35,6 +35,7 @@ import shutil import dask.array as da +import numpy as np import zarr from ome_zarr_models.common.plate import Column, Row, WellInPlate @@ -174,15 +175,206 @@ def load_store_metadata(path_or_root: str | Path | zarr.Group) -> dict[str, Any] root = ( path_or_root if isinstance(path_or_root, zarr.Group) - else zarr.open_group(str(Path(path_or_root).expanduser().resolve()), mode="a") + else zarr.open_group(str(Path(path_or_root).expanduser().resolve()), mode="r") ) - group = ensure_group(root, CLEAREX_METADATA_GROUP) - payload = dict(group.attrs) + try: + group = get_node(root, CLEAREX_METADATA_GROUP) + payload = dict(getattr(group, "attrs", {})) + except Exception: + payload = {} if payload: return payload return {"schema": STORE_METADATA_SCHEMA} +def _coerce_positive_numeric_sequence(payload: Any) -> Optional[tuple[float, ...]]: + """Return a finite positive numeric tuple when possible.""" + if not isinstance(payload, (list, tuple)): + return None + try: + parsed = tuple(float(value) for value in payload) + except Exception: + return None + if len(parsed) <= 0: + return None + if not all(np.isfinite(value) and value > 0 for value in parsed): + return None + return parsed + + +def _coerce_voxel_size_um_zyx_from_attrs( + attrs: Mapping[str, Any], +) -> Optional[tuple[float, float, float]]: + """Parse voxel size in ``(z, y, x)`` order from one attribute mapping.""" + direct_keys = ( + "voxel_size_um_zyx", + "voxel_size_zyx", + "pixel_size_zyx", + "physical_pixel_size_zyx", + ) + tczyx_keys = ("scale_tczyx", "scale") + tpczyx_keys = ( + "scale_tpczyx", + "physical_scale_tpczyx", + "voxel_size_tpczyx", + "pixel_size_tpczyx", + "physical_pixel_size_tpczyx", + ) + + for key in direct_keys: + if key not in attrs: + continue + parsed = _coerce_positive_numeric_sequence(attrs.get(key)) + if parsed is None or len(parsed) < 3: + continue + return (float(parsed[0]), float(parsed[1]), float(parsed[2])) + for key in tczyx_keys: + if key not in attrs: + continue + parsed = _coerce_positive_numeric_sequence(attrs.get(key)) + if parsed is None or len(parsed) < 5: + continue + return (float(parsed[2]), float(parsed[3]), float(parsed[4])) + for key in tpczyx_keys: + if key not in attrs: + continue + parsed = _coerce_positive_numeric_sequence(attrs.get(key)) + if parsed is None or len(parsed) < 6: + continue + return (float(parsed[3]), float(parsed[4]), float(parsed[5])) + return None + + +def _coerce_voxel_size_um_zyx_from_navigate_payload( + payload: Any, +) -> Optional[tuple[float, float, float]]: + """Parse voxel size in ``(z, y, x)`` order from Navigate payload.""" + if not isinstance(payload, Mapping): + return None + xy_value = payload.get("xy_pixel_size_um") + z_value = payload.get("z_step_um") + if xy_value is None or z_value is None: + return None + try: + xy_um = float(xy_value) + z_um = float(z_value) + except Exception: + return None + if not np.isfinite(xy_um) or not np.isfinite(z_um): + return None + if xy_um <= 0 or z_um <= 0: + return None + return (float(z_um), float(xy_um), float(xy_um)) + + +def _iter_component_attr_chain( + root: zarr.Group, + source_component: Optional[str], + *, + max_depth: int = 32, +) -> list[tuple[str, dict[str, Any]]]: + """Return source-component attrs followed by ancestor component attrs.""" + component = str(source_component or "").strip() + if not component: + return [] + + chain: list[tuple[str, dict[str, Any]]] = [] + visited: set[str] = set() + for _ in range(max(1, int(max_depth))): + if not component or component in visited: + break + visited.add(component) + try: + node = get_node(root, component) + attrs = dict(getattr(node, "attrs", {})) + except Exception: + break + chain.append((component, attrs)) + next_component = attrs.get("source_component") + if isinstance(next_component, str) and next_component.strip(): + component = next_component.strip() + continue + break + return chain + + +def resolve_voxel_size_um_zyx_with_source( + path_or_root: str | Path | zarr.Group, + *, + source_component: Optional[str] = None, +) -> tuple[tuple[float, float, float], str]: + """Resolve voxel size metadata with provenance of the selected source. + + Parameters + ---------- + path_or_root : str or pathlib.Path or zarr.Group + Store path or opened root group. + source_component : str, optional + Preferred component for direct + source-chain metadata lookup. + + Returns + ------- + tuple[tuple[float, float, float], str] + Resolved voxel size in ``(z, y, x)`` order and a machine-readable + source label. + """ + root = ( + path_or_root + if isinstance(path_or_root, zarr.Group) + else zarr.open_group(str(Path(path_or_root).expanduser().resolve()), mode="r") + ) + store_metadata = load_store_metadata(root) + root_attrs = dict(root.attrs) + component_chain = _iter_component_attr_chain(root, source_component) + + for component, attrs in component_chain: + voxel = _coerce_voxel_size_um_zyx_from_attrs(attrs) + if voxel is not None: + return voxel, f"component:{component}" + + metadata_voxel = _coerce_voxel_size_um_zyx_from_attrs(store_metadata) + if metadata_voxel is not None: + return metadata_voxel, "store_metadata" + + root_voxel = _coerce_voxel_size_um_zyx_from_attrs(root_attrs) + if root_voxel is not None: + return root_voxel, "root_attrs" + + for component, attrs in component_chain: + navigate_voxel = _coerce_voxel_size_um_zyx_from_navigate_payload( + attrs.get("navigate_experiment") + ) + if navigate_voxel is not None: + return navigate_voxel, f"component_navigate:{component}" + + metadata_navigate_voxel = _coerce_voxel_size_um_zyx_from_navigate_payload( + store_metadata.get("navigate_experiment") + ) + if metadata_navigate_voxel is not None: + return metadata_navigate_voxel, "store_metadata_navigate" + + root_navigate_voxel = _coerce_voxel_size_um_zyx_from_navigate_payload( + root_attrs.get("navigate_experiment") + ) + if root_navigate_voxel is not None: + return root_navigate_voxel, "root_navigate" + + return (1.0, 1.0, 1.0), "default" + + +def resolve_voxel_size_um_zyx( + path_or_root: str | Path | zarr.Group, + *, + source_component: Optional[str] = None, +) -> tuple[float, float, float]: + """Resolve voxel size in ``(z, y, x)`` order from store metadata.""" + voxel, _ = resolve_voxel_size_um_zyx_with_source( + path_or_root, + source_component=source_component, + ) + return voxel + + def update_store_metadata( path_or_root: str | Path | zarr.Group, **payload: Any ) -> dict[str, Any]: @@ -599,7 +791,12 @@ def publish_image_collection_from_cache( shape_tpczyx = tuple(int(value) for value in base_array.shape) position_count = max(1, int(shape_tpczyx[1])) - voxel_size_um_zyx = base_array.attrs.get("voxel_size_um_zyx") + voxel_size_um_zyx, voxel_size_resolution_source = ( + resolve_voxel_size_um_zyx_with_source( + root, + source_component=cache_components[0], + ) + ) metadata = load_store_metadata(root) translations = metadata.get("position_translations_zyx_um") if not isinstance(translations, list): @@ -636,6 +833,9 @@ def publish_image_collection_from_cache( ), level_factors_tpczyx=level_factors, ) + image_group.attrs["voxel_size_resolution_source"] = str( + voxel_size_resolution_source + ) for level_index, level_array in enumerate(level_arrays): level_shape_tczyx = ( int(level_array.shape[0]), diff --git a/src/clearex/mip_export/pipeline.py b/src/clearex/mip_export/pipeline.py index 0553762..b49cd10 100644 --- a/src/clearex/mip_export/pipeline.py +++ b/src/clearex/mip_export/pipeline.py @@ -43,7 +43,10 @@ import zarr # Local Imports -from clearex.io.ome_store import analysis_auxiliary_root +from clearex.io.ome_store import ( + analysis_auxiliary_root, + resolve_voxel_size_um_zyx_with_source, +) from clearex.io.provenance import register_latest_output_reference if TYPE_CHECKING: @@ -284,37 +287,11 @@ def _extract_voxel_size_um_zyx( Voxel sizes in microns for ``(z, y, x)``. Falls back to ``(1, 1, 1)`` when metadata is unavailable. """ - root_attrs = dict(root.attrs) - source_attrs: dict[str, Any] = {} - try: - source_attrs = dict(root[source_component].attrs) - except Exception: - source_attrs = {} - - for attrs in (source_attrs, root_attrs): - voxel = attrs.get("voxel_size_um_zyx") - if not isinstance(voxel, (tuple, list)) or len(voxel) < 3: - continue - z_um = float(voxel[0]) - y_um = float(voxel[1]) - x_um = float(voxel[2]) - if z_um > 0 and y_um > 0 and x_um > 0: - return z_um, y_um, x_um - - for attrs in (source_attrs, root_attrs): - navigate = attrs.get("navigate_experiment") - if not isinstance(navigate, dict): - continue - xy_value = navigate.get("xy_pixel_size_um") - z_value = navigate.get("z_step_um") - if xy_value is None or z_value is None: - continue - xy_um = float(xy_value) - z_um = float(z_value) - if xy_um > 0 and z_um > 0: - return z_um, xy_um, xy_um - - return 1.0, 1.0, 1.0 + voxel_size_um_zyx, _ = resolve_voxel_size_um_zyx_with_source( + root, + source_component=source_component, + ) + return voxel_size_um_zyx def _projection_pixel_size_um( diff --git a/src/clearex/registration/pipeline.py b/src/clearex/registration/pipeline.py index 55fa7e9..1e6a017 100644 --- a/src/clearex/registration/pipeline.py +++ b/src/clearex/registration/pipeline.py @@ -37,6 +37,7 @@ analysis_cache_root, load_store_metadata, public_analysis_root, + resolve_voxel_size_um_zyx_with_source, ) from clearex.io.provenance import register_latest_output_reference from clearex.workflow import SpatialCalibrationConfig, spatial_calibration_from_dict @@ -361,37 +362,11 @@ def _extract_voxel_size_um_zyx( root: zarr.hierarchy.Group, source_component: str ) -> tuple[float, float, float]: """Extract voxel size in ``(z, y, x)`` order.""" - try: - source = root[source_component] - except Exception: - source = None - - if source is not None: - try: - payload = source.attrs.get("voxel_size_um_zyx") - if isinstance(payload, (list, tuple)) and len(payload) >= 3: - parsed = tuple(float(value) for value in payload[:3]) - if all(value > 0 for value in parsed): - return parsed # type: ignore[return-value] - except Exception: - pass - - try: - payload = root.attrs.get("voxel_size_um_zyx") - if isinstance(payload, (list, tuple)) and len(payload) >= 3: - parsed = tuple(float(value) for value in payload[:3]) - if all(value > 0 for value in parsed): - return parsed # type: ignore[return-value] - except Exception: - pass - - navigate = root.attrs.get("navigate_experiment") - if isinstance(navigate, Mapping): - xy_um = _safe_float(navigate.get("xy_pixel_size_um"), default=1.0) - z_um = _safe_float(navigate.get("z_step_um"), default=1.0) - if xy_um > 0 and z_um > 0: - return (float(z_um), float(xy_um), float(xy_um)) - return (1.0, 1.0, 1.0) + voxel_size_um_zyx, _ = resolve_voxel_size_um_zyx_with_source( + root, + source_component=source_component, + ) + return voxel_size_um_zyx def _component_level_suffix(component: str) -> Optional[int]: @@ -744,8 +719,11 @@ def _source_subvolume_for_overlap( voxel_size_um_zyx=voxel_size_um_zyx, ) # Build the eight corners of the output crop in output-index space. - rz, ry, rx = (int(reference_shape_zyx[0]), int(reference_shape_zyx[1]), - int(reference_shape_zyx[2])) + rz, ry, rx = ( + int(reference_shape_zyx[0]), + int(reference_shape_zyx[1]), + int(reference_shape_zyx[2]), + ) output_corners = np.asarray( [ [0.0, 0.0, 0.0], @@ -764,12 +742,21 @@ def _source_subvolume_for_overlap( src_min = np.floor(np.min(source_indices, axis=0)).astype(int) - int(padding) src_max = np.ceil(np.max(source_indices, axis=0)).astype(int) + int(padding) - sz, sy, sx = (int(source_shape_zyx[0]), int(source_shape_zyx[1]), - int(source_shape_zyx[2])) - z0, y0, x0 = (max(0, int(src_min[0])), max(0, int(src_min[1])), - max(0, int(src_min[2]))) - z1, y1, x1 = (min(sz, int(src_max[0])), min(sy, int(src_max[1])), - min(sx, int(src_max[2]))) + sz, sy, sx = ( + int(source_shape_zyx[0]), + int(source_shape_zyx[1]), + int(source_shape_zyx[2]), + ) + z0, y0, x0 = ( + max(0, int(src_min[0])), + max(0, int(src_min[1])), + max(0, int(src_min[2])), + ) + z1, y1, x1 = ( + min(sz, int(src_max[0])), + min(sy, int(src_max[1])), + min(sx, int(src_max[2])), + ) if z1 <= z0 or y1 <= y0 or x1 <= x0: return ( @@ -780,12 +767,16 @@ def _source_subvolume_for_overlap( slices_zyx = (slice(z0, z1), slice(y0, y1), slice(x0, x1)) # Shift the transform origin to account for the sub-volume offset. voxel_xyz = np.asarray( - [float(voxel_size_um_zyx[2]), float(voxel_size_um_zyx[1]), - float(voxel_size_um_zyx[0])], + [ + float(voxel_size_um_zyx[2]), + float(voxel_size_um_zyx[1]), + float(voxel_size_um_zyx[0]), + ], dtype=np.float64, ) - offset_xyz = np.asarray([float(x0), float(y0), float(z0)], - dtype=np.float64) * voxel_xyz + offset_xyz = ( + np.asarray([float(x0), float(y0), float(z0)], dtype=np.float64) * voxel_xyz + ) adjusted = local_to_world_xyz.copy() adjusted[:3, 3] = adjusted[:3, 3] + (adjusted[:3, :3] @ offset_xyz) return slices_zyx, adjusted @@ -1062,15 +1053,23 @@ def _register_pairwise_overlap( fixed_source = np.asarray( source[ - int(t_index), int(edge.fixed_position), int(registration_channel), - fixed_slices[0], fixed_slices[1], fixed_slices[2], + int(t_index), + int(edge.fixed_position), + int(registration_channel), + fixed_slices[0], + fixed_slices[1], + fixed_slices[2], ], dtype=np.float32, ) moving_source = np.asarray( source[ - int(t_index), int(edge.moving_position), int(registration_channel), - moving_slices[0], moving_slices[1], moving_slices[2], + int(t_index), + int(edge.moving_position), + int(registration_channel), + moving_slices[0], + moving_slices[1], + moving_slices[2], ], dtype=np.float32, ) @@ -1169,7 +1168,11 @@ def _register_pairwise_overlap( shift_zyx = np.asarray(shift_zyx, dtype=np.float64) # Pre-align the moving crop using the detected shift. reg_moving = ndimage.shift( - reg_moving, shift_zyx, order=1, mode="constant", cval=0.0, + reg_moving, + shift_zyx, + order=1, + mode="constant", + cval=0.0, ).astype(np.float32, copy=False) reg_moving_mask = np.asarray(reg_moving > 0, dtype=np.float32) # Build the FFT correction in physical XYZ coordinates. @@ -1185,9 +1188,7 @@ def _register_pairwise_overlap( fft_correction_xyz = np.eye(4, dtype=np.float64) try: - fixed_image = _ants_image_from_zyx( - reg_fixed, voxel_size_um_zyx=reg_voxel_size - ) + fixed_image = _ants_image_from_zyx(reg_fixed, voxel_size_um_zyx=reg_voxel_size) moving_image = _ants_image_from_zyx( reg_moving, voxel_size_um_zyx=reg_voxel_size ) @@ -1667,9 +1668,7 @@ def _blend_weight_profiles( profile = np.ones(int(axis_size), dtype=np.float32) width = max(0, min(int(ramp_width), max(0, int(axis_size // 2)))) if width > 0: - ramp = 0.5 - 0.5 * np.cos( - np.linspace(0.0, np.pi, width, dtype=np.float32) - ) + ramp = 0.5 - 0.5 * np.cos(np.linspace(0.0, np.pi, width, dtype=np.float32)) profile[:width] = ramp profile[-width:] = np.minimum(profile[-width:], ramp[::-1]) profiles.append(profile) @@ -1879,8 +1878,12 @@ def _process_and_write_registration_chunk( ) source_volume = np.asarray( source[ - int(t_index), int(position_index), int(c_index), - src_slices[0], src_slices[1], src_slices[2], + int(t_index), + int(position_index), + int(c_index), + src_slices[0], + src_slices[1], + src_slices[2], ], dtype=np.float32, ) @@ -1897,7 +1900,10 @@ def _process_and_write_registration_chunk( # Reconstruct blend weights for only the needed source sub-volume # from the 1D profiles — avoids materializing the full 3D weight volume. weight_sub = _blend_weight_subvolume_from_profiles( - profile_z, profile_y, profile_x, src_slices, + profile_z, + profile_y, + profile_x, + src_slices, ) warped_weight = _resample_source_to_world_grid( weight_sub, @@ -1933,6 +1939,7 @@ def _prepare_output_group( output_shape_tpczyx: tuple[int, int, int, int, int, int], output_chunks_tpczyx: tuple[int, int, int, int, int, int], voxel_size_um_zyx: Sequence[float], + voxel_size_resolution_source: str, output_origin_xyz: Sequence[float], source_tile_shape_zyx: tuple[int, int, int], blend_mode: str, @@ -1966,6 +1973,7 @@ def _prepare_output_group( "axes": ["t", "p", "c", "z", "y", "x"], "source_component": str(source_component), "voxel_size_um_zyx": [float(value) for value in voxel_size_um_zyx], + "voxel_size_resolution_source": str(voxel_size_resolution_source), "output_origin_xyz_um": [float(value) for value in output_origin_xyz], "storage_policy": "latest_only", } @@ -1979,6 +1987,7 @@ def _prepare_output_group( "output_shape_tpczyx": [int(value) for value in output_shape_tpczyx], "output_chunks_tpczyx": [int(value) for value in output_chunks_tpczyx], "voxel_size_um_zyx": [float(value) for value in voxel_size_um_zyx], + "voxel_size_resolution_source": str(voxel_size_resolution_source), "output_origin_xyz_um": [float(value) for value in output_origin_xyz], "data_component": analysis_cache_data_component("registration"), } @@ -2138,7 +2147,12 @@ def run_registration_analysis( if anchor_position < 0 or anchor_position >= len(positions): raise ValueError("registration anchor_position is out of bounds.") - full_voxel_size_um_zyx = _extract_voxel_size_um_zyx(root, source_component) + full_voxel_size_um_zyx, voxel_size_resolution_source = ( + resolve_voxel_size_um_zyx_with_source( + root, + source_component=source_component, + ) + ) level_factor_zyx = _pyramid_factor_zyx_for_level( root, level=effective_level, @@ -2193,8 +2207,7 @@ def run_registration_analysis( # Resolve configurable pairwise estimation parameters. effective_ants_iterations = tuple( - int(v) - for v in parameters.get("ants_iterations", _ANTS_AFF_ITERATIONS) + int(v) for v in parameters.get("ants_iterations", _ANTS_AFF_ITERATIONS) ) effective_ants_sampling_rate = float( parameters.get("ants_sampling_rate", _ANTS_RANDOM_SAMPLING_RATE) @@ -2230,7 +2243,9 @@ def run_registration_analysis( overlap_zyx=[ int(value) for value in parameters.get("overlap_zyx", [8, 32, 32]) ], - registration_type=str(parameters.get("registration_type", "translation")) + registration_type=str( + parameters.get("registration_type", "translation") + ) .strip() .lower(), max_pairwise_voxels=effective_max_pairwise_voxels, @@ -2247,9 +2262,7 @@ def run_registration_analysis( batch_size = max(1, min(total, 4)) for batch_start in range(0, total, batch_size): batch = delayed_edges[batch_start : batch_start + batch_size] - batch_results = list( - dask.compute(*batch, scheduler="processes") - ) + batch_results = list(dask.compute(*batch, scheduler="processes")) pairwise_results.extend(batch_results) completed = len(pairwise_results) _emit( @@ -2375,9 +2388,7 @@ def run_registration_analysis( max(1, min(int(source_chunks[5]), int(output_shape_tpczyx[5]))), ) blend_mode = str(parameters.get("blend_mode", "feather")).strip().lower() - overlap_zyx = [ - int(value) for value in parameters.get("overlap_zyx", [8, 32, 32]) - ] + overlap_zyx = [int(value) for value in parameters.get("overlap_zyx", [8, 32, 32])] # --- Memory guard --------------------------------------------------------- source_tile_shape_zyx = tuple(int(v) for v in source_shape_tpczyx[3:]) @@ -2387,7 +2398,7 @@ def run_registration_analysis( source_tile_shape_zyx, n_positions=int(source_shape_tpczyx[1]), ) - est_gib = est_bytes / (1024 ** 3) + est_gib = est_bytes / (1024**3) logger.info( "Fusion memory estimate: %.1f GiB per worker (chunk %s, tile %s)", est_gib, @@ -2411,6 +2422,7 @@ def run_registration_analysis( output_shape_tpczyx=output_shape_tpczyx, output_chunks_tpczyx=output_chunks_tpczyx, voxel_size_um_zyx=full_voxel_size_um_zyx, + voxel_size_resolution_source=str(voxel_size_resolution_source), output_origin_xyz=output_min_xyz, source_tile_shape_zyx=source_tile_shape_zyx, # type: ignore[arg-type] blend_mode=blend_mode, @@ -2560,7 +2572,9 @@ def run_registration_analysis( "requested_input_resolution_level": int(requested_resolution_level), "input_resolution_level": int(effective_level), "registration_channel": int(registration_channel), - "registration_type": str(parameters.get("registration_type", "translation")), + "registration_type": str( + parameters.get("registration_type", "translation") + ), "anchor_positions": [int(value) for value in anchor_positions], "edge_count": int(edge_count), "active_edge_count": int(active_edge_count), @@ -2587,7 +2601,9 @@ def run_registration_analysis( "requested_input_resolution_level": int(requested_resolution_level), "input_resolution_level": int(effective_level), "registration_channel": int(registration_channel), - "registration_type": str(parameters.get("registration_type", "translation")), + "registration_type": str( + parameters.get("registration_type", "translation") + ), "anchor_positions": [int(value) for value in anchor_positions], "edge_count": int(edge_count), "active_edge_count": int(active_edge_count), @@ -2601,6 +2617,7 @@ def run_registration_analysis( "output_shape_tpczyx": [int(value) for value in output_shape_tpczyx], "output_chunks_tpczyx": [int(value) for value in output_chunks_tpczyx], "voxel_size_um_zyx": [float(value) for value in full_voxel_size_um_zyx], + "voxel_size_resolution_source": str(voxel_size_resolution_source), "output_origin_xyz_um": [float(value) for value in output_min_xyz], }, ) diff --git a/src/clearex/shear/README.md b/src/clearex/shear/README.md new file mode 100644 index 0000000..6a449f6 --- /dev/null +++ b/src/clearex/shear/README.md @@ -0,0 +1,89 @@ +# Shear Transform Strategy + +This `README.md` is the canonical subsystem reference for +`src/clearex/shear`. + +## Scope + +- Main runtime file: `src/clearex/shear/pipeline.py` +- Primary entrypoint: `run_shear_transform_analysis` +- Runtime output: `clearex/runtime_cache/results/shear_transform/latest/data` +- Public OME publication target: `results/shear_transform/latest` + +## Coordinate And Geometry Contract + +- Shear/rotation geometry is solved in physical-space XYZ coordinates. +- Source data axes remain canonical `(t, p, c, z, y, x)`. +- Voxel spacing is interpreted as `voxel_size_um_zyx` and used to map index + space to physical space. +- `auto_rotate_from_shear` applies axis-coupled Euler adjustments: + - `rotation_deg_x += -atan(shear_yz)` + - `rotation_deg_y += +atan(shear_xz)` + - `rotation_deg_z += -atan(shear_xy)` + +## Scale Preservation Invariants + +The shear pipeline must preserve physical pixel size metadata end-to-end. + +Required invariants: + +- Upstream operations must preserve or restate physical voxel size metadata on + their output arrays. +- Shear voxel-size resolution must not rely on a single location only. +- Shear outputs must persist: + - `voxel_size_um_zyx` + - `voxel_size_resolution_source` +- Downstream registration, visualization, OME publication, and MIP export must + consume the same resolved voxel spacing. + +## Voxel-Size Resolution Policy + +Shear now relies on shared resolver logic in `clearex.io.ome_store`: + +- `resolve_voxel_size_um_zyx_with_source(...)` +- `resolve_voxel_size_um_zyx(...)` + +Resolution order: + +1. source component attrs +2. source-component ancestry via `source_component` +3. namespaced store metadata (`clearex/metadata`) +4. root attrs +5. Navigate metadata fallbacks (`navigate_experiment`) +6. default `(1.0, 1.0, 1.0)` only when all metadata is missing + +This policy exists to prevent regressions where intermediate runtime arrays +accidentally drop voxel-size attrs. + +Read-only safety requirement: + +- Metadata reads must not create `clearex/metadata` when the store is opened + in read-only mode. +- Resolver helpers must remain side-effect free for `mode="r"` roots. + +Downsampled-layer requirement: + +- Global/root/store voxel metadata describes base-resolution physical spacing. +- For non-base components that do not define component-local scale metadata, + visualization must apply integer shape-ratio scaling from base data shape to + preserve physical spacing in napari. + +## Regression Guardrails + +When editing this subsystem: + +- Do not remove `voxel_size_um_zyx` persistence from shear outputs. +- Do not replace shared voxel-size resolution with local single-source lookups. +- Keep `voxel_size_resolution_source` on shear outputs for diagnostics. +- If output metadata keys change, update downstream readers and tests in the + same change set. + +## Validation + +Run at minimum after scale-related changes: + +- `uv run ruff check src/clearex/shear/pipeline.py src/clearex/io/ome_store.py src/clearex/visualization/pipeline.py src/clearex/registration/pipeline.py src/clearex/flatfield/pipeline.py src/clearex/mip_export/pipeline.py` +- `uv run --with pytest --with requests python -m pytest -q tests/shear/test_pipeline.py tests/io/test_ome_store_scale.py tests/flatfield/test_pipeline.py::test_copy_source_array_attrs_preserves_voxel_size tests/registration/test_pipeline.py::test_extract_voxel_size_uses_source_component_chain tests/visualization/test_pipeline.py::test_run_visualization_analysis_prefers_voxel_size_um_attrs tests/visualization/test_pipeline.py::test_run_visualization_analysis_resolves_scale_from_source_chain tests/visualization/test_pipeline.py::test_launch_napari_viewer_resolves_per_layer_scale_for_downsampled_components` + +For store-level verification on real data, confirm scale lineage and OME +multiscale scale transforms remain non-isotropic where expected. diff --git a/src/clearex/shear/pipeline.py b/src/clearex/shear/pipeline.py index 5f54bce..a96a097 100644 --- a/src/clearex/shear/pipeline.py +++ b/src/clearex/shear/pipeline.py @@ -56,6 +56,7 @@ analysis_cache_data_component, analysis_cache_root, public_analysis_root, + resolve_voxel_size_um_zyx_with_source, ) from clearex.io.provenance import register_latest_output_reference @@ -679,37 +680,11 @@ def _extract_voxel_size_um_zyx( ----- Missing metadata falls back to isotropic ``(1.0, 1.0, 1.0)`` microns. """ - root_attrs = dict(root.attrs) - source_attrs: dict[str, Any] = {} - try: - source_attrs = dict(root[source_component].attrs) - except Exception: - source_attrs = {} - - for attrs in (source_attrs, root_attrs): - voxel = attrs.get("voxel_size_um_zyx") - if not isinstance(voxel, (tuple, list)) or len(voxel) < 3: - continue - z_um = float(voxel[0]) - y_um = float(voxel[1]) - x_um = float(voxel[2]) - if z_um > 0 and y_um > 0 and x_um > 0: - return z_um, y_um, x_um - - for attrs in (source_attrs, root_attrs): - navigate = attrs.get("navigate_experiment") - if not isinstance(navigate, dict): - continue - xy_value = navigate.get("xy_pixel_size_um") - z_value = navigate.get("z_step_um") - if xy_value is None or z_value is None: - continue - xy_um = float(xy_value) - z_um = float(z_value) - if xy_um > 0 and z_um > 0: - return z_um, xy_um, xy_um - - return 1.0, 1.0, 1.0 + voxel_size_um_zyx, _ = resolve_voxel_size_um_zyx_with_source( + root, + source_component=source_component, + ) + return voxel_size_um_zyx def _rotation_matrix_xyz(*, deg_x: float, deg_y: float, deg_z: float) -> np.ndarray: @@ -1207,9 +1182,11 @@ def _emit(percent: int, message: str) -> None: f"Input component '{source_component}' is incompatible." ) - voxel_size_um_zyx = _extract_voxel_size_um_zyx( - root=root, - source_component=source_component, + voxel_size_um_zyx, voxel_size_resolution_source = ( + resolve_voxel_size_um_zyx_with_source( + root, + source_component=source_component, + ) ) if bool(normalized.get("auto_estimate_shear_yz", False)): _emit(3, "Estimating shear_yz_deg from x-extreme source slabs") @@ -1223,7 +1200,7 @@ def _emit(percent: int, message: str) -> None: normalized["shear_yz"] = float(np.tan(np.deg2rad(estimated_shear_yz_deg))) _emit( 4, - "Auto-estimated shear_yz_deg=" f"{float(estimated_shear_yz_deg):.3f}", + f"Auto-estimated shear_yz_deg={float(estimated_shear_yz_deg):.3f}", ) else: _emit(4, "Auto-estimation failed; using configured shear parameters") @@ -1282,6 +1259,7 @@ def _emit(percent: int, message: str) -> None: { "axes": ["t", "p", "c", "z", "y", "x"], "voxel_size_um_zyx": [float(v) for v in voxel_size_um_zyx], + "voxel_size_resolution_source": str(voxel_size_resolution_source), "source_component": source_component, "output_origin_xyz_um": [float(v) for v in geometry.output_origin_xyz], "affine_matrix_xyz": geometry.matrix_xyz.tolist(), @@ -1303,6 +1281,7 @@ def _emit(percent: int, message: str) -> None: "output_chunks_tpczyx": [int(v) for v in output_chunks_tpczyx], "output_origin_xyz_um": [float(v) for v in geometry.output_origin_xyz], "voxel_size_um_zyx": [float(v) for v in voxel_size_um_zyx], + "voxel_size_resolution_source": str(voxel_size_resolution_source), } ) root_w.require_group(auxiliary_root).attrs.update(dict(latest_group.attrs)) @@ -1433,6 +1412,7 @@ def _emit(percent: int, message: str) -> None: "output_shape_tpczyx": [int(v) for v in output_shape_tpczyx], "output_chunks_tpczyx": [int(v) for v in output_chunks_tpczyx], "voxel_size_um_zyx": [float(v) for v in voxel_size_um_zyx], + "voxel_size_resolution_source": str(voxel_size_resolution_source), "output_origin_xyz_um": [float(v) for v in geometry.output_origin_xyz], "applied_shear": { "xy": float(normalized["shear_xy"]), diff --git a/src/clearex/visualization/pipeline.py b/src/clearex/visualization/pipeline.py index 71266f2..935377e 100644 --- a/src/clearex/visualization/pipeline.py +++ b/src/clearex/visualization/pipeline.py @@ -51,7 +51,11 @@ # Local Imports from clearex.io.experiment import load_navigate_experiment -from clearex.io.ome_store import analysis_auxiliary_root, load_store_metadata +from clearex.io.ome_store import ( + analysis_auxiliary_root, + load_store_metadata, + resolve_voxel_size_um_zyx_with_source, +) from clearex.io.provenance import register_latest_output_reference from clearex.workflow import ( SpatialCalibrationConfig, @@ -488,6 +492,8 @@ def _extract_scale_tczyx_from_attrs( ---------- root_attrs : mapping[str, Any] Root Zarr attributes. + store_metadata : mapping[str, Any], optional + ClearEx namespaced metadata attrs. source_attrs : mapping[str, Any] Source-array attributes. @@ -630,6 +636,8 @@ def _parse_binning_xy(value: Any) -> tuple[float, float]: def _load_source_experiment_raw( root_attrs: Mapping[str, Any], + *, + store_metadata: Optional[Mapping[str, Any]] = None, ) -> Optional[Dict[str, Any]]: """Load source Navigate experiment raw metadata when available. @@ -648,7 +656,11 @@ def _load_source_experiment_raw( None Failures are handled internally and return ``None``. """ - source_experiment = root_attrs.get("source_experiment") + source_experiment = ( + store_metadata.get("source_experiment") + if isinstance(store_metadata, Mapping) + else root_attrs.get("source_experiment") + ) if not isinstance(source_experiment, str): return None text = source_experiment.strip() @@ -868,15 +880,32 @@ def _build_napari_layer_payload( source_component = str(primary_layer.component) source_components = tuple(str(item) for item in primary_layer.source_components) root_attrs = dict(root.attrs) + store_metadata = load_store_metadata(root) source_array = root[str(source_component)] source_attrs = dict(source_array.attrs) - source_experiment_raw = _load_source_experiment_raw(root_attrs) + source_experiment_raw = _load_source_experiment_raw( + root_attrs, + store_metadata=store_metadata, + ) + resolved_voxel_size_um_zyx, voxel_size_resolution_source = ( + resolve_voxel_size_um_zyx_with_source( + root, + source_component=source_component, + ) + ) scale_tczyx = ( _extract_scale_tczyx_from_attrs( root_attrs=root_attrs, source_attrs=source_attrs, ) or _extract_scale_tczyx_from_navigate_raw(source_experiment_raw) + or ( + 1.0, + 1.0, + float(resolved_voxel_size_um_zyx[0]), + float(resolved_voxel_size_um_zyx[1]), + float(resolved_voxel_size_um_zyx[2]), + ) or (1.0, 1.0, 1.0, 1.0, 1.0) ) multiscale_levels = _collect_multiscale_level_metadata( @@ -890,6 +919,8 @@ def _build_napari_layer_payload( "store_path": str(Path(zarr_path).expanduser().resolve()), "axis_labels_tczyx": list(_AXIS_LABELS_TCZYX), "scale_tczyx": [float(value) for value in scale_tczyx], + "voxel_size_um_zyx": [float(value) for value in resolved_voxel_size_um_zyx], + "voxel_size_resolution_source": str(voxel_size_resolution_source), "source_component": str(source_component), "source_components": [str(item) for item in source_components], "volume_layers": volume_layers_payload, @@ -3511,11 +3542,16 @@ def _default_channel_opacity(channel_count: int) -> float: scale = tuple(float(value) for value in scale_tczyx) scale_root: Optional[zarr.hierarchy.Group] = None scale_root_attrs: Dict[str, Any] = {} - scale_source_experiment_raw: Dict[str, Any] = {} + scale_store_metadata: Dict[str, Any] = {} + scale_source_experiment_raw: Optional[Dict[str, Any]] = None try: scale_root = zarr.open_group(str(zarr_path), mode="r") scale_root_attrs = dict(scale_root.attrs) - scale_source_experiment_raw = _load_source_experiment_raw(scale_root_attrs) + scale_store_metadata = load_store_metadata(scale_root) + scale_source_experiment_raw = _load_source_experiment_raw( + scale_root_attrs, + store_metadata=scale_store_metadata, + ) except Exception: scale_root = None layer_scale_cache: dict[str, tuple[float, float, float, float, float]] = {} @@ -3547,6 +3583,34 @@ def _resolve_layer_scale_tczyx( root_attrs={}, source_attrs=source_attrs, ) + resolver_base_scale_hint: Optional[tuple[float, float, float, float, float]] = ( + None + ) + if resolved_scale is None: + resolved_voxel_size_um_zyx, voxel_resolution_source = ( + resolve_voxel_size_um_zyx_with_source( + scale_root, + source_component=key, + ) + ) + if str(voxel_resolution_source) != "default": + resolver_scale = ( + float(scale[0]), + float(scale[1]), + float(resolved_voxel_size_um_zyx[0]), + float(resolved_voxel_size_um_zyx[1]), + float(resolved_voxel_size_um_zyx[2]), + ) + resolution_source = str(voxel_resolution_source).strip().lower() + component_specific = resolution_source.startswith( + "component:" + ) or resolution_source.startswith("component_navigate:") + if key == "data" or component_specific: + resolved_scale = resolver_scale + else: + # Root/store-level voxel metadata describes base spacing. + # Non-base components still need shape-ratio upscaling. + resolver_base_scale_hint = resolver_scale if resolved_scale is None and key == "data": resolved_scale = _extract_scale_tczyx_from_attrs( root_attrs=scale_root_attrs, @@ -3558,7 +3622,11 @@ def _resolve_layer_scale_tczyx( ) if resolved_scale is None: - base_scale = scale + base_scale = ( + resolver_base_scale_hint + if resolver_base_scale_hint is not None + else scale + ) base_shape: Optional[tuple[int, int, int, int, int, int]] = None try: base_array = scale_root["data"] @@ -3568,6 +3636,7 @@ def _resolve_layer_scale_tczyx( root_attrs=scale_root_attrs, source_attrs=dict(base_array.attrs), ) + or resolver_base_scale_hint or _extract_scale_tczyx_from_navigate_raw( scale_source_experiment_raw ) diff --git a/tests/flatfield/test_pipeline.py b/tests/flatfield/test_pipeline.py index 1c7ed42..1313936 100644 --- a/tests/flatfield/test_pipeline.py +++ b/tests/flatfield/test_pipeline.py @@ -15,6 +15,37 @@ import clearex.flatfield.pipeline as flatfield_pipeline +def test_copy_source_array_attrs_preserves_voxel_size(tmp_path: Path) -> None: + """Flatfield output attrs should carry upstream physical voxel size.""" + store_path = tmp_path / "flatfield_attr_copy_scale.zarr" + root = zarr.open_group(str(store_path), mode="w") + source = root.create_dataset( + name="clearex/runtime_cache/source/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype=np.uint16, + overwrite=True, + ) + source.attrs.update({"voxel_size_um_zyx": [5.0, 1.25, 1.25]}) + target = root.create_dataset( + name="clearex/runtime_cache/results/flatfield/latest/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype=np.float32, + overwrite=True, + ) + + flatfield_pipeline._copy_source_array_attrs( + root=root, + source_component="clearex/runtime_cache/source/data", + target_array=target, + output_chunks=(1, 1, 1, 2, 2, 2), + ) + + attrs = dict(target.attrs) + assert attrs["voxel_size_um_zyx"] == [5.0, 1.25, 1.25] + + def test_run_flatfield_analysis_writes_latest_results( tmp_path: Path, monkeypatch ) -> None: diff --git a/tests/io/test_ome_store_scale.py b/tests/io/test_ome_store_scale.py new file mode 100644 index 0000000..078e551 --- /dev/null +++ b/tests/io/test_ome_store_scale.py @@ -0,0 +1,140 @@ +# Copyright (c) 2021-2026 The University of Texas Southwestern Medical Center. +# All rights reserved. + +from __future__ import annotations + +from pathlib import Path + +import numpy as np +import zarr + +from clearex.io.ome_store import ( + load_store_metadata, + publish_analysis_collection_from_cache, + resolve_voxel_size_um_zyx_with_source, + update_store_metadata, +) + + +def test_resolve_voxel_size_uses_source_component_chain(tmp_path: Path) -> None: + """Resolver should traverse ``source_component`` ancestry before metadata.""" + store_path = tmp_path / "scale_chain.zarr" + root = zarr.open_group(str(store_path), mode="w") + source = root.create_array( + "clearex/runtime_cache/source/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype=np.uint16, + overwrite=True, + ) + source.attrs["voxel_size_um_zyx"] = [5.0, 1.5, 1.25] + flatfield = root.create_array( + "clearex/runtime_cache/results/flatfield/latest/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype=np.float32, + overwrite=True, + ) + flatfield.attrs["source_component"] = "clearex/runtime_cache/source/data" + shear = root.create_array( + "clearex/runtime_cache/results/shear_transform/latest/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype=np.float32, + overwrite=True, + ) + shear.attrs["source_component"] = ( + "clearex/runtime_cache/results/flatfield/latest/data" + ) + + update_store_metadata(root, voxel_size_um_zyx=[9.0, 9.0, 9.0]) + + voxel_size_um_zyx, source = resolve_voxel_size_um_zyx_with_source( + root, + source_component="clearex/runtime_cache/results/shear_transform/latest/data", + ) + + assert voxel_size_um_zyx == (5.0, 1.5, 1.25) + assert source == "component:clearex/runtime_cache/source/data" + + +def test_resolve_voxel_size_uses_store_metadata_navigate_fallback( + tmp_path: Path, +) -> None: + """Resolver should use namespaced Navigate metadata before default fallback.""" + store_path = tmp_path / "scale_metadata_fallback.zarr" + root = zarr.open_group(str(store_path), mode="w") + update_store_metadata( + root, + navigate_experiment={ + "xy_pixel_size_um": 0.8, + "z_step_um": 3.5, + }, + ) + + voxel_size_um_zyx, source = resolve_voxel_size_um_zyx_with_source( + root, + source_component="missing/component", + ) + + assert voxel_size_um_zyx == (3.5, 0.8, 0.8) + assert source == "store_metadata_navigate" + + +def test_load_store_metadata_read_only_missing_group_returns_schema_default( + tmp_path: Path, +) -> None: + """Metadata loading must remain read-only safe when group is absent.""" + store_path = tmp_path / "metadata_read_only_default.zarr" + root = zarr.open_group(str(store_path), mode="w") + root.create_array( + "data", + shape=(1, 1, 1, 1, 1, 1), + chunks=(1, 1, 1, 1, 1, 1), + dtype=np.uint16, + overwrite=True, + ) + + read_only_root = zarr.open_group(str(store_path), mode="r") + payload = load_store_metadata(read_only_root) + assert payload == {"schema": "clearex.ome_store.v1"} + assert "clearex" not in read_only_root + + +def test_publish_analysis_collection_uses_resolved_voxel_size(tmp_path: Path) -> None: + """Public OME scale should be derived from resolved runtime-cache voxel size.""" + store_path = tmp_path / "publish_scale.zarr" + root = zarr.open_group(str(store_path), mode="w") + source = root.create_array( + "clearex/runtime_cache/source/data", + shape=(1, 1, 1, 2, 3, 4), + chunks=(1, 1, 1, 2, 3, 4), + dtype=np.uint16, + overwrite=True, + ) + source.attrs["voxel_size_um_zyx"] = [4.0, 1.2, 1.2] + shear = root.create_array( + "clearex/runtime_cache/results/shear_transform/latest/data", + data=np.arange(1 * 1 * 1 * 2 * 3 * 4, dtype=np.float32).reshape( + (1, 1, 1, 2, 3, 4) + ), + chunks=(1, 1, 1, 2, 3, 4), + overwrite=True, + ) + shear.attrs["source_component"] = "clearex/runtime_cache/source/data" + + publish_analysis_collection_from_cache( + store_path, + analysis_name="shear_transform", + ) + + image_group = zarr.open_group(str(store_path), mode="r")[ + "results/shear_transform/latest/A/1/0" + ] + ome = image_group.attrs["ome"] + transforms = ome["multiscales"][0]["datasets"][0]["coordinateTransformations"] + assert transforms[0]["type"] == "scale" + assert transforms[0]["scale"] == [1.0, 1.0, 4.0, 1.2, 1.2] + assert image_group.attrs["voxel_size_resolution_source"] == ( + "component:clearex/runtime_cache/source/data" + ) diff --git a/tests/registration/test_pipeline.py b/tests/registration/test_pipeline.py index a81f66b..73dbb11 100644 --- a/tests/registration/test_pipeline.py +++ b/tests/registration/test_pipeline.py @@ -115,6 +115,45 @@ def _create_registration_store( return store_path +def test_extract_voxel_size_uses_source_component_chain(tmp_path: Path) -> None: + """Registration voxel-size lookup should follow ``source_component`` ancestry.""" + store_path = tmp_path / "registration_voxel_chain.zarr" + root = zarr.open_group(str(store_path), mode="w") + source = root.create_dataset( + name="clearex/runtime_cache/source/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="uint16", + overwrite=True, + ) + source.attrs["voxel_size_um_zyx"] = [6.0, 1.1, 1.1] + flatfield = root.create_dataset( + name="clearex/runtime_cache/results/flatfield/latest/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="float32", + overwrite=True, + ) + flatfield.attrs["source_component"] = "clearex/runtime_cache/source/data" + shear = root.create_dataset( + name="clearex/runtime_cache/results/shear_transform/latest/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="float32", + overwrite=True, + ) + shear.attrs["source_component"] = ( + "clearex/runtime_cache/results/flatfield/latest/data" + ) + + voxel = registration_pipeline._extract_voxel_size_um_zyx( + root, + "clearex/runtime_cache/results/shear_transform/latest/data", + ) + + assert voxel == (6.0, 1.1, 1.1) + + def test_build_edge_specs_only_keeps_overlapping_neighbors() -> None: nominal = { 0: _translation_matrix(0.0), @@ -621,10 +660,14 @@ def test_profiles_reconstruct_full_volume(self): shape = (8, 12, 10) overlap = (4, 6, 5) full_vol = registration_pipeline._blend_weight_volume( - shape, blend_mode="feather", overlap_zyx=overlap, + shape, + blend_mode="feather", + overlap_zyx=overlap, ) pz, py, px = registration_pipeline._blend_weight_profiles( - shape, blend_mode="feather", overlap_zyx=overlap, + shape, + blend_mode="feather", + overlap_zyx=overlap, ) reconstructed = ( pz[:, np.newaxis, np.newaxis] @@ -637,21 +680,30 @@ def test_subvolume_matches_full_slice(self): shape = (8, 12, 10) overlap = (4, 6, 5) full_vol = registration_pipeline._blend_weight_volume( - shape, blend_mode="feather", overlap_zyx=overlap, + shape, + blend_mode="feather", + overlap_zyx=overlap, ) pz, py, px = registration_pipeline._blend_weight_profiles( - shape, blend_mode="feather", overlap_zyx=overlap, + shape, + blend_mode="feather", + overlap_zyx=overlap, ) slices = (slice(2, 6), slice(3, 9), slice(1, 7)) sub = registration_pipeline._blend_weight_subvolume_from_profiles( - pz, py, px, slices, + pz, + py, + px, + slices, ) np.testing.assert_allclose(sub, full_vol[slices], atol=1e-7) def test_average_mode_profiles(self): shape = (4, 6, 8) pz, py, px = registration_pipeline._blend_weight_profiles( - shape, blend_mode="average", overlap_zyx=(2, 3, 4), + shape, + blend_mode="average", + overlap_zyx=(2, 3, 4), ) np.testing.assert_array_equal(pz, np.ones(4, dtype=np.float32)) np.testing.assert_array_equal(py, np.ones(6, dtype=np.float32)) diff --git a/tests/shear/test_pipeline.py b/tests/shear/test_pipeline.py index 11884f8..3669e5a 100644 --- a/tests/shear/test_pipeline.py +++ b/tests/shear/test_pipeline.py @@ -94,11 +94,10 @@ def test_run_shear_transform_auto_estimate_updates_applied_shear( slab_thickness_z=5, z_offset=6, ) - root.create_dataset( + root.create_array( name="data", data=source, chunks=(1, 1, 1, 16, 24, 14), - dtype="float32", overwrite=True, ) root["data"].attrs["voxel_size_um_zyx"] = [1.0, 1.0, 1.0] @@ -123,15 +122,58 @@ def test_run_shear_transform_auto_estimate_updates_applied_shear( assert np.isclose(observed_angle_deg, expected_angle_deg, atol=2.0) +def test_run_shear_transform_inherits_voxel_size_from_source_chain( + tmp_path: Path, +) -> None: + """Shear should resolve voxel size through ``source_component`` ancestry.""" + store_path = tmp_path / "shear_voxel_chain.zarr" + root = zarr.open_group(str(store_path), mode="w") + source = root.create_array( + name="clearex/runtime_cache/source/data", + data=np.arange(1 * 1 * 1 * 4 * 4 * 4, dtype=np.uint16).reshape( + (1, 1, 1, 4, 4, 4) + ), + chunks=(1, 1, 1, 2, 2, 2), + overwrite=True, + ) + source.attrs["voxel_size_um_zyx"] = [5.0, 1.25, 1.25] + flatfield = root.create_array( + name="clearex/runtime_cache/results/flatfield/latest/data", + data=np.asarray(source, dtype=np.float32), + chunks=(1, 1, 1, 2, 2, 2), + overwrite=True, + ) + flatfield.attrs["source_component"] = "clearex/runtime_cache/source/data" + + summary = run_shear_transform_analysis( + zarr_path=store_path, + parameters={ + "input_source": "clearex/runtime_cache/results/flatfield/latest/data", + "interpolation": "nearestneighbor", + "output_dtype": "float32", + "roi_padding_zyx": [1, 1, 1], + }, + client=None, + ) + + assert summary.voxel_size_um_zyx == (5.0, 1.25, 1.25) + attrs = dict( + zarr.open_group(str(store_path), mode="r")[summary.data_component].attrs + ) + assert attrs["voxel_size_um_zyx"] == [5.0, 1.25, 1.25] + assert attrs["voxel_size_resolution_source"] == ( + "component:clearex/runtime_cache/source/data" + ) + + def test_run_shear_transform_identity_preserves_data(tmp_path: Path) -> None: store_path = tmp_path / "shear_identity.zarr" root = zarr.open_group(str(store_path), mode="w") data = np.arange(1 * 1 * 1 * 4 * 4 * 4, dtype=np.uint16).reshape((1, 1, 1, 4, 4, 4)) - root.create_dataset( + root.create_array( name="data", data=data, chunks=(1, 1, 1, 2, 2, 2), - dtype="uint16", overwrite=True, ) root["data"].attrs["voxel_size_um_zyx"] = [1.0, 1.0, 1.0] @@ -155,11 +197,9 @@ def test_run_shear_transform_identity_preserves_data(tmp_path: Path) -> None: ) output = np.asarray( - zarr.open_group(str(store_path), mode="r")[ - "results/shear_transform/latest/data" - ] + zarr.open_group(str(store_path), mode="r")[summary.data_component] ) - assert summary.data_component == "results/shear_transform/latest/data" + assert summary.component == "results/shear_transform/latest" assert output.shape == data.shape np.testing.assert_array_equal(output, data) @@ -171,11 +211,10 @@ def test_run_shear_transform_emits_larger_bounds_for_nonzero_shear( root = zarr.open_group(str(store_path), mode="w") data = np.zeros((1, 1, 1, 6, 6, 6), dtype=np.float32) data[0, 0, 0, 2:4, 2:4, 2:4] = 1.0 - root.create_dataset( + root.create_array( name="data", data=data, chunks=(1, 1, 1, 3, 3, 3), - dtype="float32", overwrite=True, ) root["data"].attrs["voxel_size_um_zyx"] = [2.0, 1.0, 1.0] @@ -194,9 +233,7 @@ def test_run_shear_transform_emits_larger_bounds_for_nonzero_shear( ) output = np.asarray( - zarr.open_group(str(store_path), mode="r")[ - "results/shear_transform/latest/data" - ] + zarr.open_group(str(store_path), mode="r")[summary.data_component] ) assert output.shape == summary.output_shape_tpczyx assert np.max(output) > 0.0 @@ -208,16 +245,15 @@ def test_run_shear_transform_linear_normalizes_edge_support(tmp_path: Path) -> N store_path = tmp_path / "shear_linear_support_normalization.zarr" root = zarr.open_group(str(store_path), mode="w") data = np.full((1, 1, 1, 16, 16, 16), 100.0, dtype=np.float32) - root.create_dataset( + root.create_array( name="data", data=data, chunks=(1, 1, 1, 8, 8, 8), - dtype="float32", overwrite=True, ) root["data"].attrs["voxel_size_um_zyx"] = [1.0, 1.0, 1.0] - run_shear_transform_analysis( + summary = run_shear_transform_analysis( zarr_path=store_path, parameters={ "input_source": "data", @@ -231,9 +267,7 @@ def test_run_shear_transform_linear_normalizes_edge_support(tmp_path: Path) -> N ) output = np.asarray( - zarr.open_group(str(store_path), mode="r")[ - "results/shear_transform/latest/data" - ] + zarr.open_group(str(store_path), mode="r")[summary.data_component] ) positive = output[output > 0.0] assert positive.size > 0 @@ -249,11 +283,10 @@ def test_run_shear_transform_identity_with_distributed_client( store_path = tmp_path / "shear_identity_distributed.zarr" root = zarr.open_group(str(store_path), mode="w") data = np.arange(1 * 1 * 1 * 4 * 4 * 4, dtype=np.uint16).reshape((1, 1, 1, 4, 4, 4)) - root.create_dataset( + root.create_array( name="data", data=data, chunks=(1, 1, 1, 2, 2, 2), - dtype="uint16", overwrite=True, ) root["data"].attrs["voxel_size_um_zyx"] = [1.0, 1.0, 1.0] @@ -284,9 +317,7 @@ def test_run_shear_transform_identity_with_distributed_client( ) output = np.asarray( - zarr.open_group(str(store_path), mode="r")[ - "results/shear_transform/latest/data" - ] + zarr.open_group(str(store_path), mode="r")[summary.data_component] ) assert output.shape == data.shape assert summary.output_shape_tpczyx == data.shape diff --git a/tests/visualization/test_pipeline.py b/tests/visualization/test_pipeline.py index c4ab342..ba3a6fe 100644 --- a/tests/visualization/test_pipeline.py +++ b/tests/visualization/test_pipeline.py @@ -552,9 +552,9 @@ def test_run_display_pyramid_analysis_rebuilds_legacy_component_layout( ) output_root = zarr.open_group(str(store_path), mode="r") - assert ( - "results/shear_transform/latest/data_pyramid/level_1" in output_root - ), "Expected source-adjacent level_1 pyramid after migration." + assert "results/shear_transform/latest/data_pyramid/level_1" in output_root, ( + "Expected source-adjacent level_1 pyramid after migration." + ) source_attrs = dict(output_root["results/shear_transform/latest/data"].attrs) assert source_attrs["display_pyramid_levels"][1].startswith( "results/shear_transform/latest/data_pyramid/level_" @@ -724,6 +724,96 @@ def _fake_launch_napari_viewer( assert captured["scale_tczyx"] == (1.0, 1.0, 2.5, 3.5, 4.5) +def test_run_visualization_analysis_resolves_scale_from_source_chain( + tmp_path: Path, monkeypatch +) -> None: + """Visualization should recover scale from source-component ancestry.""" + store_path = tmp_path / "analysis_store.zarr" + root = zarr.open_group(str(store_path), mode="w") + source = root.create_dataset( + name="clearex/runtime_cache/source/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="uint16", + overwrite=True, + ) + source.attrs["voxel_size_um_zyx"] = [4.0, 1.5, 1.5] + flatfield = root.create_dataset( + name="clearex/runtime_cache/results/flatfield/latest/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="float32", + overwrite=True, + ) + flatfield.attrs["source_component"] = "clearex/runtime_cache/source/data" + shear = root.create_dataset( + name="clearex/runtime_cache/results/shear_transform/latest/data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="float32", + overwrite=True, + ) + shear.attrs["source_component"] = ( + "clearex/runtime_cache/results/flatfield/latest/data" + ) + + captured: dict[str, object] = {} + + def _fake_launch_napari_viewer( + *, + zarr_path, + volume_layers, + selected_positions, + points_by_position, + point_properties_by_position, + position_affines_tczyx, + axis_labels, + scale_tczyx, + image_metadata, + points_metadata, + require_gpu_rendering, + capture_keyframes, + keyframe_manifest_path, + keyframe_layer_overrides, + ) -> None: + del zarr_path + del volume_layers + del selected_positions + del points_by_position + del point_properties_by_position + del position_affines_tczyx + del axis_labels + del points_metadata + del require_gpu_rendering + del capture_keyframes + del keyframe_manifest_path + del keyframe_layer_overrides + captured["scale_tczyx"] = tuple(float(value) for value in scale_tczyx) + captured["image_metadata"] = dict(image_metadata) + + monkeypatch.setattr( + visualization_pipeline, + "_launch_napari_viewer", + _fake_launch_napari_viewer, + ) + + run_visualization_analysis( + zarr_path=store_path, + parameters={ + "input_source": "clearex/runtime_cache/results/shear_transform/latest/data", + "launch_mode": "in_process", + "overlay_particle_detections": False, + }, + ) + + assert captured["scale_tczyx"] == (1.0, 1.0, 4.0, 1.5, 1.5) + image_metadata = dict(captured["image_metadata"]) + assert image_metadata["voxel_size_um_zyx"] == [4.0, 1.5, 1.5] + assert image_metadata["voxel_size_resolution_source"] == ( + "component:clearex/runtime_cache/source/data" + ) + + def test_run_visualization_analysis_show_all_positions_uses_stage_affines( tmp_path: Path, monkeypatch ) -> None: From f051991bbda5dea5da05cd40c24509a6e0bb8e6a Mon Sep 17 00:00:00 2001 From: Kevin Dean Date: Mon, 23 Mar 2026 21:21:32 -0500 Subject: [PATCH 10/10] Visualization: make auto launch mode non-blocking under Qt Problem\n- Visualization progress could appear stuck at ~65% after napari opened when\n launch_mode=auto resolved to in_process on the main thread.\n- In GUI/Qt workflows this blocked run_visualization_analysis until the viewer\n closed, so workflow progress and metadata finalization did not advance while\n napari remained open.\n\nSolution\n- Updated launch-mode resolution in src/clearex/visualization/pipeline.py:\n - launch_mode=auto now prefers subprocess when a Qt application instance is\n active (PyQt6 QApplication.instance() is not None).\n - Existing behavior is preserved for non-Qt contexts:\n - main thread -> in_process\n - non-main thread -> subprocess\n- Kept explicit modes unchanged:\n - launch_mode=in_process remains blocking by design.\n - launch_mode=subprocess remains non-blocking by design.\n\nWhy this is safe\n- Subprocess launch path already supports keyframe manifest handoff by passing\n keyframe_manifest_path to the subprocess runner and forcing in_process only\n within the spawned viewer process.\n- This change affects only auto-resolution policy and does not alter payload\n construction, layer resolution, or metadata schema.\n\nTests\n- Added launch-mode policy tests in tests/visualization/test_pipeline.py:\n - test_resolve_effective_launch_mode_auto_prefers_subprocess_with_qt_app\n - test_resolve_effective_launch_mode_auto_prefers_in_process_without_qt_app\n - test_run_visualization_analysis_auto_uses_subprocess_with_active_qt_app\n\nDocs\n- Updated src/clearex/visualization/README.md GUI/workflow notes to document\n the new auto policy under Qt and retained non-Qt behavior.\n\nValidation executed\n- uv run ruff format src/clearex/visualization/pipeline.py tests/visualization/test_pipeline.py\n- uv run ruff check src/clearex/visualization/pipeline.py tests/visualization/test_pipeline.py\n- uv run --with pytest --with requests python -m pytest -q \\n tests/visualization/test_pipeline.py::test_resolve_effective_launch_mode_auto_prefers_subprocess_with_qt_app \\n tests/visualization/test_pipeline.py::test_resolve_effective_launch_mode_auto_prefers_in_process_without_qt_app \\n tests/visualization/test_pipeline.py::test_run_visualization_analysis_auto_uses_subprocess_with_active_qt_app\n- Result: 3 passed --- src/clearex/visualization/README.md | 5 ++ src/clearex/visualization/pipeline.py | 26 +++++++++ tests/visualization/test_pipeline.py | 84 +++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) diff --git a/src/clearex/visualization/README.md b/src/clearex/visualization/README.md index 9fec742..e70a140 100644 --- a/src/clearex/visualization/README.md +++ b/src/clearex/visualization/README.md @@ -217,6 +217,11 @@ napari opened in 2D or 3D. display pyramids, not auto-building them. - The visualization GUI should describe 3D as a request that may fall back to 2D for oversized image layers. +- `launch_mode=auto` should resolve to `subprocess` whenever a Qt application + instance is active, so GUI workflows can continue and complete while napari + remains open. +- `launch_mode=auto` in non-Qt contexts retains thread-based behavior: + main-thread runs stay `in_process`; non-main-thread runs use `subprocess`. ## Agent Expectations diff --git a/src/clearex/visualization/pipeline.py b/src/clearex/visualization/pipeline.py index 935377e..6b80667 100644 --- a/src/clearex/visualization/pipeline.py +++ b/src/clearex/visualization/pipeline.py @@ -1085,8 +1085,34 @@ def _resolve_effective_launch_mode(requested_mode: str) -> str: str Effective launch mode (``in_process`` or ``subprocess``). """ + + def _qt_application_is_active() -> bool: + """Return whether a Qt application instance is currently active.""" + qt_widgets_module = sys.modules.get("PyQt6.QtWidgets") + application_cls = ( + getattr(qt_widgets_module, "QApplication", None) + if qt_widgets_module is not None + else None + ) + if application_cls is None: + try: + from PyQt6.QtWidgets import QApplication as _QApplication + except Exception: + return False + application_cls = _QApplication + instance_getter = getattr(application_cls, "instance", None) + if not callable(instance_getter): + return False + try: + return instance_getter() is not None + except Exception: + return False + mode = str(requested_mode).strip().lower() or "auto" if mode == "auto": + if _qt_application_is_active(): + # Keep GUI workflows non-blocking while napari remains open. + return "subprocess" if threading.current_thread() is threading.main_thread(): return "in_process" return "subprocess" diff --git a/tests/visualization/test_pipeline.py b/tests/visualization/test_pipeline.py index ba3a6fe..531f7ba 100644 --- a/tests/visualization/test_pipeline.py +++ b/tests/visualization/test_pipeline.py @@ -290,6 +290,90 @@ def _fake_launch_napari_subprocess( assert latest_attrs["keyframe_layer_overrides"] == [] +def test_resolve_effective_launch_mode_auto_prefers_subprocess_with_qt_app( + monkeypatch, +) -> None: + class _FakeQApplication: + @staticmethod + def instance() -> object: + return object() + + class _FakeQtWidgets: + QApplication = _FakeQApplication + + monkeypatch.setitem(sys.modules, "PyQt6.QtWidgets", _FakeQtWidgets) + assert visualization_pipeline._resolve_effective_launch_mode("auto") == "subprocess" + + +def test_resolve_effective_launch_mode_auto_prefers_in_process_without_qt_app( + monkeypatch, +) -> None: + class _FakeQApplication: + @staticmethod + def instance() -> None: + return None + + class _FakeQtWidgets: + QApplication = _FakeQApplication + + monkeypatch.setitem(sys.modules, "PyQt6.QtWidgets", _FakeQtWidgets) + assert visualization_pipeline._resolve_effective_launch_mode("auto") == "in_process" + + +def test_run_visualization_analysis_auto_uses_subprocess_with_active_qt_app( + tmp_path: Path, monkeypatch +) -> None: + store_path = tmp_path / "analysis_store.zarr" + root = zarr.open_group(str(store_path), mode="w") + root.create_array( + name="data", + shape=(1, 1, 1, 2, 2, 2), + chunks=(1, 1, 1, 2, 2, 2), + dtype="uint16", + overwrite=True, + ) + + class _FakeQApplication: + @staticmethod + def instance() -> object: + return object() + + class _FakeQtWidgets: + QApplication = _FakeQApplication + + monkeypatch.setitem(sys.modules, "PyQt6.QtWidgets", _FakeQtWidgets) + + captured: dict[str, object] = {} + + def _fake_launch_napari_subprocess( + *, + zarr_path, + normalized_parameters, + ) -> int: + captured["zarr_path"] = str(zarr_path) + captured["parameters"] = dict(normalized_parameters) + return 98765 + + monkeypatch.setattr( + visualization_pipeline, + "_launch_napari_subprocess", + _fake_launch_napari_subprocess, + ) + + summary = run_visualization_analysis( + zarr_path=store_path, + parameters={ + "launch_mode": "auto", + "overlay_particle_detections": False, + }, + ) + + assert summary.launch_mode == "subprocess" + assert summary.viewer_pid == 98765 + assert captured["zarr_path"] == str(store_path) + assert dict(captured["parameters"])["launch_mode"] == "in_process" + + def test_run_visualization_analysis_rejects_invalid_position(tmp_path: Path) -> None: store_path = tmp_path / "analysis_store.zarr" root = zarr.open_group(str(store_path), mode="w")