diff --git a/.gitignore b/.gitignore index 51e2524..a08e1d8 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,8 @@ benchmarks # pixi environments .pixi *.egg-info + +config/samples_AS11F3_only.tsv +config/samples_ki3_batch2.tsv +lecb2.log +run_lecb2.sh diff --git a/workflow/scripts/imaris_to_metadata.py b/workflow/scripts/imaris_to_metadata.py index 66b854a..10d15e1 100644 --- a/workflow/scripts/imaris_to_metadata.py +++ b/workflow/scripts/imaris_to_metadata.py @@ -52,6 +52,25 @@ def clean_keys(obj): return obj +def h5_attr_to_str(attr): + """Convert an HDF5 byte-array attribute to a Python string. + + Imaris stores attributes as arrays of single-byte values, e.g. + [b'4', b'.', b'0'] → '4.0' + """ + if attr is None: + return None + return "".join(b.decode("utf-8") for b in attr) + + +def _try_float(value, default=None): + """Try to convert value to float, return default if not possible.""" + try: + return float(value) + except (ValueError, TypeError): + return default + + def build_bids_metadata(custom_attrs): """Convert custom attribute dict into BIDS microscopy-compliant metadata.""" # --- PixelSize and Units --- @@ -73,8 +92,9 @@ def build_bids_metadata(custom_attrs): "PixelSize": pixel_size, "PixelSizeUnits": unit_label, "Immersion": custom_attrs.get("ObjectiveMedium", {}).get("@ObjectiveMedium"), - "NumericalAperture": float( - custom_attrs.get("ObjectiveNA", {}).get("@ObjectiveNA", 0.0) + "NumericalAperture": _try_float( + custom_attrs.get("ObjectiveNA", {}).get("@ObjectiveNA", None), + default=0.1, ), "Magnification": magnification, "OtherAcquisitionParameters": custom_attrs.get("MeasurementMode", {}).get( @@ -109,6 +129,94 @@ def build_bids_metadata(custom_attrs): return bids_json +def build_bids_metadata_from_native_imaris(hdf5_file): + """Build BIDS metadata directly from native Imaris HDF5 attributes. + + Fallback for .ims files that lack embedded OME XML tags. + Computes voxel sizes from image extents: pixel_size = (ExtMax - ExtMin) / N_voxels + """ + img = hdf5_file["DataSetInfo/Image"] + + # --- Image dimensions --- + nx = int(h5_attr_to_str(img.attrs["X"])) + ny = int(h5_attr_to_str(img.attrs["Y"])) + nz = int(h5_attr_to_str(img.attrs["Z"])) + + # --- Physical extents (in file units, typically µm) --- + ext_min_x = float(h5_attr_to_str(img.attrs["ExtMin0"])) + ext_max_x = float(h5_attr_to_str(img.attrs["ExtMax0"])) + ext_min_y = float(h5_attr_to_str(img.attrs["ExtMin1"])) + ext_max_y = float(h5_attr_to_str(img.attrs["ExtMax1"])) + ext_min_z = float(h5_attr_to_str(img.attrs["ExtMin2"])) + ext_max_z = float(h5_attr_to_str(img.attrs["ExtMax2"])) + + # --- Compute voxel sizes --- + px_x = abs(ext_max_x - ext_min_x) / nx + px_y = abs(ext_max_y - ext_min_y) / ny + px_z = abs(ext_max_z - ext_min_z) / nz + pixel_size = [px_x, px_y, px_z] + + # --- Units --- + unit_raw = h5_attr_to_str(img.attrs.get("Unit", None)) + if unit_raw is None: + unit_label = "um" + else: + unit_label = unit_raw.replace("µ", "u") if "µ" in unit_raw else unit_raw + + # --- Channel metadata --- + extra_channels = {} + ch_idx = 0 + while f"DataSetInfo/Channel {ch_idx}" in hdf5_file: + ch_grp = hdf5_file[f"DataSetInfo/Channel {ch_idx}"] + ch_info = {} + for attr_name in [ + "Name", + "LSMExcitationWavelength", + "LSMEmissionWavelength", + "Color", + "Description", + ]: + val = ch_grp.attrs.get(attr_name, None) + if val is not None: + ch_info[attr_name] = h5_attr_to_str(val) + extra_channels[f"Channel_{ch_idx}"] = ch_info + ch_idx += 1 + + # --- Software version --- + sw_version = None + if "DataSetInfo/ImarisDataSet" in hdf5_file: + ds_grp = hdf5_file["DataSetInfo/ImarisDataSet"] + ver = ds_grp.attrs.get("Version", None) + if ver is not None: + sw_version = h5_attr_to_str(ver) + + print(f"Native Imaris metadata: PixelSize={pixel_size}, Unit={unit_label}") + print(f" Image dims: X={nx}, Y={ny}, Z={nz}") + print(f" Extents X: [{ext_min_x}, {ext_max_x}]") + print(f" Extents Y: [{ext_min_y}, {ext_max_y}]") + print(f" Extents Z: [{ext_min_z}, {ext_max_z}]") + + bids_json = { + "PixelSize": pixel_size, + "PixelSizeUnits": unit_label, + "Immersion": None, + "NumericalAperture": 0.1, + "Magnification": None, + "OtherAcquisitionParameters": None, + "InstrumentModel": None, + "SoftwareVersions": sw_version, + "ExtraMetadata": { + "Channels": extra_channels, + "ImageExtents": { + "ExtMin": [ext_min_x, ext_min_y, ext_min_z], + "ExtMax": [ext_max_x, ext_max_y, ext_max_z], + }, + }, + } + + return bids_json + + # ----------------------------- # Main extraction # ----------------------------- @@ -134,53 +242,64 @@ def print_h5_keys_recursive(obj, indent=""): print(f"{indent}- (Unknown HDF5 object type: {type(obj)})") -try: +bids_metadata = None + +with h5py.File(snakemake.input.ims, "r") as hdf5_file: - with h5py.File(snakemake.input.ims, "r") as hdf5_file: - # print(snakemake.input.ims) - # print_h5_keys_recursive(hdf5_file) + # ── Strategy 1: OME XML tags embedded in the .ims file ────────────── + try: xml_data = hdf5_file["DataSetInfo/OME Image Tags/Image 0"][:] xml_dict = parse_xml_bytes(xml_data) - custom_attrs = xml_dict["root"]["ca:CustomAttributes"] -except: - print( - "Warning: cannot find OME metadata from imaris file, searching for tif file using standard folder structure" - ) - - from glob import glob - from pathlib import Path - - import tifffile - - p = Path(snakemake.input.ims) - - # 1) Name of the parent folder - subj = p.parent.name # -> 'B_AS161F3' - - # 2) Root path of the 'raw' folder (3 levels up from the file) - raw_root = p.parents[2] # -> '/nfs/trident3/.../raw' + if xml_dict: + custom_attrs = xml_dict["root"]["ca:CustomAttributes"] + print(custom_attrs) + bids_metadata = build_bids_metadata(custom_attrs) + except (KeyError, TypeError, ValueError) as e: + print( + f"Warning: cannot find OME metadata from imaris file ({e}), " + "trying tif fallback..." + ) - # 3) Construct the new path - new_path = str(raw_root / "tif_4x_*" / subj / "*.ome.tif") - try: - tif_file = sorted(glob(new_path))[0] - with tifffile.TiffFile(tif_file) as tif: - xml_data = tif.ome_metadata - xml_dict = parse_xml_str(xml_data) - custom_attrs = xml_dict["OME"]["Image"]["ca:CustomAttributes"] - except: - raise ( - ValueError( - "Cannot find OME metadata in imaris file or in associated tif files from standard folder structure" + # ── Strategy 2: associated .ome.tif in standard folder structure ──── + if bids_metadata is None: + try: + from glob import glob + from pathlib import Path + + import tifffile + + p = Path(snakemake.input.ims) + subj = p.parent.name + raw_root = p.parents[2] + new_path = str(raw_root / "tif_4x_*" / subj / "*.ome.tif") + + tif_file = sorted(glob(new_path))[0] + with tifffile.TiffFile(tif_file) as tif: + xml_data = tif.ome_metadata + xml_dict = parse_xml_str(xml_data) + custom_attrs = xml_dict["OME"]["Image"]["ca:CustomAttributes"] + print(custom_attrs) + bids_metadata = build_bids_metadata(custom_attrs) + except (IndexError, KeyError, TypeError, ValueError) as e: + print( + f"Warning: cannot find associated tif files ({e}), " + "trying native Imaris HDF5 attributes..." ) - ) + + # ── Strategy 3: native Imaris HDF5 attributes (ExtMin/ExtMax) ─────── + if bids_metadata is None: + try: + bids_metadata = build_bids_metadata_from_native_imaris(hdf5_file) + except (KeyError, TypeError, ValueError) as e: + raise ValueError( + f"Cannot extract metadata from .ims file using any strategy. " + f"Native Imaris fallback failed with: {e}" + ) from e -if not xml_dict: - raise ValueError("Failed to parse XML from .ims file") +if bids_metadata is None: + raise ValueError("Failed to extract metadata from .ims file") -print(custom_attrs) -bids_metadata = build_bids_metadata(custom_attrs) # ----------------------------- # Write to JSON