Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@ benchmarks
# pixi environments
.pixi
*.egg-info

config/samples_AS11F3_only.tsv
config/samples_ki3_batch2.tsv
lecb2.log
run_lecb2.sh
201 changes: 160 additions & 41 deletions workflow/scripts/imaris_to_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,25 @@ def clean_keys(obj):
return obj


def h5_attr_to_str(attr):
"""Convert an HDF5 byte-array attribute to a Python string.

Imaris stores attributes as arrays of single-byte values, e.g.
[b'4', b'.', b'0'] → '4.0'
"""
if attr is None:
return None
return "".join(b.decode("utf-8") for b in attr)


def _try_float(value, default=None):
"""Try to convert value to float, return default if not possible."""
try:
return float(value)
except (ValueError, TypeError):
return default


def build_bids_metadata(custom_attrs):
"""Convert custom attribute dict into BIDS microscopy-compliant metadata."""
# --- PixelSize and Units ---
Expand All @@ -73,8 +92,9 @@ def build_bids_metadata(custom_attrs):
"PixelSize": pixel_size,
"PixelSizeUnits": unit_label,
"Immersion": custom_attrs.get("ObjectiveMedium", {}).get("@ObjectiveMedium"),
"NumericalAperture": float(
custom_attrs.get("ObjectiveNA", {}).get("@ObjectiveNA", 0.0)
"NumericalAperture": _try_float(
custom_attrs.get("ObjectiveNA", {}).get("@ObjectiveNA", None),
default=0.1,
),
"Magnification": magnification,
"OtherAcquisitionParameters": custom_attrs.get("MeasurementMode", {}).get(
Expand Down Expand Up @@ -109,6 +129,94 @@ def build_bids_metadata(custom_attrs):
return bids_json


def build_bids_metadata_from_native_imaris(hdf5_file):
"""Build BIDS metadata directly from native Imaris HDF5 attributes.

Fallback for .ims files that lack embedded OME XML tags.
Computes voxel sizes from image extents: pixel_size = (ExtMax - ExtMin) / N_voxels
"""
img = hdf5_file["DataSetInfo/Image"]

# --- Image dimensions ---
nx = int(h5_attr_to_str(img.attrs["X"]))
ny = int(h5_attr_to_str(img.attrs["Y"]))
nz = int(h5_attr_to_str(img.attrs["Z"]))

# --- Physical extents (in file units, typically µm) ---
ext_min_x = float(h5_attr_to_str(img.attrs["ExtMin0"]))
ext_max_x = float(h5_attr_to_str(img.attrs["ExtMax0"]))
ext_min_y = float(h5_attr_to_str(img.attrs["ExtMin1"]))
ext_max_y = float(h5_attr_to_str(img.attrs["ExtMax1"]))
ext_min_z = float(h5_attr_to_str(img.attrs["ExtMin2"]))
ext_max_z = float(h5_attr_to_str(img.attrs["ExtMax2"]))

# --- Compute voxel sizes ---
px_x = abs(ext_max_x - ext_min_x) / nx
px_y = abs(ext_max_y - ext_min_y) / ny
px_z = abs(ext_max_z - ext_min_z) / nz
pixel_size = [px_x, px_y, px_z]

# --- Units ---
unit_raw = h5_attr_to_str(img.attrs.get("Unit", None))
if unit_raw is None:
unit_label = "um"
else:
unit_label = unit_raw.replace("µ", "u") if "µ" in unit_raw else unit_raw

# --- Channel metadata ---
extra_channels = {}
ch_idx = 0
while f"DataSetInfo/Channel {ch_idx}" in hdf5_file:
ch_grp = hdf5_file[f"DataSetInfo/Channel {ch_idx}"]
ch_info = {}
for attr_name in [
"Name",
"LSMExcitationWavelength",
"LSMEmissionWavelength",
"Color",
"Description",
]:
val = ch_grp.attrs.get(attr_name, None)
if val is not None:
ch_info[attr_name] = h5_attr_to_str(val)
extra_channels[f"Channel_{ch_idx}"] = ch_info
ch_idx += 1

# --- Software version ---
sw_version = None
if "DataSetInfo/ImarisDataSet" in hdf5_file:
ds_grp = hdf5_file["DataSetInfo/ImarisDataSet"]
ver = ds_grp.attrs.get("Version", None)
if ver is not None:
sw_version = h5_attr_to_str(ver)

print(f"Native Imaris metadata: PixelSize={pixel_size}, Unit={unit_label}")
print(f" Image dims: X={nx}, Y={ny}, Z={nz}")
print(f" Extents X: [{ext_min_x}, {ext_max_x}]")
print(f" Extents Y: [{ext_min_y}, {ext_max_y}]")
print(f" Extents Z: [{ext_min_z}, {ext_max_z}]")

bids_json = {
"PixelSize": pixel_size,
"PixelSizeUnits": unit_label,
"Immersion": None,
"NumericalAperture": 0.1,
"Magnification": None,
"OtherAcquisitionParameters": None,
"InstrumentModel": None,
"SoftwareVersions": sw_version,
"ExtraMetadata": {
"Channels": extra_channels,
"ImageExtents": {
"ExtMin": [ext_min_x, ext_min_y, ext_min_z],
"ExtMax": [ext_max_x, ext_max_y, ext_max_z],
},
},
}

return bids_json


# -----------------------------
# Main extraction
# -----------------------------
Expand All @@ -134,53 +242,64 @@ def print_h5_keys_recursive(obj, indent=""):
print(f"{indent}- (Unknown HDF5 object type: {type(obj)})")


try:
bids_metadata = None

with h5py.File(snakemake.input.ims, "r") as hdf5_file:

with h5py.File(snakemake.input.ims, "r") as hdf5_file:
# print(snakemake.input.ims)
# print_h5_keys_recursive(hdf5_file)
# ── Strategy 1: OME XML tags embedded in the .ims file ──────────────
try:
xml_data = hdf5_file["DataSetInfo/OME Image Tags/Image 0"][:]
xml_dict = parse_xml_bytes(xml_data)
custom_attrs = xml_dict["root"]["ca:CustomAttributes"]
except:
print(
"Warning: cannot find OME metadata from imaris file, searching for tif file using standard folder structure"
)

from glob import glob
from pathlib import Path

import tifffile

p = Path(snakemake.input.ims)

# 1) Name of the parent folder
subj = p.parent.name # -> 'B_AS161F3'

# 2) Root path of the 'raw' folder (3 levels up from the file)
raw_root = p.parents[2] # -> '/nfs/trident3/.../raw'
if xml_dict:
custom_attrs = xml_dict["root"]["ca:CustomAttributes"]
print(custom_attrs)
bids_metadata = build_bids_metadata(custom_attrs)
except (KeyError, TypeError, ValueError) as e:
print(
f"Warning: cannot find OME metadata from imaris file ({e}), "
"trying tif fallback..."
)

# 3) Construct the new path
new_path = str(raw_root / "tif_4x_*" / subj / "*.ome.tif")
try:
tif_file = sorted(glob(new_path))[0]
with tifffile.TiffFile(tif_file) as tif:
xml_data = tif.ome_metadata
xml_dict = parse_xml_str(xml_data)
custom_attrs = xml_dict["OME"]["Image"]["ca:CustomAttributes"]
except:
raise (
ValueError(
"Cannot find OME metadata in imaris file or in associated tif files from standard folder structure"
# ── Strategy 2: associated .ome.tif in standard folder structure ────
if bids_metadata is None:
try:
from glob import glob
from pathlib import Path

import tifffile

p = Path(snakemake.input.ims)
subj = p.parent.name
raw_root = p.parents[2]
new_path = str(raw_root / "tif_4x_*" / subj / "*.ome.tif")

tif_file = sorted(glob(new_path))[0]
with tifffile.TiffFile(tif_file) as tif:
xml_data = tif.ome_metadata
xml_dict = parse_xml_str(xml_data)
custom_attrs = xml_dict["OME"]["Image"]["ca:CustomAttributes"]
print(custom_attrs)
bids_metadata = build_bids_metadata(custom_attrs)
except (IndexError, KeyError, TypeError, ValueError) as e:
print(
f"Warning: cannot find associated tif files ({e}), "
"trying native Imaris HDF5 attributes..."
)
)

# ── Strategy 3: native Imaris HDF5 attributes (ExtMin/ExtMax) ───────
if bids_metadata is None:
try:
bids_metadata = build_bids_metadata_from_native_imaris(hdf5_file)
except (KeyError, TypeError, ValueError) as e:
raise ValueError(
f"Cannot extract metadata from .ims file using any strategy. "
f"Native Imaris fallback failed with: {e}"
) from e


if not xml_dict:
raise ValueError("Failed to parse XML from .ims file")
if bids_metadata is None:
raise ValueError("Failed to extract metadata from .ims file")

print(custom_attrs)
bids_metadata = build_bids_metadata(custom_attrs)

# -----------------------------
# Write to JSON
Expand Down