Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 144 additions & 39 deletions workflow/scripts/imaris_to_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ def clean_keys(obj):
return obj


def h5_attr_to_str(attr):
"""Convert an HDF5 byte-array attribute to a Python string.

Imaris stores attributes as arrays of single-byte values, e.g.
[b'4', b'.', b'0'] → '4.0'
"""
if attr is None:
return None
return "".join(b.decode("utf-8") for b in attr)
Comment on lines +55 to +63
Copy link

Copilot AI Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

h5_attr_to_str assumes the attribute is an iterable of 1-byte values (each with .decode()), but h5py attributes are often returned as a scalar bytes/str, a NumPy scalar (e.g. np.bytes_), or a NumPy array of uint8. In those cases this will raise (e.g. iterating a bytes yields ints). Consider making this helper handle: str (return as-is), bytes/np.bytes_ (single decode), NumPy ndarray (decode bytes dtype or convert uint8 via .tobytes()), and strip any trailing \x00 padding if present.

Copilot uses AI. Check for mistakes.


def build_bids_metadata(custom_attrs):
"""Convert custom attribute dict into BIDS microscopy-compliant metadata."""
# --- PixelSize and Units ---
Expand Down Expand Up @@ -109,6 +120,89 @@ def build_bids_metadata(custom_attrs):
return bids_json


def build_bids_metadata_from_native_imaris(hdf5_file):
"""Build BIDS metadata directly from native Imaris HDF5 attributes.

Fallback for .ims files that lack embedded OME XML tags.
Computes voxel sizes from image extents: pixel_size = (ExtMax - ExtMin) / N_voxels
"""
img = hdf5_file["DataSetInfo/Image"]

# --- Image dimensions ---
nx = int(h5_attr_to_str(img.attrs["X"]))
ny = int(h5_attr_to_str(img.attrs["Y"]))
nz = int(h5_attr_to_str(img.attrs["Z"]))

# --- Physical extents (in file units, typically µm) ---
ext_min_x = float(h5_attr_to_str(img.attrs["ExtMin0"]))
ext_max_x = float(h5_attr_to_str(img.attrs["ExtMax0"]))
ext_min_y = float(h5_attr_to_str(img.attrs["ExtMin1"]))
ext_max_y = float(h5_attr_to_str(img.attrs["ExtMax1"]))
ext_min_z = float(h5_attr_to_str(img.attrs["ExtMin2"]))
ext_max_z = float(h5_attr_to_str(img.attrs["ExtMax2"]))

# --- Compute voxel sizes ---
px_x = abs(ext_max_x - ext_min_x) / nx
px_y = abs(ext_max_y - ext_min_y) / ny
px_z = abs(ext_max_z - ext_min_z) / nz
pixel_size = [px_x, px_y, px_z]

# --- Units ---
unit_raw = h5_attr_to_str(img.attrs.get("Unit", None))
if unit_raw is None:
unit_label = "um"
else:
unit_label = unit_raw.replace("µ", "u") if "µ" in unit_raw else unit_raw

# --- Channel metadata ---
extra_channels = {}
ch_idx = 0
while f"DataSetInfo/Channel {ch_idx}" in hdf5_file:
ch_grp = hdf5_file[f"DataSetInfo/Channel {ch_idx}"]
ch_info = {}
for attr_name in ["Name", "LSMExcitationWavelength", "LSMEmissionWavelength",
"Color", "Description"]:
Copy link

Copilot AI Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The for attr_name in [...] list is formatted in a way that likely won’t pass the repo’s Black formatting checks (the line is far beyond typical line length and Black will reflow it). Please reformat this list in Black-compliant style (e.g., one item per line) to avoid CI/lint failures.

Suggested change
for attr_name in ["Name", "LSMExcitationWavelength", "LSMEmissionWavelength",
"Color", "Description"]:
for attr_name in [
"Name",
"LSMExcitationWavelength",
"LSMEmissionWavelength",
"Color",
"Description",
]:

Copilot uses AI. Check for mistakes.
val = ch_grp.attrs.get(attr_name, None)
if val is not None:
ch_info[attr_name] = h5_attr_to_str(val)
extra_channels[f"Channel_{ch_idx}"] = ch_info
ch_idx += 1

# --- Software version ---
sw_version = None
if "DataSetInfo/ImarisDataSet" in hdf5_file:
ds_grp = hdf5_file["DataSetInfo/ImarisDataSet"]
ver = ds_grp.attrs.get("Version", None)
if ver is not None:
sw_version = h5_attr_to_str(ver)

print(f"Native Imaris metadata: PixelSize={pixel_size}, Unit={unit_label}")
print(f" Image dims: X={nx}, Y={ny}, Z={nz}")
print(f" Extents X: [{ext_min_x}, {ext_max_x}]")
print(f" Extents Y: [{ext_min_y}, {ext_max_y}]")
print(f" Extents Z: [{ext_min_z}, {ext_max_z}]")

bids_json = {
"PixelSize": pixel_size,
"PixelSizeUnits": unit_label,
"Immersion": None,
"NumericalAperture": 0.0,
"Magnification": None,
"OtherAcquisitionParameters": None,
"InstrumentModel": None,
"SoftwareVersions": sw_version,
"ExtraMetadata": {
"Channels": extra_channels,
"ImageExtents": {
"ExtMin": [ext_min_x, ext_min_y, ext_min_z],
"ExtMax": [ext_max_x, ext_max_y, ext_max_z],
},
},
}

return bids_json


# -----------------------------
# Main extraction
# -----------------------------
Expand All @@ -134,53 +228,64 @@ def print_h5_keys_recursive(obj, indent=""):
print(f"{indent}- (Unknown HDF5 object type: {type(obj)})")


try:
bids_metadata = None

with h5py.File(snakemake.input.ims, "r") as hdf5_file:
# print(snakemake.input.ims)
# print_h5_keys_recursive(hdf5_file)
with h5py.File(snakemake.input.ims, "r") as hdf5_file:

# ── Strategy 1: OME XML tags embedded in the .ims file ──────────────
try:
xml_data = hdf5_file["DataSetInfo/OME Image Tags/Image 0"][:]
xml_dict = parse_xml_bytes(xml_data)
custom_attrs = xml_dict["root"]["ca:CustomAttributes"]
except:
print(
"Warning: cannot find OME metadata from imaris file, searching for tif file using standard folder structure"
)

from glob import glob
from pathlib import Path

import tifffile

p = Path(snakemake.input.ims)

# 1) Name of the parent folder
subj = p.parent.name # -> 'B_AS161F3'

# 2) Root path of the 'raw' folder (3 levels up from the file)
raw_root = p.parents[2] # -> '/nfs/trident3/.../raw'
if xml_dict:
custom_attrs = xml_dict["root"]["ca:CustomAttributes"]
print(custom_attrs)
bids_metadata = build_bids_metadata(custom_attrs)
except (KeyError, TypeError, ValueError) as e:
print(
f"Warning: cannot find OME metadata from imaris file ({e}), "
"trying tif fallback..."
)
Comment on lines +257 to +261
Copy link

Copilot AI Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strategy 1 only catches KeyError, TypeError, ValueError, but reading the embedded OME dataset (hdf5_file[...][:]) can also raise OSError (e.g., dataset exists but can’t be read). If you want to continue on to strategies 2/3 in that case, consider adding OSError to this except list.

Copilot uses AI. Check for mistakes.

# 3) Construct the new path
new_path = str(raw_root / "tif_4x_*" / subj / "*.ome.tif")
try:
tif_file = sorted(glob(new_path))[0]
with tifffile.TiffFile(tif_file) as tif:
xml_data = tif.ome_metadata
xml_dict = parse_xml_str(xml_data)
custom_attrs = xml_dict["OME"]["Image"]["ca:CustomAttributes"]
except:
raise (
ValueError(
"Cannot find OME metadata in imaris file or in associated tif files from standard folder structure"
# ── Strategy 2: associated .ome.tif in standard folder structure ────
if bids_metadata is None:
try:
from glob import glob
from pathlib import Path

import tifffile

p = Path(snakemake.input.ims)
subj = p.parent.name
raw_root = p.parents[2]
new_path = str(raw_root / "tif_4x_*" / subj / "*.ome.tif")

tif_file = sorted(glob(new_path))[0]
with tifffile.TiffFile(tif_file) as tif:
xml_data = tif.ome_metadata
xml_dict = parse_xml_str(xml_data)
custom_attrs = xml_dict["OME"]["Image"]["ca:CustomAttributes"]
print(custom_attrs)
bids_metadata = build_bids_metadata(custom_attrs)
except (IndexError, KeyError, TypeError, ValueError) as e:
Copy link

Copilot AI Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strategy 2’s tif fallback only catches IndexError, KeyError, TypeError, ValueError, but opening/reading a found .ome.tif can also raise OSError/FileNotFoundError (e.g., corrupt file, permissions). In that case the script will crash instead of continuing to the native Imaris HDF5 fallback. Consider including OSError (and/or FileNotFoundError) in this except clause so Strategy 3 can still run when the tif path exists but can’t be read.

Suggested change
except (IndexError, KeyError, TypeError, ValueError) as e:
except (IndexError, KeyError, TypeError, ValueError, OSError) as e:

Copilot uses AI. Check for mistakes.
print(
f"Warning: cannot find associated tif files ({e}), "
"trying native Imaris HDF5 attributes..."
)
)

# ── Strategy 3: native Imaris HDF5 attributes (ExtMin/ExtMax) ───────
if bids_metadata is None:
try:
bids_metadata = build_bids_metadata_from_native_imaris(hdf5_file)
except (KeyError, TypeError, ValueError) as e:
raise ValueError(
f"Cannot extract metadata from .ims file using any strategy. "
f"Native Imaris fallback failed with: {e}"
) from e


if not xml_dict:
raise ValueError("Failed to parse XML from .ims file")
if bids_metadata is None:
raise ValueError("Failed to extract metadata from .ims file")

print(custom_attrs)
bids_metadata = build_bids_metadata(custom_attrs)

# -----------------------------
# Write to JSON
Expand Down
Loading