Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve handling of ome-tiff files generated by bioformats #1750

Merged
merged 1 commit into from
Jan 2, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@
- Harden the geojson annotation parser ([#1743](../../pull/1743))
- Add more color palettes ([#1746](../../pull/1746))
- Improve the list of extensions the bioformats source reports ([#1748](../../pull/1748))
- Improve handling of ome-tiff files generated by bioformats ([#1750](../../pull/1750))

### Changes

70 changes: 69 additions & 1 deletion sources/ometiff/large_image_source_ometiff/__init__.py
Original file line number Diff line number Diff line change
@@ -105,6 +105,7 @@
msg = 'Not a recognized OME Tiff'
raise TileSourceError(msg)
info = getattr(base, '_description_record', None)
self._associatedImages = {}
if not info or not info.get('OME'):
msg = 'Not an OME Tiff'
raise TileSourceError(msg)
@@ -115,6 +116,7 @@
except KeyError:
msg = 'Not a recognized OME Tiff'
raise TileSourceError(msg)
usesSubIfds = self._checkForSubIfds(base)
omeimages = [
entry['Pixels'] for entry in self._omeinfo['Image'] if
len(entry['Pixels']['TiffData']) == len(self._omebase['TiffData'])]
@@ -125,10 +127,16 @@
omebylevel = dict(zip(levels, omeimages))
self._omeLevels = [omebylevel.get(key) for key in range(max(omebylevel.keys()) + 1)]
if base._tiffInfo.get('istiled'):
if usesSubIfds:
self._omeLevels = [None] * max(usesSubIfds) + [self._omeLevels[-1]]
self._tiffDirectories = [
self.getTiffDir(int(entry['TiffData'][0].get('IFD', 0)))
if entry else None
for entry in self._omeLevels]
if usesSubIfds:
for lvl in usesSubIfds:
if self._tiffDirectories[lvl] is None:
self._tiffDirectories[lvl] = False
else:
self._tiffDirectories = [
self.getTiffDir(0, mustBeTiled=None)
@@ -149,7 +157,6 @@
# We can get the embedded images, but we don't currently use non-tiled
# images as associated images. This would require enumerating tiff
# directories not mentioned by the ome list.
self._associatedImages = {}
self._checkForInefficientDirectories()

def _checkForOMEZLoop(self):
@@ -199,6 +206,40 @@
info['Image']['Pixels']['PlanesFromZloop'] = 'true'
info['Image']['Pixels']['SizeZ'] = str(zloop)

def _checkForSubIfds(self, base):
"""
Check if the first ifd has sub-ifds. If so, expect lower resolutions
to be in subifds, not in primary ifds.

:param base: base tiff directory
:returns: either False if no subifds are lower resolution, or a
dictionary of levels (keys) and values that are subifd numbers.
"""
try:
levels = int(max(0, math.ceil(max(
math.log(float(base.imageWidth) / base.tileWidth),
math.log(float(base.imageHeight) / base.tileHeight)) / math.log(2))) + 1)
filled = {}
for z in range(levels - 2, -1, -1):
subdir = levels - 1 - z
scale = int(2 ** subdir)
try:
dir = self.getTiffDir(0, mustBeTiled=True, subDirectoryNum=subdir)
except Exception:
continue
if (dir is not None and
(dir.tileWidth == base.tileWidth or dir.tileWidth == dir.imageWidth) and
(dir.tileHeight == base.tileHeight or dir.tileHeight == dir.imageHeight) and
abs(dir.imageWidth * scale - base.imageWidth) <= scale and
abs(dir.imageHeight * scale - base.imageHeight) <= scale):
filled[z] = subdir
if not len(filled):
return False
filled[levels - 1] = 0
return filled
except TiffError:
return False

Check warning on line 241 in sources/ometiff/large_image_source_ometiff/__init__.py

Codecov / codecov/patch

sources/ometiff/large_image_source_ometiff/__init__.py#L240-L241

Added lines #L240 - L241 were not covered by tests

def _parseOMEInfo(self): # noqa
if isinstance(self._omeinfo['Image'], dict):
self._omeinfo['Image'] = [self._omeinfo['Image']]
@@ -241,6 +282,33 @@
for entry in self._omebase['TiffData']}) > 1:
msg = 'OME Tiff references multiple files'
raise TileSourceError(msg)
if (len(self._omebase['TiffData']) ==
int(self._omebase['SizeT']) * int(self._omebase['SizeZ'])):
self._omebase['SizeC'] = 1
# DWM:: others are probably associated images
for img in self._omeinfo['Image'][1:]:
try:
if img['Name'] and img['Pixels']['TiffData'][0]['IFD']:
self._addAssociatedImage(
int(img['Pixels']['TiffData'][0]['IFD']),
None, None, img['Name'].split()[0])
except Exception:
pass

Check warning on line 296 in sources/ometiff/large_image_source_ometiff/__init__.py

Codecov / codecov/patch

sources/ometiff/large_image_source_ometiff/__init__.py#L295-L296

Added lines #L295 - L296 were not covered by tests
elif len(self._omeinfo['Image']) > 1:
multiple = False
for img in self._omeinfo['Image'][1:]:
try:
bpix = self._omeinfo['Image'][0]['Pixels']
imgpix = img['Pixels']
if imgpix['SizeX'] == bpix['SizeX'] and imgpix['SizeY'] == bpix['SizeY']:
multiple = True
break
except Exception:
multiple = True

Check warning on line 307 in sources/ometiff/large_image_source_ometiff/__init__.py

Codecov / codecov/patch

sources/ometiff/large_image_source_ometiff/__init__.py#L304-L307

Added lines #L304 - L307 were not covered by tests
if multiple:
# We should handle this as SizeXY
msg = 'OME Tiff references multiple images'
raise TileSourceError(msg)

Check warning on line 311 in sources/ometiff/large_image_source_ometiff/__init__.py

Codecov / codecov/patch

sources/ometiff/large_image_source_ometiff/__init__.py#L310-L311

Added lines #L310 - L311 were not covered by tests
if (len(self._omebase['TiffData']) != int(self._omebase['SizeC']) *
int(self._omebase['SizeT']) * int(self._omebase['SizeZ']) or
len(self._omebase['TiffData']) != len(
5 changes: 4 additions & 1 deletion sources/pil/large_image_source_pil/__init__.py
Original file line number Diff line number Diff line change
@@ -19,6 +19,7 @@
import math
import os
import threading
import warnings

import numpy as np
import PIL.Image
@@ -56,6 +57,8 @@
# package is not installed
pass

warnings.filterwarnings('ignore', category=UserWarning, module='.*PIL.*')

# Default to ignoring files with some specific extensions.
config.ConfigValues['source_pil_ignored_names'] = \
r'(\.mrxs|\.vsi)$'
@@ -138,7 +141,7 @@ def __init__(self, path, maxSize=None, **kwargs): # noqa
if self._pilImage is None:
try:
self._pilImage = PIL.Image.open(largeImagePath)
except OSError:
except (OSError, ValueError):
if not os.path.isfile(largeImagePath):
raise TileSourceFileNotFoundError(largeImagePath) from None
msg = 'File cannot be opened via PIL.'
30 changes: 22 additions & 8 deletions sources/tiff/large_image_source_tiff/__init__.py
Original file line number Diff line number Diff line change
@@ -327,12 +327,19 @@ def _initWithTiffTools(self): # noqa
self._info = info
frames = []
associated = [] # for now, a list of directories
used_subifd = False
for idx, ifd in enumerate(info['ifds']):
# if not tiles, add to associated images
if tifftools.Tag.tileWidth.value not in ifd['tags']:
associated.append(idx)
associated.append((idx, False))
continue
level = self._levelFromIfd(ifd, info['ifds'][0])
try:
level = self._levelFromIfd(ifd, info['ifds'][0])
except TileSourceError:
if idx and used_subifd:
associated.append((idx, True))
continue
raise
# if the same resolution as the main image, add a frame
if level == self.levels - 1:
frames.append({'dirs': [None] * self.levels})
@@ -371,9 +378,13 @@ def _initWithTiffTools(self): # noqa
tifftools.Tag.TileOffsets.value not in subifds[0]['tags']):
msg = 'Subifd has no strip or tile offsets.'
raise TileSourceMalformedError(msg)
level = self._levelFromIfd(subifds[0], info['ifds'][0])
try:
level = self._levelFromIfd(subifds[0], info['ifds'][0])
except Exception:
break
if level < self.levels - 1 and frames[-1]['dirs'][level] is None:
frames[-1]['dirs'][level] = (idx, subidx + 1)
used_subifd = True
else:
msg = 'Tile layers are in a surprising order'
raise TileSourceError(msg)
@@ -407,8 +418,8 @@ def _initWithTiffTools(self): # noqa
self._iccprofiles.append(ifd['tags'][
tifftools.Tag.ICCProfile.value]['data'])
self._associatedImages = {}
for dirNum in associated:
self._addAssociatedImage(dirNum)
for dirNum, isTiled in associated:
self._addAssociatedImage(dirNum, isTiled)
self._frames = frames
self._tiffDirectories = [
self.getTiffDir(
@@ -490,7 +501,7 @@ def _checkForVendorSpecificTags(self):
frame.setdefault('frame', {})
frame['frame']['IndexC'] = idx

def _addAssociatedImage(self, directoryNum, mustBeTiled=False, topImage=None):
def _addAssociatedImage(self, directoryNum, mustBeTiled=False, topImage=None, imageId=None):
"""
Check if the specified TIFF directory contains an image with a sensible
image description that can be used as an ID. If so, and if the image
@@ -501,6 +512,7 @@ def _addAssociatedImage(self, directoryNum, mustBeTiled=False, topImage=None):
untiled images.
:param topImage: if specified, add image-embedded metadata to this
image.
:param imageId: if specified, use this as the image name.
"""
try:
associated = self.getTiffDir(directoryNum, mustBeTiled)
@@ -514,6 +526,8 @@ def _addAssociatedImage(self, directoryNum, mustBeTiled=False, topImage=None):
id = 'dir%d' % directoryNum
if not len(self._associatedImages):
id = 'macro'
if imageId:
id = imageId
if not id and not mustBeTiled:
id = {1: 'label', 9: 'macro'}.get(associated._tiffInfo.get('subfiletype'))
if not isinstance(id, str):
@@ -765,7 +779,7 @@ def getAssociatedImagesList(self):
"""
imageList = set(self._associatedImages)
for td in self._tiffDirectories:
if td is not None:
if td is not None and td is not False:
imageList |= set(td._embeddedImages)
return sorted(imageList)

@@ -784,7 +798,7 @@ def _getAssociatedImage(self, imageKey):
# with seemingly bad associated images, we may need to read them with a
# more complex process than read_image.
for td in self._tiffDirectories:
if td is not None and imageKey in td._embeddedImages:
if td is not None and td is not False and imageKey in td._embeddedImages:
return PIL.Image.open(io.BytesIO(base64.b64decode(td._embeddedImages[imageKey])))
if imageKey in self._associatedImages:
return PIL.Image.fromarray(self._associatedImages[imageKey])
8 changes: 5 additions & 3 deletions sources/tiff/large_image_source_tiff/tiff_reader.py
Original file line number Diff line number Diff line change
@@ -788,11 +788,13 @@ def getTile(self, x, y, asarray=False):

if (not self._tiffInfo.get('istiled') or
self._tiffInfo.get('compression') not in {
libtiff_ctypes.COMPRESSION_JPEG, 33003, 33005, 34712} or
libtiff_ctypes.COMPRESSION_JPEG, 33003, 33004, 33005, 34712} or
self._tiffInfo.get('bitspersample') != 8 or
self._tiffInfo.get('sampleformat') not in {
None, libtiff_ctypes.SAMPLEFORMAT_UINT} or
(asarray and self._tiffInfo.get('compression') not in {33003, 33005, 34712} and (
(asarray and self._tiffInfo.get('compression') not in {
33003, 33004, 33005, 34712,
} and (
self._tiffInfo.get('compression') != libtiff_ctypes.COMPRESSION_JPEG or
self._tiffInfo.get('photometric') != libtiff_ctypes.PHOTOMETRIC_YCBCR))):
return self._getUncompressedTile(tileNum)
@@ -811,7 +813,7 @@ def getTile(self, x, y, asarray=False):
# Get the whole frame, which is in a JPEG or JPEG 2000 format
frame = self._getJpegFrame(tileNum, True)
# For JP2K, see if we can convert it faster than PIL
if self._tiffInfo.get('compression') in {33003, 33005}:
if self._tiffInfo.get('compression') in {33003, 33004, 33005, 34712}:
try:
import openjpeg

4 changes: 4 additions & 0 deletions test/datastore.py
Original file line number Diff line number Diff line change
@@ -123,6 +123,10 @@
# Synthetic Indica Labs tiff; subifds missing tile/strip data and unmarked
# float32 pixels rather than uint32
'synthetic_indica.tiff': 'sha512:fba7eb2fb5fd12ac242d8b0760440f170f48f9e2434a672cbf230bd8a9ff02fad8f9bdf7225edf2de244f412edfc5205e695031a1d43dd99fe31c3aca11909a1', # noqa
# Converted from the TCGA svs file using bioformats java program and
# --rgb --quality=0.015 --compression='JPEG-2000 Lossy' parameters to make
# the file small
'TCGA-55-8207-01Z-00-DX1.ome.tiff': 'sha512:50cf63f0e8bfa3054d3532b7dd0237b66aeb4c7609da874639a28bc068dbd157f786e84d3eb76a3b0e6636a042c56c3b96d3be2ad66f7589d0542a5d20cecdb4', # noqa
}


4 changes: 2 additions & 2 deletions test/test_converter.py
Original file line number Diff line number Diff line change
@@ -146,8 +146,8 @@ def testConvertJp2kCompression(tmpdir):
image, _ = source.getRegion(
output={'maxWidth': 200, 'maxHeight': 200}, format=constants.TILE_FORMAT_NUMPY)
# Without or with icc adjustment
assert ((image[12][167] == [215, 135, 172]).all() or
(image[12][167] == [216, 134, 172]).all())
assert ((image[12][167][:3] == [215, 135, 172]).all() or
(image[12][167][:3] == [216, 134, 172]).all())

outputPath2 = os.path.join(tmpdir, 'out2.tiff')
large_image_converter.convert(imagePath, outputPath2, compression='jp2k', psnr=50)
4 changes: 2 additions & 2 deletions test/test_source_base.py
Original file line number Diff line number Diff line change
@@ -64,12 +64,12 @@
'openslide': {
'read': r'\.(ptif|svs|ndpi|tif.*|qptiff|dcm)$',
'noread': r'(oahu|DDX58_AXL|huron\.image2_jpeg2k|landcover_sample|d042-353\.crop|US_Geo\.|extraoverview|imagej|bad_axes|synthetic_untiled|indica|tcia.*dcm)', # noqa
'skip': r'nokeyframe\.ome\.tiff$',
'skip': r'nokeyframe\.ome\.tiff|TCGA-55.*\.ome\.tiff$',
'skipTiles': r'one_layer_missing',
},
'pil': {
'read': r'(\.(jpg|jpeg|png|tif.*)|18[-0-9a-f]{34}\.dcm)$',
'noread': r'(G10-3|JK-kidney|d042-353.*tif|huron|one_layer_missing|US_Geo|extraoverview|indica)', # noqa
'noread': r'(G10-3|JK-kidney|d042-353.*tif|huron|one_layer_missing|US_Geo|extraoverview|indica|TCGA-55.*\.ome\.tiff)', # noqa
},
'rasterio': {
'read': r'(\.(jpg|jpeg|jp2|ptif|scn|svs|ndpi|tif.*|qptiff)|18[-0-9a-f]{34}\.dcm)$',