From 6b237f8c13c2ccea59509bc24766a20291ad178f Mon Sep 17 00:00:00 2001 From: David Manthey Date: Wed, 3 Jun 2020 15:14:48 -0400 Subject: [PATCH] Read more OME Tiff files. Some OME Tiff files have one main record any everything else as a Plane. Also, for TIFFs with mostly missing directories (such as OME Tiffs with only a tiled maximal resolution layer), synthesize intermediate empty directories to avoid excessive memory use. --- .../large_image_source_ometiff/__init__.py | 63 +++++++++++-------- .../tiff/large_image_source_tiff/__init__.py | 25 +++++++- test/test_source_ometiff.py | 30 +++++++++ 3 files changed, 89 insertions(+), 29 deletions(-) diff --git a/sources/ometiff/large_image_source_ometiff/__init__.py b/sources/ometiff/large_image_source_ometiff/__init__.py index 02a7d9609..adb60ebc0 100644 --- a/sources/ometiff/large_image_source_ometiff/__init__.py +++ b/sources/ometiff/large_image_source_ometiff/__init__.py @@ -16,6 +16,7 @@ # limitations under the License. ############################################################################## +import copy import math import numpy import PIL.Image @@ -101,30 +102,7 @@ def __init__(self, path, **kwargs): raise TileSourceException('Not an OME Tiff') self._omeinfo = info['OME'] self._checkForOMEZLoop(largeImagePath) - if isinstance(self._omeinfo['Image'], dict): - self._omeinfo['Image'] = [self._omeinfo['Image']] - for img in self._omeinfo['Image']: - if isinstance(img['Pixels'].get('TiffData'), dict): - img['Pixels']['TiffData'] = [img['Pixels']['TiffData']] - if isinstance(img['Pixels'].get('Plane'), dict): - img['Pixels']['Plane'] = [img['Pixels']['Plane']] - try: - self._omebase = self._omeinfo['Image'][0]['Pixels'] - if ((not len(self._omebase['TiffData']) or ( - len(self._omebase['TiffData']) == 1 and - self._omebase['TiffData'][0] == {})) and - len(self._omebase['Plane'])): - self._omebase['TiffData'] = self._omebase['Plane'] - if len({entry.get('UUID', {}).get('FileName', '') - for entry in self._omebase['TiffData']}) > 1: - raise TileSourceException('OME Tiff references multiple files') - if (len(self._omebase['TiffData']) != int(self._omebase['SizeC']) * - int(self._omebase['SizeT']) * int(self._omebase['SizeZ']) or - len(self._omebase['TiffData']) != len( - self._omebase.get('Plane', self._omebase['TiffData']))): - raise TileSourceException('OME Tiff contains frames that contain multiple planes') - except (KeyError, ValueError, IndexError): - raise TileSourceException('OME Tiff does not contain an expected record') + self._parseOMEInfo() omeimages = [ entry['Pixels'] for entry in self._omeinfo['Image'] if len(entry['Pixels']['TiffData']) == len(self._omebase['TiffData'])] @@ -206,6 +184,39 @@ def _checkForOMEZLoop(self, largeImagePath): info['Image']['Pixels']['PlanesFromZloop'] = 'true' info['Image']['Pixels']['SizeZ'] = str(zloop) + def _parseOMEInfo(self): + if isinstance(self._omeinfo['Image'], dict): + self._omeinfo['Image'] = [self._omeinfo['Image']] + for img in self._omeinfo['Image']: + if isinstance(img['Pixels'].get('TiffData'), dict): + img['Pixels']['TiffData'] = [img['Pixels']['TiffData']] + if isinstance(img['Pixels'].get('Plane'), dict): + img['Pixels']['Plane'] = [img['Pixels']['Plane']] + try: + self._omebase = self._omeinfo['Image'][0]['Pixels'] + if ((not len(self._omebase['TiffData']) or + len(self._omebase['TiffData']) == 1) and + len(self._omebase['Plane'])): + if not len(self._omebase['TiffData']) or self._omebase['TiffData'][0] == {}: + self._omebase['TiffData'] = self._omebase['Plane'] + elif (int(self._omebase['TiffData'][0].get('PlaneCount', 0)) == + len(self._omebase['Plane'])): + planes = copy.deepcopy(self._omebase['Plane']) + for idx, plane in enumerate(planes): + plane['IFD'] = plane.get( + 'IFD', int(self._omebase['TiffData'][0].get('IFD', 0)) + idx) + self._omebase['TiffData'] = planes + if len({entry.get('UUID', {}).get('FileName', '') + for entry in self._omebase['TiffData']}) > 1: + raise TileSourceException('OME Tiff references multiple files') + if (len(self._omebase['TiffData']) != int(self._omebase['SizeC']) * + int(self._omebase['SizeT']) * int(self._omebase['SizeZ']) or + len(self._omebase['TiffData']) != len( + self._omebase.get('Plane', self._omebase['TiffData']))): + raise TileSourceException('OME Tiff contains frames that contain multiple planes') + except (KeyError, ValueError, IndexError): + raise TileSourceException('OME Tiff does not contain an expected record') + def getMetadata(self): """ Return a dictionary of metadata containing levels, sizeX, sizeY, @@ -238,8 +249,8 @@ def getMetadata(self): for key in reftbl: if key in frame and not reftbl[key] in frame: frame[reftbl[key]] = int(frame[key]) - if frame[reftbl[key]] + 1 > maxref.get(reftbl[key], 0): - maxref[reftbl[key]] = frame[reftbl[key]] + 1 + if reftbl[key] in frame and frame[reftbl[key]] + 1 > maxref.get(reftbl[key], 0): + maxref[reftbl[key]] = frame[reftbl[key]] + 1 frame['Frame'] = idx if (idx and ( frame.get('IndexV') != result['frames'][idx - 1].get('IndexV') or diff --git a/sources/tiff/large_image_source_tiff/__init__.py b/sources/tiff/large_image_source_tiff/__init__.py index a4c7a855d..c47ef6cb1 100644 --- a/sources/tiff/large_image_source_tiff/__init__.py +++ b/sources/tiff/large_image_source_tiff/__init__.py @@ -65,6 +65,14 @@ class TiffFileTileSource(FileTileSource): 'image/x-ptif': SourcePriority.PREFERRED, } + # When getting tiles for otherwise empty directories (missing powers of + # two), we composite the tile from higher resolution levels. This can use + # excessive memory if there are too many missing levels. For instance, if + # there are six missing levels and the tile size is 1024 square RGBA, then + # 16 Gb are needed for the composited tile at a minimum. By setting + # _maxSkippedLevels, such large gaps are composited in stages. + _maxSkippedLevels = 3 + def __init__(self, path, **kwargs): """ Initialize the tile class. See the base class for other available @@ -272,9 +280,13 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, try: allowStyle = True if self._tiffDirectories[z] is None: - if sparseFallback: - raise IOTiffException('Missing z level %d' % z) - tile = self.getTileFromEmptyDirectory(x, y, z, **kwargs) + try: + tile = self.getTileFromEmptyDirectory(x, y, z, **kwargs) + except Exception: + if sparseFallback: + raise IOTiffException('Missing z level %d' % z) + else: + raise allowStyle = False format = TILE_FORMAT_PIL else: @@ -327,10 +339,14 @@ def getTileFromEmptyDirectory(self, x, y, z, **kwargs): :param z: original level. :returns: tile in PIL format. """ + basez = z scale = 1 while self._tiffDirectories[z] is None: scale *= 2 z += 1 + while z - basez > self._maxSkippedLevels: + z -= self._maxSkippedLevels + scale = int(scale / 2 ** self._maxSkippedLevels) tile = PIL.Image.new( 'RGBA', (self.tileWidth * scale, self.tileHeight * scale)) maxX = 2.0 ** (z + 1 - self.levels) * self.sizeX / self.tileWidth @@ -361,8 +377,11 @@ def getPreferredLevel(self, level): :returns level: a level with actual data that is no lower resolution. """ level = max(0, min(level, self.levels - 1)) + baselevel = level while self._tiffDirectories[level] is None and level < self.levels - 1: level += 1 + while level - baselevel > self._maxSkippedLevels: + level -= self._maxSkippedLevels return level def getAssociatedImagesList(self): diff --git a/test/test_source_ometiff.py b/test/test_source_ometiff.py index 3d44073ba..323606324 100644 --- a/test/test_source_ometiff.py +++ b/test/test_source_ometiff.py @@ -2,8 +2,10 @@ import json import numpy +from xml.etree import cElementTree from large_image.constants import TILE_FORMAT_NUMPY +from large_image.tilesource import etreeToDict import large_image_source_ometiff from . import utilities @@ -81,3 +83,31 @@ def testInternalMetadata(): source = large_image_source_ometiff.OMETiffFileTileSource(imagePath) metadata = source.getInternalMetadata() assert 'omeinfo' in metadata + + +def testXMLParsing(): + samples = [{ + 'xml': """ +""", # noqa + 'checks': { + 'frames': 3, + 'IndexRange': {'IndexC': 3}, + 'IndexStride': {'IndexC': 1}, + 'channelmap': {'Blue': 2, 'Green': 1, 'Red': 0}, + 'channels': ['Red', 'Green', 'Blue'], + } + }] + # Create a source so we can use internal functions for testing + imagePath = utilities.externaldata('data/sample.ome.tif.sha512') + source = large_image_source_ometiff.OMETiffFileTileSource(imagePath) + for sample in samples: + xml = cElementTree.fromstring(sample['xml']) + info = etreeToDict(xml) + source._omeinfo = info['OME'] + source._parseOMEInfo() + metadata = source.getMetadata() + for key, value in sample['checks'].items(): + if key in {'frames'}: + assert len(metadata[key]) == value + else: + assert metadata[key] == value