Skip to content

Commit

Permalink
Handle indica labs variant tiff.
Browse files Browse the repository at this point in the history
The tiff files have some subifds without any strips or tiles and
erroneously do not declare that they have float pixel data.
  • Loading branch information
manthey committed Nov 5, 2024
1 parent 7d4d213 commit 5172d3c
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 65 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
### Improvements

- Format dates in item lists ([#1707](../../pull/1707))
- Guard dtype types ([#1711](../../pull/1711), [#1714](../../pull/1714))
- Guard dtype types ([#1711](../../pull/1711), [#1714](../../pull/1714), [#1716](../../pull/1716))
- Better handle IndicaLabs tiff files ([#1717](../../pull/1717))

### Changes

Expand Down
4 changes: 4 additions & 0 deletions large_image/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ class TileSourceInefficientError(TileSourceError):
pass


class TileSourceMalformedError(TileSourceError):
pass


class TileSourceFileNotFoundError(TileSourceError, FileNotFoundError):
def __init__(self, *args, **kwargs) -> None:
super().__init__(errno.ENOENT, *args, **kwargs)
Expand Down
3 changes: 2 additions & 1 deletion large_image/tilesource/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ def _imageToPIL(
# image = image / ((2 ** maxl2) - 1)
# image = (image * 255).astype(numpy.uint8)
elif image.dtype != np.uint8:
image = image.astype(np.uint8)
image = np.clip(np.nan_to_num(np.where(
image is None, np.nan, image), nan=0), 0, 255).astype(np.uint8)
image = PIL.Image.fromarray(image, mode)
elif not isinstance(image, PIL.Image.Image):
image = PIL.Image.open(io.BytesIO(image))
Expand Down
10 changes: 9 additions & 1 deletion sources/tiff/large_image_source_tiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@

from large_image.cache_util import LruCacheMetaclass, methodcache
from large_image.constants import TILE_FORMAT_NUMPY, TILE_FORMAT_PIL, SourcePriority
from large_image.exceptions import TileSourceError, TileSourceFileNotFoundError
from large_image.exceptions import (TileSourceError,
TileSourceFileNotFoundError,
TileSourceMalformedError)
from large_image.tilesource import FileTileSource, nearPowerOfTwo

from . import tiff_reader
Expand Down Expand Up @@ -90,6 +92,8 @@ def __init__(self, path, **kwargs): # noqa
try:
self._initWithTiffTools()
return
except TileSourceMalformedError:
raise
except Exception as exc:
self.logger.debug('Cannot read with tifftools route; %r', exc)
lastException = exc
Expand Down Expand Up @@ -363,6 +367,10 @@ def _initWithTiffTools(self): # noqa
if len(subifds) != 1:
msg = 'When stored in subifds, each subifd should be a single ifd.'
raise TileSourceError(msg)
if (tifftools.Tag.StripOffsets.value not in subifds[0]['tags'] and
tifftools.Tag.TileOffsets.value not in subifds[0]['tags']):
msg = 'Subifd has no strip or tile offsets.'
raise TileSourceMalformedError(msg)
level = self._levelFromIfd(subifds[0], info['ifds'][0])
if level < self.levels - 1 and frames[-1]['dirs'][level] is None:
frames[-1]['dirs'][level] = (idx, subidx + 1)
Expand Down
132 changes: 75 additions & 57 deletions sources/tifffile/large_image_source_tifffile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,68 +292,19 @@ def _handle_imagej(self):
except Exception:
pass

def _handle_scn(self): # noqa
"""
For SCN files, parse the xml and possibly adjust how associated images
are labelled.
"""
def _handle_indica(self):
import xml.etree.ElementTree

import large_image.tilesource.utilities

root = xml.etree.ElementTree.fromstring(self._tf.pages[0].description)
self._xml = large_image.tilesource.utilities.etreeToDict(root)
for collection in et_findall(root, 'collection'):
sizeX = collection.attrib.get('sizeX')
sizeY = collection.attrib.get('sizeY')
for supplementalImage in et_findall(collection, 'supplementalImage'):
name = supplementalImage.attrib.get('type', '').lower()
ifd = supplementalImage.attrib.get('ifd', '')
oldname = 'image_%s' % ifd
if (name and ifd and oldname in self._associatedImages and
name not in self._associatedImages):
self._associatedImages[name] = self._associatedImages[oldname]
self._associatedImages.pop(oldname, None)
for image in et_findall(collection, 'image'):
name = image.attrib.get('name', 'Unknown')
for view in et_findall(image, 'view'):
if (sizeX and view.attrib.get('sizeX') == sizeX and
sizeY and view.attrib.get('sizeY') == sizeY and
not int(view.attrib.get('offsetX')) and
not int(view.attrib.get('offsetY')) and
name.lower() in self._associatedImages and
'macro' not in self._associatedImages):
self._associatedImages['macro'] = self._associatedImages[name.lower()]
self._associatedImages.pop(name.lower(), None)
if name != self._baseSeries.name:
continue
for scanSettings in et_findall(image, 'scanSettings'):
for objectiveSettings in et_findall(scanSettings, 'objectiveSettings'):
for objective in et_findall(objectiveSettings, 'objective'):
if not hasattr(self, '_magnification') and float(objective.text) > 0:
self._magnification = float(objective.text)
for channelSettings in et_findall(scanSettings, 'channelSettings'):
channels = {}
for channel in et_findall(channelSettings, 'channel'):
channels[int(channel.attrib.get('index', 0))] = (
large_image.tilesource.utilities.etreeToDict(channel)['channel'])
self._channelInfo = channels
try:
self._channels = [
channels.get(idx)['name'].split('|')[0]
for idx in range(len(channels))]
except Exception:
pass

def _handle_svs(self):
"""
For SVS files, parse the magnification and pixel size.
"""
try:
meta = self._tf.pages[0].description
self._magnification = float(meta.split('AppMag = ')[1].split('|')[0].strip())
self._mm_x = self._mm_y = float(
meta.split('|MPP = ', 1)[1].split('|')[0].strip()) * 0.001
root = xml.etree.ElementTree.fromstring(self._tf.pages[0].description)
self._xml = large_image.tilesource.utilities.etreeToDict(root)
self._channels = [c['name'] for c in
self._xml['indica']['image']['channels']['channel']]
if len(self._basis) == 1 and 'I' in self._basis:
self._basis['C'] = self._basis.pop('I')
self._associatedImages.clear()
except Exception:
pass

Expand Down Expand Up @@ -414,6 +365,71 @@ def _handle_ome(self):
except Exception:
pass

def _handle_scn(self): # noqa
"""
For SCN files, parse the xml and possibly adjust how associated images
are labelled.
"""
import xml.etree.ElementTree

import large_image.tilesource.utilities

root = xml.etree.ElementTree.fromstring(self._tf.pages[0].description)
self._xml = large_image.tilesource.utilities.etreeToDict(root)
for collection in et_findall(root, 'collection'):
sizeX = collection.attrib.get('sizeX')
sizeY = collection.attrib.get('sizeY')
for supplementalImage in et_findall(collection, 'supplementalImage'):
name = supplementalImage.attrib.get('type', '').lower()
ifd = supplementalImage.attrib.get('ifd', '')
oldname = 'image_%s' % ifd
if (name and ifd and oldname in self._associatedImages and
name not in self._associatedImages):
self._associatedImages[name] = self._associatedImages[oldname]
self._associatedImages.pop(oldname, None)
for image in et_findall(collection, 'image'):
name = image.attrib.get('name', 'Unknown')
for view in et_findall(image, 'view'):
if (sizeX and view.attrib.get('sizeX') == sizeX and
sizeY and view.attrib.get('sizeY') == sizeY and
not int(view.attrib.get('offsetX')) and
not int(view.attrib.get('offsetY')) and
name.lower() in self._associatedImages and
'macro' not in self._associatedImages):
self._associatedImages['macro'] = self._associatedImages[name.lower()]
self._associatedImages.pop(name.lower(), None)
if name != self._baseSeries.name:
continue
for scanSettings in et_findall(image, 'scanSettings'):
for objectiveSettings in et_findall(scanSettings, 'objectiveSettings'):
for objective in et_findall(objectiveSettings, 'objective'):
if not hasattr(self, '_magnification') and float(objective.text) > 0:
self._magnification = float(objective.text)
for channelSettings in et_findall(scanSettings, 'channelSettings'):
channels = {}
for channel in et_findall(channelSettings, 'channel'):
channels[int(channel.attrib.get('index', 0))] = (
large_image.tilesource.utilities.etreeToDict(channel)['channel'])
self._channelInfo = channels
try:
self._channels = [
channels.get(idx)['name'].split('|')[0]
for idx in range(len(channels))]
except Exception:
pass

def _handle_svs(self):
"""
For SVS files, parse the magnification and pixel size.
"""
try:
meta = self._tf.pages[0].description
self._magnification = float(meta.split('AppMag = ')[1].split('|')[0].strip())
self._mm_x = self._mm_y = float(
meta.split('|MPP = ', 1)[1].split('|')[0].strip()) * 0.001
except Exception:
pass

def getNativeMagnification(self):
"""
Get the magnification at a particular level.
Expand Down Expand Up @@ -623,6 +639,8 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs):
sel.append(slice(series.shape[aidx]))
baxis += 'S'
else:
if axis not in self._basis and axis == 'I':
axis = 'C'
sel.append((frame // self._basis[axis][0]) % self._basis[axis][2])
tile = bza[tuple(sel)]
# rotate
Expand Down
3 changes: 3 additions & 0 deletions test/datastore.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@
# Source: TCIA/CMB-LCA_v07_20240828/CMB-LCA/MSB-01459/
# 12-22-1959-XR Chest-59125/1002.000000-43033/1-1.dcm
'tcia_msb_01459_19591222.dcm': 'sha512:9dea871c3816f149227ece40d35aa5cf655f23412cb7aee72f175f0a74435d8b21aaa2030e7e75b0affbc07c03c205028025a4d5022bfa797bff523fa98315e0', # noqa
# Synthetic Indica Labs tiff; subifds missing tile/strip data and unmarked
# float32 pixels rather than uint32
'synthetic_indica.tiff': 'sha512:fba7eb2fb5fd12ac242d8b0760440f170f48f9e2434a672cbf230bd8a9ff02fad8f9bdf7225edf2de244f412edfc5205e695031a1d43dd99fe31c3aca11909a1', # noqa
}


Expand Down
11 changes: 6 additions & 5 deletions test/test_source_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,34 +63,35 @@
'openjpeg': {'read': r'\.(jp2)$'},
'openslide': {
'read': r'\.(ptif|svs|ndpi|tif.*|qptiff|dcm)$',
'noread': r'(oahu|DDX58_AXL|huron\.image2_jpeg2k|landcover_sample|d042-353\.crop|US_Geo\.|extraoverview|imagej|bad_axes|synthetic_untiled|tcia.*dcm)', # noqa
'noread': r'(oahu|DDX58_AXL|huron\.image2_jpeg2k|landcover_sample|d042-353\.crop|US_Geo\.|extraoverview|imagej|bad_axes|synthetic_untiled|indica|tcia.*dcm)', # noqa
'skip': r'nokeyframe\.ome\.tiff$',
'skipTiles': r'one_layer_missing',
},
'pil': {
'read': r'(\.(jpg|jpeg|png|tif.*)|18[-0-9a-f]{34}\.dcm)$',
'noread': r'(G10-3|JK-kidney|d042-353.*tif|huron|one_layer_missing|US_Geo|extraoverview)', # noqa
'noread': r'(G10-3|JK-kidney|d042-353.*tif|huron|one_layer_missing|US_Geo|extraoverview|indica)', # noqa
},
'rasterio': {
'read': r'(\.(jpg|jpeg|jp2|ptif|scn|svs|ndpi|tif.*|qptiff)|18[-0-9a-f]{34}\.dcm)$',
'noread': r'(huron\.image2_jpeg2k|sample_jp2k_33003|TCGA-DU-6399|\.(ome.tiff|nc)$)',
'skip': r'nokeyframe\.ome\.tiff$',
'skip': r'(indica|nokeyframe\.ome\.tiff$)',
},
'test': {'any': True, 'skipTiles': r''},
'tiff': {
'read': r'(\.(ptif|scn|svs|tif.*|qptiff)|[-0-9a-f]{36}\.dcm)$',
'noread': r'(DDX58_AXL|G10-3_pelvis_crop|landcover_sample|US_Geo\.|imagej)',
'noread': r'(DDX58_AXL|G10-3_pelvis_crop|landcover_sample|US_Geo\.|imagej|indica)',
'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles)'},
'tifffile': {
'read': r'',
'noread': r'((\.(nc|nd2|yml|yaml|json|czi|png|jpg|jpeg|jp2|ndpi|zarr\.db|zarr\.zip)|(nokeyframe\.ome\.tiff|XY01\.ome\.tif|level.*\.dcm|tcia.*dcm)$)' + # noqa
(r'|bad_axes' if sys.version_info < (3, 9) else '') +
r')',
'skip': r'indica' if sys.version_info < (3, 9) else '^$',
},
'vips': {
'read': r'',
'noread': r'(\.(nc|nd2|yml|yaml|json|czi|png|svs|scn|zarr\.db|zarr\.zip)|tcia.*dcm)$',
'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles|JK-kidney_B-gal_H3_4C_1-500sec\.jp2|extraoverview|synthetic_untiled)' # noqa
'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles|JK-kidney_B-gal_H3_4C_1-500sec\.jp2|extraoverview|synthetic_untiled)', # noqa
},
'zarr': {'read': r'\.(zarr|zgroup|zattrs|db|zarr\.zip)$'},
}
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ commands =
description = Autoformat import order and autopep8
skipsdist = true
skip_install = true
base_python=python3.9
deps =
autopep8
isort
Expand Down

0 comments on commit 5172d3c

Please sign in to comment.