Handle indica labs variant tiff.

The tiff files have some subifds without any strips or tiles and erroneously do not declare that they have float pixel data.
girder · Nov 5, 2024 · 5172d3c · 5172d3c
1 parent 7d4d213
commit 5172d3c
Show file tree

Hide file tree

Showing 8 changed files with 102 additions and 65 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,7 +5,8 @@
 ### Improvements
 
 - Format dates in item lists ([#1707](../../pull/1707))
-- Guard dtype types ([#1711](../../pull/1711), [#1714](../../pull/1714))
+- Guard dtype types ([#1711](../../pull/1711), [#1714](../../pull/1714), [#1716](../../pull/1716))
+- Better handle IndicaLabs tiff files ([#1717](../../pull/1717))
 
 ### Changes
 

diff --git a/large_image/exceptions.py b/large_image/exceptions.py
@@ -21,6 +21,10 @@ class TileSourceInefficientError(TileSourceError):
     pass
 
 
+class TileSourceMalformedError(TileSourceError):
+    pass
+
+
 class TileSourceFileNotFoundError(TileSourceError, FileNotFoundError):
     def __init__(self, *args, **kwargs) -> None:
         super().__init__(errno.ENOENT, *args, **kwargs)

diff --git a/large_image/tilesource/utilities.py b/large_image/tilesource/utilities.py
@@ -232,7 +232,8 @@ def _imageToPIL(
         #         image = image / ((2 ** maxl2) - 1)
         #     image = (image * 255).astype(numpy.uint8)
         elif image.dtype != np.uint8:
-            image = image.astype(np.uint8)
+            image = np.clip(np.nan_to_num(np.where(
+                image is None, np.nan, image), nan=0), 0, 255).astype(np.uint8)
         image = PIL.Image.fromarray(image, mode)
     elif not isinstance(image, PIL.Image.Image):
         image = PIL.Image.open(io.BytesIO(image))

diff --git a/sources/tiff/large_image_source_tiff/__init__.py b/sources/tiff/large_image_source_tiff/__init__.py
@@ -30,7 +30,9 @@
 
 from large_image.cache_util import LruCacheMetaclass, methodcache
 from large_image.constants import TILE_FORMAT_NUMPY, TILE_FORMAT_PIL, SourcePriority
-from large_image.exceptions import TileSourceError, TileSourceFileNotFoundError
+from large_image.exceptions import (TileSourceError,
+                                    TileSourceFileNotFoundError,
+                                    TileSourceMalformedError)
 from large_image.tilesource import FileTileSource, nearPowerOfTwo
 
 from . import tiff_reader
@@ -90,6 +92,8 @@ def __init__(self, path, **kwargs):  # noqa
         try:
             self._initWithTiffTools()
             return
+        except TileSourceMalformedError:
+            raise
         except Exception as exc:
             self.logger.debug('Cannot read with tifftools route; %r', exc)
             lastException = exc
@@ -363,6 +367,10 @@ def _initWithTiffTools(self):  # noqa
                     if len(subifds) != 1:
                         msg = 'When stored in subifds, each subifd should be a single ifd.'
                         raise TileSourceError(msg)
+                    if (tifftools.Tag.StripOffsets.value not in subifds[0]['tags'] and
+                            tifftools.Tag.TileOffsets.value not in subifds[0]['tags']):
+                        msg = 'Subifd has no strip or tile offsets.'
+                        raise TileSourceMalformedError(msg)
                     level = self._levelFromIfd(subifds[0], info['ifds'][0])
                     if level < self.levels - 1 and frames[-1]['dirs'][level] is None:
                         frames[-1]['dirs'][level] = (idx, subidx + 1)

diff --git a/sources/tifffile/large_image_source_tifffile/__init__.py b/sources/tifffile/large_image_source_tifffile/__init__.py
@@ -292,68 +292,19 @@ def _handle_imagej(self):
         except Exception:
             pass
 
-    def _handle_scn(self):  # noqa
-        """
-        For SCN files, parse the xml and possibly adjust how associated images
-        are labelled.
-        """
+    def _handle_indica(self):
         import xml.etree.ElementTree
 
         import large_image.tilesource.utilities
 
-        root = xml.etree.ElementTree.fromstring(self._tf.pages[0].description)
-        self._xml = large_image.tilesource.utilities.etreeToDict(root)
-        for collection in et_findall(root, 'collection'):
-            sizeX = collection.attrib.get('sizeX')
-            sizeY = collection.attrib.get('sizeY')
-            for supplementalImage in et_findall(collection, 'supplementalImage'):
-                name = supplementalImage.attrib.get('type', '').lower()
-                ifd = supplementalImage.attrib.get('ifd', '')
-                oldname = 'image_%s' % ifd
-                if (name and ifd and oldname in self._associatedImages and
-                        name not in self._associatedImages):
-                    self._associatedImages[name] = self._associatedImages[oldname]
-                    self._associatedImages.pop(oldname, None)
-            for image in et_findall(collection, 'image'):
-                name = image.attrib.get('name', 'Unknown')
-                for view in et_findall(image, 'view'):
-                    if (sizeX and view.attrib.get('sizeX') == sizeX and
-                            sizeY and view.attrib.get('sizeY') == sizeY and
-                            not int(view.attrib.get('offsetX')) and
-                            not int(view.attrib.get('offsetY')) and
-                            name.lower() in self._associatedImages and
-                            'macro' not in self._associatedImages):
-                        self._associatedImages['macro'] = self._associatedImages[name.lower()]
-                        self._associatedImages.pop(name.lower(), None)
-                if name != self._baseSeries.name:
-                    continue
-                for scanSettings in et_findall(image, 'scanSettings'):
-                    for objectiveSettings in et_findall(scanSettings, 'objectiveSettings'):
-                        for objective in et_findall(objectiveSettings, 'objective'):
-                            if not hasattr(self, '_magnification') and float(objective.text) > 0:
-                                self._magnification = float(objective.text)
-                    for channelSettings in et_findall(scanSettings, 'channelSettings'):
-                        channels = {}
-                        for channel in et_findall(channelSettings, 'channel'):
-                            channels[int(channel.attrib.get('index', 0))] = (
-                                large_image.tilesource.utilities.etreeToDict(channel)['channel'])
-                        self._channelInfo = channels
-                        try:
-                            self._channels = [
-                                channels.get(idx)['name'].split('|')[0]
-                                for idx in range(len(channels))]
-                        except Exception:
-                            pass
-
-    def _handle_svs(self):
-        """
-        For SVS files, parse the magnification and pixel size.
-        """
         try:
-            meta = self._tf.pages[0].description
-            self._magnification = float(meta.split('AppMag = ')[1].split('|')[0].strip())
-            self._mm_x = self._mm_y = float(
-                meta.split('|MPP = ', 1)[1].split('|')[0].strip()) * 0.001
+            root = xml.etree.ElementTree.fromstring(self._tf.pages[0].description)
+            self._xml = large_image.tilesource.utilities.etreeToDict(root)
+            self._channels = [c['name'] for c in
+                              self._xml['indica']['image']['channels']['channel']]
+            if len(self._basis) == 1 and 'I' in self._basis:
+                self._basis['C'] = self._basis.pop('I')
+            self._associatedImages.clear()
         except Exception:
             pass
 
@@ -414,6 +365,71 @@ def _handle_ome(self):
         except Exception:
             pass
 
+    def _handle_scn(self):  # noqa
+        """
+        For SCN files, parse the xml and possibly adjust how associated images
+        are labelled.
+        """
+        import xml.etree.ElementTree
+
+        import large_image.tilesource.utilities
+
+        root = xml.etree.ElementTree.fromstring(self._tf.pages[0].description)
+        self._xml = large_image.tilesource.utilities.etreeToDict(root)
+        for collection in et_findall(root, 'collection'):
+            sizeX = collection.attrib.get('sizeX')
+            sizeY = collection.attrib.get('sizeY')
+            for supplementalImage in et_findall(collection, 'supplementalImage'):
+                name = supplementalImage.attrib.get('type', '').lower()
+                ifd = supplementalImage.attrib.get('ifd', '')
+                oldname = 'image_%s' % ifd
+                if (name and ifd and oldname in self._associatedImages and
+                        name not in self._associatedImages):
+                    self._associatedImages[name] = self._associatedImages[oldname]
+                    self._associatedImages.pop(oldname, None)
+            for image in et_findall(collection, 'image'):
+                name = image.attrib.get('name', 'Unknown')
+                for view in et_findall(image, 'view'):
+                    if (sizeX and view.attrib.get('sizeX') == sizeX and
+                            sizeY and view.attrib.get('sizeY') == sizeY and
+                            not int(view.attrib.get('offsetX')) and
+                            not int(view.attrib.get('offsetY')) and
+                            name.lower() in self._associatedImages and
+                            'macro' not in self._associatedImages):
+                        self._associatedImages['macro'] = self._associatedImages[name.lower()]
+                        self._associatedImages.pop(name.lower(), None)
+                if name != self._baseSeries.name:
+                    continue
+                for scanSettings in et_findall(image, 'scanSettings'):
+                    for objectiveSettings in et_findall(scanSettings, 'objectiveSettings'):
+                        for objective in et_findall(objectiveSettings, 'objective'):
+                            if not hasattr(self, '_magnification') and float(objective.text) > 0:
+                                self._magnification = float(objective.text)
+                    for channelSettings in et_findall(scanSettings, 'channelSettings'):
+                        channels = {}
+                        for channel in et_findall(channelSettings, 'channel'):
+                            channels[int(channel.attrib.get('index', 0))] = (
+                                large_image.tilesource.utilities.etreeToDict(channel)['channel'])
+                        self._channelInfo = channels
+                        try:
+                            self._channels = [
+                                channels.get(idx)['name'].split('|')[0]
+                                for idx in range(len(channels))]
+                        except Exception:
+                            pass
+
+    def _handle_svs(self):
+        """
+        For SVS files, parse the magnification and pixel size.
+        """
+        try:
+            meta = self._tf.pages[0].description
+            self._magnification = float(meta.split('AppMag = ')[1].split('|')[0].strip())
+            self._mm_x = self._mm_y = float(
+                meta.split('|MPP = ', 1)[1].split('|')[0].strip()) * 0.001
+        except Exception:
+            pass
+
     def getNativeMagnification(self):
         """
         Get the magnification at a particular level.
@@ -623,6 +639,8 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False, **kwargs):
                     sel.append(slice(series.shape[aidx]))
                     baxis += 'S'
                 else:
+                    if axis not in self._basis and axis == 'I':
+                        axis = 'C'
                     sel.append((frame // self._basis[axis][0]) % self._basis[axis][2])
             tile = bza[tuple(sel)]
             # rotate

diff --git a/test/datastore.py b/test/datastore.py
@@ -120,6 +120,9 @@
     # Source: TCIA/CMB-LCA_v07_20240828/CMB-LCA/MSB-01459/
     #   12-22-1959-XR Chest-59125/1002.000000-43033/1-1.dcm
     'tcia_msb_01459_19591222.dcm': 'sha512:9dea871c3816f149227ece40d35aa5cf655f23412cb7aee72f175f0a74435d8b21aaa2030e7e75b0affbc07c03c205028025a4d5022bfa797bff523fa98315e0',  # noqa
+    # Synthetic Indica Labs tiff; subifds missing tile/strip data and unmarked
+    # float32 pixels rather than uint32
+    'synthetic_indica.tiff': 'sha512:fba7eb2fb5fd12ac242d8b0760440f170f48f9e2434a672cbf230bd8a9ff02fad8f9bdf7225edf2de244f412edfc5205e695031a1d43dd99fe31c3aca11909a1',  # noqa
 }
 
 

diff --git a/test/test_source_base.py b/test/test_source_base.py
@@ -63,34 +63,35 @@
     'openjpeg': {'read': r'\.(jp2)$'},
     'openslide': {
         'read': r'\.(ptif|svs|ndpi|tif.*|qptiff|dcm)$',
-        'noread': r'(oahu|DDX58_AXL|huron\.image2_jpeg2k|landcover_sample|d042-353\.crop|US_Geo\.|extraoverview|imagej|bad_axes|synthetic_untiled|tcia.*dcm)',  # noqa
+        'noread': r'(oahu|DDX58_AXL|huron\.image2_jpeg2k|landcover_sample|d042-353\.crop|US_Geo\.|extraoverview|imagej|bad_axes|synthetic_untiled|indica|tcia.*dcm)',  # noqa
         'skip': r'nokeyframe\.ome\.tiff$',
         'skipTiles': r'one_layer_missing',
     },
     'pil': {
         'read': r'(\.(jpg|jpeg|png|tif.*)|18[-0-9a-f]{34}\.dcm)$',
-        'noread': r'(G10-3|JK-kidney|d042-353.*tif|huron|one_layer_missing|US_Geo|extraoverview)',  # noqa
+        'noread': r'(G10-3|JK-kidney|d042-353.*tif|huron|one_layer_missing|US_Geo|extraoverview|indica)',  # noqa
     },
     'rasterio': {
         'read': r'(\.(jpg|jpeg|jp2|ptif|scn|svs|ndpi|tif.*|qptiff)|18[-0-9a-f]{34}\.dcm)$',
         'noread': r'(huron\.image2_jpeg2k|sample_jp2k_33003|TCGA-DU-6399|\.(ome.tiff|nc)$)',
-        'skip': r'nokeyframe\.ome\.tiff$',
+        'skip': r'(indica|nokeyframe\.ome\.tiff$)',
     },
     'test': {'any': True, 'skipTiles': r''},
     'tiff': {
         'read': r'(\.(ptif|scn|svs|tif.*|qptiff)|[-0-9a-f]{36}\.dcm)$',
-        'noread': r'(DDX58_AXL|G10-3_pelvis_crop|landcover_sample|US_Geo\.|imagej)',
+        'noread': r'(DDX58_AXL|G10-3_pelvis_crop|landcover_sample|US_Geo\.|imagej|indica)',
         'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles)'},
     'tifffile': {
         'read': r'',
         'noread': r'((\.(nc|nd2|yml|yaml|json|czi|png|jpg|jpeg|jp2|ndpi|zarr\.db|zarr\.zip)|(nokeyframe\.ome\.tiff|XY01\.ome\.tif|level.*\.dcm|tcia.*dcm)$)' +  # noqa
                   (r'|bad_axes' if sys.version_info < (3, 9) else '') +
                   r')',
+        'skip': r'indica' if sys.version_info < (3, 9) else '^$',
     },
     'vips': {
         'read': r'',
         'noread': r'(\.(nc|nd2|yml|yaml|json|czi|png|svs|scn|zarr\.db|zarr\.zip)|tcia.*dcm)$',
-        'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles|JK-kidney_B-gal_H3_4C_1-500sec\.jp2|extraoverview|synthetic_untiled)'  # noqa
+        'skipTiles': r'(sample_image\.ptif|one_layer_missing_tiles|JK-kidney_B-gal_H3_4C_1-500sec\.jp2|extraoverview|synthetic_untiled)',  # noqa
     },
     'zarr': {'read': r'\.(zarr|zgroup|zattrs|db|zarr\.zip)$'},
 }

diff --git a/tox.ini b/tox.ini
@@ -241,6 +241,7 @@ commands =
 description = Autoformat import order and autopep8
 skipsdist = true
 skip_install = true
+base_python=python3.9
 deps =
   autopep8
   isort