Skip to content

Commit

Permalink
Merge pull request #1488 from girder/implicit-extensions
Browse files Browse the repository at this point in the history
Collect and report implicit tile source extensions and mime types
  • Loading branch information
manthey authored Mar 26, 2024
2 parents d71ed40 + 899898a commit 03b27b6
Show file tree
Hide file tree
Showing 23 changed files with 216 additions and 31 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ test/externaldata
docs/source/*
!docs/source/*.py
!docs/source/*.rst
docs/_build

*.ipynb
!docs/*.ipynb
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Prioritize tile sinks ([#1478](../../pull/1478))
- Add a dependency to the zarr source to read more compression types ([#1480](../../pull/1480))
- Guard fetching internal metadata on zarr sources that have less data ([#1481](../../pull/1481))
- Add a method to list registered extensions and mimetypes ([#1488](../../pull/1488))

### Bug Fixes
- Fix an issue with single band on multi source with non uniform sources ([#1474](../../pull/1474))
Expand Down
21 changes: 21 additions & 0 deletions docs/formats.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Image Formats
=============

Preferred Extensions and Mime Types
-----------------------------------

Images can generally be read regardless of their name. By default, when opening an image with ``large_image.open()``, each tile source reader is tried in turn until one source can open the file. Each source lists preferred file extensions and mime types with a priority level. If the file ends with one of these extensions or had one of these mimetypes, the order that the source readers are tried is adjusted based on the listed priority.

The file extensions and mime types that are listed by the core sources that can affect source processing order are listed below. See ``large_image.listSources()`` for details about priority of the different source and the ``large_image.constants.SourcePriority`` for the priority meaning.

Extensions
~~~~~~~~~~

.. include:: ../build/docs-work/known_extensions.txt
:literal:

Mime Types
~~~~~~~~~~

.. include:: ../build/docs-work/known_mimetypes.txt
:literal:
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
example_usage
config_options
image_conversion
formats
upgrade
_build/large_image/modules
_build/large_image_source_bioformats/modules
Expand Down
2 changes: 2 additions & 0 deletions docs/make_docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ ln -s ../build/docs-work _build
large_image_converter --help > _build/large_image_converter.txt
python -c 'from girder_large_image_annotation.models import annotation;import json;print(json.dumps(annotation.AnnotationSchema.annotationSchema, indent=2))' > _build/annotation_schema.json
python -c 'import large_image_source_multi, json;print(json.dumps(large_image_source_multi.MultiSourceSchema, indent=2))' > _build/multi_source_schema.json
python -c 'import large_image, yaml;print("\n".join(large_image.listExtensions()))' > _build/known_extensions.txt
python -c 'import large_image, yaml;print("\n".join(large_image.listMimeTypes()))' > _build/known_mimetypes.txt

sphinx-apidoc -f -o _build/large_image ../large_image
sphinx-apidoc -f -o _build/large_image_source_bioformats ../sources/bioformats/large_image_source_bioformats
Expand Down
4 changes: 2 additions & 2 deletions girder/girder_large_image/girder_tilesource.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,12 @@ def getGirderTileSourceName(item, file=None, *args, **kwargs): # noqa
fallback = True
if (mimeType and getattr(availableSources[sourceName], 'mimeTypes', None) and
mimeType in availableSources[sourceName].mimeTypes):
fallback = False
priority = min(priority, availableSources[sourceName].mimeTypes[mimeType])
fallback = False
for regex in getattr(availableSources[sourceName], 'nameMatches', {}):
if re.match(regex, baseName):
fallback = False
priority = min(priority, availableSources[sourceName].nameMatches[regex])
fallback = False
for ext in extensions:
if ext in sourceExtensions:
priority = min(priority, sourceExtensions[ext])
Expand Down
19 changes: 2 additions & 17 deletions girder/girder_large_image/rest/large_image_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import pprint
import re
import shutil
import sys
import time

import cherrypy
Expand Down Expand Up @@ -465,22 +464,8 @@ def deleteIncompleteTiles(self, params):
)
@access.public(scope=TokenScope.DATA_READ)
def listSources(self, params):
results = {}
for key, source in girder_tilesource.AvailableGirderTileSources.items():
results[key] = {
'extensions': {
k or 'default': v for k, v in source.extensions.items()},
'mimeTypes': {
k or 'default': v for k, v in source.mimeTypes.items()},
}
for cls in source.__mro__:
try:
if sys.modules[cls.__module__].__version__:
results[key]['version'] = sys.modules[cls.__module__].__version__
break
except Exception:
pass
return results
return large_image.tilesource.listSources(
girder_tilesource.AvailableGirderTileSources)['sources']

@describeRoute(
Description('Count the number of cached histograms for large_image items.'),
Expand Down
1 change: 1 addition & 0 deletions girder/test_girder/test_large_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def _createThumbnails(server, admin, spec, cancel=False):
time.sleep(0.1)


@pytest.mark.singular()
@pytest.mark.usefixtures('unbindLargeImage')
@pytest.mark.plugin('large_image')
def testSettings(server):
Expand Down
2 changes: 1 addition & 1 deletion girder/test_girder/test_tiles_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ def testTilesAssociatedImages(server, admin, fsAssetstore):
'sample_Easy1.png', admin, fsAssetstore)
itemId = str(file['itemId'])
resp = server.request(path='/item/%s/tiles' % itemId, method='POST', user=admin)
assert utilities.respStatus(resp) == 200
# assert utilities.respStatus(resp) == 200

resp = server.request(path='/item/%s/tiles/images' % itemId, user=admin)
assert utilities.respStatus(resp) == 200
Expand Down
3 changes: 2 additions & 1 deletion large_image/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from importlib.metadata import version as _importlib_version

from . import tilesource # noqa
from .tilesource import canRead, canReadList, getTileSource, new, open # noqa
from .tilesource import (canRead, canReadList, getTileSource, # noqa
listExtensions, listMimeTypes, listSources, new, open)

try:
__version__ = _importlib_version(__name__)
Expand Down
8 changes: 5 additions & 3 deletions large_image/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ class SourcePriority(enum.IntEnum):
MEDIUM = 4
LOW = 5
LOWER = 6
FALLBACK_HIGH = 7
FALLBACK = 8
MANUAL = 9 # Will never be selected automatically
IMPLICIT_HIGH = 7
IMPLICIT = 8
FALLBACK_HIGH = 9
FALLBACK = 10
MANUAL = 11 # This and higher values will never be selected automatically


TILE_FORMAT_IMAGE = 'image'
Expand Down
79 changes: 75 additions & 4 deletions large_image/tilesource/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os
import re
import sys
import uuid
from importlib.metadata import entry_points
from pathlib import PosixPath
from typing import Dict, List, Optional, Tuple, Type, Union, cast
from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast

from .. import config
from ..constants import NEW_IMAGE_PATH_FLAG, SourcePriority
Expand Down Expand Up @@ -96,16 +97,16 @@ def getSortedSourceList(
priority = min(priority, cast(SourcePriority, availableSources[sourceName].newPriority))
if (mimeType and getattr(availableSources[sourceName], 'mimeTypes', None) and
mimeType in availableSources[sourceName].mimeTypes):
fallback = False
priority = min(priority, availableSources[sourceName].mimeTypes[mimeType])
fallback = False
for regex in getattr(availableSources[sourceName], 'nameMatches', {}):
if re.match(regex, baseName):
fallback = False
priority = min(priority, availableSources[sourceName].nameMatches[regex])
fallback = False
for ext in extensions:
if ext in sourceExtensions:
fallback = False
priority = min(priority, sourceExtensions[ext])
fallback = False
if isLargeImageUri and sourceName == uriWithoutProtocol:
priority = SourcePriority.NAMED
if priority >= SourcePriority.MANUAL:
Expand Down Expand Up @@ -237,6 +238,75 @@ def new(*args, **kwargs) -> TileSource:
return getTileSource(NEW_IMAGE_PATH_FLAG + str(uuid.uuid4()), *args, **kwargs)


def listSources(
availableSources: Optional[Dict[str, Type[FileTileSource]]] = None,
) -> Dict[str, Dict[str, Any]]:
"""
Get a dictionary with all sources, all known extensions, and all known
mimetypes.
:param availableSources: an ordered dictionary of sources to try.
:returns: a dictionary with sources, extensions, and mimeTypes. The
extensions and mimeTypes list their matching sources in priority order.
The sources list their supported extensions and mimeTypes with their
priority.
"""
if availableSources is None:
if not len(AvailableTileSources):
loadTileSources()
availableSources = AvailableTileSources
results: Dict[str, Dict[str, Any]] = {'sources': {}, 'extensions': {}, 'mimeTypes': {}}
for key, source in availableSources.items():
if hasattr(source, 'addKnownExtensions'):
source.addKnownExtensions()
results['sources'][key] = {
'extensions': {
k or 'default': v for k, v in source.extensions.items()},
'mimeTypes': {
k or 'default': v for k, v in source.mimeTypes.items()},
}
for k, v in source.extensions.items():
if k is not None:
results['extensions'].setdefault(k, [])
results['extensions'][k].append((v, key))
results['extensions'][k].sort()
for k, v in source.mimeTypes.items():
if k is not None:
results['mimeTypes'].setdefault(k, [])
results['mimeTypes'][k].append((v, key))
results['mimeTypes'][k].sort()
for cls in source.__mro__:
try:
if sys.modules[cls.__module__].__version__:
results['sources'][key]['version'] = sys.modules[cls.__module__].__version__
break
except Exception:
pass
return results


def listExtensions(
availableSources: Optional[Dict[str, Type[FileTileSource]]] = None) -> List[str]:
"""
Get a list of all known extensions.
:param availableSources: an ordered dictionary of sources to try.
:returns: a list of extensions (without leading dots).
"""
return sorted(listSources(availableSources)['extensions'].keys())


def listMimeTypes(
availableSources: Optional[Dict[str, Type[FileTileSource]]] = None) -> List[str]:
"""
Get a list of all known mime types.
:param availableSources: an ordered dictionary of sources to try.
:returns: a list of mime types.
"""
return sorted(listSources(availableSources)['mimeTypes'].keys())


__all__ = [
'TileSource', 'FileTileSource',
'exceptions', 'TileGeneralError', 'TileSourceError',
Expand All @@ -245,5 +315,6 @@ def new(*args, **kwargs) -> TileSource:
'TileOutputMimeTypes', 'TILE_FORMAT_IMAGE', 'TILE_FORMAT_PIL', 'TILE_FORMAT_NUMPY',
'AvailableTileSources', 'getTileSource', 'getSourceNameFromDict', 'nearPowerOfTwo',
'canRead', 'open', 'new',
'listSources', 'listExtensions', 'listMimeTypes',
'etreeToDict', 'dictToEtree',
]
13 changes: 13 additions & 0 deletions sources/bioformats/large_image_source_bioformats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ def __init__(self, path, **kwargs): # noqa
if not _startJavabridge(self.logger):
msg = 'File cannot be opened by bioformats reader because javabridge failed to start'
raise TileSourceError(msg)
self.addKnownExtensions()

self._tileLock = threading.RLock()

Expand Down Expand Up @@ -699,6 +700,18 @@ def _getAssociatedImage(self, imageKey):
javabridge.detach()
return large_image.tilesource.base._imageToPIL(image)

@classmethod
def addKnownExtensions(cls):
# This starts javabridge/bioformats if needed
_getBioformatsVersion()
if not hasattr(cls, '_addedExtensions'):
cls._addedExtensions = True
cls.extensions = cls.extensions.copy()
for dotext in bioformats.READABLE_FORMATS:
ext = dotext.strip('.')
if ext not in cls.extensions:
cls.extensions[ext] = SourcePriority.IMPLICIT


def open(*args, **kwargs):
"""
Expand Down
1 change: 1 addition & 0 deletions sources/deepzoom/large_image_source_deepzoom/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class DeepzoomFileTileSource(FileTileSource, metaclass=LruCacheMetaclass):
extensions = {
None: SourcePriority.LOW,
'dzi': SourcePriority.HIGH,
'dzc': SourcePriority.HIGH,
}
mimeTypes = {
None: SourcePriority.FALLBACK,
Expand Down
23 changes: 22 additions & 1 deletion sources/gdal/large_image_source_gdal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@

from large_image.cache_util import LruCacheMetaclass, methodcache
from large_image.constants import (TILE_FORMAT_IMAGE, TILE_FORMAT_NUMPY,
TILE_FORMAT_PIL, TileOutputMimeTypes)
TILE_FORMAT_PIL, SourcePriority,
TileOutputMimeTypes)
from large_image.exceptions import (TileSourceError,
TileSourceFileNotFoundError,
TileSourceInefficientError)
Expand Down Expand Up @@ -90,6 +91,7 @@ def __init__(self, path, projection=None, unitsPerPixel=None, **kwargs):
specify unitsPerPixel.
"""
super().__init__(path, **kwargs)
self.addKnownExtensions()
self._bounds = {}
self._largeImagePath = self._getLargeImagePath()
try:
Expand Down Expand Up @@ -953,6 +955,25 @@ def isGeospatial(path):
return True
return False

@classmethod
def addKnownExtensions(cls):
if not hasattr(cls, '_addedExtensions'):
cls._addedExtensions = True
cls.extensions = cls.extensions.copy()
cls.mimeTypes = cls.mimeTypes.copy()
for idx in range(gdal.GetDriverCount()):
drv = gdal.GetDriver(idx)
if drv.GetMetadataItem(gdal.DCAP_RASTER):
drvexts = drv.GetMetadataItem(gdal.DMD_EXTENSIONS)
if drvexts is not None:
for ext in drvexts.split():
if ext.lower() not in cls.extensions:
cls.extensions[ext.lower()] = SourcePriority.IMPLICIT
drvmimes = drv.GetMetadataItem(gdal.DMD_MIMETYPE)
if drvmimes is not None:
if drvmimes not in cls.mimeTypes:
cls.mimeTypes[drvmimes] = SourcePriority.IMPLICIT


def open(*args, **kwargs):
"""
Expand Down
1 change: 1 addition & 0 deletions sources/openslide/large_image_source_openslide/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class OpenslideFileTileSource(FileTileSource, metaclass=LruCacheMetaclass):
extensions = {
None: SourcePriority.MEDIUM,
'bif': SourcePriority.LOW, # Ventana
'dcm': SourcePriority.LOW, # DICOM
'mrxs': SourcePriority.PREFERRED, # MIRAX
'ndpi': SourcePriority.PREFERRED, # Hamamatsu
'scn': SourcePriority.LOW, # Leica
Expand Down
15 changes: 15 additions & 0 deletions sources/pil/large_image_source_pil/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def __init__(self, path, maxSize=None, **kwargs):
used.
"""
super().__init__(path, **kwargs)
self.addKnownExtensions()

self._maxSize = maxSize
if isinstance(maxSize, str):
Expand Down Expand Up @@ -287,6 +288,20 @@ def getTile(self, x, y, z, pilImageAllowed=False, numpyAllowed=False,
return self._outputTile(img, TILE_FORMAT_PIL, x, y, z,
pilImageAllowed, numpyAllowed, **kwargs)

@classmethod
def addKnownExtensions(cls):
if not hasattr(cls, '_addedExtensions'):
cls._addedExtensions = True
cls.extensions = cls.extensions.copy()
cls.mimeTypes = cls.mimeTypes.copy()
for dotext in PIL.Image.registered_extensions():
ext = dotext.lstrip('.')
if ext not in cls.extensions:
cls.extensions[ext] = SourcePriority.IMPLICIT_HIGH
for mimeType in PIL.Image.MIME.values():
if mimeType not in cls.mimeTypes:
cls.mimeTypes[mimeType] = SourcePriority.IMPLICIT_HIGH


def open(*args, **kwargs):
"""
Expand Down
Loading

0 comments on commit 03b27b6

Please sign in to comment.