From d84017dd378e153400c1946d1b4b1f1bee980f89 Mon Sep 17 00:00:00 2001 From: David Manthey Date: Wed, 13 Sep 2023 13:57:32 -0400 Subject: [PATCH] Add an option to not cache sources. When opening a tile source, pass `noCache=True`. In this mode, the tile source can directly have its style modified (e.g., `source.style = `). This is also used when importing images into girder to avoid flushing the cache of tile sources that are in active use. This closes #1294. This closes #1145. There is a config value `cache_sources`, that, if False, makes `noCache` default to False. This closes #1206. --- CHANGELOG.md | 5 ++++ docs/config_options.rst | 2 ++ .../girder_large_image/girder_tilesource.py | 2 +- .../girder_large_image/models/image_item.py | 2 +- large_image/cache_util/cache.py | 16 +++++++++- large_image/config.py | 6 ++++ large_image/tilesource/__init__.py | 6 ++++ large_image/tilesource/base.py | 30 +++++++++++++++++-- .../large_image_source_dicom/__init__.py | 2 +- .../nd2/large_image_source_nd2/__init__.py | 2 +- test/test_cache_source.py | 26 +++++++++++++++- 11 files changed, 91 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a4d778323..de8328dc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## 1.24.0 + +### Features +- Added a noCache option to opening tile sources ([#1296](../../pull/1296)) + ## 1.23.7 ### Improvements diff --git a/docs/config_options.rst b/docs/config_options.rst index f3fce7b90..2271087b0 100644 --- a/docs/config_options.rst +++ b/docs/config_options.rst @@ -23,6 +23,8 @@ Configuration parameters: - ``cache_tilesource_maximum``: If this is non-zero, this further limits the number of tilesources than can be cached to this value. +- ``cache_sources``: If set to False, the default will be to not cache tile sources. This has substantial performance penalties if sources are used multiple times, so should only be set in singular dynamic environments such as experimental notebooks. + - ``max_small_image_size``: The PIL tilesource is used for small images if they are no more than this many pixels along their maximum dimension. - ``source_bioformats_ignored_names``, ``source_pil_ignored_names``, ``source_vips_ignored_names``: Some tile sources can read some files that are better read by other tilesources. Since reading these files is suboptimal, these tile sources have a setting that, by default, ignores files without extensions or with particular extensions. This setting is a Python regular expressions. For bioformats this defaults to ``r'(^[!.]*|\.(jpg|jpeg|jpe|png|tif|tiff|ndpi))$'``. diff --git a/girder/girder_large_image/girder_tilesource.py b/girder/girder_large_image/girder_tilesource.py index e0950574c..f24f9d71f 100644 --- a/girder/girder_large_image/girder_tilesource.py +++ b/girder/girder_large_image/girder_tilesource.py @@ -190,7 +190,7 @@ def getGirderTileSourceName(item, file=None, *args, **kwargs): # noqa for k, v in properties.items()) sourceList.append((propertiesClash, fallback, priority, sourceName)) for _clash, _fallback, _priority, sourceName in sorted(sourceList): - if availableSources[sourceName].canRead(item): + if availableSources[sourceName].canRead(item, *args, **kwargs): return sourceName diff --git a/girder/girder_large_image/models/image_item.py b/girder/girder_large_image/models/image_item.py index ef3f93182..9c90332b3 100644 --- a/girder/girder_large_image/models/image_item.py +++ b/girder/girder_large_image/models/image_item.py @@ -80,7 +80,7 @@ def createImageItem(self, item, fileObj, user=None, token=None, logger.debug( 'createImageItem checking if item %s (%s) can be used directly', item['_id'], item['name']) - sourceName = girder_tilesource.getGirderTileSourceName(item, fileObj) + sourceName = girder_tilesource.getGirderTileSourceName(item, fileObj, noCache=True) if sourceName: logger.info( 'createImageItem using source %s for item %s (%s)', diff --git a/large_image/cache_util/cache.py b/large_image/cache_util/cache.py index 433f2cff6..f6eeeb14a 100644 --- a/large_image/cache_util/cache.py +++ b/large_image/cache_util/cache.py @@ -1,5 +1,6 @@ import functools import threading +import uuid try: import resource @@ -168,7 +169,18 @@ def __new__(metacls, name, bases, namespace, **kwargs): # noqa - N804 return cls def __call__(cls, *args, **kwargs): # noqa - N805 - + if kwargs.get('noCache') or ( + kwargs.get('noCache') is None and config.getConfig('cache_sources') is False): + instance = super().__call__(*args, **kwargs) + # for pickling + instance._initValues = (args, kwargs.copy()) + instance._classkey = str(uuid.uuid4()) + instance._noCache = True + if kwargs.get('style') != getattr(cls, '_unstyledStyle', None): + subkwargs = kwargs.copy() + subkwargs['style'] = getattr(cls, '_unstyledStyle', None) + instance._unstyledInstance = subresult = cls(*args, **subkwargs) + return instance cache, cacheLock = LruCacheMetaclass.classCaches[cls] if hasattr(cls, 'getLRUHash'): @@ -212,6 +224,7 @@ def __call__(cls, *args, **kwargs): # noqa - N805 # for pickling result._initValues = (args, kwargs.copy()) result._unstyledInstance = subresult + result._derivedSource = True # Has to be after setting the _unstyledInstance result._setStyle(kwargs['style']) with cacheLock: @@ -233,6 +246,7 @@ def __call__(cls, *args, **kwargs): # noqa - N805 subkwargs = kwargs.copy() subkwargs['style'] = getattr(cls, '_unstyledStyle', None) instance._unstyledInstance = subresult = cls(*args, **subkwargs) + instance._derivedSource = True with cacheLock: cache[key] = instance return instance diff --git a/large_image/config.py b/large_image/config.py index b251156af..dd76ec29b 100644 --- a/large_image/config.py +++ b/large_image/config.py @@ -25,6 +25,12 @@ 'cache_memcached_username': None, 'cache_memcached_password': None, + # If set to False, the default will be to not cache tile sources. This has + # substantial performance penalties if sources are used multiple times, so + # should only be set in singular dynamic environments such as experimental + # notebooks. + 'cache_sources': True, + # Generally, these keys are the form of "cache__" # For tilesources. These are also limited by available file handles. diff --git a/large_image/tilesource/__init__.py b/large_image/tilesource/__init__.py index aea07f555..7ac8c2545 100644 --- a/large_image/tilesource/__init__.py +++ b/large_image/tilesource/__init__.py @@ -178,6 +178,9 @@ def canRead(*args, **kwargs): """ Check if large_image can read a path or uri. + If there is no intention to open the image immediately, conisder adding + `noCache=True` to the kwargs to avoid cycling the cache unnecessarily. + :returns: True if any appropriate source reports it can read the path or uri. """ @@ -192,6 +195,9 @@ def canReadList(pathOrUri, mimeType=None, *args, **kwargs): """ Check if large_image can read a path or uri via each source. + If there is no intention to open the image immediately, conisder adding + `noCache=True` to the kwargs to avoid cycling the cache unnecessarily. + :param pathOrUri: either a file path or a fixed source via large_image://. :param mimeType: the mimetype of the file, if known. diff --git a/large_image/tilesource/base.py b/large_image/tilesource/base.py index bd0ffb5c5..34e6158f2 100644 --- a/large_image/tilesource/base.py +++ b/large_image/tilesource/base.py @@ -8,6 +8,7 @@ import threading import time import types +import uuid import numpy as np import PIL @@ -57,8 +58,8 @@ class TileSource(IPyLeafletMixin): geospatial = False def __init__(self, encoding='JPEG', jpegQuality=95, jpegSubsampling=0, - tiffCompression='raw', edge=False, style=None, *args, - **kwargs): + tiffCompression='raw', edge=False, style=None, noCache=None, + *args, **kwargs): """ Initialize the tile class. @@ -129,6 +130,10 @@ def __init__(self, encoding='JPEG', jpegQuality=95, jpegSubsampling=0, excepting that each must have a band that is not -1. Bands are composited in the order listed. This base object may also contain the 'dtype' and 'axis' values. + :param noCache: if True, the style can be adjusted dynamically and the + source is not elibible for caching. If there is no intention to + reuse the source at a later time, this can have performance + benefits, such as when first cataloging images that can be read. """ super().__init__(**kwargs) self.logger = config.getConfig('logger') @@ -233,6 +238,27 @@ def getCenter(self, *args, **kwargs): def style(self): return self._style + @style.setter + def style(self, value): + if not hasattr(self, '_unstyledStyle') and value == getattr(self, '_unstyledStyle', None): + return + if not getattr(self, '_noCache', False): + msg = 'Cannot set the style of a cached source' + raise exceptions.TileSourceError(msg) + args, kwargs = self._initValues + kwargs['style'] = value + self._initValues = (args, kwargs.copy()) + oldval = getattr(self, '_jsonstyle', None) + self._setStyle(value) + if oldval == getattr(self, '_jsonstyle', None): + return + self._classkey = str(uuid.uuid4()) + if (kwargs.get('style') != getattr(self, '_unstyledStyle', None) and + not hasattr(self, '_unstyledInstance')): + subkwargs = kwargs.copy() + subkwargs['style'] = getattr(self, '_unstyledStyle', None) + self._unstyledInstance = self.__class__(*args, **subkwargs) + @property def dtype(self): with self._sourceLock: diff --git a/sources/dicom/large_image_source_dicom/__init__.py b/sources/dicom/large_image_source_dicom/__init__.py index 1c0e3bc4c..92953cae1 100644 --- a/sources/dicom/large_image_source_dicom/__init__.py +++ b/sources/dicom/large_image_source_dicom/__init__.py @@ -147,7 +147,7 @@ def __del__(self): # If we have an _unstyledInstance attribute, this is not the owner of # the _docim handle, so we can't close it. Otherwise, we need to close # it or the _dicom library may prevent shutting down. - if getattr(self, '_dicom', None) is not None and not hasattr(self, '_unstyledInstance'): + if getattr(self, '_dicom', None) is not None and not hasattr(self, '_derivedSource'): try: self._dicom.close() finally: diff --git a/sources/nd2/large_image_source_nd2/__init__.py b/sources/nd2/large_image_source_nd2/__init__.py index ad4178f00..7e09f50f1 100644 --- a/sources/nd2/large_image_source_nd2/__init__.py +++ b/sources/nd2/large_image_source_nd2/__init__.py @@ -194,7 +194,7 @@ def __del__(self): # If we have an _unstyledInstance attribute, this is not the owner of # the _nd2 handle, so we can't close it. Otherwise, we need to close # it or the nd2 library complains that we didn't explicitly close it. - if hasattr(self, '_nd2') and not hasattr(self, '_unstyledInstance'): + if hasattr(self, '_nd2') and not hasattr(self, '_derivedSource'): self._nd2.close() del self._nd2 diff --git a/test/test_cache_source.py b/test/test_cache_source.py index e13ae93f9..290ea5bab 100644 --- a/test/test_cache_source.py +++ b/test/test_cache_source.py @@ -3,7 +3,7 @@ import pytest import large_image -from large_image.cache_util import cachesClear +from large_image.cache_util import cachesClear, cachesInfo from .datastore import datastore @@ -65,3 +65,27 @@ def testCacheSourceBadStyle(): assert tile1 == tile2 ts1 = ts2 = None cachesClear() + + +@pytest.mark.singular() +def testCacheNoCache(): + cachesClear() + assert cachesInfo()['tilesource']['used'] == 0 + imagePath = datastore.fetch('sample_image.ptif') + ts1 = large_image.open(imagePath, noCache=True) + ts2 = large_image.open(imagePath, style={'max': 128}, noCache=True) + assert cachesInfo()['tilesource']['used'] == 0 + ts1.style = {'max': 190} + lastkey = ts2._classkey + ts2.style = {'max': 190} + assert ts2._classkey != lastkey + lastkey = ts2._classkey + ts2.style = {'max': 190} + assert ts2._classkey == lastkey + assert cachesInfo()['tilesource']['used'] == 0 + ts1 = ts2 = None + ts1 = large_image.open(imagePath) + large_image.open(imagePath, style={'max': 128}) + assert cachesInfo()['tilesource']['used'] > 0 + with pytest.raises(large_image.exceptions.TileSourceError): + ts1.style = {'max': 190}