Skip to content

Commit d8f45cd

Browse files
authored
Merge pull request #553 from djhoese/cache-area-slices
Add optional caching to AreaDefinition.get_area_slices
2 parents 6a8afc0 + b0a2579 commit d8f45cd

File tree

8 files changed

+411
-148
lines changed

8 files changed

+411
-148
lines changed

docs/source/howtos/configuration.rst

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,67 @@ Or for specific blocks of code:
7676
Similarly, if you need to access one of the values you can
7777
use the ``pyresample.config.get`` method.
7878
79+
Cache Directory
80+
^^^^^^^^^^^^^^^
81+
82+
* **Environment variable**: ``PYRESAMPLE_CACHE_DIR``
83+
* **YAML/Config Key**: ``cache_dir``
84+
* **Default**: See below
85+
86+
Directory where any files cached by Pyresample will be stored. This
87+
directory is not necessarily cleared out by Pyresample, but is rarely used
88+
without explicitly being enabled by the user. This
89+
defaults to a different path depending on your operating system following
90+
the `platformdirs <https://github.com/platformdirs/platformdirs#example-output>`_
91+
"user cache dir".
92+
93+
.. note::
94+
95+
Some resampling algorithms provide caching functionality when the user
96+
provides a directory to cache to. These resamplers do not currently use this
97+
configuration option.
98+
99+
.. _config_cache_sensor_angles_setting:
100+
101+
Cache Geometry Slices
102+
^^^^^^^^^^^^^^^^^^^^^
103+
104+
* **Environment variable**: ``PYRESAMPLE_CACHE_GEOMETRY_SLICES``
105+
* **YAML/Config Key**: ``cache_geometry_slices``
106+
* **Default**: ``False``
107+
108+
Whether or not generated slices for geometry objects are cached to disk.
109+
These slices are used in various parts of Pyresample like
110+
cropping or overlap calculations including those performed in some resampling
111+
algorithms. At the time of writing this is only performed on
112+
``AreaDefinition`` objects through their
113+
:meth:`~pyresample.geometry.AreaDefinition.get_area_slices` method.
114+
Slices are stored in ``cache_dir`` (see above).
115+
Unlike other caching performed in Pyresample where potentially large arrays
116+
are cached, this option saves a pair of ``slice`` objects that consist of
117+
only 3 integers each. This makes the amount of space used in the cache very
118+
small for many cached results.
119+
120+
The slicing operations in Pyresample typically involve finding the intersection
121+
between two geometries. This requires generating bounding polygons for the
122+
geometries and doing polygon intersection calculations that can handle
123+
projection anti-meridians. At the time of writing these calculations can take
124+
as long as 15 seconds depending on number of vertices used in the bounding
125+
polygons. One use case for these slices is reducing input data to only the
126+
overlap of the target area. This can be done before or during resampling as
127+
part of the algorithm or as part of a third-party resampling interface
128+
(ex. Satpy). In the future as optimizations are made to the polygon
129+
intersection logic this caching option should hopefully not be needed.
130+
131+
When setting this as an environment variable, this should be set with the
132+
string equivalent of the Python boolean values ``="True"`` or ``="False"``.
133+
134+
.. warning::
135+
136+
This caching does not limit the number of entries nor does it expire old
137+
entries. It is up to the user to manage the contents of the cache
138+
directory.
139+
79140
Feature Flags
80141
-------------
81142

pyresample/_caching.py

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
"""Various tools for caching.
2+
3+
These tools are rarely needed by users and are used where they make sense
4+
throughout pyresample.
5+
6+
"""
7+
from __future__ import annotations
8+
9+
import hashlib
10+
import json
11+
import os
12+
import warnings
13+
from functools import update_wrapper
14+
from glob import glob
15+
from pathlib import Path
16+
from typing import Any, Callable
17+
18+
import pyresample
19+
20+
21+
class JSONCacheHelper:
22+
"""Decorator class to cache results to a JSON file on-disk."""
23+
24+
def __init__(
25+
self,
26+
func: Callable,
27+
cache_config_key: str,
28+
cache_version: int = 1,
29+
):
30+
self._callable = func
31+
self._cache_config_key = cache_config_key
32+
self._cache_version = cache_version
33+
self._uncacheable_arg_type_names = ("",)
34+
35+
@staticmethod
36+
def cache_clear(cache_dir: str | None = None):
37+
"""Remove all on-disk files associated with this function.
38+
39+
Intended to mimic the :func:`functools.cache` behavior.
40+
"""
41+
cache_dir = _get_cache_dir_from_config(cache_dir=cache_dir, cache_version="*")
42+
for json_file in glob(str(cache_dir / "*.json")):
43+
os.remove(json_file)
44+
45+
def __call__(self, *args):
46+
"""Call decorated function and cache the result to JSON."""
47+
should_cache = pyresample.config.get(self._cache_config_key, False)
48+
if not should_cache:
49+
return self._callable(*args)
50+
51+
try:
52+
arg_hash = _hash_args(args)
53+
except TypeError as err:
54+
warnings.warn("Cannot cache function due to unhashable argument: " + str(err),
55+
stacklevel=2)
56+
return self._callable(*args)
57+
58+
return self._run_and_cache(arg_hash, args)
59+
60+
def _run_and_cache(self, arg_hash: str, args: tuple[Any]) -> Any:
61+
base_cache_dir = _get_cache_dir_from_config(cache_version=self._cache_version)
62+
json_path = base_cache_dir / f"{arg_hash}.json"
63+
if not json_path.is_file():
64+
res = self._callable(*args)
65+
json_path.parent.mkdir(exist_ok=True)
66+
with open(json_path, "w") as json_cache:
67+
json.dump(res, json_cache, cls=_JSONEncoderWithSlice)
68+
69+
# for consistency, always load the cached result
70+
with open(json_path, "r") as json_cache:
71+
res = json.load(json_cache, object_hook=_object_hook)
72+
return res
73+
74+
75+
def _get_cache_dir_from_config(cache_dir: str | None = None, cache_version: int | str = 1) -> Path:
76+
cache_dir = cache_dir or pyresample.config.get("cache_dir")
77+
if cache_dir is None:
78+
raise RuntimeError("Can't use JSON caching. No 'cache_dir' configured.")
79+
subdir = f"geometry_slices_v{cache_version}"
80+
return Path(cache_dir) / subdir
81+
82+
83+
def _hash_args(args: tuple[Any]) -> str:
84+
from pyresample.future.geometry import AreaDefinition, SwathDefinition
85+
from pyresample.geometry import AreaDefinition as LegacyAreaDefinition
86+
from pyresample.geometry import SwathDefinition as LegacySwathDefinition
87+
88+
hashable_args = []
89+
for arg in args:
90+
if isinstance(arg, (SwathDefinition, LegacySwathDefinition)):
91+
raise TypeError(f"Unhashable type ({type(arg)})")
92+
if isinstance(arg, (AreaDefinition, LegacyAreaDefinition)):
93+
arg = hash(arg)
94+
hashable_args.append(arg)
95+
arg_hash = hashlib.sha1() # nosec
96+
arg_hash.update(json.dumps(tuple(hashable_args)).encode("utf8"))
97+
return arg_hash.hexdigest()
98+
99+
100+
class _JSONEncoderWithSlice(json.JSONEncoder):
101+
def default(self, obj: Any) -> Any:
102+
if isinstance(obj, slice):
103+
return {"__slice__": True, "start": obj.start, "stop": obj.stop, "step": obj.step}
104+
return super().default(obj)
105+
106+
107+
def _object_hook(obj: object) -> Any:
108+
if isinstance(obj, dict) and obj.get("__slice__", False):
109+
return slice(obj["start"], obj["stop"], obj["step"])
110+
return obj
111+
112+
113+
def cache_to_json_if(cache_config_key: str) -> Callable:
114+
"""Decorate a function and cache the results to a JSON file on disk.
115+
116+
This caching only happens if the ``pyresample.config`` boolean value for
117+
the provided key is ``True`` as well as some other conditions. See
118+
:class:`JSONCacheHelper` for more information. Most importantly this
119+
decorator does not limit how many items can be cached and does not clear
120+
out old entries. It is up to the user to manage the size of the cache.
121+
122+
"""
123+
def _decorator(func: Callable) -> Callable:
124+
zarr_cacher = JSONCacheHelper(func, cache_config_key)
125+
wrapper = update_wrapper(zarr_cacher, func)
126+
return wrapper
127+
128+
return _decorator

pyresample/_config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from donfig import Config
2222

2323
BASE_PATH = os.path.dirname(os.path.realpath(__file__))
24-
# FIXME: Use package_resources?
2524
PACKAGE_CONFIG_PATH = os.path.join(BASE_PATH, 'etc')
2625

2726
_user_config_dir = platformdirs.user_config_dir("pyresample", "pytroll")
@@ -36,6 +35,8 @@
3635
config = Config(
3736
"pyresample",
3837
defaults=[{
38+
"cache_dir": platformdirs.user_cache_dir("pyresample", "pytroll"),
39+
"cache_geometry_slices": False,
3940
"features": {
4041
"future_geometries": False,
4142
},
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
"""Functions and tools for subsetting a geometry object."""
2+
from __future__ import annotations
3+
4+
import math
5+
from typing import TYPE_CHECKING, Any
6+
7+
import numpy as np
8+
9+
# this caching module imports the geometries so this subset module
10+
# must be imported inside functions in the geometry modules if needed
11+
# to avoid circular dependencies
12+
from pyresample._caching import cache_to_json_if
13+
from pyresample.boundary import Boundary
14+
from pyresample.geometry import get_geostationary_bounding_box_in_lonlats, logger
15+
from pyresample.utils import check_slice_orientation
16+
17+
if TYPE_CHECKING:
18+
from pyresample import AreaDefinition
19+
20+
21+
@cache_to_json_if("cache_geometry_slices")
22+
def get_area_slices(
23+
src_area: AreaDefinition,
24+
area_to_cover: AreaDefinition,
25+
shape_divisible_by: int | None,
26+
) -> tuple[slice, slice]:
27+
"""Compute the slice to read based on an `area_to_cover`."""
28+
if not _is_area_like(src_area):
29+
raise NotImplementedError(f"Only AreaDefinitions are supported, not {type(src_area)}")
30+
if not _is_area_like(area_to_cover):
31+
raise NotImplementedError(f"Only AreaDefinitions are supported, not {type(area_to_cover)}")
32+
33+
# Intersection only required for two different projections
34+
proj_def_to_cover = area_to_cover.crs
35+
proj_def = src_area.crs
36+
if proj_def_to_cover == proj_def:
37+
logger.debug('Projections for data and slice areas are identical: %s',
38+
proj_def_to_cover)
39+
# Get slice parameters
40+
xstart, xstop, ystart, ystop = _get_slice_starts_stops(src_area, area_to_cover)
41+
42+
x_slice = check_slice_orientation(slice(xstart, xstop))
43+
y_slice = check_slice_orientation(slice(ystart, ystop))
44+
x_slice = _ensure_integer_slice(x_slice)
45+
y_slice = _ensure_integer_slice(y_slice)
46+
return x_slice, y_slice
47+
48+
data_boundary = _get_area_boundary(src_area)
49+
area_boundary = _get_area_boundary(area_to_cover)
50+
intersection = data_boundary.contour_poly.intersection(
51+
area_boundary.contour_poly)
52+
if intersection is None:
53+
logger.debug('Cannot determine appropriate slicing. '
54+
"Data and projection area do not overlap.")
55+
raise NotImplementedError
56+
x, y = src_area.get_array_indices_from_lonlat(
57+
np.rad2deg(intersection.lon), np.rad2deg(intersection.lat))
58+
x_slice = slice(np.ma.min(x), np.ma.max(x) + 1)
59+
y_slice = slice(np.ma.min(y), np.ma.max(y) + 1)
60+
x_slice = _ensure_integer_slice(x_slice)
61+
y_slice = _ensure_integer_slice(y_slice)
62+
if shape_divisible_by is not None:
63+
x_slice = _make_slice_divisible(x_slice, src_area.width,
64+
factor=shape_divisible_by)
65+
y_slice = _make_slice_divisible(y_slice, src_area.height,
66+
factor=shape_divisible_by)
67+
68+
return (check_slice_orientation(x_slice),
69+
check_slice_orientation(y_slice))
70+
71+
72+
def _is_area_like(area_obj: Any) -> bool:
73+
return hasattr(area_obj, "crs") and hasattr(area_obj, "area_extent")
74+
75+
76+
def _get_slice_starts_stops(src_area, area_to_cover):
77+
"""Get x and y start and stop points for slicing."""
78+
llx, lly, urx, ury = area_to_cover.area_extent
79+
x, y = src_area.get_array_coordinates_from_projection_coordinates([llx, urx], [lly, ury])
80+
81+
# we use `round` because we want the *exterior* of the pixels to contain the area_to_cover's area extent.
82+
if (src_area.area_extent[0] > src_area.area_extent[2]) ^ (llx > urx):
83+
xstart = max(0, round(x[1]))
84+
xstop = min(src_area.width, round(x[0]) + 1)
85+
else:
86+
xstart = max(0, round(x[0]))
87+
xstop = min(src_area.width, round(x[1]) + 1)
88+
if (src_area.area_extent[1] > src_area.area_extent[3]) ^ (lly > ury):
89+
ystart = max(0, round(y[0]))
90+
ystop = min(src_area.height, round(y[1]) + 1)
91+
else:
92+
ystart = max(0, round(y[1]))
93+
ystop = min(src_area.height, round(y[0]) + 1)
94+
95+
return xstart, xstop, ystart, ystop
96+
97+
98+
def _get_area_boundary(area_to_cover: AreaDefinition) -> Boundary:
99+
try:
100+
if area_to_cover.is_geostationary:
101+
return Boundary(*get_geostationary_bounding_box_in_lonlats(area_to_cover))
102+
boundary_shape = max(max(*area_to_cover.shape) // 100 + 1, 3)
103+
return area_to_cover.boundary(frequency=boundary_shape, force_clockwise=True)
104+
except ValueError:
105+
raise NotImplementedError("Can't determine boundary of area to cover")
106+
107+
108+
def _make_slice_divisible(sli, max_size, factor=2):
109+
"""Make the given slice even in size."""
110+
rem = (sli.stop - sli.start) % factor
111+
if rem != 0:
112+
adj = factor - rem
113+
if sli.stop + 1 + rem < max_size:
114+
sli = slice(sli.start, sli.stop + adj)
115+
elif sli.start > 0:
116+
sli = slice(sli.start - adj, sli.stop)
117+
else:
118+
sli = slice(sli.start, sli.stop - rem)
119+
120+
return sli
121+
122+
123+
def _ensure_integer_slice(sli):
124+
start = sli.start
125+
stop = sli.stop
126+
step = sli.step
127+
return slice(
128+
math.floor(start) if start is not None else None,
129+
math.ceil(stop) if stop is not None else None,
130+
math.floor(step) if step is not None else None
131+
)

0 commit comments

Comments
 (0)