Skip to content

Commit b2a0e00

Browse files
committed
Issue #259/#453#458 finetune crs normalization some more
- rename to `normalize_crs` because it is not only about EPSG output, WKT2 output is also allowed - reworked and simplified pyproj availability handling: when available: use it fully, when not: do best effort normalization - Move tests into class for better grouping and overview - add test coverage for "no pyproj available" code path
1 parent ba32cca commit b2a0e00

File tree

5 files changed

+230
-232
lines changed

5 files changed

+230
-232
lines changed

CHANGELOG.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12-
13-
- Processes that take a CRS as argument now try harder to convert your input into a proper EPSG code, to avoid unexpected results when an invalid argument gets sent to the backend.
12+
- Processes that take a CRS as argument now try harder to normalize your input to
13+
a CRS representation that aligns with the openEO API (using `pyproj` library when available)
14+
([#259](https://github.com/Open-EO/openeo-python-client/issues/259))
1415
- Initial `load_geojson` support with `Connection.load_geojson()` ([#424](https://github.com/Open-EO/openeo-python-client/issues/424))
1516
- Initial `load_url` (for vector cubes) support with `Connection.load_url()` ([#424](https://github.com/Open-EO/openeo-python-client/issues/424))
1617
- Support lambda based property filtering in `Connection.load_stac()` ([#425](https://github.com/Open-EO/openeo-python-client/issues/425))
@@ -27,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2728

2829
- Fix: MultibackendJobManager should stop when finished, also when job finishes with error ([#452](https://github.com/Open-EO/openeo-python-client/issues/432))
2930

31+
3032
## [0.21.1] - 2023-07-19
3133

3234
### Fixed

openeo/rest/datacube.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
from openeo.rest.service import Service
3838
from openeo.rest.udp import RESTUserDefinedProcess
3939
from openeo.rest.vectorcube import VectorCube
40-
from openeo.util import get_temporal_extent, dict_no_none, rfc3339, guess_format, crs_to_epsg_code
40+
from openeo.util import get_temporal_extent, dict_no_none, rfc3339, guess_format, normalize_crs
4141

4242
if typing.TYPE_CHECKING:
4343
# Imports for type checking only (circular import issue at runtime).
@@ -332,7 +332,7 @@ def filter_bbox(
332332
" Use keyword arguments or tuple/list argument instead.")
333333
west, east, north, south = args[:4]
334334
if len(args) > 4:
335-
crs = crs_to_epsg_code(args[4])
335+
crs = normalize_crs(args[4])
336336
elif len(args) == 1 and (isinstance(args[0], (list, tuple)) and len(args[0]) == 4
337337
or isinstance(args[0], (dict, shapely.geometry.base.BaseGeometry, Parameter))):
338338
bbox = args[0]
@@ -834,7 +834,7 @@ def _get_geometry_argument(
834834
# TODO: don't warn when the crs is Lon-Lat like EPSG:4326?
835835
warnings.warn(f"Geometry with non-Lon-Lat CRS {crs!r} is only supported by specific back-ends.")
836836
# TODO #204 alternative for non-standard CRS in GeoJSON object?
837-
epsg_code = crs_to_epsg_code(crs)
837+
epsg_code = normalize_crs(crs)
838838
if epsg_code is not None:
839839
# proj did recognize the CRS
840840
crs_name = f"EPSG:{epsg_code}"

openeo/rest/vectorcube.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from openeo.rest._datacube import THIS, UDF, _ProcessGraphAbstraction, build_child_callback
1515
from openeo.rest.job import BatchJob
1616
from openeo.rest.mlmodel import MlModel
17-
from openeo.util import dict_no_none, guess_format, crs_to_epsg_code, to_bbox_dict, InvalidBBoxException
17+
from openeo.util import dict_no_none, guess_format, to_bbox_dict, InvalidBBoxException
1818

1919
if typing.TYPE_CHECKING:
2020
# Imports for type checking only (circular import issue at runtime).

openeo/util.py

Lines changed: 49 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@
1818
import shapely.geometry.base
1919
from deprecated import deprecated
2020

21+
try:
22+
# pyproj is an optional dependency
23+
import pyproj
24+
except ImportError:
25+
pyproj = None
26+
27+
2128
logger = logging.getLogger(__name__)
2229

2330

@@ -535,7 +542,7 @@ class BBoxDict(dict):
535542
def __init__(self, *, west: float, south: float, east: float, north: float, crs: Optional[Union[str, int]] = None):
536543
super().__init__(west=west, south=south, east=east, north=north)
537544
if crs is not None:
538-
self.update(crs=crs_to_epsg_code(crs))
545+
self.update(crs=normalize_crs(crs))
539546

540547
# TODO: provide west, south, east, north, crs as @properties? Read-only or read-write?
541548

@@ -635,86 +642,58 @@ def get(self, fraction: float) -> str:
635642
return f"{self.left}{bar:{self.fill}<{width}s}{self.right}"
636643

637644

638-
def crs_to_epsg_code(crs: Union[str, int, dict, None]) -> Optional[int]:
639-
"""Convert a CRS string or int to an integer EPGS code, where CRS usually comes from user input.
640-
641-
Three cases:
642-
643-
- If it is already an integer we just keep it.
644-
- If it is None it stays None, and empty strings become None as well.
645-
- If it is a string we try to parse it with the pyproj library.
646-
- Strings of the form "EPSG:<int>" will be converted to teh value <int>
647-
- For any other strings formats, it will work if pyproj supports is,
648-
otherwise it won't.
649-
650-
The result is **always** an EPSG code, so the CRS should be one that is
651-
defined in EPSG. For any other definitions pyproj will only give you the
652-
closest EPSG match and that result is possibly inaccurate.
653-
654-
Note that we also need to support WKT string (WKT2),
655-
see also: https://github.com/Open-EO/openeo-processes/issues/58
645+
def normalize_crs(crs: Any, *, use_pyproj: bool = True) -> Union[None, int, str]:
646+
"""
647+
Normalize given data structure (typically just an int or string)
648+
that encodes a CRS (Coordinate Reference System) to an EPSG (int) code or WKT2 CRS string.
656649
657-
For very the oldest supported version of Python: v3.6 there is a problem
658-
because the pyproj version that is compatible with Python 3.6 is too old
659-
and does not properly support WKT2.
650+
Behavior and data structure support depends on the availability of the ``pyproj`` library:
660651
652+
- If the ``pyproj`` library is available: use that to do parsing and conversion.
653+
This means that anything that is supported by ``pyproj.CRS.from_user_input`` is allowed.
654+
See the ``pyproj`` docs for more details.
655+
- Otherwise, some best effort validation is done:
656+
EPSG looking int/str values will be parsed as such, other strings will be assumed to be WKT2 already.
657+
Other data structures will not be accepted.
661658
662-
For a list of CRS input formats that proj supports
663-
see: https://pyproj4.github.io/pyproj/stable/api/crs/crs.html#pyproj.crs.CRS.from_user_input
659+
:param crs: data structure that encodes a CRS, typically just an int or string value.
660+
If the ``pyproj`` library is available, everything supported by it is allowed
661+
:param use_pyproj: whether ``pyproj`` should be leveraged at all
662+
(mainly useful for testing the "no pyproj available" code path)
664663
665-
:param crs:
666-
Input from user for the Coordinate Reference System to convert to an
667-
EPSG code.
664+
:return: EPSG code as int, or WKT2 string. Or None if input was empty .
668665
669666
:raises ValueError:
670-
When the crs is a not a supported CRS string.
671-
:raises TypeError:
672-
When crs is none of the supported types: str, int, None
667+
When the given CRS data can not be parsed/converted/normalized.
673668
674-
:return: An EPGS code if it could be found, otherwise None
675669
"""
676-
677-
# Only convert to the default if it is an explicitly allowed type.
678670
if crs in (None, "", {}):
679671
return None
680672

681-
if not isinstance(crs, (int, str, dict)):
682-
raise TypeError("The allowed type for the parameter 'crs' are: str, int, dict and None")
683-
684-
# If we want to stop processing as soon as we have an int value, then we
685-
# should not accept values that are complete non-sense, as best as we can.
686-
crs_intermediate = crs
687-
if isinstance(crs, int):
688-
crs_intermediate = crs
689-
elif isinstance(crs, str):
690-
# This conversion is needed to support strings that only contain an integer,
691-
# e.g. "4326" though it is a string, is a otherwise a correct EPSG code.
673+
if pyproj and use_pyproj:
692674
try:
693-
crs_intermediate = int(crs)
694-
except ValueError as exc:
695-
# So we need to process it with pyproj, below.
696-
logger.debug("crs_to_epsg_code received crs input that was not an int: crs={crs}, exception caught: {exc}")
697-
698-
if isinstance(crs_intermediate, int):
699-
if crs_intermediate <= 0:
700-
raise ValueError(f"When crs is an integer value it has to be > 0.")
701-
else:
702-
return crs_intermediate
703-
704-
try:
705-
import pyproj.crs
706-
except ImportError as exc:
707-
message = (
708-
f"Cannot convert CRS string: {crs}. "
709-
+ "Need pyproj to convert this CRS string but the pyproj library is not installed."
710-
)
711-
logger.error(message)
712-
raise ValueError(message) from ImportError
675+
# (if available:) let pyproj do the validation/parsing
676+
crs_obj = pyproj.CRS.from_user_input(crs)
677+
# Convert back to EPSG int or WKT2 string
678+
crs = crs_obj.to_epsg() or crs_obj.to_wkt()
679+
except pyproj.ProjError as e:
680+
raise ValueError(f"Failed to normalize CRS data with pyproj: {crs}") from e
713681
else:
714-
try:
715-
converted_crs = pyproj.crs.CRS.from_user_input(crs)
716-
except pyproj.exceptions.CRSError as exc:
717-
logger.error(f"Could not convert CRS string to EPSG code: crs={crs}, exception: {exc}", exc_info=True)
718-
raise ValueError(crs) from exc
682+
# Best effort simple validation/normalization
683+
if isinstance(crs, int) and crs > 0:
684+
# Assume int is already valid EPSG code
685+
pass
686+
elif isinstance(crs, str):
687+
# Parse as EPSG int code if it looks like that,
688+
# otherwise: leave it as-is, assuming it is a valid WKT2 CRS string
689+
if re.match(r"^(epsg:)?\d+$", crs.strip(), flags=re.IGNORECASE):
690+
crs = int(crs.split(":")[-1])
691+
elif "GEOGCRS[" in crs:
692+
# Very simple WKT2 CRS detection heuristic
693+
logger.warning(f"Assuming this is a valid WK2 CRS string: {repr_truncate(crs)}")
694+
else:
695+
raise ValueError(f"Can not normalize CRS string {repr_truncate(crs)}")
719696
else:
720-
return converted_crs.to_epsg()
697+
raise ValueError(f"Can not normalize CRS data {type(crs)}")
698+
699+
return crs

0 commit comments

Comments
 (0)