Skip to content

Commit ab2ad96

Browse files
committed
Issue #114/#211 initial support for load_url (vector cubes)
1 parent 65920ef commit ab2ad96

File tree

7 files changed

+204
-36
lines changed

7 files changed

+204
-36
lines changed

openeo_driver/ProcessGraphDeserializer.py

+43-31
Original file line numberDiff line numberDiff line change
@@ -6,29 +6,29 @@
66
import datetime
77
import logging
88
import math
9+
import re
910
import tempfile
1011
import time
1112
import warnings
1213
from pathlib import Path
13-
from typing import Dict, Callable, List, Union, Tuple, Any, Iterable
14+
from typing import Any, Callable, Dict, Iterable, List, Tuple, Union
1415

15-
import pandas as pd
1616
import geopandas as gpd
1717
import numpy as np
18+
import openeo.udf
1819
import openeo_processes
20+
import pandas as pd
1921
import pyproj
2022
import requests
21-
from dateutil.relativedelta import relativedelta
22-
from requests.structures import CaseInsensitiveDict
2323
import shapely.geometry
24-
from shapely.geometry import shape, GeometryCollection, shape, mapping, MultiPolygon
2524
import shapely.ops
26-
27-
import openeo.udf
25+
from dateutil.relativedelta import relativedelta
2826
from openeo.capabilities import ComparableVersion
29-
from openeo.internal.process_graph_visitor import ProcessGraphVisitor, ProcessGraphVisitException
27+
from openeo.internal.process_graph_visitor import ProcessGraphVisitException, ProcessGraphVisitor
3028
from openeo.metadata import CollectionMetadata, MetadataException
31-
from openeo.util import load_json, rfc3339, deep_get, str_truncate
29+
from openeo.util import deep_get, load_json, rfc3339, str_truncate
30+
from shapely.geometry import GeometryCollection, MultiPolygon, mapping, shape
31+
3232
from openeo_driver import dry_run
3333
from openeo_driver.backend import (
3434
UserDefinedProcessMetadata,
@@ -53,13 +53,9 @@
5353
to_save_result, AggregatePolygonSpatialResult, MlModelResult
5454
from openeo_driver.specs import SPECS_ROOT, read_spec
5555
from openeo_driver.util.date_math import month_shift
56-
from openeo_driver.util.geometry import (
57-
geojson_to_geometry,
58-
geojson_to_multipolygon,
59-
spatial_extent_union,
60-
)
56+
from openeo_driver.util.geometry import geojson_to_geometry, geojson_to_multipolygon, spatial_extent_union
6157
from openeo_driver.util.utm import auto_utm_epsg_for_geometry
62-
from openeo_driver.utils import smart_bool, EvalEnv
58+
from openeo_driver.utils import EvalEnv, smart_bool
6359

6460
_log = logging.getLogger(__name__)
6561

@@ -1540,26 +1536,24 @@ def read_vector(args: Dict, env: EvalEnv) -> DelayedVector:
15401536

15411537

15421538
@process_registry_100.add_function(spec=read_spec("openeo-processes/1.x/proposals/load_uploaded_files.json"))
1543-
def load_uploaded_files(args: dict, env: EvalEnv) -> Union[DriverVectorCube,DriverDataCube]:
1539+
def load_uploaded_files(args: ProcessArgs, env: EvalEnv) -> Union[DriverVectorCube, DriverDataCube]:
15441540
# TODO #114 EP-3981 process name is still under discussion https://github.com/Open-EO/openeo-processes/issues/322
1545-
paths = extract_arg(args, 'paths', process_id="load_uploaded_files")
1546-
format = extract_arg(args, 'format', process_id="load_uploaded_files")
1547-
options = args.get("options", {})
1548-
1549-
input_formats = CaseInsensitiveDict(env.backend_implementation.file_formats()["input"])
1550-
if format not in input_formats:
1551-
raise FileTypeInvalidException(type=format, types=", ".join(input_formats.keys()))
1541+
paths = args.get_required("paths", expected_type=list)
1542+
format = args.get_required(
1543+
"format",
1544+
expected_type=str,
1545+
validator=ProcessArgs.validator_file_format(formats=env.backend_implementation.file_formats()["input"]),
1546+
)
1547+
options = args.get_optional("options", default={})
15521548

1553-
if format.lower() in {"geojson", "esri shapefile", "gpkg", "parquet"}:
1549+
if DriverVectorCube.from_fiona_supports(format):
15541550
return DriverVectorCube.from_fiona(paths, driver=format, options=options)
15551551
elif format.lower() in {"GTiff"}:
1556-
if(len(paths)!=1):
1557-
raise FeatureUnsupportedException(f"load_uploaded_files only supports a single raster of format {format!r}, you provided {paths}")
1558-
kwargs = dict(
1559-
glob_pattern=paths[0],
1560-
format=format,
1561-
options=options
1562-
)
1552+
if len(paths) != 1:
1553+
raise FeatureUnsupportedException(
1554+
f"load_uploaded_files only supports a single raster of format {format!r}, you provided {paths}"
1555+
)
1556+
kwargs = dict(glob_pattern=paths[0], format=format, options=options)
15631557
dry_run_tracer: DryRunDataTracer = env.get(ENV_DRY_RUN_TRACER)
15641558
if dry_run_tracer:
15651559
return dry_run_tracer.load_disk_data(**kwargs)
@@ -1604,6 +1598,24 @@ def load_geojson(args: ProcessArgs, env: EvalEnv) -> DriverVectorCube:
16041598
return vector_cube
16051599

16061600

1601+
@process_registry_100.add_function(spec=read_spec("openeo-processes/2.x/proposals/load_url.json"))
1602+
def load_url(args: ProcessArgs, env: EvalEnv) -> DriverVectorCube:
1603+
# TODO: Follow up possible `load_url` changes https://github.com/Open-EO/openeo-processes/issues/450 ?
1604+
url = args.get_required("url", expected_type=str, validator=re.compile("^https?://").match)
1605+
format = args.get_required(
1606+
"format",
1607+
expected_type=str,
1608+
validator=ProcessArgs.validator_file_format(formats=env.backend_implementation.file_formats()["input"]),
1609+
)
1610+
options = args.get_optional("options", default={})
1611+
1612+
if DriverVectorCube.from_fiona_supports(format):
1613+
# TODO: for GeoJSON (and related) support `properties` option like load_geojson? https://github.com/Open-EO/openeo-processes/issues/450
1614+
return DriverVectorCube.from_fiona(paths=[url], driver=format, options=options)
1615+
else:
1616+
raise FeatureUnsupportedException(f"Loading format {format!r} is not supported")
1617+
1618+
16071619
@non_standard_process(
16081620
ProcessSpec("get_geometries", description="Reads vector data from a file or a URL or get geometries from a FeatureCollection")
16091621
.param('filename', description="filename or http url of a vector file", schema={"type": "string"}, required=False)

openeo_driver/_version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.61.0a1"
1+
__version__ = "0.61.1a1"

openeo_driver/datacube.py

+6
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,12 @@ def from_geodataframe(
331331
)
332332
return cls(geometries=data, cube=cube)
333333

334+
@classmethod
335+
def from_fiona_supports(cls, format: str) -> bool:
336+
"""Does `from_fiona` supports given format?"""
337+
# TODO: also cover input format options?
338+
return format.lower() in {"geojson", "esri shapefile", "gpkg", "parquet"}
339+
334340
@classmethod
335341
def from_fiona(
336342
cls,

openeo_driver/processes.py

+22
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
from typing import Any, Callable, Collection, Dict, List, Optional, Tuple, Union
77

88
from openeo_driver.errors import (
9+
FileTypeInvalidException,
10+
OpenEOApiException,
911
ProcessParameterInvalidException,
1012
ProcessParameterRequiredException,
1113
ProcessUnsupportedException,
@@ -325,6 +327,9 @@ def _check_value(
325327
try:
326328
valid = validator(value)
327329
reason = "Failed validation."
330+
except OpenEOApiException:
331+
# Preserve original OpenEOApiException
332+
raise
328333
except Exception as e:
329334
valid = False
330335
reason = str(e)
@@ -441,6 +446,23 @@ def validator(value):
441446

442447
return validator
443448

449+
@staticmethod
450+
def validator_file_format(formats: Union[List[str], Dict[str, dict]]):
451+
"""
452+
Build validator for input/output format (case-insensitive check)
453+
454+
:param formats list of valid formats, or dictionary with formats as keys
455+
"""
456+
formats = list(formats)
457+
options = set(f.lower() for f in formats)
458+
459+
def validator(value: str):
460+
if value.lower() not in options:
461+
raise FileTypeInvalidException(type=value, types=", ".join(formats))
462+
return True
463+
464+
return validator
465+
444466
@staticmethod
445467
def validator_geojson_dict(
446468
allowed_types: Optional[Collection[str]] = None,

openeo_driver/testing.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -614,10 +614,19 @@ def test_my_function(caplog, monkeypatch)
614614
@contextlib.contextmanager
615615
def ephemeral_fileserver(path: Union[Path, str], host: str = "localhost", port: int = 0) -> str:
616616
"""
617-
Context manager to run a short-lived (static) file HTTP server, serving some local test data.
617+
Context manager to run a short-lived (static) file HTTP server, serving files from a given local test data folder.
618+
618619
This is an alternative to traditional mocking of HTTP requests (e.g. with requests_mock)
619620
for situations where that doesn't work (requests are done in a subprocess or at the level of a C-extension/library).
620621
622+
Usage example:
623+
624+
>>> # create temp file with `tmp_path` fixture
625+
>>> (tmp_path / "hello.txt").write_text("Hello world")
626+
>>> with ephemeral_fileserver(tmp_path) as fileserver_root:
627+
... res = subprocess.check_output(["curl", f"{fileserver_root}/hello.txt"])
628+
>>> assert res.strip() == "Hello world"
629+
621630
:param path: root path of the local files to serve
622631
:return: root URL of the ephemeral file server (e.g. "http://localhost:21342")
623632
"""

tests/test_processes.py

+28-3
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,12 @@
44

55
from openeo_driver.datacube import DriverDataCube
66
from openeo_driver.errors import (
7-
ProcessUnsupportedException,
8-
ProcessParameterRequiredException,
7+
FileTypeInvalidException,
98
ProcessParameterInvalidException,
9+
ProcessParameterRequiredException,
10+
ProcessUnsupportedException,
1011
)
11-
from openeo_driver.processes import ProcessSpec, ProcessRegistry, ProcessRegistryException, ProcessArgs
12+
from openeo_driver.processes import ProcessArgs, ProcessRegistry, ProcessRegistryException, ProcessSpec
1213

1314

1415
def test_process_spec_basic_040():
@@ -635,3 +636,27 @@ def test_validator_geojson_dict(self):
635636
),
636637
):
637638
_ = args.get_required("geometry", validator=validator)
639+
640+
@pytest.mark.parametrize(
641+
["formats"],
642+
[
643+
(["GeoJSON", "CSV"],),
644+
({"GeoJSON": {}, "CSV": {}},),
645+
],
646+
)
647+
def test_validator_file_format(self, formats):
648+
args = ProcessArgs(
649+
{"format1": "GeoJSON", "format2": "geojson", "format3": "TooExotic"},
650+
process_id="wibble",
651+
)
652+
653+
validator = ProcessArgs.validator_file_format(formats=formats)
654+
655+
assert args.get_required("format1", validator=validator) == "GeoJSON"
656+
assert args.get_required("format2", validator=validator) == "geojson"
657+
658+
with pytest.raises(
659+
FileTypeInvalidException,
660+
match=re.escape("File format TooExotic not allowed. Allowed file formats: GeoJSON, CSV"),
661+
):
662+
_ = args.get_required("format3", validator=validator)

tests/test_views_execute.py

+94
Original file line numberDiff line numberDiff line change
@@ -1965,11 +1965,105 @@ def test_to_vector_cube(self, api100, geojson, expected):
19651965
],
19661966
)
19671967
def test_load_geojson(self, api100, geojson, expected):
1968+
# TODO: cover `properties` parameter
19681969
res = api100.check_result(
19691970
{"vc": {"process_id": "load_geojson", "arguments": {"data": geojson}, "result": True}}
19701971
)
19711972
assert res.json == DictSubSet({"type": "FeatureCollection", "features": expected})
19721973

1974+
@pytest.mark.parametrize(
1975+
["geometry", "expected"],
1976+
[
1977+
(
1978+
{"type": "Point", "coordinates": (1, 2)},
1979+
[
1980+
{
1981+
"type": "Feature",
1982+
"geometry": {"type": "Point", "coordinates": [1, 2]},
1983+
"properties": {},
1984+
},
1985+
],
1986+
),
1987+
(
1988+
{"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]},
1989+
[
1990+
{
1991+
"type": "Feature",
1992+
"geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]},
1993+
"properties": {},
1994+
},
1995+
],
1996+
),
1997+
(
1998+
{"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]},
1999+
[
2000+
{
2001+
"type": "Feature",
2002+
"geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]},
2003+
"properties": {},
2004+
},
2005+
],
2006+
),
2007+
(
2008+
{
2009+
"type": "Feature",
2010+
"geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]},
2011+
"properties": {"id": "12_3"},
2012+
},
2013+
[
2014+
{
2015+
"type": "Feature",
2016+
"geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]},
2017+
"properties": {"id": "12_3"},
2018+
},
2019+
],
2020+
),
2021+
(
2022+
{
2023+
"type": "FeatureCollection",
2024+
"features": [
2025+
{
2026+
"type": "Feature",
2027+
"geometry": {"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]},
2028+
"properties": {"id": 1},
2029+
},
2030+
{
2031+
"type": "Feature",
2032+
"geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]},
2033+
"properties": {"id": 2},
2034+
},
2035+
],
2036+
},
2037+
[
2038+
{
2039+
"type": "Feature",
2040+
"geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]},
2041+
"properties": {"id": 1},
2042+
},
2043+
{
2044+
"type": "Feature",
2045+
"geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]},
2046+
"properties": {"id": 2},
2047+
},
2048+
],
2049+
),
2050+
],
2051+
)
2052+
def test_load_url_geojson(self, api100, geometry, expected, tmp_path):
2053+
(tmp_path / "geometry.json").write_text(json.dumps(geometry))
2054+
with ephemeral_fileserver(tmp_path) as fileserver_root:
2055+
url = f"{fileserver_root}/geometry.json"
2056+
res = api100.check_result(
2057+
{
2058+
"load": {
2059+
"process_id": "load_url",
2060+
"arguments": {"url": url, "format": "GeoJSON"},
2061+
"result": True,
2062+
}
2063+
}
2064+
)
2065+
assert res.json == DictSubSet({"type": "FeatureCollection", "features": expected})
2066+
19732067

19742068
def test_no_nested_JSONResult(api):
19752069
api.set_auth_bearer_token()

0 commit comments

Comments
 (0)