Skip to content

Commit 6b3c539

Browse files
committed
Issue #425 initial Connection.load_stac
1 parent 48edef8 commit 6b3c539

File tree

5 files changed

+182
-4
lines changed

5 files changed

+182
-4
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
- Show progress bar while waiting for OIDC authentication with device code flow,
1414
including special mode for in Jupyter notebooks.
1515
([#237](https://github.com/Open-EO/openeo-python-client/issues/237))
16+
- Basic support for `load_stac` process with `Connection.load_stac()`
17+
([#425](https://github.com/Open-EO/openeo-python-client/issues/))
1618

1719
### Changed
1820

openeo/rest/connection.py

+135-4
Original file line numberDiff line numberDiff line change
@@ -1036,10 +1036,141 @@ def load_result(
10361036
TemporalDimension(name='t', extent=[]),
10371037
BandDimension(name="bands", bands=[Band("unknown")]),
10381038
])
1039-
cube = self.datacube_from_process(process_id="load_result", id=id,
1040-
**dict_no_none(spatial_extent=spatial_extent,
1041-
temporal_extent=temporal_extent and DataCube._get_temporal_extent(
1042-
temporal_extent), bands=bands))
1039+
cube = self.datacube_from_process(
1040+
process_id="load_result",
1041+
id=id,
1042+
**dict_no_none(
1043+
spatial_extent=spatial_extent,
1044+
temporal_extent=temporal_extent and DataCube._get_temporal_extent(temporal_extent),
1045+
bands=bands,
1046+
),
1047+
)
1048+
cube.metadata = metadata
1049+
return cube
1050+
1051+
def load_stac(
1052+
self,
1053+
url: str,
1054+
spatial_extent: Optional[Dict[str, float]] = None,
1055+
temporal_extent: Optional[List[Union[str, datetime.datetime, datetime.date]]] = None,
1056+
bands: Optional[List[str]] = None,
1057+
properties: Optional[dict] = None,
1058+
) -> DataCube:
1059+
"""
1060+
Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`.
1061+
A batch job result can be loaded by providing a reference to it.
1062+
1063+
If supported by the underlying metadata and file format, the data that is added to the data cube can be
1064+
restricted with the parameters ``spatial_extent``, ``temporal_extent`` and ``bands``.
1065+
If no data is available for the given extents, a ``NoDataAvailable`` error is thrown.
1066+
1067+
Remarks:
1068+
1069+
* The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as
1070+
specified in the metadata if the ``bands`` parameter is set to ``null``.
1071+
* If no additional parameter is specified this would imply that the whole data set is expected to be loaded.
1072+
Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only
1073+
load the data that is actually required after evaluating subsequent processes such as filters.
1074+
This means that the values should be processed only after the data has been limited to the required extent
1075+
and as a consequence also to a manageable size.
1076+
1077+
1078+
:param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog)
1079+
or a specific STAC API Collection that allows to filter items and to download assets.
1080+
This includes batch job results, which itself are compliant to STAC.
1081+
For external URLs, authentication details such as API keys or tokens may need to be included in the URL.
1082+
1083+
Batch job results can be specified in two ways:
1084+
1085+
- For Batch job results at the same back-end, a URL pointing to the corresponding batch job results
1086+
endpoint should be provided. The URL usually ends with ``/jobs/{id}/results`` and ``{id}``
1087+
is the corresponding batch job ID.
1088+
- For external results, a signed URL must be provided. Not all back-ends support signed URLs,
1089+
which are provided as a link with the link relation `canonical` in the batch job result metadata.
1090+
:param spatial_extent:
1091+
Limits the data to load to the specified bounding box or polygons.
1092+
1093+
For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects
1094+
with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).
1095+
1096+
For vector data, the process loads the geometry into the data cube if the geometry is fully within the
1097+
bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).
1098+
Empty geometries may only be in the data cube if no spatial extent has been provided.
1099+
1100+
The GeoJSON can be one of the following feature types:
1101+
1102+
* A ``Polygon`` or ``MultiPolygon`` geometry,
1103+
* a ``Feature`` with a ``Polygon`` or ``MultiPolygon`` geometry, or
1104+
* a ``FeatureCollection`` containing at least one ``Feature`` with ``Polygon`` or ``MultiPolygon`` geometries.
1105+
1106+
Set this parameter to ``None`` to set no limit for the spatial extent.
1107+
Be careful with this when loading large datasets. It is recommended to use this parameter instead of
1108+
using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data.
1109+
1110+
:param temporal_extent:
1111+
Limits the data to load to the specified left-closed temporal interval.
1112+
Applies to all temporal dimensions.
1113+
The interval has to be specified as an array with exactly two elements:
1114+
1115+
1. The first element is the start of the temporal interval.
1116+
The specified instance in time is **included** in the interval.
1117+
2. The second element is the end of the temporal interval.
1118+
The specified instance in time is **excluded** from the interval.
1119+
1120+
The second element must always be greater/later than the first element.
1121+
Otherwise, a `TemporalExtentEmpty` exception is thrown.
1122+
1123+
Also supports open intervals by setting one of the boundaries to ``None``, but never both.
1124+
1125+
Set this parameter to ``None`` to set no limit for the temporal extent.
1126+
Be careful with this when loading large datasets. It is recommended to use this parameter instead of
1127+
using ``filter_temporal()`` directly after loading unbounded data.
1128+
1129+
:param bands:
1130+
Only adds the specified bands into the data cube so that bands that don't match the list
1131+
of band names are not available. Applies to all dimensions of type `bands`.
1132+
1133+
Either the unique band name (metadata field ``name`` in bands) or one of the common band names
1134+
(metadata field ``common_name`` in bands) can be specified.
1135+
If the unique band name and the common name conflict, the unique band name has a higher priority.
1136+
1137+
The order of the specified array defines the order of the bands in the data cube.
1138+
If multiple bands match a common name, all matched bands are included in the original order.
1139+
1140+
It is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data.
1141+
1142+
:param properties:
1143+
Limits the data by metadata properties to include only data in the data cube which
1144+
all given conditions return ``True`` for (AND operation).
1145+
1146+
Specify key-value-pairs with the key being the name of the metadata property,
1147+
which can be retrieved with the openEO Data Discovery for Collections.
1148+
The value must be a condition (user-defined process) to be evaluated against a STAC API.
1149+
This parameter is not supported for static STAC.
1150+
1151+
.. versionadded:: 0.17.0
1152+
"""
1153+
# TODO: detect actual metadata from URL
1154+
metadata = CollectionMetadata(
1155+
{},
1156+
dimensions=[
1157+
SpatialDimension(name="x", extent=[]),
1158+
SpatialDimension(name="y", extent=[]),
1159+
TemporalDimension(name="t", extent=[]),
1160+
BandDimension(name="bands", bands=[Band("unknown")]),
1161+
],
1162+
)
1163+
arguments = {"url": url}
1164+
# TODO: more normalization/validation of extent/band parameters and `properties`
1165+
if spatial_extent:
1166+
arguments["spatial_extent"] = spatial_extent
1167+
if temporal_extent:
1168+
arguments["temporal_extent"] = DataCube._get_temporal_extent(temporal_extent)
1169+
if bands:
1170+
arguments["bands"] = bands
1171+
if properties:
1172+
arguments["properties"] = properties
1173+
cube = self.datacube_from_process(process_id="load_stac", **arguments)
10431174
cube.metadata = metadata
10441175
return cube
10451176

openeo/rest/job.py

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ class BatchJob:
3232
3333
"""
3434

35+
# TODO #425 method to bootstrap `load_stac` directly from a BatchJob object
36+
3537
def __init__(self, job_id: str, connection: 'Connection'):
3638
self.job_id = job_id
3739
"""Unique identifier of the batch job (string)."""

tests/rest/conftest.py

+11
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
import pytest
99
import time_machine
1010

11+
from openeo.rest.connection import Connection
12+
13+
API_URL = "https://oeo.test/"
14+
1115

1216
@pytest.fixture(params=["1.0.0"])
1317
def api_version(request):
@@ -65,3 +69,10 @@ def assert_oidc_device_code_flow(url: str = "https://oidc.test/dc", elapsed: flo
6569
assert time_machine.coordinates.time() - start >= elapsed
6670

6771
return assert_oidc_device_code_flow
72+
73+
74+
@pytest.fixture
75+
def con120(requests_mock):
76+
requests_mock.get(API_URL, json={"api_version": "1.2.0"})
77+
con = Connection(API_URL)
78+
return con

tests/rest/test_connection.py

+32
Original file line numberDiff line numberDiff line change
@@ -1846,6 +1846,38 @@ def test_load_result_filters(requests_mock):
18461846
}
18471847

18481848

1849+
class TestLoadStac:
1850+
def test_basic(self, con120):
1851+
cube = con120.load_stac("https://provide.test/dataset")
1852+
assert cube.flat_graph() == {
1853+
"loadstac1": {
1854+
"process_id": "load_stac",
1855+
"arguments": {"url": "https://provide.test/dataset"},
1856+
"result": True,
1857+
}
1858+
}
1859+
1860+
def test_extents(self, con120):
1861+
cube = con120.load_stac(
1862+
"https://provide.test/dataset",
1863+
spatial_extent={"west": 1, "south": 2, "east": 3, "north": 4},
1864+
temporal_extent=["2023-05-10", "2023-06-01"],
1865+
bands=["B02", "B03"],
1866+
)
1867+
assert cube.flat_graph() == {
1868+
"loadstac1": {
1869+
"process_id": "load_stac",
1870+
"arguments": {
1871+
"url": "https://provide.test/dataset",
1872+
"spatial_extent": {"east": 3, "north": 4, "south": 2, "west": 1},
1873+
"temporal_extent": ["2023-05-10", "2023-06-01"],
1874+
"bands": ["B02", "B03"],
1875+
},
1876+
"result": True,
1877+
}
1878+
}
1879+
1880+
18491881
def test_list_file_formats(requests_mock):
18501882
requests_mock.get(API_URL, json={"api_version": "1.0.0"})
18511883
conn = Connection(API_URL)

0 commit comments

Comments
 (0)