Skip to content

Commit eed1489

Browse files
use DriverVectorCube in chunk_polygon Open-EO/openeo-python-driver#288
1 parent 5739cb6 commit eed1489

File tree

3 files changed

+7
-15
lines changed

3 files changed

+7
-15
lines changed

openeogeotrellis/geopysparkdatacube.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -572,9 +572,7 @@ def partitionByKey(spatialkey):
572572
def chunk_polygon(
573573
self,
574574
reducer: Union[ProcessGraphVisitor, Dict],
575-
# TODO: it's wrong to use MultiPolygon as a collection of polygons. MultiPolygons should be handled as single, atomic "features"
576-
# also see https://github.com/Open-EO/openeo-python-driver/issues/288
577-
chunks: MultiPolygon,
575+
chunks: DriverVectorCube,
578576
mask_value: float,
579577
env: EvalEnv,
580578
context: Optional[dict] = None,
@@ -584,27 +582,20 @@ def chunk_polygon(
584582

585583
if isinstance(reducer, dict):
586584
reducer = GeoPySparkBackendImplementation.accept_process_graph(reducer)
587-
chunks: List[Polygon] = chunks.geoms
588585
jvm = get_jvm()
589586

590587
result_collection = None
591588
if isinstance(reducer, SingleNodeUDFProcessGraphVisitor):
592589
udf, udf_context = self._extract_udf_code_and_context(process=reducer, context=context, env=env)
593-
# Polygons should use the same projection as the rdd.
594-
# TODO Usage of GeometryCollection should be avoided. It's abused here like a FeatureCollection,
595-
# but a GeometryCollections is conceptually just single "feature".
596-
# What you want here is proper support for FeatureCollections or at least a list of individual geometries.
597-
# also see https://github.com/Open-EO/openeo-python-driver/issues/71, https://github.com/Open-EO/openeo-python-driver/issues/288
598590
reprojected_polygons: jvm.org.openeo.geotrellis.ProjectedPolygons \
599-
= to_projected_polygons(jvm, GeometryCollection(chunks))
591+
= to_projected_polygons(jvm, chunks)
600592
band_names = self.metadata.band_dimension.band_names
601593

602594
def rdd_function(rdd, _zoom):
603595
return jvm.org.openeo.geotrellis.udf.Udf.runChunkPolygonUserCode(
604596
udf, rdd, reprojected_polygons, band_names, udf_context, mask_value
605597
)
606598

607-
# All JEP implementation work with float cell types.
608599
float_cube = self.apply_to_levels(lambda layer: self._convert_celltype(layer, "float32"))
609600
result_collection = float_cube._apply_to_levels_geotrellis_rdd(
610601
rdd_function, self.metadata, gps.LayerType.SPACETIME

tests/test_chunk_polygon.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from shapely.geometry import Polygon, MultiPolygon
77

88
from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube
9+
from openeo_driver.datacube import DriverVectorCube
910
from .data import get_test_data_file, TEST_DATA_ROOT
1011

1112

@@ -14,7 +15,7 @@
1415
#
1516
# Note: In order to run these tests you need to set several environment variables.
1617
# If you use the virtual environment venv (with JEP and Numpy installed):
17-
# 1. LD_LIBRARY_PATH = .../venv/lib/python3.6/site-packages/jep
18+
# 1. LD_LIBRARY_PATH = .../venv/lib/python3.8/site-packages/jep
1819
# This will look for the shared library 'jep.so'. This is the compiled C code that binds Java and Python objects.
1920

2021
def test_chunk_polygon(imagecollection_with_two_bands_and_three_dates):
@@ -36,7 +37,7 @@ def test_chunk_polygon(imagecollection_with_two_bands_and_three_dates):
3637
env = EvalEnv()
3738

3839
polygon1 = Extent(0.0, 0.0, 4.0, 4.0).to_polygon
39-
chunks = MultiPolygon([polygon1])
40+
chunks = DriverVectorCube.from_geometry(polygon1)
4041
cube: GeopysparkDataCube = imagecollection_with_two_bands_and_three_dates
4142
result_cube: GeopysparkDataCube = cube.chunk_polygon(udf_add_to_bands, chunks=chunks, mask_value=None, env=env)
4243
result_layer: TiledRasterLayer = result_cube.pyramid.levels[0]

tests/test_error.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
from geopyspark import TiledRasterLayer, Extent
22
from openeo_driver.utils import EvalEnv
33
from py4j.protocol import Py4JJavaError
4-
from shapely.geometry import MultiPolygon
54

65
from openeogeotrellis.backend import GeoPySparkBackendImplementation
76
from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube
87
from openeogeotrellis.utils import get_jvm
8+
from openeo_driver.datacube import DriverVectorCube
99

1010

1111
# Note: Ensure that the python environment has all the required modules installed.
@@ -44,7 +44,7 @@ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
4444
env = EvalEnv()
4545

4646
polygon1 = Extent(0.0, 0.0, 4.0, 4.0).to_polygon
47-
chunks = MultiPolygon([polygon1])
47+
chunks = DriverVectorCube.from_geometry(polygon1)
4848
cube: GeopysparkDataCube = imagecollection_with_two_bands_and_three_dates
4949
try:
5050
result_cube: GeopysparkDataCube = cube.chunk_polygon(udf_add_to_bands, chunks=chunks, mask_value=None, env=env)

0 commit comments

Comments
 (0)