7
7
import io
8
8
9
9
import geopandas as gpd
10
- import numpy as np
10
+ import numpy
11
11
import pyproj
12
12
import shapely .geometry
13
13
import shapely .geometry .base
@@ -218,12 +218,13 @@ class DriverVectorCube:
218
218
DIM_GEOMETRIES = "geometries"
219
219
DIM_BANDS = "bands"
220
220
FLATTEN_PREFIX = "vc"
221
+ COLUMN_SELECTION_ALL = "all"
222
+ COLUMN_SELECTION_NUMERICAL = "numerical"
221
223
222
224
def __init__ (
223
225
self ,
224
226
geometries : gpd .GeoDataFrame ,
225
227
cube : Optional [xarray .DataArray ] = None ,
226
- flatten_prefix : str = FLATTEN_PREFIX ,
227
228
):
228
229
"""
229
230
@@ -237,18 +238,78 @@ def __init__(
237
238
log .error (f"First cube dim should be { self .DIM_GEOMETRIES !r} but got dims { cube .dims !r} " )
238
239
raise VectorCubeError ("Cube's first dimension is invalid." )
239
240
if not geometries .index .equals (cube .indexes [cube .dims [0 ]]):
240
- log .error (f"Invalid VectorCube components { geometries .index !r } != { cube .indexes [cube .dims [0 ]]!r } " )
241
+ log .error (f"Invalid VectorCube components { geometries .index = } != { cube .indexes [cube .dims [0 ]]= } " )
241
242
raise VectorCubeError ("Incompatible vector cube components" )
242
243
self ._geometries : gpd .GeoDataFrame = geometries
243
244
self ._cube = cube
244
- self ._flatten_prefix = flatten_prefix
245
245
246
- def with_cube (self , cube : xarray .DataArray , flatten_prefix : str = FLATTEN_PREFIX ) -> "DriverVectorCube" :
246
+ def with_cube (self , cube : xarray .DataArray ) -> "DriverVectorCube" :
247
247
"""Create new vector cube with same geometries but new cube"""
248
248
log .info (f"Creating vector cube with new cube { cube .name !r} " )
249
- return type (self )(
250
- geometries = self ._geometries , cube = cube , flatten_prefix = flatten_prefix
251
- )
249
+ return type (self )(geometries = self ._geometries , cube = cube )
250
+
251
+ @classmethod
252
+ def from_geodataframe (
253
+ cls ,
254
+ data : gpd .GeoDataFrame ,
255
+ * ,
256
+ columns_for_cube : Union [List [str ], str ] = COLUMN_SELECTION_NUMERICAL ,
257
+ # TODO: change default band name to "properties" (per `load_geojson` spec introduced by https://github.com/Open-EO/openeo-processes/pull/427)
258
+ dimension_name : str = DIM_BANDS ,
259
+ ) -> "DriverVectorCube" :
260
+ """
261
+ Build a DriverVectorCube from given GeoPandas data frame,
262
+ using the data frame geometries as vector cube geometries
263
+ and other columns (as specified) as cube values along a "bands" dimension
264
+
265
+ :param data: geopandas data frame
266
+ :param columns_for_cube: which data frame columns to use as cube values.
267
+ One of:
268
+ - "numerical": automatically pick numerical columns
269
+ - "all": use all columns as cube values
270
+ - list of column names
271
+ :param dimension_name: name of the "bands" dimension
272
+ :return: vector cube
273
+ """
274
+ available_columns = [c for c in data .columns if c != "geometry" ]
275
+
276
+ if columns_for_cube is None :
277
+ # TODO #114: what should default selection be?
278
+ columns_for_cube = cls .COLUMN_SELECTION_NUMERICAL
279
+
280
+ if columns_for_cube == cls .COLUMN_SELECTION_NUMERICAL :
281
+ columns_for_cube = [c for c in available_columns if numpy .issubdtype (data [c ].dtype , numpy .number )]
282
+ elif columns_for_cube == cls .COLUMN_SELECTION_ALL :
283
+ columns_for_cube = available_columns
284
+ elif isinstance (columns_for_cube , list ):
285
+ # TODO #114 limit to subset with available columns (and automatically fill in missing columns with nodata)?
286
+ columns_for_cube = columns_for_cube
287
+ else :
288
+ raise ValueError (columns_for_cube )
289
+ assert isinstance (columns_for_cube , list )
290
+
291
+ if columns_for_cube :
292
+ cube_df = data [columns_for_cube ]
293
+ # TODO: remove `columns_for_cube` from geopandas data frame?
294
+ # Enabling that triggers failure of som existing tests that use `aggregate_spatial`
295
+ # to "enrich" a vector cube with pre-existing properties
296
+ # Also see https://github.com/Open-EO/openeo-api/issues/504
297
+ # geometries_df = data.drop(columns=columns_for_cube)
298
+ geometries_df = data
299
+
300
+ # TODO: leverage pandas `to_xarray` and xarray `to_array` instead of this manual building?
301
+ cube : xarray .DataArray = xarray .DataArray (
302
+ data = cube_df .values ,
303
+ dims = [cls .DIM_GEOMETRIES , dimension_name ],
304
+ coords = {
305
+ cls .DIM_GEOMETRIES : data .geometry .index .to_list (),
306
+ dimension_name : cube_df .columns ,
307
+ },
308
+ )
309
+ return cls (geometries = geometries_df , cube = cube )
310
+
311
+ else :
312
+ return cls (geometries = data )
252
313
253
314
@classmethod
254
315
def from_fiona (
@@ -261,15 +322,21 @@ def from_fiona(
261
322
if len (paths ) != 1 :
262
323
# TODO #114 EP-3981: support multiple paths
263
324
raise FeatureUnsupportedException (message = "Loading a vector cube from multiple files is not supported" )
325
+ columns_for_cube = (options or {}).get ("columns_for_cube" , cls .COLUMN_SELECTION_NUMERICAL )
264
326
# TODO #114 EP-3981: lazy loading like/with DelayedVector
265
327
# note for GeoJSON: will consider Feature.id as well as Feature.properties.id
266
328
if "parquet" == driver :
267
- return cls .from_parquet (paths = paths )
329
+ return cls .from_parquet (paths = paths , columns_for_cube = columns_for_cube )
268
330
else :
269
- return cls (geometries = gpd .read_file (paths [0 ], driver = driver ))
331
+ gdf = gpd .read_file (paths [0 ], driver = driver )
332
+ return cls .from_geodataframe (gdf , columns_for_cube = columns_for_cube )
270
333
271
334
@classmethod
272
- def from_parquet (cls , paths : List [Union [str , Path ]]):
335
+ def from_parquet (
336
+ cls ,
337
+ paths : List [Union [str , Path ]],
338
+ columns_for_cube : Union [List [str ], str ] = COLUMN_SELECTION_NUMERICAL ,
339
+ ):
273
340
if len (paths ) != 1 :
274
341
# TODO #114 EP-3981: support multiple paths
275
342
raise FeatureUnsupportedException (
@@ -287,10 +354,14 @@ def from_parquet(cls, paths: List[Union[str, Path]]):
287
354
if "OGC:CRS84" in str (df .crs ) or "WGS 84 (CRS84)" in str (df .crs ):
288
355
# workaround for not being able to decode ogc:crs84
289
356
df .crs = CRS .from_epsg (4326 )
290
- return cls ( geometries = df )
357
+ return cls . from_geodataframe ( df , columns_for_cube = columns_for_cube )
291
358
292
359
@classmethod
293
- def from_geojson (cls , geojson : dict ) -> "DriverVectorCube" :
360
+ def from_geojson (
361
+ cls ,
362
+ geojson : dict ,
363
+ columns_for_cube : Union [List [str ], str ] = COLUMN_SELECTION_NUMERICAL ,
364
+ ) -> "DriverVectorCube" :
294
365
"""Construct vector cube from GeoJson dict structure"""
295
366
validate_geojson_coordinates (geojson )
296
367
# TODO support more geojson types?
@@ -308,7 +379,8 @@ def from_geojson(cls, geojson: dict) -> "DriverVectorCube":
308
379
raise FeatureUnsupportedException (
309
380
f"Can not construct DriverVectorCube from { geojson .get ('type' , type (geojson ))!r} "
310
381
)
311
- return cls (geometries = gpd .GeoDataFrame .from_features (features ))
382
+ gdf = gpd .GeoDataFrame .from_features (features )
383
+ return cls .from_geodataframe (gdf , columns_for_cube = columns_for_cube )
312
384
313
385
@classmethod
314
386
def from_geometry (
@@ -323,7 +395,9 @@ def from_geometry(
323
395
geometry = [geometry ]
324
396
return cls (geometries = gpd .GeoDataFrame (geometry = geometry ))
325
397
326
- def _as_geopandas_df (self ) -> gpd .GeoDataFrame :
398
+ def _as_geopandas_df (
399
+ self , flatten_prefix : Optional [str ] = None , flatten_name_joiner : str = "~"
400
+ ) -> gpd .GeoDataFrame :
327
401
"""Join geometries and cube as a geopandas dataframe"""
328
402
# TODO: avoid copy?
329
403
df = self ._geometries .copy (deep = True )
@@ -334,18 +408,19 @@ def _as_geopandas_df(self) -> gpd.GeoDataFrame:
334
408
if self ._cube .dims [1 :]:
335
409
stacked = self ._cube .stack (prop = self ._cube .dims [1 :])
336
410
log .info (f"Flattened cube component of vector cube to { stacked .shape [1 ]} properties" )
411
+ name_prefix = [flatten_prefix ] if flatten_prefix else []
337
412
for p in stacked .indexes ["prop" ]:
338
- name = "~" .join (str (x ) for x in [ self . _flatten_prefix ] + list (p ))
413
+ name = flatten_name_joiner .join (str (x ) for x in name_prefix + list (p ))
339
414
# TODO: avoid column collisions?
340
415
df [name ] = stacked .sel (prop = p )
341
416
else :
342
- df [self ._flatten_prefix ] = self ._cube
417
+ df [flatten_prefix or self .FLATTEN_PREFIX ] = self ._cube
343
418
344
419
return df
345
420
346
- def to_geojson (self ) -> dict :
421
+ def to_geojson (self , flatten_prefix : Optional [ str ] = None ) -> dict :
347
422
"""Export as GeoJSON FeatureCollection."""
348
- return shapely .geometry .mapping (self ._as_geopandas_df ())
423
+ return shapely .geometry .mapping (self ._as_geopandas_df (flatten_prefix = flatten_prefix ))
349
424
350
425
def to_wkt (self ) -> List [str ]:
351
426
wkts = [str (g ) for g in self ._geometries .geometry ]
@@ -369,7 +444,8 @@ def write_assets(
369
444
)
370
445
return self .to_legacy_save_result ().write_assets (directory )
371
446
372
- self ._as_geopandas_df ().to_file (path , driver = format_info .fiona_driver )
447
+ gdf = self ._as_geopandas_df (flatten_prefix = options .get ("flatten_prefix" ))
448
+ gdf .to_file (path , driver = format_info .fiona_driver )
373
449
374
450
if not format_info .multi_file :
375
451
# single file format
@@ -464,6 +540,9 @@ def geometry_count(self) -> int:
464
540
def get_geometries (self ) -> Sequence [shapely .geometry .base .BaseGeometry ]:
465
541
return self ._geometries .geometry
466
542
543
+ def get_cube (self ) -> Optional [xarray .DataArray ]:
544
+ return self ._cube
545
+
467
546
def get_ids (self ) -> Optional [Sequence ]:
468
547
return self ._geometries .get ("id" )
469
548
@@ -474,8 +553,9 @@ def get_xarray_cube_basics(self) -> Tuple[tuple, dict]:
474
553
return dims , coords
475
554
476
555
def __eq__ (self , other ):
477
- return (isinstance (other , DriverVectorCube )
478
- and np .array_equal (self ._as_geopandas_df ().values , other ._as_geopandas_df ().values ))
556
+ return isinstance (other , DriverVectorCube ) and numpy .array_equal (
557
+ self ._as_geopandas_df ().values , other ._as_geopandas_df ().values
558
+ )
479
559
480
560
def fit_class_random_forest (
481
561
self ,
0 commit comments