7
7
import io
8
8
9
9
import geopandas as gpd
10
- import numpy as np
10
+ import numpy
11
11
import pyproj
12
12
import shapely .geometry
13
13
import shapely .geometry .base
@@ -218,12 +218,13 @@ class DriverVectorCube:
218
218
DIM_GEOMETRIES = "geometries"
219
219
DIM_BANDS = "bands"
220
220
FLATTEN_PREFIX = "vc"
221
+ COLUMN_SELECTION_ALL = "all"
222
+ COLUMN_SELECTION_NUMERICAL = "numerical"
221
223
222
224
def __init__ (
223
225
self ,
224
226
geometries : gpd .GeoDataFrame ,
225
227
cube : Optional [xarray .DataArray ] = None ,
226
- flatten_prefix : str = FLATTEN_PREFIX ,
227
228
):
228
229
"""
229
230
@@ -237,18 +238,77 @@ def __init__(
237
238
log .error (f"First cube dim should be { self .DIM_GEOMETRIES !r} but got dims { cube .dims !r} " )
238
239
raise VectorCubeError ("Cube's first dimension is invalid." )
239
240
if not geometries .index .equals (cube .indexes [cube .dims [0 ]]):
240
- log .error (f"Invalid VectorCube components { geometries .index !r } != { cube .indexes [cube .dims [0 ]]!r } " )
241
+ log .error (f"Invalid VectorCube components { geometries .index = } != { cube .indexes [cube .dims [0 ]]= } " )
241
242
raise VectorCubeError ("Incompatible vector cube components" )
242
243
self ._geometries : gpd .GeoDataFrame = geometries
243
244
self ._cube = cube
244
- self ._flatten_prefix = flatten_prefix
245
245
246
- def with_cube (self , cube : xarray .DataArray , flatten_prefix : str = FLATTEN_PREFIX ) -> "DriverVectorCube" :
246
+ def with_cube (self , cube : xarray .DataArray ) -> "DriverVectorCube" :
247
247
"""Create new vector cube with same geometries but new cube"""
248
248
log .info (f"Creating vector cube with new cube { cube .name !r} " )
249
- return type (self )(
250
- geometries = self ._geometries , cube = cube , flatten_prefix = flatten_prefix
251
- )
249
+ return type (self )(geometries = self ._geometries , cube = cube )
250
+
251
+ @classmethod
252
+ def from_geodataframe (
253
+ cls ,
254
+ data : gpd .GeoDataFrame ,
255
+ * ,
256
+ columns_for_cube : Union [List [str ], str ] = COLUMN_SELECTION_NUMERICAL ,
257
+ dimension_name : str = DIM_BANDS ,
258
+ ) -> "DriverVectorCube" :
259
+ """
260
+ Build a DriverVectorCube from given GeoPandas data frame,
261
+ using the data frame geometries as vector cube geometries
262
+ and other columns (as specified) as cube values along a "bands" dimension
263
+
264
+ :param data: geopandas data frame
265
+ :param columns_for_cube: which data frame columns to use as cube values.
266
+ One of:
267
+ - "numerical": automatically pick numerical columns
268
+ - "all": use all columns as cube values
269
+ - list of column names
270
+ :param dimension_name: name of the "bands" dimension
271
+ :return: vector cube
272
+ """
273
+ available_columns = [c for c in data .columns if c != "geometry" ]
274
+
275
+ if columns_for_cube is None :
276
+ # TODO #114: what should default selection be?
277
+ columns_for_cube = cls .COLUMN_SELECTION_NUMERICAL
278
+
279
+ if columns_for_cube == cls .COLUMN_SELECTION_NUMERICAL :
280
+ columns_for_cube = [c for c in available_columns if numpy .issubdtype (data [c ].dtype , numpy .number )]
281
+ elif columns_for_cube == cls .COLUMN_SELECTION_ALL :
282
+ columns_for_cube = available_columns
283
+ elif isinstance (columns_for_cube , list ):
284
+ # TODO #114 limit to subset with available columns (and automatically fill in missing columns with nodata)?
285
+ columns_for_cube = columns_for_cube
286
+ else :
287
+ raise ValueError (columns_for_cube )
288
+ assert isinstance (columns_for_cube , list )
289
+
290
+ if columns_for_cube :
291
+ cube_df = data [columns_for_cube ]
292
+ # TODO: remove `columns_for_cube` from geopandas data frame?
293
+ # Enabling that triggers failure of som existing tests that use `aggregate_spatial`
294
+ # to "enrich" a vector cube with pre-existing properties
295
+ # Also see https://github.com/Open-EO/openeo-api/issues/504
296
+ # geometries_df = data.drop(columns=columns_for_cube)
297
+ geometries_df = data
298
+
299
+ # TODO: leverage pandas `to_xarray` and xarray `to_array` instead of this manual building?
300
+ cube : xarray .DataArray = xarray .DataArray (
301
+ data = cube_df .values ,
302
+ dims = [cls .DIM_GEOMETRIES , dimension_name ],
303
+ coords = {
304
+ cls .DIM_GEOMETRIES : data .geometry .index .to_list (),
305
+ dimension_name : cube_df .columns ,
306
+ },
307
+ )
308
+ return cls (geometries = geometries_df , cube = cube )
309
+
310
+ else :
311
+ return cls (geometries = data )
252
312
253
313
@classmethod
254
314
def from_fiona (
@@ -261,15 +321,21 @@ def from_fiona(
261
321
if len (paths ) != 1 :
262
322
# TODO #114 EP-3981: support multiple paths
263
323
raise FeatureUnsupportedException (message = "Loading a vector cube from multiple files is not supported" )
324
+ columns_for_cube = options .get ("columns_for_cube" , cls .COLUMN_SELECTION_NUMERICAL )
264
325
# TODO #114 EP-3981: lazy loading like/with DelayedVector
265
326
# note for GeoJSON: will consider Feature.id as well as Feature.properties.id
266
327
if "parquet" == driver :
267
- return cls .from_parquet (paths = paths )
328
+ return cls .from_parquet (paths = paths , columns_for_cube = columns_for_cube )
268
329
else :
269
- return cls (geometries = gpd .read_file (paths [0 ], driver = driver ))
330
+ gdf = gpd .read_file (paths [0 ], driver = driver )
331
+ return cls .from_geodataframe (gdf , columns_for_cube = columns_for_cube )
270
332
271
333
@classmethod
272
- def from_parquet (cls , paths : List [Union [str , Path ]]):
334
+ def from_parquet (
335
+ cls ,
336
+ paths : List [Union [str , Path ]],
337
+ columns_for_cube : Union [List [str ], str ] = COLUMN_SELECTION_NUMERICAL ,
338
+ ):
273
339
if len (paths ) != 1 :
274
340
# TODO #114 EP-3981: support multiple paths
275
341
raise FeatureUnsupportedException (
@@ -287,10 +353,14 @@ def from_parquet(cls, paths: List[Union[str, Path]]):
287
353
if "OGC:CRS84" in str (df .crs ) or "WGS 84 (CRS84)" in str (df .crs ):
288
354
# workaround for not being able to decode ogc:crs84
289
355
df .crs = CRS .from_epsg (4326 )
290
- return cls ( geometries = df )
356
+ return cls . from_geodataframe ( df , columns_for_cube = columns_for_cube )
291
357
292
358
@classmethod
293
- def from_geojson (cls , geojson : dict ) -> "DriverVectorCube" :
359
+ def from_geojson (
360
+ cls ,
361
+ geojson : dict ,
362
+ columns_for_cube : Union [List [str ], str ] = COLUMN_SELECTION_NUMERICAL ,
363
+ ) -> "DriverVectorCube" :
294
364
"""Construct vector cube from GeoJson dict structure"""
295
365
validate_geojson_coordinates (geojson )
296
366
# TODO support more geojson types?
@@ -308,7 +378,8 @@ def from_geojson(cls, geojson: dict) -> "DriverVectorCube":
308
378
raise FeatureUnsupportedException (
309
379
f"Can not construct DriverVectorCube from { geojson .get ('type' , type (geojson ))!r} "
310
380
)
311
- return cls (geometries = gpd .GeoDataFrame .from_features (features ))
381
+ gdf = gpd .GeoDataFrame .from_features (features )
382
+ return cls .from_geodataframe (gdf , columns_for_cube = columns_for_cube )
312
383
313
384
@classmethod
314
385
def from_geometry (
@@ -323,7 +394,9 @@ def from_geometry(
323
394
geometry = [geometry ]
324
395
return cls (geometries = gpd .GeoDataFrame (geometry = geometry ))
325
396
326
- def _as_geopandas_df (self ) -> gpd .GeoDataFrame :
397
+ def _as_geopandas_df (
398
+ self , flatten_prefix : Optional [str ] = None , flatten_name_joiner : str = "~"
399
+ ) -> gpd .GeoDataFrame :
327
400
"""Join geometries and cube as a geopandas dataframe"""
328
401
# TODO: avoid copy?
329
402
df = self ._geometries .copy (deep = True )
@@ -334,18 +407,19 @@ def _as_geopandas_df(self) -> gpd.GeoDataFrame:
334
407
if self ._cube .dims [1 :]:
335
408
stacked = self ._cube .stack (prop = self ._cube .dims [1 :])
336
409
log .info (f"Flattened cube component of vector cube to { stacked .shape [1 ]} properties" )
410
+ name_prefix = [flatten_prefix ] if flatten_prefix else []
337
411
for p in stacked .indexes ["prop" ]:
338
- name = "~" .join (str (x ) for x in [ self . _flatten_prefix ] + list (p ))
412
+ name = flatten_name_joiner .join (str (x ) for x in name_prefix + list (p ))
339
413
# TODO: avoid column collisions?
340
414
df [name ] = stacked .sel (prop = p )
341
415
else :
342
- df [self ._flatten_prefix ] = self ._cube
416
+ df [flatten_prefix or self .FLATTEN_PREFIX ] = self ._cube
343
417
344
418
return df
345
419
346
- def to_geojson (self ) -> dict :
420
+ def to_geojson (self , flatten_prefix : Optional [ str ] = None ) -> dict :
347
421
"""Export as GeoJSON FeatureCollection."""
348
- return shapely .geometry .mapping (self ._as_geopandas_df ())
422
+ return shapely .geometry .mapping (self ._as_geopandas_df (flatten_prefix = flatten_prefix ))
349
423
350
424
def to_wkt (self ) -> List [str ]:
351
425
wkts = [str (g ) for g in self ._geometries .geometry ]
@@ -369,7 +443,8 @@ def write_assets(
369
443
)
370
444
return self .to_legacy_save_result ().write_assets (directory )
371
445
372
- self ._as_geopandas_df ().to_file (path , driver = format_info .fiona_driver )
446
+ gdf = self ._as_geopandas_df (flatten_prefix = options .get ("flatten_prefix" ))
447
+ gdf .to_file (path , driver = format_info .fiona_driver )
373
448
374
449
if not format_info .multi_file :
375
450
# single file format
@@ -474,8 +549,9 @@ def get_xarray_cube_basics(self) -> Tuple[tuple, dict]:
474
549
return dims , coords
475
550
476
551
def __eq__ (self , other ):
477
- return (isinstance (other , DriverVectorCube )
478
- and np .array_equal (self ._as_geopandas_df ().values , other ._as_geopandas_df ().values ))
552
+ return isinstance (other , DriverVectorCube ) and numpy .array_equal (
553
+ self ._as_geopandas_df ().values , other ._as_geopandas_df ().values
554
+ )
479
555
480
556
def fit_class_random_forest (
481
557
self ,
0 commit comments