diff --git a/rust/geodatafusion/src/udf/mod.rs b/rust/geodatafusion/src/udf/mod.rs index 48b6899..1eeb97a 100644 --- a/rust/geodatafusion/src/udf/mod.rs +++ b/rust/geodatafusion/src/udf/mod.rs @@ -1,3 +1,4 @@ pub mod geo; pub mod geohash; pub mod native; +pub(crate) mod util; diff --git a/rust/geodatafusion/src/udf/native/editors/flip_coordinates.rs b/rust/geodatafusion/src/udf/native/editors/flip_coordinates.rs new file mode 100644 index 0000000..dcc96b7 --- /dev/null +++ b/rust/geodatafusion/src/udf/native/editors/flip_coordinates.rs @@ -0,0 +1,327 @@ +use std::any::Any; +use std::sync::{Arc, OnceLock}; + +use arrow_schema::{DataType, FieldRef}; +use datafusion::error::{DataFusionError, Result}; +use datafusion::logical_expr::scalar_doc_sections::DOC_SECTION_OTHER; +use datafusion::logical_expr::{ + ColumnarValue, Documentation, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature, +}; +use geo_traits::GeometryTrait; +use geoarrow_array::array::{ + CoordBuffer, InterleavedCoordBuffer, LineStringArray, MultiLineStringArray, MultiPointArray, + MultiPolygonArray, PointArray, PolygonArray, RectArray, SeparatedCoordBuffer, from_arrow_array, +}; +use geoarrow_array::builder::{GeometryBuilder, InterleavedCoordBufferBuilder}; +use geoarrow_array::cast::AsGeoArrowArray; +use geoarrow_array::{GeoArrowArray, GeoArrowArrayAccessor, IntoArrow, downcast_geoarrow_array}; +use geoarrow_schema::error::GeoArrowResult; +use geoarrow_schema::{CoordType, Dimension, GeoArrowType, GeometryType}; + +use crate::data_types::any_single_geometry_type_input; +use crate::error::GeoDataFusionResult; + +#[derive(Debug)] +pub struct FlipCoordinates { + signature: Signature, + coord_type: CoordType, +} + +impl FlipCoordinates { + pub fn new(coord_type: CoordType) -> Self { + Self { + signature: any_single_geometry_type_input(), + coord_type, + } + } +} + +impl Default for FlipCoordinates { + fn default() -> Self { + Self::new(Default::default()) + } +} + +static DOCUMENTATION: OnceLock = OnceLock::new(); + +impl ScalarUDFImpl for FlipCoordinates { + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + "st_flipcoordinates" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, _arg_types: &[DataType]) -> Result { + Err(DataFusionError::Internal("return_type".to_string())) + } + + fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result { + Ok(return_field_impl(args, self.coord_type)?) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + Ok(invoke_impl(args, self.coord_type)?) + } + + fn documentation(&self) -> Option<&Documentation> { + Some(DOCUMENTATION.get_or_init(|| { + Documentation::builder( + DOC_SECTION_OTHER, + "Return the number of points in a geometry. Works for all geometries.", + "ST_FlipCoordinates(geometry)", + ) + .with_argument("g1", "geometry") + .build() + })) + } +} + +fn return_field_impl( + args: ReturnFieldArgs, + coord_type: CoordType, +) -> GeoDataFusionResult { + let field = args.arg_fields[0].as_ref(); + let geo_type = GeoArrowType::from_arrow_field(field)?; + let new_type = match geo_type { + GeoArrowType::Point(_) + | GeoArrowType::LineString(_) + | GeoArrowType::Polygon(_) + | GeoArrowType::MultiPoint(_) + | GeoArrowType::MultiLineString(_) + | GeoArrowType::MultiPolygon(_) + | GeoArrowType::Rect(_) => geo_type, + _ => GeoArrowType::Geometry( + GeometryType::new(geo_type.metadata().clone()).with_coord_type(coord_type), + ), + }; + Ok(Arc::new( + new_type.to_field(field.name(), field.is_nullable()), + )) +} + +fn invoke_impl( + args: ScalarFunctionArgs, + coord_type: CoordType, +) -> GeoDataFusionResult { + let arrays = ColumnarValue::values_to_arrays(&args.args)?; + let geo_array = from_arrow_array(&arrays[0], &args.arg_fields[0])?; + let result = flip_coordinates_impl(&geo_array, coord_type)?; + Ok(ColumnarValue::Array(result.into_array_ref())) +} + +fn flip_coordinates_impl( + array: &dyn GeoArrowArray, + coord_type: CoordType, +) -> GeoArrowResult> { + match array.data_type() { + GeoArrowType::Point(_) => Ok(Arc::new(flip_point_array(array.as_point()))), + GeoArrowType::LineString(_) => Ok(Arc::new(flip_line_string_array(array.as_line_string()))), + GeoArrowType::Polygon(_) => Ok(Arc::new(flip_polygon_array(array.as_polygon()))), + GeoArrowType::MultiPoint(_) => Ok(Arc::new(flip_multipoint_array(array.as_multi_point()))), + GeoArrowType::MultiLineString(_) => Ok(Arc::new(flip_multi_line_string_array( + array.as_multi_line_string(), + ))), + GeoArrowType::MultiPolygon(_) => { + Ok(Arc::new(flip_multi_polygon_array(array.as_multi_polygon()))) + } + GeoArrowType::Rect(_) => Ok(Arc::new(flip_rect_array(array.as_rect()))), + _ => downcast_geoarrow_array!(array, flip_generic_array, coord_type), + } +} + +fn flip_point_array(array: &PointArray) -> PointArray { + PointArray::new( + flip_coords(array.coords()), + array.logical_nulls(), + array.extension_type().metadata().clone(), + ) +} + +fn flip_line_string_array(array: &LineStringArray) -> LineStringArray { + LineStringArray::new( + flip_coords(array.coords()), + array.geom_offsets().clone(), + array.logical_nulls(), + array.extension_type().metadata().clone(), + ) +} + +fn flip_polygon_array(array: &PolygonArray) -> PolygonArray { + PolygonArray::new( + flip_coords(array.coords()), + array.geom_offsets().clone(), + array.ring_offsets().clone(), + array.logical_nulls(), + array.extension_type().metadata().clone(), + ) +} + +fn flip_multipoint_array(array: &MultiPointArray) -> MultiPointArray { + MultiPointArray::new( + flip_coords(array.coords()), + array.geom_offsets().clone(), + array.logical_nulls(), + array.extension_type().metadata().clone(), + ) +} + +fn flip_multi_line_string_array(array: &MultiLineStringArray) -> MultiLineStringArray { + MultiLineStringArray::new( + flip_coords(array.coords()), + array.geom_offsets().clone(), + array.ring_offsets().clone(), + array.logical_nulls(), + array.extension_type().metadata().clone(), + ) +} + +fn flip_multi_polygon_array(array: &MultiPolygonArray) -> MultiPolygonArray { + MultiPolygonArray::new( + flip_coords(array.coords()), + array.geom_offsets().clone(), + array.polygon_offsets().clone(), + array.ring_offsets().clone(), + array.logical_nulls(), + array.extension_type().metadata().clone(), + ) +} + +fn flip_rect_array(array: &RectArray) -> RectArray { + RectArray::new( + flip_separated_coords(array.lower()), + flip_separated_coords(array.upper()), + array.logical_nulls(), + array.extension_type().metadata().clone(), + ) +} + +fn flip_generic_array<'a>( + array: &'a impl GeoArrowArrayAccessor<'a>, + coord_type: CoordType, +) -> GeoArrowResult> { + let typ = GeometryType::new(array.data_type().metadata().clone()).with_coord_type(coord_type); + let mut output_builder = GeometryBuilder::new(typ); + + for geom in array.iter() { + if let Some(g) = geom { + output_builder.push_geometry(Some(&flip_geometry(&g?)?))?; + } else { + output_builder.push_null(); + } + } + + Ok(Arc::new(output_builder.finish())) +} + +fn flip_geometry(geom: &impl GeometryTrait) -> GeoArrowResult { + todo!() + // match geom.as_type() { + // geo_traits::GeometryType::Point() + // } +} + +fn flip_coords(coords: &CoordBuffer) -> CoordBuffer { + match coords { + CoordBuffer::Separated(separated) => { + CoordBuffer::Separated(flip_separated_coords(separated)) + } + CoordBuffer::Interleaved(interleaved) => { + CoordBuffer::Interleaved(flip_interleaved_coords(interleaved)) + } + } +} + +fn flip_separated_coords(coords: &SeparatedCoordBuffer) -> SeparatedCoordBuffer { + let mut buffers = coords.buffers().to_vec(); + buffers.swap(0, 1); + SeparatedCoordBuffer::from_vec(buffers, coords.dim()).unwrap() +} + +fn flip_interleaved_coords(coords: &InterleavedCoordBuffer) -> InterleavedCoordBuffer { + let mut builder = InterleavedCoordBufferBuilder::with_capacity(coords.len(), coords.dim()); + let raw_coord_buffer = coords.coords(); + for coord_idx in 0..coords.len() { + match coords.dim() { + Dimension::XY => { + let x = raw_coord_buffer.get(coord_idx * 2).unwrap(); + let y = raw_coord_buffer.get(coord_idx * 2 + 1).unwrap(); + let flipped = geo::Coord { x: *y, y: *x }; + builder.push_coord(&flipped); + } + Dimension::XYZ => { + let x = raw_coord_buffer.get(coord_idx * 3).unwrap(); + let y = raw_coord_buffer.get(coord_idx * 3 + 1).unwrap(); + let z = raw_coord_buffer.get(coord_idx * 3 + 2).unwrap(); + let flipped = wkt::types::Coord { + x: *y, + y: *x, + z: Some(*z), + m: None, + }; + builder.push_coord(&flipped); + } + Dimension::XYM => { + let x = raw_coord_buffer.get(coord_idx * 3).unwrap(); + let y = raw_coord_buffer.get(coord_idx * 3 + 1).unwrap(); + let m = raw_coord_buffer.get(coord_idx * 3 + 2).unwrap(); + let flipped = wkt::types::Coord { + x: *y, + y: *x, + z: None, + m: Some(*m), + }; + builder.push_coord(&flipped); + } + Dimension::XYZM => { + let x = raw_coord_buffer.get(coord_idx * 4).unwrap(); + let y = raw_coord_buffer.get(coord_idx * 4 + 1).unwrap(); + let z = raw_coord_buffer.get(coord_idx * 4 + 2).unwrap(); + let m = raw_coord_buffer.get(coord_idx * 4 + 3).unwrap(); + let flipped = wkt::types::Coord { + x: *y, + y: *x, + z: Some(*z), + m: Some(*m), + }; + builder.push_coord(&flipped); + } + } + } + builder.finish() +} + +#[cfg(test)] +mod test { + use std::str::FromStr; + + use datafusion::prelude::SessionContext; + use geoarrow_array::array::WktArray; + + use super::*; + use crate::udf::native::io::{AsText, GeomFromText}; + + #[tokio::test] + async fn test() { + let ctx = SessionContext::new(); + + ctx.register_udf(FlipCoordinates::new(Default::default()).into()); + ctx.register_udf(GeomFromText::new(Default::default()).into()); + ctx.register_udf(AsText::new().into()); + + let df = ctx + .sql("SELECT ST_AsText(ST_FlipCoordinates(ST_GeomFromText('POINT(1 2)')));") + .await + .unwrap(); + let batch = df.collect().await.unwrap().into_iter().next().unwrap(); + let wkt_arr = + WktArray::try_from((batch.column(0).as_ref(), batch.schema().field(0))).unwrap(); + let val = wkt_arr.value(0).unwrap(); + assert_eq!(val, wkt::Wkt::from_str("POINT(2 1)").unwrap()); + } +} diff --git a/rust/geodatafusion/src/udf/native/editors/mod.rs b/rust/geodatafusion/src/udf/native/editors/mod.rs new file mode 100644 index 0000000..cf478af --- /dev/null +++ b/rust/geodatafusion/src/udf/native/editors/mod.rs @@ -0,0 +1,3 @@ +mod flip_coordinates; + +pub use flip_coordinates::FlipCoordinates; diff --git a/rust/geodatafusion/src/udf/native/mod.rs b/rust/geodatafusion/src/udf/native/mod.rs index b54c571..573f8e6 100644 --- a/rust/geodatafusion/src/udf/native/mod.rs +++ b/rust/geodatafusion/src/udf/native/mod.rs @@ -3,6 +3,7 @@ pub mod accessors; pub mod bounding_box; pub mod constructors; +pub mod editors; pub mod io; // mod processing; diff --git a/rust/geodatafusion/src/udf/util/mod.rs b/rust/geodatafusion/src/udf/util/mod.rs new file mode 100644 index 0000000..3685259 --- /dev/null +++ b/rust/geodatafusion/src/udf/util/mod.rs @@ -0,0 +1 @@ +pub(crate) mod to_wkt_geometry; diff --git a/rust/geodatafusion/src/udf/util/to_wkt_geometry.rs b/rust/geodatafusion/src/udf/util/to_wkt_geometry.rs new file mode 100644 index 0000000..bb5879b --- /dev/null +++ b/rust/geodatafusion/src/udf/util/to_wkt_geometry.rs @@ -0,0 +1,158 @@ +//! Helpers to convert an `impl GeometryTrait` to a `WKT` type. +//! +//! This is **not** a WKT string but rather a geometry representation in the `wkt` crate. We use it +//! because it additionally supports Z and M dimensions. +use geo_traits::*; +use geoarrow_schema::error::{GeoArrowError, GeoArrowResult}; +use wkt::WktNum; +use wkt::types::*; + +fn dim_to_wkt_dim(dim: geo_traits::Dimensions) -> GeoArrowResult { + match dim { + geo_traits::Dimensions::Xy | geo_traits::Dimensions::Unknown(2) => Ok(Dimension::XY), + geo_traits::Dimensions::Xyz | geo_traits::Dimensions::Unknown(3) => Ok(Dimension::XYZ), + geo_traits::Dimensions::Xym => Ok(Dimension::XYM), + geo_traits::Dimensions::Xyzm | geo_traits::Dimensions::Unknown(4) => Ok(Dimension::XYZM), + _ => Err(GeoArrowError::InvalidGeoArrow( + "Unsupported coordinate dimension".to_string(), + )), + } +} + +fn coord_to_wkt_coord(coord: &impl CoordTrait) -> GeoArrowResult> { + match dim_to_wkt_dim(coord.dim())? { + Dimension::XY => Ok(Coord { + x: coord.x(), + y: coord.y(), + z: None, + m: None, + }), + Dimension::XYZ => Ok(Coord { + x: coord.x(), + y: coord.y(), + z: coord.nth(2), + m: None, + }), + Dimension::XYM => Ok(Coord { + x: coord.x(), + y: coord.y(), + z: None, + m: coord.nth(2), + }), + Dimension::XYZM => Ok(Coord { + x: coord.x(), + y: coord.y(), + z: coord.nth(2), + m: coord.nth(3), + }), + _ => Err(GeoArrowError::InvalidGeoArrow( + "Unsupported coordinate dimension".to_string(), + )), + } +} + +fn point_to_wkt_point(point: &impl PointTrait) -> GeoArrowResult> { + Ok(Point::new( + point.coord().map(|c| coord_to_wkt_coord(&c)).transpose()?, + dim_to_wkt_dim(point.dim())?, + )) +} + +fn line_string_to_wkt_line_string( + ls: &impl LineStringTrait, +) -> GeoArrowResult> { + let coords = ls + .coords() + .map(|c| coord_to_wkt_coord(&c)) + .collect::>>()?; + Ok(LineString::new(coords, dim_to_wkt_dim(ls.dim())?)) +} + +fn polygon_to_wkt_polygon( + poly: &impl PolygonTrait, +) -> GeoArrowResult> { + let mut rings = Vec::with_capacity(1 + poly.num_interiors()); + let exterior = if let Some(ext) = poly.exterior() { + line_string_to_wkt_line_string(&ext)? + } else { + LineString::new(vec![], Dimension::XY) + }; + rings.push(exterior); + + let interiors = poly + .interiors() + .map(|int| line_string_to_wkt_line_string(&int)) + .collect::>>()?; + rings.extend(interiors); + + Ok(Polygon::new(rings, dim_to_wkt_dim(poly.dim())?)) +} + +fn multi_point_to_wkt_multi_point( + mp: &impl MultiPointTrait, +) -> GeoArrowResult> { + let points = mp + .points() + .map(|pt| point_to_wkt_point(&pt)) + .collect::>>()?; + Ok(MultiPoint::new(points, dim_to_wkt_dim(mp.dim())?)) +} + +fn multi_line_string_to_wkt_multi_line_string( + mls: &impl MultiLineStringTrait, +) -> GeoArrowResult> { + let lines = mls + .line_strings() + .map(|ls| line_string_to_wkt_line_string(&ls)) + .collect::>>()?; + Ok(MultiLineString::new(lines, dim_to_wkt_dim(mls.dim())?)) +} + +fn multi_polygon_to_wkt_multi_polygon( + mp: &impl MultiPolygonTrait, +) -> GeoArrowResult> { + let polys = mp + .polygons() + .map(|poly| polygon_to_wkt_polygon(&poly)) + .collect::>>()?; + Ok(MultiPolygon::new(polys, dim_to_wkt_dim(mp.dim())?)) +} + +pub(crate) fn geometry_to_wkt_geometry( + geom: &impl GeometryTrait, +) -> GeoArrowResult> { + match geom.as_type() { + geo_traits::GeometryType::Point(pt) => Ok(wkt::Wkt::Point(point_to_wkt_point(pt)?)), + geo_traits::GeometryType::LineString(ls) => { + Ok(wkt::Wkt::LineString(line_string_to_wkt_line_string(ls)?)) + } + geo_traits::GeometryType::Polygon(poly) => { + Ok(wkt::Wkt::Polygon(polygon_to_wkt_polygon(poly)?)) + } + geo_traits::GeometryType::MultiPoint(mp) => { + Ok(wkt::Wkt::MultiPoint(multi_point_to_wkt_multi_point(mp)?)) + } + geo_traits::GeometryType::MultiLineString(mls) => Ok(wkt::Wkt::MultiLineString( + multi_line_string_to_wkt_multi_line_string(mls)?, + )), + geo_traits::GeometryType::MultiPolygon(mp) => Ok(wkt::Wkt::MultiPolygon( + multi_polygon_to_wkt_multi_polygon(mp)?, + )), + geo_traits::GeometryType::GeometryCollection(gc) => Ok(wkt::Wkt::GeometryCollection( + geometry_collection_to_wkt_geometry_collection(gc)?, + )), + _ => Err(GeoArrowError::InvalidGeoArrow( + "Unsupported geometry type".to_string(), + )), + } +} + +fn geometry_collection_to_wkt_geometry_collection( + gc: &impl GeometryCollectionTrait, +) -> GeoArrowResult> { + let geoms = gc + .geometries() + .map(|g| geometry_to_wkt_geometry(&g)) + .collect::>>()?; + Ok(GeometryCollection::new(geoms, dim_to_wkt_dim(gc.dim())?)) +}