Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/geoarrow/geoarrow.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,11 @@ GeoArrowErrorCode GeoArrowWKTWriterInit(struct GeoArrowWKTWriter* writer);
void GeoArrowWKTWriterInitVisitor(struct GeoArrowWKTWriter* writer,
struct GeoArrowVisitor* v);

GeoArrowErrorCode GeoArrowWKTWriterAppend(struct GeoArrowWKTWriter* writer,
struct GeoArrowGeometryView geom);

GeoArrowErrorCode GeoArrowWKTWriterAppendNull(struct GeoArrowWKTWriter* writer);

/// \brief Finish an ArrowArray containing elements from the visited input
///
/// This function can be called more than once to support multiple batches.
Expand Down
4 changes: 4 additions & 0 deletions src/geoarrow/geoarrow_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@ struct ArrowArrayStream {
_GEOARROW_MAKE_NAME(GEOARROW_NAMESPACE, GeoArrowWKTWriterInit)
#define GeoArrowWKTWriterInitVisitor \
_GEOARROW_MAKE_NAME(GEOARROW_NAMESPACE, GeoArrowWKTWriterInitVisitor)
#define GeoArrowWKTWriterAppend \
_GEOARROW_MAKE_NAME(GEOARROW_NAMESPACE, GeoArrowWKTWriterAppend)
#define GeoArrowWKTWriterAppendNull \
_GEOARROW_MAKE_NAME(GEOARROW_NAMESPACE, GeoArrowWKTWriterAppendNull)
#define GeoArrowWKTWriterFinish \
_GEOARROW_MAKE_NAME(GEOARROW_NAMESPACE, GeoArrowWKTWriterFinish)
#define GeoArrowWKTWriterReset \
Expand Down
300 changes: 279 additions & 21 deletions src/geoarrow/wkt_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,72 @@ static inline int WKTWriterWrite(struct WKTWriterPrivate* private, const char* v
return ArrowBufferAppend(&private->values, value, strlen(value));
}

static GeoArrowErrorCode WKTWriterWriteGeometryType(
struct WKTWriterPrivate* private_data, enum GeoArrowGeometryType geometry_type,
enum GeoArrowDimensions dimensions) {
const char* geometry_type_name = GeoArrowGeometryTypeString(geometry_type);
NANOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, geometry_type_name));

switch (dimensions) {
case GEOARROW_DIMENSIONS_XY:
break;
case GEOARROW_DIMENSIONS_XYZ:
NANOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, " Z"));
break;
case GEOARROW_DIMENSIONS_XYM:
NANOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, " M"));
break;
case GEOARROW_DIMENSIONS_XYZM:
NANOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, " ZM"));
break;
default:
NANOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, " <not valid>"));
return EINVAL;
}

GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, " "));

return GEOARROW_OK;
}

static GeoArrowErrorCode WKTWriterReserveCoords(struct WKTWriterPrivate* private_data,
int64_t n_coords, int n_values) {
int64_t max_chars_per_coord_theoretical =
// space + comma after coordinate
(n_coords * 2) +
// spaces between ordinates
(n_coords * (n_values - 1)) +
// GeoArrowPrintDouble might require up to 40 accessible bytes per call
(40 * n_values);

// Use a heuristic to estimate the number of characters we are about to write
// to avoid more then one allocation for this call. This is normally substantially
// less than the theoretical amount.
int64_t max_chars_estimated =
(n_coords * 2) + // space + comma after coordinate
(n_coords * (n_values - 1)) + // spaces between ordinates
// precision + decimal + estimate of normal
// digits to the left of the decimal
((private_data->precision + 1 + 8) * n_coords * n_values) +
// Ensure that the last reserve() call doesn't trigger an allocation
max_chars_per_coord_theoretical;

GEOARROW_RETURN_NOT_OK(ArrowBufferReserve(&private_data->values, max_chars_estimated));
return GEOARROW_OK;
}

static GeoArrowErrorCode WKTWriterCheckElementOutputSize(
struct WKTWriterPrivate* private_data) {
// Check if we've hit our max number of bytes for this feature
if (private_data->max_element_size_bytes >= 0 &&
(private_data->values.size_bytes - private_data->values_feat_start) >=
private_data->max_element_size_bytes) {
return EAGAIN;
} else {
return GEOARROW_OK;
}
}

static inline void WKTWriterWriteDoubleUnsafe(struct WKTWriterPrivate* private,
double value) {
// Always ensure that we have at least 40 writable bytes remaining before calling
Expand Down Expand Up @@ -85,27 +151,8 @@ static int geom_start_wkt(struct GeoArrowVisitor* v,
return EINVAL;
}

const char* geometry_type_name = GeoArrowGeometryTypeString(geometry_type);
NANOARROW_RETURN_NOT_OK(WKTWriterWrite(private, geometry_type_name));

switch (dimensions) {
case GEOARROW_DIMENSIONS_XY:
break;
case GEOARROW_DIMENSIONS_XYZ:
NANOARROW_RETURN_NOT_OK(WKTWriterWrite(private, " Z"));
break;
case GEOARROW_DIMENSIONS_XYM:
NANOARROW_RETURN_NOT_OK(WKTWriterWrite(private, " M"));
break;
case GEOARROW_DIMENSIONS_XYZM:
NANOARROW_RETURN_NOT_OK(WKTWriterWrite(private, " ZM"));
break;
default:
GeoArrowErrorSet(v->error, "WKTWriter::geom_start(): Unexpected `dimensions`");
return EINVAL;
}

NANOARROW_RETURN_NOT_OK(WKTWriterWrite(private, " "));
GEOARROW_RETURN_NOT_OK(
WKTWriterWriteGeometryType(private, geometry_type, dimensions));
}

if (private->level > 0) {
Expand Down Expand Up @@ -146,6 +193,7 @@ static int coords_wkt(struct GeoArrowVisitor* v, const struct GeoArrowCoordView*

struct WKTWriterPrivate* private = (struct WKTWriterPrivate*)v->private_data;
NANOARROW_RETURN_NOT_OK(WKTWriterCheckLevel(private));
GEOARROW_RETURN_NOT_OK(WKTWriterReserveCoords(private, n_coords, n_dims));

int64_t max_chars_per_coord_theoretical =
// space + comma after coordinate
Expand Down Expand Up @@ -270,6 +318,216 @@ static int feat_end_wkt(struct GeoArrowVisitor* v) {
return GEOARROW_OK;
}

static GeoArrowErrorCode GeoArrowWKTWriterAppendValidity(struct GeoArrowWKTWriter* writer,
int is_valid) {
struct WKTWriterPrivate* private_data = (struct WKTWriterPrivate*)writer->private_data;

private_data->length++;

if (!is_valid) {
if (private_data->validity.buffer.data == NULL) {
NANOARROW_RETURN_NOT_OK(
ArrowBitmapReserve(&private_data->validity, private_data->length));
ArrowBitmapAppendUnsafe(&private_data->validity, 1, private_data->length - 1);
}

private_data->null_count++;
NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(&private_data->validity, 0, 1));
} else if (private_data->validity.buffer.data != NULL) {
NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(&private_data->validity, 1, 1));
}

return GEOARROW_OK;
}

static inline void GeoArrowWKTWriterAppendCoordUnsafe(
struct WKTWriterPrivate* private_data, const uint8_t** cursor, const int32_t* strides,
int n_values) {
double value;

memcpy(&value, cursor[0], sizeof(double));
WKTWriterWriteDoubleUnsafe(private_data, value);
cursor[0] += strides[0];

for (int j = 1; j < n_values; j++) {
ArrowBufferAppendUnsafe(&private_data->values, " ", 1);
memcpy(&value, cursor[j], sizeof(double));
WKTWriterWriteDoubleUnsafe(private_data, value);
cursor[j] += strides[j];
}
}

static GeoArrowErrorCode GeoArrowWKTWriterAppendCoordsUnsafe(
struct WKTWriterPrivate* private_data, const struct GeoArrowGeometryNode* node,
int n_values) {
const uint8_t* cursor[4];
memcpy(cursor, node->coords, sizeof(cursor));

int32_t stride[4];
memcpy(stride, node->coord_stride, sizeof(stride));

// Append the first coord
GeoArrowWKTWriterAppendCoordUnsafe(private_data, cursor, stride, n_values);

// Append the rest of the coords
for (uint32_t i = 1; i < node->size; i++) {
GEOARROW_RETURN_NOT_OK(WKTWriterCheckElementOutputSize(private_data));
ArrowBufferAppendUnsafe(&private_data->values, ", ", 2);
GeoArrowWKTWriterAppendCoordUnsafe(private_data, cursor, stride, n_values);
}

return GEOARROW_OK;
}

static GeoArrowErrorCode GeoArrowWKTWriterAppendSequenceOrEmpty(
struct WKTWriterPrivate* private_data, const struct GeoArrowGeometryNode* node) {
if (node->size == 0) {
GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, "EMPTY"));
return GEOARROW_OK;
}

uint32_t n_coords = node->size;
int n_values = _GeoArrowkNumDimensions[node->dimensions];

GEOARROW_RETURN_NOT_OK(WKTWriterReserveCoords(private_data, n_coords, n_values));

GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, "("));
GeoArrowWKTWriterAppendCoordsUnsafe(private_data, node, n_values);
GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, ")"));

return GEOARROW_OK;
}

static GeoArrowErrorCode GeoArrowWKTWriterAppendFlatMultipoint(
struct WKTWriterPrivate* private_data, const struct GeoArrowGeometryNode* node,
const struct GeoArrowGeometryNode** node_iter) {
int n_values;
const uint8_t* cursor[4];
int32_t stride[4] = {0, 0, 0, 0};

GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, "("));

// Append the first child as a coord
const struct GeoArrowGeometryNode* child_node = (*node_iter)++;
n_values = _GeoArrowkNumDimensions[child_node->dimensions];
GEOARROW_RETURN_NOT_OK(
WKTWriterReserveCoords(private_data, child_node->size, n_values));
memcpy(cursor, child_node->coords, sizeof(cursor));
GeoArrowWKTWriterAppendCoordUnsafe(private_data, cursor, stride, n_values);

// Append the rest of the children as coords
for (uint32_t i = 1; i < node->size; i++) {
GEOARROW_RETURN_NOT_OK(WKTWriterCheckElementOutputSize(private_data));

const struct GeoArrowGeometryNode* child_node = (*node_iter)++;
n_values = _GeoArrowkNumDimensions[child_node->dimensions];
GEOARROW_RETURN_NOT_OK(
WKTWriterReserveCoords(private_data, child_node->size, n_values));

memcpy(cursor, child_node->coords, sizeof(cursor));
GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, ", "));
GeoArrowWKTWriterAppendCoordUnsafe(private_data, cursor, stride, n_values);
}

GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, ")"));
return GEOARROW_OK;
}

static GeoArrowErrorCode GeoArrowWKTWriterAppendImpl(
struct WKTWriterPrivate* private_data, const struct GeoArrowGeometryNode** node_iter,
int tag) {
const struct GeoArrowGeometryNode* node = (*node_iter)++;

if (tag) {
enum GeoArrowGeometryType geometry_type =
(enum GeoArrowGeometryType)node->geometry_type;
enum GeoArrowDimensions dimensions = (enum GeoArrowDimensions)node->dimensions;

GEOARROW_RETURN_NOT_OK(
WKTWriterWriteGeometryType(private_data, geometry_type, dimensions));
}

switch (node->geometry_type) {
case GEOARROW_GEOMETRY_TYPE_POINT:
case GEOARROW_GEOMETRY_TYPE_LINESTRING:
GEOARROW_RETURN_NOT_OK(GeoArrowWKTWriterAppendSequenceOrEmpty(private_data, node));
return GEOARROW_OK;

case GEOARROW_GEOMETRY_TYPE_MULTIPOINT:
if (private_data->use_flat_multipoint) {
return GeoArrowWKTWriterAppendFlatMultipoint(private_data, node, node_iter);
}
#if defined(__GNUC__) || defined(__clang__)
__attribute__((fallthrough));
#endif
case GEOARROW_GEOMETRY_TYPE_POLYGON:
case GEOARROW_GEOMETRY_TYPE_MULTILINESTRING:
case GEOARROW_GEOMETRY_TYPE_MULTIPOLYGON:
case GEOARROW_GEOMETRY_TYPE_GEOMETRYCOLLECTION: {
if (node->size == 0) {
GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, "EMPTY"));
return GEOARROW_OK;
}

GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, "("));
GEOARROW_RETURN_NOT_OK(GeoArrowWKTWriterAppendImpl(
private_data, node_iter,
node->geometry_type == GEOARROW_GEOMETRY_TYPE_GEOMETRYCOLLECTION));
for (uint32_t i = 1; i < node->size; i++) {
GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, ", "));
GEOARROW_RETURN_NOT_OK(GeoArrowWKTWriterAppendImpl(
private_data, node_iter,
node->geometry_type == GEOARROW_GEOMETRY_TYPE_GEOMETRYCOLLECTION));
}
GEOARROW_RETURN_NOT_OK(WKTWriterWrite(private_data, ")"));
return GEOARROW_OK;
}
default:
return EINVAL;
}
}

GeoArrowErrorCode GeoArrowWKTWriterAppendNull(struct GeoArrowWKTWriter* writer) {
struct WKTWriterPrivate* private_data = (struct WKTWriterPrivate*)writer->private_data;
GEOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(
&private_data->offsets, (int32_t)private_data->values.size_bytes));
GEOARROW_RETURN_NOT_OK(GeoArrowWKTWriterAppendValidity(writer, 0));
return GEOARROW_OK;
}

GeoArrowErrorCode GeoArrowWKTWriterAppend(struct GeoArrowWKTWriter* writer,
struct GeoArrowGeometryView geom) {
struct WKTWriterPrivate* private_data = (struct WKTWriterPrivate*)writer->private_data;
private_data->use_flat_multipoint = writer->use_flat_multipoint;
private_data->max_element_size_bytes = writer->max_element_size_bytes;
private_data->precision = writer->precision;

private_data->values_feat_start = private_data->values.size_bytes;

GEOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(
&private_data->offsets, (int32_t)private_data->values.size_bytes));

const struct GeoArrowGeometryNode* node_iter = geom.root;
GeoArrowErrorCode result = GeoArrowWKTWriterAppendImpl(private_data, &node_iter, 1);
switch (result) {
case GEOARROW_OK:
break;
case EAGAIN:
if (private_data->max_element_size_bytes >= 0 &&
(private_data->values.size_bytes - private_data->values_feat_start) >
private_data->max_element_size_bytes) {
private_data->values.size_bytes =
private_data->values_feat_start + private_data->max_element_size_bytes;
}
break;
default:
return result;
}

GEOARROW_RETURN_NOT_OK(GeoArrowWKTWriterAppendValidity(writer, 1));
return GEOARROW_OK;
}

GeoArrowErrorCode GeoArrowWKTWriterInit(struct GeoArrowWKTWriter* writer) {
struct WKTWriterPrivate* private =
(struct WKTWriterPrivate*)ArrowMalloc(sizeof(struct WKTWriterPrivate));
Expand Down
3 changes: 3 additions & 0 deletions src/geoarrow/wkx_files_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,19 @@ TEST(WKXFilesTest, WKXFilesTestFiles) {
ASSERT_EQ(GeoArrowGeometryInit(&geom_cloner), GEOARROW_OK);

const GeoArrowGeometry& geom_from_wkt = tester.AsGeometry(line_wkt);
EXPECT_EQ(tester.AsWKTUsingVisitor(geom_from_wkt), line_wkt);
EXPECT_EQ(tester.AsWKT(geom_from_wkt), line_wkt);

ASSERT_EQ(GeoArrowGeometryShallowCopy(GeoArrowGeometryAsView(&geom_from_wkt),
&geom_cloner),
GEOARROW_OK);
EXPECT_EQ(tester.AsWKTUsingVisitor(geom_cloner), line_wkt);
EXPECT_EQ(tester.AsWKT(geom_cloner), line_wkt);

ASSERT_EQ(
GeoArrowGeometryDeepCopy(GeoArrowGeometryAsView(&geom_from_wkt), &geom_cloner),
GEOARROW_OK);
EXPECT_EQ(tester.AsWKTUsingVisitor(geom_cloner), line_wkt);
EXPECT_EQ(tester.AsWKT(geom_cloner), line_wkt);

GeoArrowGeometryReset(&geom_cloner);
Expand Down
11 changes: 10 additions & 1 deletion src/geoarrow/wkx_testing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,20 @@ class WKXTester {
return WKTValue();
}

std::string AsWKT(const GeoArrowGeometry& geometry) {
std::string AsWKTUsingVisitor(const GeoArrowGeometry& geometry) {
ReadGeometry(geometry, WKTVisitor());
return WKTValue();
}

std::string AsWKT(const GeoArrowGeometry& geometry) {
int result = GeoArrowWKTWriterAppend(&wkt_writer_, GeoArrowGeometryAsView(&geometry));
if (result != GEOARROW_OK) {
throw WKXTestException("GeoArrowWKTWriterAppend", result, error_.message);
}

return WKTValue();
}

std::vector<uint8_t> AsWKB(const std::string& str) {
ReadWKT(str, WKBVisitor());
return WKBValue();
Expand Down
Loading