diff --git a/CHANGES.md b/CHANGES.md index 117751cd..920b1828 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,7 +9,9 @@ ### Bug fixes - Fix WKB writing on big-endian systems (#497). -- Fix writing fids to e.g. GPKG file with use_arrow (#511). +- Fix writing fids to e.g. GPKG file with `use_arrow` (#511). +- Fix error in `write_dataframe` when writing an empty or all-None object + column with `use_arrow` (#512). ### Packaging diff --git a/pyogrio/geopandas.py b/pyogrio/geopandas.py index 45fe3b1a..e6919eed 100644 --- a/pyogrio/geopandas.py +++ b/pyogrio/geopandas.py @@ -583,6 +583,15 @@ def write_dataframe( table = pa.Table.from_pandas(df, preserve_index=False) + # Null arrow columns are not supported by GDAL, so convert to string + for field_index, field in enumerate(table.schema): + if field.type == pa.null(): + table = table.set_column( + field_index, + field.with_type(pa.string()), + table[field_index].cast(pa.string()), + ) + if geometry_column is not None: # ensure that the geometry column is binary (for all-null geometries, # this could be a wrong type) diff --git a/pyogrio/tests/test_geopandas_io.py b/pyogrio/tests/test_geopandas_io.py index a65b5baa..73b1990d 100644 --- a/pyogrio/tests/test_geopandas_io.py +++ b/pyogrio/tests/test_geopandas_io.py @@ -1146,16 +1146,29 @@ def test_write_dataframe_index(tmp_path, naturalearth_lowres, use_arrow): @pytest.mark.parametrize("ext", [ext for ext in ALL_EXTS if ext not in ".geojsonl"]) +@pytest.mark.parametrize( + "columns, dtype", + [ + ([], None), + (["col_int"], np.int64), + (["col_float"], np.float64), + (["col_object"], object), + ], +) @pytest.mark.requires_arrow_write_api -def test_write_empty_dataframe(tmp_path, ext, use_arrow): - expected = gp.GeoDataFrame(geometry=[], crs=4326) +def test_write_empty_dataframe(tmp_path, ext, columns, dtype, use_arrow): + """Test writing dataframe with no rows. + With use_arrow, object type columns with no rows are converted to null type columns + by pyarrow, but null columns are not supported by GDAL. Added to test fix for #513. + """ + expected = gp.GeoDataFrame(geometry=[], columns=columns, dtype=dtype, crs=4326) filename = tmp_path / f"test{ext}" write_dataframe(expected, filename, use_arrow=use_arrow) assert filename.exists() - df = read_dataframe(filename) - assert_geodataframe_equal(df, expected) + df = read_dataframe(filename, use_arrow=use_arrow) + assert_geodataframe_equal(df, expected, check_index_type=False) def test_write_empty_geometry(tmp_path): @@ -1175,6 +1188,24 @@ def test_write_empty_geometry(tmp_path): assert_geodataframe_equal(df, expected) +@pytest.mark.requires_arrow_write_api +def test_write_None_string_column(tmp_path, use_arrow): + """Test pandas object columns with all None values. + + With use_arrow, such columns are converted to null type columns by pyarrow, but null + columns are not supported by GDAL. Added to test fix for #513. + """ + gdf = gp.GeoDataFrame({"object_col": [None]}, geometry=[Point(0, 0)], crs=4326) + filename = tmp_path / "test.gpkg" + + write_dataframe(gdf, filename, use_arrow=use_arrow) + assert filename.exists() + + result_gdf = read_dataframe(filename, use_arrow=use_arrow) + assert result_gdf.object_col.dtype == object + assert_geodataframe_equal(result_gdf, gdf) + + @pytest.mark.parametrize("ext", [".geojsonl", ".geojsons"]) @pytest.mark.requires_arrow_write_api def test_write_read_empty_dataframe_unsupported(tmp_path, ext, use_arrow):