Skip to content

Commit 427c8dc

Browse files
committed
Arrow reading: generic code path (as used by GeoJSON): fix mis-handling of timezones
Fixes geopandas/pyogrio#487 (comment) regarding GeoJSON
1 parent 4a92011 commit 427c8dc

File tree

2 files changed

+73
-34
lines changed

2 files changed

+73
-34
lines changed

autotest/ogr/ogr_geojson.py

+72-24
Original file line numberDiff line numberDiff line change
@@ -4605,7 +4605,7 @@ def test_ogr_geojson_arrow_stream_pyarrow_mixed_timezone(tmp_vsimem):
46054605

46064606

46074607
def test_ogr_geojson_arrow_stream_pyarrow_utc_plus_five(tmp_vsimem):
4608-
pytest.importorskip("pyarrow")
4608+
# pytest.importorskip("pyarrow")
46094609

46104610
filename = str(
46114611
tmp_vsimem / "test_ogr_geojson_arrow_stream_pyarrow_utc_plus_five.geojson"
@@ -4621,22 +4621,37 @@ def test_ogr_geojson_arrow_stream_pyarrow_utc_plus_five(tmp_vsimem):
46214621
lyr.CreateFeature(f)
46224622
ds = None
46234623

4624+
try:
4625+
import pyarrow # NOQA
4626+
4627+
has_pyarrow = True
4628+
except ImportError:
4629+
has_pyarrow = False
4630+
if has_pyarrow:
4631+
ds = ogr.Open(filename)
4632+
lyr = ds.GetLayer(0)
4633+
stream = lyr.GetArrowStreamAsPyArrow()
4634+
assert stream.schema.field("datetime").type.tz == "+05:00"
4635+
values = []
4636+
for batch in stream:
4637+
for x in batch.field("datetime"):
4638+
values.append(x.value)
4639+
assert values == [1653982496789, 1653986096789]
4640+
4641+
mem_ds = ogr.GetDriverByName("Memory").CreateDataSource("")
4642+
mem_lyr = mem_ds.CreateLayer("test", geom_type=ogr.wkbPoint)
46244643
ds = ogr.Open(filename)
46254644
lyr = ds.GetLayer(0)
4626-
stream = lyr.GetArrowStreamAsPyArrow()
4627-
assert stream.schema.field("datetime").type.tz == "+05:00"
4628-
values = []
4629-
for batch in stream:
4630-
for x in batch.field("datetime"):
4631-
values.append(x.value)
4632-
assert values == [1654000496789, 1654004096789]
4645+
mem_lyr.WriteArrow(lyr)
4646+
4647+
f = mem_lyr.GetNextFeature()
4648+
assert f["datetime"] == "2022/05/31 12:34:56.789+05"
46334649

46344650

46354651
###############################################################################
46364652

46374653

46384654
def test_ogr_geojson_arrow_stream_pyarrow_utc_minus_five(tmp_vsimem):
4639-
pytest.importorskip("pyarrow")
46404655

46414656
filename = str(
46424657
tmp_vsimem / "test_ogr_geojson_arrow_stream_pyarrow_utc_minus_five.geojson"
@@ -4652,22 +4667,37 @@ def test_ogr_geojson_arrow_stream_pyarrow_utc_minus_five(tmp_vsimem):
46524667
lyr.CreateFeature(f)
46534668
ds = None
46544669

4670+
try:
4671+
import pyarrow # NOQA
4672+
4673+
has_pyarrow = True
4674+
except ImportError:
4675+
has_pyarrow = False
4676+
if has_pyarrow:
4677+
ds = ogr.Open(filename)
4678+
lyr = ds.GetLayer(0)
4679+
stream = lyr.GetArrowStreamAsPyArrow()
4680+
assert stream.schema.field("datetime").type.tz == "-05:00"
4681+
values = []
4682+
for batch in stream:
4683+
for x in batch.field("datetime"):
4684+
values.append(x.value)
4685+
assert values == [1654018496789, 1654022096789]
4686+
4687+
mem_ds = ogr.GetDriverByName("Memory").CreateDataSource("")
4688+
mem_lyr = mem_ds.CreateLayer("test", geom_type=ogr.wkbPoint)
46554689
ds = ogr.Open(filename)
46564690
lyr = ds.GetLayer(0)
4657-
stream = lyr.GetArrowStreamAsPyArrow()
4658-
assert stream.schema.field("datetime").type.tz == "-05:00"
4659-
values = []
4660-
for batch in stream:
4661-
for x in batch.field("datetime"):
4662-
values.append(x.value)
4663-
assert values == [1654000496789, 1654004096789]
4691+
mem_lyr.WriteArrow(lyr)
4692+
4693+
f = mem_lyr.GetNextFeature()
4694+
assert f["datetime"] == "2022/05/31 12:34:56.789-05"
46644695

46654696

46664697
###############################################################################
46674698

46684699

46694700
def test_ogr_geojson_arrow_stream_pyarrow_unknown_timezone(tmp_vsimem):
4670-
pytest.importorskip("pyarrow")
46714701

46724702
filename = str(
46734703
tmp_vsimem / "test_ogr_geojson_arrow_stream_pyarrow_unknown_timezone.geojson"
@@ -4683,15 +4713,33 @@ def test_ogr_geojson_arrow_stream_pyarrow_unknown_timezone(tmp_vsimem):
46834713
lyr.CreateFeature(f)
46844714
ds = None
46854715

4716+
try:
4717+
import pyarrow # NOQA
4718+
4719+
has_pyarrow = True
4720+
except ImportError:
4721+
has_pyarrow = False
4722+
if has_pyarrow:
4723+
ds = ogr.Open(filename)
4724+
lyr = ds.GetLayer(0)
4725+
stream = lyr.GetArrowStreamAsPyArrow()
4726+
assert stream.schema.field("datetime").type.tz is None
4727+
values = []
4728+
for batch in stream:
4729+
for x in batch.field("datetime"):
4730+
values.append(x.value)
4731+
assert values == [1654000496789, 1654004096789]
4732+
4733+
mem_ds = ogr.GetDriverByName("Memory").CreateDataSource("")
4734+
mem_lyr = mem_ds.CreateLayer("test", geom_type=ogr.wkbPoint)
46864735
ds = ogr.Open(filename)
46874736
lyr = ds.GetLayer(0)
4688-
stream = lyr.GetArrowStreamAsPyArrow()
4689-
assert stream.schema.field("datetime").type.tz is None
4690-
values = []
4691-
for batch in stream:
4692-
for x in batch.field("datetime"):
4693-
values.append(x.value)
4694-
assert values == [1654000496789, 1654004096789]
4737+
mem_lyr.WriteArrow(lyr)
4738+
4739+
f = mem_lyr.GetNextFeature()
4740+
# We have lost the timezone info here, as there's no way in Arrow to
4741+
# have a mixed of with and without timezone in a single column
4742+
assert f["datetime"] == "2022/05/31 12:34:56.789"
46954743

46964744

46974745
###############################################################################

ogr/ogrsf_frmts/generic/ogrlayerarrow.cpp

+1-10
Original file line numberDiff line numberDiff line change
@@ -1780,17 +1780,8 @@ FillDateTimeArray(struct ArrowArray *psChild,
17801780
auto nVal =
17811781
CPLYMDHMSToUnixTime(&brokenDown) * 1000 +
17821782
(static_cast<int>(psRawField->Date.Second * 1000 + 0.5) % 1000);
1783-
if (nFieldTZFlag > OGR_TZFLAG_MIXED_TZ &&
1783+
if (nFieldTZFlag >= OGR_TZFLAG_MIXED_TZ &&
17841784
psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
1785-
{
1786-
// Convert for psRawField->Date.TZFlag to nFieldTZFlag
1787-
const int TZOffset =
1788-
(psRawField->Date.TZFlag - nFieldTZFlag) * 15;
1789-
const int TZOffsetMS = TZOffset * 60 * 1000;
1790-
nVal -= TZOffsetMS;
1791-
}
1792-
else if (nFieldTZFlag == OGR_TZFLAG_MIXED_TZ &&
1793-
psRawField->Date.TZFlag > OGR_TZFLAG_MIXED_TZ)
17941785
{
17951786
// Convert for psRawField->Date.TZFlag to UTC
17961787
const int TZOffset =

0 commit comments

Comments
 (0)