Skip to content

Commit 30276b2

Browse files
fix: pyarrow backend trimming timestamp to date (#2875)
1 parent aeef0bc commit 30276b2

File tree

3 files changed

+17
-16
lines changed

3 files changed

+17
-16
lines changed

awswrangler/_data_types.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,7 @@ def athena2pandas(dtype: str, dtype_backend: str | None = None) -> str: # noqa:
369369
if (dtype == "string") or dtype.startswith("char") or dtype.startswith("varchar"):
370370
return "string" if dtype_backend != "pyarrow" else "string[pyarrow]"
371371
if dtype in ("timestamp", "timestamp with time zone"):
372-
return "datetime64" if dtype_backend != "pyarrow" else "date64[pyarrow]"
372+
return "datetime64" if dtype_backend != "pyarrow" else "timestamp[ns][pyarrow]"
373373
if dtype == "date":
374374
return "date" if dtype_backend != "pyarrow" else "date32[pyarrow]"
375375
if dtype.startswith("decimal"):

tests/_utils.py

+15-14
Original file line numberDiff line numberDiff line change
@@ -334,31 +334,32 @@ def get_df_dtype_backend(dtype_backend: Literal["numpy_nullable", "pyarrow"] = "
334334
"int32_nullable": [1, None, 3],
335335
"int64_nullable": [1, None, 3],
336336
"float_nullable": [0.0, None, 2.2],
337-
# "bool_nullable": [True, None, False],
337+
"bool_nullable": [True, None, False],
338338
"string_nullable": ["Washington", None, "Seattle"],
339-
# "date_nullable": [dt("2020-01-01"), None, dt("2020-01-02")],
340-
# "timestamp_nullable": [ts("2020-01-01 00:00:00.0"), None, ts("2020-01-02 00:00:01.0")],
339+
"date_nullable": [dt("2020-01-01"), None, dt("2020-01-02")],
340+
"timestamp_nullable": [ts("2020-01-01 00:00:00.0"), None, ts("2020-01-02 00:00:01.0")],
341341
}
342342
)
343343
if dtype_backend == "numpy_nullable":
344-
df["int8_nullable"] = df["int8_nullable"].astype("Int8")
345-
df["int16_nullable"] = df["int16_nullable"].astype("Int16")
346-
df["int32_nullable"] = df["int32_nullable"].astype("Int32")
347-
df["int64_nullable"] = df["int64_nullable"].astype("Int64")
348-
df["float_nullable"] = df["float_nullable"].astype("Float64")
349-
# df["bool_nullable"] = df["bool_nullable"].astype("boolean")
350-
# df["date_nullable"] = df["date_nullable"].astype("string[python]")
351-
df["string_nullable"] = df["string_nullable"].astype("string[python]")
344+
df["int8_nullable"] = df["int8_nullable"].astype(pd.Int8Dtype())
345+
df["int16_nullable"] = df["int16_nullable"].astype(pd.Int16Dtype())
346+
df["int32_nullable"] = df["int32_nullable"].astype(pd.Int32Dtype())
347+
df["int64_nullable"] = df["int64_nullable"].astype(pd.Int64Dtype())
348+
df["float_nullable"] = df["float_nullable"].astype(pd.Float64Dtype())
349+
df["bool_nullable"] = df["bool_nullable"].astype(pd.BooleanDtype())
350+
df["string_nullable"] = df["string_nullable"].astype(pd.StringDtype())
351+
df["timestamp_nullable"] = df["timestamp_nullable"].astype(pd.DatetimeTZDtype())
352+
df["date_nullable"] = df["date_nullable"].astype(pd.StringDtype())
352353
elif dtype_backend == "pyarrow":
353354
df["int8_nullable"] = df["int8_nullable"].astype(pd.ArrowDtype(pa.int8()))
354355
df["int16_nullable"] = df["int16_nullable"].astype(pd.ArrowDtype(pa.int16()))
355356
df["int32_nullable"] = df["int32_nullable"].astype(pd.ArrowDtype(pa.int32()))
356357
df["int64_nullable"] = df["int64_nullable"].astype(pd.ArrowDtype(pa.int64()))
357358
df["float_nullable"] = df["float_nullable"].astype(pd.ArrowDtype(pa.float64()))
358-
# df["bool_nullable"] = df["bool_nullable"].astype(pd.ArrowDtype(pa.bool_()))
359-
# df["date_nullable"] = df["date_nullable"].astype(pd.ArrowDtype(pa.string()))
359+
df["bool_nullable"] = df["bool_nullable"].astype(pd.ArrowDtype(pa.bool_()))
360360
df["string_nullable"] = df["string_nullable"].astype(pd.ArrowDtype(pa.string()))
361-
# df["timestamp_nullable"] = df["timestamp_nullable"].astype("date64[ms][pyarrow]")
361+
df["date_nullable"] = df["date_nullable"].astype(pd.ArrowDtype(pa.date32()))
362+
df["timestamp_nullable"] = df["timestamp_nullable"].astype(pd.ArrowDtype(pa.timestamp("ns")))
362363
else:
363364
raise ValueError(f"Unknown dtype_backend: {dtype_backend}")
364365
return df

tests/unit/test_pandas_pyarrow_dtype_backend.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def test_athena_csv_dtype_backend(
105105
df["string_nullable"] = df["string_nullable"].astype("string[pyarrow]")
106106

107107
if ctas_approach or unload_approach:
108-
df2["string_nullable"].replace("", pa.NA, inplace=True)
108+
df2["string_nullable"] = df2["string_nullable"].replace("", pa.NA)
109109

110110
assert_pandas_equals(df, df2)
111111

0 commit comments

Comments
 (0)