Skip to content

Commit 147c3d3

Browse files
Fix explode to preserve datetime unit in Series and DataFrame; update related tests
1 parent 7d542b4 commit 147c3d3

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

pandas/core/frame.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"""
1111

1212
from __future__ import annotations
13-
13+
import pandas as pd
1414
import collections
1515
from collections import abc
1616
from collections.abc import (
@@ -9903,13 +9903,24 @@ def explode(
99039903
df = self.reset_index(drop=True)
99049904
if len(columns) == 1:
99059905
result = df[columns[0]].explode()
9906+
orig_dtype = df[columns[0]].dtype
9907+
if pd.api.types.is_datetime64_dtype(orig_dtype):
9908+
result = result.astype(orig_dtype)
99069909
else:
99079910
mylen = lambda x: len(x) if (is_list_like(x) and len(x) > 0) else 1
99089911
counts0 = self[columns[0]].apply(mylen)
99099912
for c in columns[1:]:
99109913
if not all(counts0 == self[c].apply(mylen)):
99119914
raise ValueError("columns must have matching element counts")
9912-
result = DataFrame({c: df[c].explode() for c in columns})
9915+
result_data = {}
9916+
for c in columns:
9917+
exploded_series = df[c].explode()
9918+
orig_dtype = df[c].dtype
9919+
if pd.api.types.is_datetime64_dtype(orig_dtype):
9920+
exploded_series = exploded_series.astype(orig_dtype)
9921+
result_data[c] = exploded_series
9922+
result = DataFrame(result_data)
9923+
99139924
result = df.drop(columns, axis=1).join(result)
99149925
if ignore_index:
99159926
result.index = default_index(len(result))

pandas/tests/series/methods/test_explode.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,3 +175,29 @@ def test_explode_pyarrow_non_list_type(ignore_index):
175175
result = ser.explode(ignore_index=ignore_index)
176176
expected = pd.Series([1, 2, 3], dtype="int64[pyarrow]", index=[0, 1, 2])
177177
tm.assert_series_equal(result, expected)
178+
179+
def test_explode_preserves_datetime_unit():
180+
# Create datetime64[ms] array manually
181+
dt64_ms = np.array(["2020-01-01T00:00:00.000", "2020-01-01T01:00:00.000", "2020-01-01T02:00:00.000"], dtype="datetime64[ms]")
182+
s = pd.Series([dt64_ms])
183+
184+
# Explode the Series
185+
result = s.explode()
186+
187+
# Ensure the dtype (including unit) is preserved
188+
assert result.dtype == dt64_ms.dtype, f"Expected dtype {dt64_ms.dtype}, got {result.dtype}"
189+
190+
def test_single_column_explode_preserves_datetime_unit():
191+
# Use freq in ms since unit='ms'
192+
rng = pd.date_range("2020-01-01T00:00:00Z", periods=3, freq="3600000ms", unit="ms")
193+
s = pd.Series([rng])
194+
result = s.explode()
195+
assert result.dtype == rng.dtype
196+
197+
def test_multi_column_explode_preserves_datetime_unit():
198+
rng1 = pd.date_range("2020-01-01", periods=2, freq="3600000ms", unit="ms")
199+
rng2 = pd.date_range("2020-01-01", periods=2, freq="3600000ms", unit="ms")
200+
df = pd.DataFrame({"A": [rng1], "B": [rng2]})
201+
result = df.explode(["A", "B"])
202+
assert result["A"].dtype == rng1.dtype
203+
assert result["B"].dtype == rng2.dtype

0 commit comments

Comments
 (0)