Skip to content

Commit c657d9f

Browse files
noritadaproost
authored andcommitted
BUG/TST: fix and test for timezone drop in GroupBy.shift/bfill/ffill (pandas-dev#27992)
1 parent bf0c192 commit c657d9f

File tree

3 files changed

+74
-5
lines changed

3 files changed

+74
-5
lines changed

doc/source/whatsnew/v0.25.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ Groupby/resample/rolling
7878
^^^^^^^^^^^^^^^^^^^^^^^^
7979

8080
- Bug incorrectly raising an ``IndexError`` when passing a list of quantiles to :meth:`pandas.core.groupby.DataFrameGroupBy.quantile` (:issue:`28113`).
81+
- Bug in :meth:`pandas.core.groupby.GroupBy.shift`, :meth:`pandas.core.groupby.GroupBy.bfill` and :meth:`pandas.core.groupby.GroupBy.ffill` where timezone information would be dropped (:issue:`19995`, :issue:`27992`)
8182
-
8283
-
8384
-

pandas/core/groupby/groupby.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -2263,26 +2263,28 @@ def _get_cythonized_result(
22632263
base_func = getattr(libgroupby, how)
22642264

22652265
for name, obj in self._iterate_slices():
2266+
values = obj._data._values
2267+
22662268
if aggregate:
22672269
result_sz = ngroups
22682270
else:
2269-
result_sz = len(obj.values)
2271+
result_sz = len(values)
22702272

22712273
if not cython_dtype:
2272-
cython_dtype = obj.values.dtype
2274+
cython_dtype = values.dtype
22732275

22742276
result = np.zeros(result_sz, dtype=cython_dtype)
22752277
func = partial(base_func, result, labels)
22762278
inferences = None
22772279

22782280
if needs_values:
2279-
vals = obj.values
2281+
vals = values
22802282
if pre_processing:
22812283
vals, inferences = pre_processing(vals)
22822284
func = partial(func, vals)
22832285

22842286
if needs_mask:
2285-
mask = isna(obj.values).view(np.uint8)
2287+
mask = isna(values).view(np.uint8)
22862288
func = partial(func, mask)
22872289

22882290
if needs_ngroups:
@@ -2291,7 +2293,7 @@ def _get_cythonized_result(
22912293
func(**kwargs) # Call func to modify indexer values in place
22922294

22932295
if result_is_index:
2294-
result = algorithms.take_nd(obj.values, result)
2296+
result = algorithms.take_nd(values, result)
22952297

22962298
if post_processing:
22972299
result = post_processing(result, inferences)

pandas/tests/groupby/test_groupby.py

+66
Original file line numberDiff line numberDiff line change
@@ -1882,3 +1882,69 @@ def test_groupby_axis_1(group_name):
18821882
results = df.groupby(group_name, axis=1).sum()
18831883
expected = df.T.groupby(group_name).sum().T
18841884
assert_frame_equal(results, expected)
1885+
1886+
1887+
@pytest.mark.parametrize(
1888+
"op, expected",
1889+
[
1890+
(
1891+
"shift",
1892+
{
1893+
"time": [
1894+
None,
1895+
None,
1896+
Timestamp("2019-01-01 12:00:00"),
1897+
Timestamp("2019-01-01 12:30:00"),
1898+
None,
1899+
None,
1900+
]
1901+
},
1902+
),
1903+
(
1904+
"bfill",
1905+
{
1906+
"time": [
1907+
Timestamp("2019-01-01 12:00:00"),
1908+
Timestamp("2019-01-01 12:30:00"),
1909+
Timestamp("2019-01-01 14:00:00"),
1910+
Timestamp("2019-01-01 14:30:00"),
1911+
Timestamp("2019-01-01 14:00:00"),
1912+
Timestamp("2019-01-01 14:30:00"),
1913+
]
1914+
},
1915+
),
1916+
(
1917+
"ffill",
1918+
{
1919+
"time": [
1920+
Timestamp("2019-01-01 12:00:00"),
1921+
Timestamp("2019-01-01 12:30:00"),
1922+
Timestamp("2019-01-01 12:00:00"),
1923+
Timestamp("2019-01-01 12:30:00"),
1924+
Timestamp("2019-01-01 14:00:00"),
1925+
Timestamp("2019-01-01 14:30:00"),
1926+
]
1927+
},
1928+
),
1929+
],
1930+
)
1931+
def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected):
1932+
# GH19995, GH27992: Check that timezone does not drop in shift, bfill, and ffill
1933+
tz = tz_naive_fixture
1934+
data = {
1935+
"id": ["A", "B", "A", "B", "A", "B"],
1936+
"time": [
1937+
Timestamp("2019-01-01 12:00:00"),
1938+
Timestamp("2019-01-01 12:30:00"),
1939+
None,
1940+
None,
1941+
Timestamp("2019-01-01 14:00:00"),
1942+
Timestamp("2019-01-01 14:30:00"),
1943+
],
1944+
}
1945+
df = DataFrame(data).assign(time=lambda x: x.time.dt.tz_localize(tz))
1946+
1947+
grouped = df.groupby("id")
1948+
result = getattr(grouped, op)()
1949+
expected = DataFrame(expected).assign(time=lambda x: x.time.dt.tz_localize(tz))
1950+
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)