diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst index 1cdf213d81a74..69f324211e5b2 100644 --- a/doc/source/whatsnew/v0.25.2.rst +++ b/doc/source/whatsnew/v0.25.2.rst @@ -78,6 +78,7 @@ Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug incorrectly raising an ``IndexError`` when passing a list of quantiles to :meth:`pandas.core.groupby.DataFrameGroupBy.quantile` (:issue:`28113`). +- Bug in :meth:`pandas.core.groupby.GroupBy.shift`, :meth:`pandas.core.groupby.GroupBy.bfill` and :meth:`pandas.core.groupby.GroupBy.ffill` where timezone information would be dropped (:issue:`19995`, :issue:`27992`) - - - diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 55def024cb1d4..e010e615e176e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2263,26 +2263,28 @@ def _get_cythonized_result( base_func = getattr(libgroupby, how) for name, obj in self._iterate_slices(): + values = obj._data._values + if aggregate: result_sz = ngroups else: - result_sz = len(obj.values) + result_sz = len(values) if not cython_dtype: - cython_dtype = obj.values.dtype + cython_dtype = values.dtype result = np.zeros(result_sz, dtype=cython_dtype) func = partial(base_func, result, labels) inferences = None if needs_values: - vals = obj.values + vals = values if pre_processing: vals, inferences = pre_processing(vals) func = partial(func, vals) if needs_mask: - mask = isna(obj.values).view(np.uint8) + mask = isna(values).view(np.uint8) func = partial(func, mask) if needs_ngroups: @@ -2291,7 +2293,7 @@ def _get_cythonized_result( func(**kwargs) # Call func to modify indexer values in place if result_is_index: - result = algorithms.take_nd(obj.values, result) + result = algorithms.take_nd(values, result) if post_processing: result = post_processing(result, inferences) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4556b22b57279..bec5cbc5fecb8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1882,3 +1882,69 @@ def test_groupby_axis_1(group_name): results = df.groupby(group_name, axis=1).sum() expected = df.T.groupby(group_name).sum().T assert_frame_equal(results, expected) + + +@pytest.mark.parametrize( + "op, expected", + [ + ( + "shift", + { + "time": [ + None, + None, + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + None, + None, + ] + }, + ), + ( + "bfill", + { + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ] + }, + ), + ( + "ffill", + { + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ] + }, + ), + ], +) +def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected): + # GH19995, GH27992: Check that timezone does not drop in shift, bfill, and ffill + tz = tz_naive_fixture + data = { + "id": ["A", "B", "A", "B", "A", "B"], + "time": [ + Timestamp("2019-01-01 12:00:00"), + Timestamp("2019-01-01 12:30:00"), + None, + None, + Timestamp("2019-01-01 14:00:00"), + Timestamp("2019-01-01 14:30:00"), + ], + } + df = DataFrame(data).assign(time=lambda x: x.time.dt.tz_localize(tz)) + + grouped = df.groupby("id") + result = getattr(grouped, op)() + expected = DataFrame(expected).assign(time=lambda x: x.time.dt.tz_localize(tz)) + assert_frame_equal(result, expected)