Skip to content

Commit 72fd708

Browse files
authored
ENH: Add first and last aggregations to Rolling and Expanding (#60579)
* ENH: Add first and last aggregations to Rolling and Expanding * Update reference doc * Set 'See Also' section in doc * Fix docstring * Retry fixing docstring * Fix missing period in docstring * Another missing period
1 parent 50767f8 commit 72fd708

12 files changed

+483
-1
lines changed

doc/source/reference/window.rst

+4
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ Rolling window functions
3030
Rolling.std
3131
Rolling.min
3232
Rolling.max
33+
Rolling.first
34+
Rolling.last
3335
Rolling.corr
3436
Rolling.cov
3537
Rolling.skew
@@ -72,6 +74,8 @@ Expanding window functions
7274
Expanding.std
7375
Expanding.min
7476
Expanding.max
77+
Expanding.first
78+
Expanding.last
7579
Expanding.corr
7680
Expanding.cov
7781
Expanding.skew

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ Other enhancements
5757
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5858
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
5959
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
60+
- :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`)
6061
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
6162
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
6263
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)

pandas/_libs/window/aggregations.pyi

+12
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,18 @@ def roll_min(
6060
end: np.ndarray, # np.ndarray[np.int64]
6161
minp: int, # int64_t
6262
) -> np.ndarray: ... # np.ndarray[float]
63+
def roll_first(
64+
values: np.ndarray, # np.ndarray[np.float64]
65+
start: np.ndarray, # np.ndarray[np.int64]
66+
end: np.ndarray, # np.ndarray[np.int64]
67+
minp: int, # int64_t
68+
) -> np.ndarray: ... # np.ndarray[float]
69+
def roll_last(
70+
values: np.ndarray, # np.ndarray[np.float64]
71+
start: np.ndarray, # np.ndarray[np.int64]
72+
end: np.ndarray, # np.ndarray[np.int64]
73+
minp: int, # int64_t
74+
) -> np.ndarray: ... # np.ndarray[float]
6375
def roll_quantile(
6476
values: np.ndarray, # const float64_t[:]
6577
start: np.ndarray, # np.ndarray[np.int64]

pandas/_libs/window/aggregations.pyx

+83
Original file line numberDiff line numberDiff line change
@@ -1133,6 +1133,89 @@ cdef _roll_min_max(ndarray[float64_t] values,
11331133

11341134
return output
11351135

1136+
# ----------------------------------------------------------------------
1137+
# Rolling first, last
1138+
1139+
1140+
def roll_first(const float64_t[:] values, ndarray[int64_t] start,
1141+
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
1142+
return _roll_first_last(values, start, end, minp, is_first=1)
1143+
1144+
1145+
def roll_last(const float64_t[:] values, ndarray[int64_t] start,
1146+
ndarray[int64_t] end, int64_t minp) -> np.ndarray:
1147+
return _roll_first_last(values, start, end, minp, is_first=0)
1148+
1149+
1150+
cdef _roll_first_last(const float64_t[:] values, ndarray[int64_t] start,
1151+
ndarray[int64_t] end, int64_t minp, bint is_first):
1152+
cdef:
1153+
Py_ssize_t i, j, fl_idx
1154+
bint is_monotonic_increasing_bounds
1155+
int64_t nobs = 0, N = len(start), s, e
1156+
float64_t val, res
1157+
ndarray[float64_t] output
1158+
1159+
is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
1160+
start, end
1161+
)
1162+
1163+
output = np.empty(N, dtype=np.float64)
1164+
1165+
if (end - start).max() == 0:
1166+
output[:] = NaN
1167+
return output
1168+
1169+
with nogil:
1170+
for i in range(0, N):
1171+
s = start[i]
1172+
e = end[i]
1173+
1174+
if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
1175+
fl_idx = -1
1176+
nobs = 0
1177+
for j in range(s, e):
1178+
val = values[j]
1179+
if val == val:
1180+
if not is_first or fl_idx < s:
1181+
fl_idx = j
1182+
nobs += 1
1183+
else:
1184+
# handle deletes
1185+
for j in range(start[i - 1], s):
1186+
val = values[j]
1187+
if val == val:
1188+
nobs -= 1
1189+
1190+
# update fl_idx if out of range, if first
1191+
if is_first and fl_idx < s:
1192+
fl_idx = -1
1193+
for j in range(s, end[i - 1]):
1194+
val = values[j]
1195+
if val == val:
1196+
fl_idx = j
1197+
break
1198+
1199+
# handle adds
1200+
for j in range(end[i - 1], e):
1201+
val = values[j]
1202+
if val == val:
1203+
if not is_first or fl_idx < s:
1204+
fl_idx = j
1205+
nobs += 1
1206+
1207+
if nobs >= minp and fl_idx >= s:
1208+
res = values[fl_idx]
1209+
else:
1210+
res = NaN
1211+
1212+
output[i] = res
1213+
1214+
if not is_monotonic_increasing_bounds:
1215+
nobs = 0
1216+
1217+
return output
1218+
11361219

11371220
cdef enum InterpolationType:
11381221
LINEAR,

pandas/core/window/expanding.py

+72
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,78 @@ def skew(self, numeric_only: bool = False):
723723
def kurt(self, numeric_only: bool = False):
724724
return super().kurt(numeric_only=numeric_only)
725725

726+
@doc(
727+
template_header,
728+
create_section_header("Parameters"),
729+
kwargs_numeric_only,
730+
create_section_header("Returns"),
731+
template_returns,
732+
create_section_header("See Also"),
733+
dedent(
734+
"""
735+
GroupBy.first : Similar method for GroupBy objects.
736+
Expanding.last : Method to get the last element in each window.\n
737+
"""
738+
).replace("\n", "", 1),
739+
create_section_header("Examples"),
740+
dedent(
741+
"""
742+
The example below will show an expanding calculation with a window size of
743+
three.
744+
745+
>>> s = pd.Series(range(5))
746+
>>> s.expanding(3).first()
747+
0 NaN
748+
1 NaN
749+
2 0.0
750+
3 0.0
751+
4 0.0
752+
dtype: float64
753+
"""
754+
).replace("\n", "", 1),
755+
window_method="expanding",
756+
aggregation_description="First (left-most) element of the window",
757+
agg_method="first",
758+
)
759+
def first(self, numeric_only: bool = False):
760+
return super().first(numeric_only=numeric_only)
761+
762+
@doc(
763+
template_header,
764+
create_section_header("Parameters"),
765+
kwargs_numeric_only,
766+
create_section_header("Returns"),
767+
template_returns,
768+
create_section_header("See Also"),
769+
dedent(
770+
"""
771+
GroupBy.last : Similar method for GroupBy objects.
772+
Expanding.first : Method to get the first element in each window.\n
773+
"""
774+
).replace("\n", "", 1),
775+
create_section_header("Examples"),
776+
dedent(
777+
"""
778+
The example below will show an expanding calculation with a window size of
779+
three.
780+
781+
>>> s = pd.Series(range(5))
782+
>>> s.expanding(3).last()
783+
0 NaN
784+
1 NaN
785+
2 2.0
786+
3 3.0
787+
4 4.0
788+
dtype: float64
789+
"""
790+
).replace("\n", "", 1),
791+
window_method="expanding",
792+
aggregation_description="Last (right-most) element of the window",
793+
agg_method="last",
794+
)
795+
def last(self, numeric_only: bool = False):
796+
return super().last(numeric_only=numeric_only)
797+
726798
@doc(
727799
template_header,
728800
create_section_header("Parameters"),

pandas/core/window/rolling.py

+88
Original file line numberDiff line numberDiff line change
@@ -1740,6 +1740,22 @@ def kurt(self, numeric_only: bool = False):
17401740
numeric_only=numeric_only,
17411741
)
17421742

1743+
def first(self, numeric_only: bool = False):
1744+
window_func = window_aggregations.roll_first
1745+
return self._apply(
1746+
window_func,
1747+
name="first",
1748+
numeric_only=numeric_only,
1749+
)
1750+
1751+
def last(self, numeric_only: bool = False):
1752+
window_func = window_aggregations.roll_last
1753+
return self._apply(
1754+
window_func,
1755+
name="last",
1756+
numeric_only=numeric_only,
1757+
)
1758+
17431759
def quantile(
17441760
self,
17451761
q: float,
@@ -2622,6 +2638,78 @@ def sem(self, ddof: int = 1, numeric_only: bool = False):
26222638
def kurt(self, numeric_only: bool = False):
26232639
return super().kurt(numeric_only=numeric_only)
26242640

2641+
@doc(
2642+
template_header,
2643+
create_section_header("Parameters"),
2644+
kwargs_numeric_only,
2645+
create_section_header("Returns"),
2646+
template_returns,
2647+
create_section_header("See Also"),
2648+
dedent(
2649+
"""
2650+
GroupBy.first : Similar method for GroupBy objects.
2651+
Rolling.last : Method to get the last element in each window.\n
2652+
"""
2653+
).replace("\n", "", 1),
2654+
create_section_header("Examples"),
2655+
dedent(
2656+
"""
2657+
The example below will show a rolling calculation with a window size of
2658+
three.
2659+
2660+
>>> s = pd.Series(range(5))
2661+
>>> s.rolling(3).first()
2662+
0 NaN
2663+
1 NaN
2664+
2 0.0
2665+
3 1.0
2666+
4 2.0
2667+
dtype: float64
2668+
"""
2669+
).replace("\n", "", 1),
2670+
window_method="rolling",
2671+
aggregation_description="First (left-most) element of the window",
2672+
agg_method="first",
2673+
)
2674+
def first(self, numeric_only: bool = False):
2675+
return super().first(numeric_only=numeric_only)
2676+
2677+
@doc(
2678+
template_header,
2679+
create_section_header("Parameters"),
2680+
kwargs_numeric_only,
2681+
create_section_header("Returns"),
2682+
template_returns,
2683+
create_section_header("See Also"),
2684+
dedent(
2685+
"""
2686+
GroupBy.last : Similar method for GroupBy objects.
2687+
Rolling.first : Method to get the first element in each window.\n
2688+
"""
2689+
).replace("\n", "", 1),
2690+
create_section_header("Examples"),
2691+
dedent(
2692+
"""
2693+
The example below will show a rolling calculation with a window size of
2694+
three.
2695+
2696+
>>> s = pd.Series(range(5))
2697+
>>> s.rolling(3).last()
2698+
0 NaN
2699+
1 NaN
2700+
2 2.0
2701+
3 3.0
2702+
4 4.0
2703+
dtype: float64
2704+
"""
2705+
).replace("\n", "", 1),
2706+
window_method="rolling",
2707+
aggregation_description="Last (right-most) element of the window",
2708+
agg_method="last",
2709+
)
2710+
def last(self, numeric_only: bool = False):
2711+
return super().last(numeric_only=numeric_only)
2712+
26252713
@doc(
26262714
template_header,
26272715
create_section_header("Parameters"),

pandas/tests/window/test_cython_aggregations.py

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ def _get_rolling_aggregations():
3030
("roll_median_c", window_aggregations.roll_median_c),
3131
("roll_max", window_aggregations.roll_max),
3232
("roll_min", window_aggregations.roll_min),
33+
("roll_first", window_aggregations.roll_first),
34+
("roll_last", window_aggregations.roll_last),
3335
]
3436
+ [
3537
(

0 commit comments

Comments
 (0)