Skip to content

Commit

Permalink
ENH: Support pipe() method in Rolling and Expanding (#60697)
Browse files Browse the repository at this point in the history
* ENH: Support pipe() method in Rolling and Expanding

* Fix mypy errors

* Fix docstring errors

* Add pipe method to doc reference
  • Loading branch information
snitish authored Jan 13, 2025
1 parent 55a6d0a commit f787764
Show file tree
Hide file tree
Showing 6 changed files with 236 additions and 2 deletions.
2 changes: 2 additions & 0 deletions doc/source/reference/window.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Rolling window functions
Rolling.skew
Rolling.kurt
Rolling.apply
Rolling.pipe
Rolling.aggregate
Rolling.quantile
Rolling.sem
Expand Down Expand Up @@ -76,6 +77,7 @@ Expanding window functions
Expanding.skew
Expanding.kurt
Expanding.apply
Expanding.pipe
Expanding.aggregate
Expanding.quantile
Expanding.sem
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ Other enhancements
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
- :class:`Rolling` and :class:`Expanding` now support ``pipe`` method (:issue:`57076`)
- :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`)
- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
Expand Down
57 changes: 57 additions & 0 deletions pandas/core/window/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,63 @@ def create_section_header(header: str) -> str:
"""
).replace("\n", "", 1)

template_pipe = """
Apply a ``func`` with arguments to this %(klass)s object and return its result.
Use `.pipe` when you want to improve readability by chaining together
functions that expect Series, DataFrames, GroupBy, Rolling, Expanding or Resampler
objects.
Instead of writing
>>> h = lambda x, arg2, arg3: x + 1 - arg2 * arg3
>>> g = lambda x, arg1: x * 5 / arg1
>>> f = lambda x: x ** 4
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]}, index=pd.date_range('2012-08-02', periods=4))
>>> h(g(f(df.rolling('2D')), arg1=1), arg2=2, arg3=3) # doctest: +SKIP
You can write
>>> (df.rolling('2D')
... .pipe(f)
... .pipe(g, arg1=1)
... .pipe(h, arg2=2, arg3=3)) # doctest: +SKIP
which is much more readable.
Parameters
----------
func : callable or tuple of (callable, str)
Function to apply to this %(klass)s object or, alternatively,
a `(callable, data_keyword)` tuple where `data_keyword` is a
string indicating the keyword of `callable` that expects the
%(klass)s object.
*args : iterable, optional
Positional arguments passed into `func`.
**kwargs : dict, optional
A dictionary of keyword arguments passed into `func`.
Returns
-------
%(klass)s
The original object with the function `func` applied.
See Also
--------
Series.pipe : Apply a function with arguments to a series.
DataFrame.pipe: Apply a function with arguments to a dataframe.
apply : Apply function to each group instead of to the
full %(klass)s object.
Notes
-----
See more `here
<https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls>`_
Examples
--------
%(examples)s
"""

numba_notes = (
"See :ref:`window.numba_engine` and :ref:`enhancingperf.numba` for "
"extended documentation and performance considerations for the Numba engine.\n\n"
Expand Down
61 changes: 60 additions & 1 deletion pandas/core/window/expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,15 @@
TYPE_CHECKING,
Any,
Literal,
final,
overload,
)

from pandas.util._decorators import doc
from pandas.util._decorators import (
Appender,
Substitution,
doc,
)

from pandas.core.indexers.objects import (
BaseIndexer,
Expand All @@ -20,6 +26,7 @@
kwargs_numeric_only,
numba_notes,
template_header,
template_pipe,
template_returns,
template_see_also,
window_agg_numba_parameters,
Expand All @@ -34,7 +41,11 @@
from collections.abc import Callable

from pandas._typing import (
Concatenate,
P,
QuantileInterpolation,
Self,
T,
WindowingRankType,
)

Expand Down Expand Up @@ -241,6 +252,54 @@ def apply(
kwargs=kwargs,
)

@overload
def pipe(
self,
func: Callable[Concatenate[Self, P], T],
*args: P.args,
**kwargs: P.kwargs,
) -> T: ...

@overload
def pipe(
self,
func: tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T: ...

@final
@Substitution(
klass="Expanding",
examples="""
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
... index=pd.date_range('2012-08-02', periods=4))
>>> df
A
2012-08-02 1
2012-08-03 2
2012-08-04 3
2012-08-05 4
To get the difference between each expanding window's maximum and minimum
value in one pass, you can do
>>> df.expanding().pipe(lambda x: x.max() - x.min())
A
2012-08-02 0.0
2012-08-03 1.0
2012-08-04 2.0
2012-08-05 3.0""",
)
@Appender(template_pipe)
def pipe(
self,
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T:
return super().pipe(func, *args, **kwargs)

@doc(
template_header,
create_section_header("Parameters"),
Expand Down
85 changes: 84 additions & 1 deletion pandas/core/window/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
TYPE_CHECKING,
Any,
Literal,
final,
overload,
)

import numpy as np
Expand All @@ -26,7 +28,11 @@
import pandas._libs.window.aggregations as window_aggregations
from pandas.compat._optional import import_optional_dependency
from pandas.errors import DataError
from pandas.util._decorators import doc
from pandas.util._decorators import (
Appender,
Substitution,
doc,
)

from pandas.core.dtypes.common import (
ensure_float64,
Expand Down Expand Up @@ -81,6 +87,7 @@
kwargs_scipy,
numba_notes,
template_header,
template_pipe,
template_returns,
template_see_also,
window_agg_numba_parameters,
Expand All @@ -102,8 +109,12 @@

from pandas._typing import (
ArrayLike,
Concatenate,
NDFrameT,
QuantileInterpolation,
P,
Self,
T,
WindowingRankType,
npt,
)
Expand Down Expand Up @@ -1529,6 +1540,30 @@ def apply_func(values, begin, end, min_periods, raw=raw):

return apply_func

@overload
def pipe(
self,
func: Callable[Concatenate[Self, P], T],
*args: P.args,
**kwargs: P.kwargs,
) -> T: ...

@overload
def pipe(
self,
func: tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T: ...

def pipe(
self,
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T:
return com.pipe(self, func, *args, **kwargs)

def sum(
self,
numeric_only: bool = False,
Expand Down Expand Up @@ -2044,6 +2079,54 @@ def apply(
kwargs=kwargs,
)

@overload
def pipe(
self,
func: Callable[Concatenate[Self, P], T],
*args: P.args,
**kwargs: P.kwargs,
) -> T: ...

@overload
def pipe(
self,
func: tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T: ...

@final
@Substitution(
klass="Rolling",
examples="""
>>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
... index=pd.date_range('2012-08-02', periods=4))
>>> df
A
2012-08-02 1
2012-08-03 2
2012-08-04 3
2012-08-05 4
To get the difference between each rolling 2-day window's maximum and minimum
value in one pass, you can do
>>> df.rolling('2D').pipe(lambda x: x.max() - x.min())
A
2012-08-02 0.0
2012-08-03 1.0
2012-08-04 1.0
2012-08-05 1.0""",
)
@Appender(template_pipe)
def pipe(
self,
func: Callable[Concatenate[Self, P], T] | tuple[Callable[..., T], str],
*args: Any,
**kwargs: Any,
) -> T:
return super().pipe(func, *args, **kwargs)

@doc(
template_header,
create_section_header("Parameters"),
Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/window/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,38 @@ def test_agg_nested_dicts():
r.agg({"A": {"ra": ["mean", "std"]}, "B": {"rb": ["mean", "std"]}})


@pytest.mark.parametrize(
"func,window_size",
[
(
"rolling",
2,
),
(
"expanding",
None,
),
],
)
def test_pipe(func, window_size):
# Issue #57076
df = DataFrame(
{
"B": np.random.default_rng(2).standard_normal(10),
"C": np.random.default_rng(2).standard_normal(10),
}
)
r = getattr(df, func)(window_size)

expected = r.max() - r.mean()
result = r.pipe(lambda x: x.max() - x.mean())
tm.assert_frame_equal(result, expected)

expected = r.max() - 2 * r.min()
result = r.pipe(lambda x, k: x.max() - k * x.min(), k=2)
tm.assert_frame_equal(result, expected)


def test_count_nonnumeric_types(step):
# GH12541
cols = [
Expand Down

0 comments on commit f787764

Please sign in to comment.