From 5e49c744e896f22a8c8daaa5dff7a3b5761dd7fb Mon Sep 17 00:00:00 2001 From: LongBao Date: Sun, 17 Apr 2022 15:44:26 +0900 Subject: [PATCH 1/6] fix resample doc --- pandas/core/resample.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 209433a45f8b2..513dbd991650b 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -101,6 +101,21 @@ _shared_docs_kwargs: dict[str, str] = {} +_resample_agg_method_template = """ +Compute {fname} of group values. + +Parameters +---------- +min_count : int, default {mc} + The required number of valid values to perform the operation. If fewer + than ``min_count`` non-NA values are present the result will be NA. + +Returns +------- +Series or DataFrame + Computed {fname} of values within each group. +""" + class Resampler(BaseGroupBy, PandasObject): """ @@ -1027,11 +1042,11 @@ def quantile(self, q=0.5, **kwargs): # downsample methods for method in ["sum", "prod", "min", "max", "first", "last"]: + @doc(_resample_agg_method_template, fname=method, mc=0) def f(self, _method=method, min_count=0, *args, **kwargs): nv.validate_resampler_func(_method, args, kwargs) return self._downsample(_method, min_count=min_count) - f.__doc__ = getattr(GroupBy, method).__doc__ setattr(Resampler, method, f) From a8701f2669fda546132dbbac11a980003b69ecbc Mon Sep 17 00:00:00 2001 From: LongBao Date: Fri, 22 Apr 2022 23:27:30 +0900 Subject: [PATCH 2/6] add numeric_only to resampler --- pandas/core/resample.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 513dbd991650b..e18e572b1e42b 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -101,21 +101,6 @@ _shared_docs_kwargs: dict[str, str] = {} -_resample_agg_method_template = """ -Compute {fname} of group values. - -Parameters ----------- -min_count : int, default {mc} - The required number of valid values to perform the operation. If fewer - than ``min_count`` non-NA values are present the result will be NA. - -Returns -------- -Series or DataFrame - Computed {fname} of values within each group. -""" - class Resampler(BaseGroupBy, PandasObject): """ @@ -1042,11 +1027,26 @@ def quantile(self, q=0.5, **kwargs): # downsample methods for method in ["sum", "prod", "min", "max", "first", "last"]: - @doc(_resample_agg_method_template, fname=method, mc=0) - def f(self, _method=method, min_count=0, *args, **kwargs): + def f( + self, + _method: str = method, + numeric_only: bool | lib.NoDefault = lib.no_default, + min_count: int = 0, + *args, + **kwargs + ): + if numeric_only is lib.no_default: + if self.obj.ndim == 1: + numeric_only = None + elif _method in ["first", "min"]: + numeric_only = False + nv.validate_resampler_func(_method, args, kwargs) - return self._downsample(_method, min_count=min_count) + return self._downsample( + _method, numeric_only=numeric_only, min_count=min_count + ) + f.__doc__ = getattr(GroupBy, method).__doc__ setattr(Resampler, method, f) From b42ac36dcd03dbbe7026c5de0f8d55bb0e085f58 Mon Sep 17 00:00:00 2001 From: LongBao Date: Fri, 22 Apr 2022 23:43:10 +0900 Subject: [PATCH 3/6] add numeric_only to resampler --- pandas/core/resample.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 3022dffc385bc..6f778b1aa8af5 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1037,8 +1037,9 @@ def f( ): if numeric_only is lib.no_default: if self.obj.ndim == 1: + # SeriesGroupBy numeric_only = None - elif _method in ["first", "min"]: + elif _method != "sum": numeric_only = False nv.validate_resampler_func(_method, args, kwargs) From 220d50ac24023c30e914849f3483379a3d94017c Mon Sep 17 00:00:00 2001 From: LongBao Date: Sat, 23 Apr 2022 12:00:07 +0900 Subject: [PATCH 4/6] Added an entry in the latest doc/source/whatsnew/vX.X.X.rst file --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/resample.py | 13 +++++-------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 922ef28b855b9..e4879a6c41515 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -96,6 +96,7 @@ Other enhancements - :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`) - Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, and :meth:`DataFrame.cov` (:issue:`46560`) - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`) +- Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 6f778b1aa8af5..723c129141569 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1033,19 +1033,16 @@ def f( numeric_only: bool | lib.NoDefault = lib.no_default, min_count: int = 0, *args, - **kwargs + **kwargs, ): if numeric_only is lib.no_default: - if self.obj.ndim == 1: - # SeriesGroupBy - numeric_only = None - elif _method != "sum": + if (self.obj.ndim == 1) or (_method != "sum"): + # For SeriesGroupBy, set the default to be False. + # For DataFrameGroupBy, set it to be False for methods other than `sum`. numeric_only = False nv.validate_resampler_func(_method, args, kwargs) - return self._downsample( - _method, numeric_only=numeric_only, min_count=min_count - ) + return self._downsample(_method, numeric_only=numeric_only, min_count=min_count) f.__doc__ = getattr(GroupBy, method).__doc__ setattr(Resampler, method, f) From 906ac882ff56ddd5082faab09c841d80bcf24362 Mon Sep 17 00:00:00 2001 From: LongBao Date: Sat, 23 Apr 2022 19:29:09 +0900 Subject: [PATCH 5/6] add test cases --- pandas/tests/resample/test_resample_api.py | 55 ++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 9148600d31bc2..8728435a8e47c 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -771,3 +771,58 @@ def test_end_and_end_day_origin( ) tm.assert_series_equal(res, expected) + + +@pytest.mark.parametrize("numeric_only", [True, False]) +def test_downsample_method(numeric_only): + # test if `numeric_only` behave as expected for Resampler downsample methods. + + index = date_range("2018-01-01", periods=2, freq="D") + expected_index = date_range("2018-12-31", periods=1, freq="Y") + df = DataFrame({"cat": ["cat_1", "cat_2"], "num": [5, 20]}, index=index) + resampled = df.resample("Y") + + # test Resampler.sum + result = resampled.sum(numeric_only=numeric_only) + if numeric_only: + expected = DataFrame({"num": [25]}, index=expected_index) + else: + expected = DataFrame({"cat": ["cat_1cat_2"], "num": [25]}, index=expected_index) + tm.assert_frame_equal(result, expected) + + # test Resampler.prod + result = resampled.prod(numeric_only=numeric_only) + expected = DataFrame({"num": [100]}, index=expected_index) + tm.assert_frame_equal(result, expected) + + # test Resampler.min + result = resampled.min(numeric_only=numeric_only) + if numeric_only: + expected = DataFrame({"num": [5]}, index=expected_index) + else: + expected = DataFrame({"cat": ["cat_1"], "num": [5]}, index=expected_index) + tm.assert_frame_equal(result, expected) + + # test Resampler.max + result = resampled.max(numeric_only=numeric_only) + if numeric_only: + expected = DataFrame({"num": [20]}, index=expected_index) + else: + expected = DataFrame({"cat": ["cat_2"], "num": [20]}, index=expected_index) + tm.assert_frame_equal(result, expected) + + # test Resampler.first + result = resampled.first(numeric_only=numeric_only) + if numeric_only: + expected = DataFrame({"num": [5]}, index=expected_index) + else: + expected = DataFrame({"cat": ["cat_1"], "num": [5]}, index=expected_index) + tm.assert_frame_equal(result, expected) + + # test Resampler.last + result = resampled.last(numeric_only=numeric_only) + if numeric_only: + expected = DataFrame({"num": [20]}, index=expected_index) + else: + expected = DataFrame({"cat": ["cat_2"], "num": [20]}, index=expected_index) + tm.assert_frame_equal(result, expected) From 5f18efa62dd88bec5dc991e019e681bd3ec1aa5c Mon Sep 17 00:00:00 2001 From: LongBao Date: Sun, 24 Apr 2022 01:54:47 +0900 Subject: [PATCH 6/6] add and refactor test cases --- pandas/core/resample.py | 3 +- pandas/tests/resample/test_resample_api.py | 109 +++++++++++++-------- 2 files changed, 70 insertions(+), 42 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 723c129141569..362f61d25ac34 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1036,8 +1036,7 @@ def f( **kwargs, ): if numeric_only is lib.no_default: - if (self.obj.ndim == 1) or (_method != "sum"): - # For SeriesGroupBy, set the default to be False. + if _method != "sum": # For DataFrameGroupBy, set it to be False for methods other than `sum`. numeric_only = False diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 8728435a8e47c..a5834dd237c01 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._libs import lib + import pandas as pd from pandas import ( DataFrame, @@ -773,56 +775,83 @@ def test_end_and_end_day_origin( tm.assert_series_equal(res, expected) -@pytest.mark.parametrize("numeric_only", [True, False]) -def test_downsample_method(numeric_only): - # test if `numeric_only` behave as expected for Resampler downsample methods. +@pytest.mark.parametrize( + "method, numeric_only, expected_data", + [ + ("sum", True, {"num": [25]}), + ("sum", False, {"cat": ["cat_1cat_2"], "num": [25]}), + ("sum", lib.no_default, {"num": [25]}), + ("prod", True, {"num": [100]}), + ("prod", False, {"num": [100]}), + ("prod", lib.no_default, {"num": [100]}), + ("min", True, {"num": [5]}), + ("min", False, {"cat": ["cat_1"], "num": [5]}), + ("min", lib.no_default, {"cat": ["cat_1"], "num": [5]}), + ("max", True, {"num": [20]}), + ("max", False, {"cat": ["cat_2"], "num": [20]}), + ("max", lib.no_default, {"cat": ["cat_2"], "num": [20]}), + ("first", True, {"num": [5]}), + ("first", False, {"cat": ["cat_1"], "num": [5]}), + ("first", lib.no_default, {"cat": ["cat_1"], "num": [5]}), + ("last", True, {"num": [20]}), + ("last", False, {"cat": ["cat_2"], "num": [20]}), + ("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}), + ], +) +def test_frame_downsample_method(method, numeric_only, expected_data): + # GH#46442 test if `numeric_only` behave as expected for DataFrameGroupBy index = date_range("2018-01-01", periods=2, freq="D") expected_index = date_range("2018-12-31", periods=1, freq="Y") df = DataFrame({"cat": ["cat_1", "cat_2"], "num": [5, 20]}, index=index) resampled = df.resample("Y") - # test Resampler.sum - result = resampled.sum(numeric_only=numeric_only) - if numeric_only: - expected = DataFrame({"num": [25]}, index=expected_index) - else: - expected = DataFrame({"cat": ["cat_1cat_2"], "num": [25]}, index=expected_index) - tm.assert_frame_equal(result, expected) + func = getattr(resampled, method) + result = func(numeric_only=numeric_only) - # test Resampler.prod - result = resampled.prod(numeric_only=numeric_only) - expected = DataFrame({"num": [100]}, index=expected_index) + expected = DataFrame(expected_data, index=expected_index) tm.assert_frame_equal(result, expected) - # test Resampler.min - result = resampled.min(numeric_only=numeric_only) - if numeric_only: - expected = DataFrame({"num": [5]}, index=expected_index) - else: - expected = DataFrame({"cat": ["cat_1"], "num": [5]}, index=expected_index) - tm.assert_frame_equal(result, expected) - # test Resampler.max - result = resampled.max(numeric_only=numeric_only) - if numeric_only: - expected = DataFrame({"num": [20]}, index=expected_index) - else: - expected = DataFrame({"cat": ["cat_2"], "num": [20]}, index=expected_index) - tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "method, numeric_only, expected_data", + [ + ("sum", True, ()), + ("sum", False, ["cat_1cat_2"]), + ("sum", lib.no_default, ["cat_1cat_2"]), + ("prod", True, ()), + ("prod", False, ()), + ("prod", lib.no_default, ()), + ("min", True, ()), + ("min", False, ["cat_1"]), + ("min", lib.no_default, ["cat_1"]), + ("max", True, ()), + ("max", False, ["cat_2"]), + ("max", lib.no_default, ["cat_2"]), + ("first", True, ()), + ("first", False, ["cat_1"]), + ("first", lib.no_default, ["cat_1"]), + ("last", True, ()), + ("last", False, ["cat_2"]), + ("last", lib.no_default, ["cat_2"]), + ], +) +def test_series_downsample_method(method, numeric_only, expected_data): + # GH#46442 test if `numeric_only` behave as expected for SeriesGroupBy - # test Resampler.first - result = resampled.first(numeric_only=numeric_only) - if numeric_only: - expected = DataFrame({"num": [5]}, index=expected_index) - else: - expected = DataFrame({"cat": ["cat_1"], "num": [5]}, index=expected_index) - tm.assert_frame_equal(result, expected) + index = date_range("2018-01-01", periods=2, freq="D") + expected_index = date_range("2018-12-31", periods=1, freq="Y") + df = Series(["cat_1", "cat_2"], index=index) + resampled = df.resample("Y") - # test Resampler.last - result = resampled.last(numeric_only=numeric_only) - if numeric_only: - expected = DataFrame({"num": [20]}, index=expected_index) + func = getattr(resampled, method) + if numeric_only and numeric_only is not lib.no_default: + with pytest.raises(NotImplementedError, match="not implement numeric_only"): + func(numeric_only=numeric_only) + elif method == "prod": + with pytest.raises(TypeError, match="can't multiply sequence by non-int"): + func(numeric_only=numeric_only) else: - expected = DataFrame({"cat": ["cat_2"], "num": [20]}, index=expected_index) - tm.assert_frame_equal(result, expected) + result = func(numeric_only=numeric_only) + expected = Series(expected_data, index=expected_index) + tm.assert_series_equal(result, expected)