diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index bbdcd183f65e1..28c0094881159 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -792,8 +792,9 @@ Groupby/resample/rolling - Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.Resampler.quantile` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`) - Bug in :meth:`.Rolling.median` and :meth:`.Rolling.quantile` returned wrong values for :class:`.BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`) - Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`) -- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`) +- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`) - Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`) +- Bug in :meth:`.DataFrameGroupBy.resample` using ``.agg`` with sum produced different result than just calling ``.sum`` (:issue:`33548`) - Bug in :meth:`.DataFrameGroupBy.apply` dropped values on ``nan`` group when returning the same axes with the original frame (:issue:`38227`) - Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`33795`) - Bug in :meth:`DataFrameGroupBy.rank` with ``datetime64tz`` or period dtype incorrectly casting results to those dtypes instead of returning ``float64`` dtype (:issue:`38187`) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d814a7cee436e..40ef7199406fe 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -287,6 +287,7 @@ def __init__( self.indexer = None self.binner = None self._grouper = None + self._indexer = None self.dropna = dropna @final @@ -341,15 +342,24 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False): # Keep self.grouper value before overriding if self._grouper is None: self._grouper = self.grouper + self._indexer = self.indexer # the key must be a valid info item if self.key is not None: key = self.key # The 'on' is already defined if getattr(self.grouper, "name", None) == key and isinstance(obj, Series): - # pandas\core\groupby\grouper.py:348: error: Item "None" of - # "Optional[Any]" has no attribute "take" [union-attr] - ax = self._grouper.take(obj.index) # type: ignore[union-attr] + # Sometimes self._grouper will have been resorted while + # obj has not. In this case there is a mismatch when we + # call self._grouper.take(obj.index) so we need to undo the sorting + # before we call _grouper.take. + assert self._grouper is not None + if self._indexer is not None: + reverse_indexer = self._indexer.argsort() + unsorted_ax = self._grouper.take(reverse_indexer) + ax = unsorted_ax.take(obj.index) + else: + ax = self._grouper.take(obj.index) else: if key not in obj._info_axis: raise KeyError(f"The grouper name {key} is not found") diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index 15dd49f8bf182..da5bb0eb59f70 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -362,3 +362,39 @@ def test_apply_to_one_column_of_df(): tm.assert_series_equal(result, expected) result = df.resample("H").apply(lambda group: group["col"].sum()) tm.assert_series_equal(result, expected) + + +def test_resample_groupby_agg(): + # GH: 33548 + df = DataFrame( + { + "cat": [ + "cat_1", + "cat_1", + "cat_2", + "cat_1", + "cat_2", + "cat_1", + "cat_2", + "cat_1", + ], + "num": [5, 20, 22, 3, 4, 30, 10, 50], + "date": [ + "2019-2-1", + "2018-02-03", + "2020-3-11", + "2019-2-2", + "2019-2-2", + "2018-12-4", + "2020-3-11", + "2020-12-12", + ], + } + ) + df["date"] = pd.to_datetime(df["date"]) + + resampled = df.groupby("cat").resample("Y", on="date") + expected = resampled.sum() + result = resampled.agg({"num": "sum"}) + + tm.assert_frame_equal(result, expected)