BUG: groupby resample different results with .agg() vs .mean() (panda…

…s-dev#37905)
luckyvs1 · Jan 20, 2021 · 458c320 · 458c320
1 parent 8e502ea
commit 458c320
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 4 deletions.
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -793,8 +793,9 @@ Groupby/resample/rolling
 - Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.Resampler.quantile` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`)
 - Bug in :meth:`.Rolling.median` and :meth:`.Rolling.quantile` returned wrong values for :class:`.BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`)
 - Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`)
-- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`)
+- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`)
 - Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`)
+- Bug in :meth:`.DataFrameGroupBy.resample` using ``.agg`` with sum produced different result than just calling ``.sum`` (:issue:`33548`)
 - Bug in :meth:`.DataFrameGroupBy.apply` dropped values on ``nan`` group when returning the same axes with the original frame (:issue:`38227`)
 - Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`33795`)
 - Bug in :meth:`DataFrameGroupBy.rank` with ``datetime64tz`` or period dtype incorrectly casting results to those dtypes instead of returning ``float64`` dtype (:issue:`38187`)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -258,6 +258,7 @@ def __init__(
         self.indexer = None
         self.binner = None
         self._grouper = None
+        self._indexer = None
         self.dropna = dropna
 
     @final
@@ -312,15 +313,24 @@ def _set_grouper(self, obj: FrameOrSeries, sort: bool = False):
         # Keep self.grouper value before overriding
         if self._grouper is None:
             self._grouper = self.grouper
+            self._indexer = self.indexer
 
         # the key must be a valid info item
         if self.key is not None:
             key = self.key
             # The 'on' is already defined
             if getattr(self.grouper, "name", None) == key and isinstance(obj, Series):
-                # pandas\core\groupby\grouper.py:348: error: Item "None" of
-                # "Optional[Any]" has no attribute "take"  [union-attr]
-                ax = self._grouper.take(obj.index)  # type: ignore[union-attr]
+                # Sometimes self._grouper will have been resorted while
+                # obj has not. In this case there is a mismatch when we
+                # call self._grouper.take(obj.index) so we need to undo the sorting
+                # before we call _grouper.take.
+                assert self._grouper is not None
+                if self._indexer is not None:
+                    reverse_indexer = self._indexer.argsort()
+                    unsorted_ax = self._grouper.take(reverse_indexer)
+                    ax = unsorted_ax.take(obj.index)
+                else:
+                    ax = self._grouper.take(obj.index)
             else:
                 if key not in obj._info_axis:
                     raise KeyError(f"The grouper name {key} is not found")

diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
@@ -362,3 +362,39 @@ def test_apply_to_one_column_of_df():
     tm.assert_series_equal(result, expected)
     result = df.resample("H").apply(lambda group: group["col"].sum())
     tm.assert_series_equal(result, expected)
+
+
+def test_resample_groupby_agg():
+    # GH: 33548
+    df = DataFrame(
+        {
+            "cat": [
+                "cat_1",
+                "cat_1",
+                "cat_2",
+                "cat_1",
+                "cat_2",
+                "cat_1",
+                "cat_2",
+                "cat_1",
+            ],
+            "num": [5, 20, 22, 3, 4, 30, 10, 50],
+            "date": [
+                "2019-2-1",
+                "2018-02-03",
+                "2020-3-11",
+                "2019-2-2",
+                "2019-2-2",
+                "2018-12-4",
+                "2020-3-11",
+                "2020-12-12",
+            ],
+        }
+    )
+    df["date"] = pd.to_datetime(df["date"])
+
+    resampled = df.groupby("cat").resample("Y", on="date")
+    expected = resampled.sum()
+    result = resampled.agg({"num": "sum"})
+
+    tm.assert_frame_equal(result, expected)