BUG: Groupby.cummin/max DataError on datetimes (#15561) (#15569)

pandas-dev · Mar 5, 2017 · f5b7bcb · f5b7bcb
1 parent c198e28
commit f5b7bcb
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 4 deletions.
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -635,7 +635,7 @@ Performance Improvements
 - Increased performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`)
 - Improved performance of timeseries plotting with an irregular DatetimeIndex
   (or with ``compat_x=True``) (:issue:`15073`).
-- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`)
+- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`)
 - Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`)
 - When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`)
 - Improved performance of `rank()` for categorical data (:issue:`15498`)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1442,7 +1442,7 @@ def cummin(self, axis=0, **kwargs):
         if axis != 0:
             return self.apply(lambda x: np.minimum.accumulate(x, axis))
 
-        return self._cython_transform('cummin', **kwargs)
+        return self._cython_transform('cummin', numeric_only=False)
 
     @Substitution(name='groupby')
     @Appender(_doc_template)
@@ -1451,7 +1451,7 @@ def cummax(self, axis=0, **kwargs):
         if axis != 0:
             return self.apply(lambda x: np.maximum.accumulate(x, axis))
 
-        return self._cython_transform('cummax', **kwargs)
+        return self._cython_transform('cummax', numeric_only=False)
 
     @Substitution(name='groupby')
     @Appender(_doc_template)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1954,7 +1954,8 @@ def test_arg_passthru(self):
         for attr in ['cummin', 'cummax']:
             f = getattr(df.groupby('group'), attr)
             result = f()
-            tm.assert_index_equal(result.columns, expected_columns_numeric)
+            # GH 15561: numeric_only=False set by default like min/max
+            tm.assert_index_equal(result.columns, expected_columns)
 
             result = f(numeric_only=False)
             tm.assert_index_equal(result.columns, expected_columns)
@@ -4295,6 +4296,13 @@ def test_cummin_cummax(self):
         result = base_df.groupby('A').B.apply(lambda x: x.cummax()).to_frame()
         tm.assert_frame_equal(expected, result)
 
+        # GH 15561
+        df = pd.DataFrame(dict(a=[1], b=pd.to_datetime(['2001'])))
+        expected = pd.Series(pd.to_datetime('2001'), index=[0], name='b')
+        for method in ['cummax', 'cummin']:
+            result = getattr(df.groupby('a')['b'], method)()
+            tm.assert_series_equal(expected, result)
+
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)