From 9eb429e3b9a57f9b9b55ff5208a605d6a73569c7 Mon Sep 17 00:00:00 2001 From: watercrossing Date: Wed, 8 Nov 2017 10:44:39 +0000 Subject: [PATCH] Fix groupby().count() for datetimelike columns --- doc/source/whatsnew/v0.21.1.txt | 1 + pandas/core/groupby.py | 3 ++- pandas/tests/groupby/test_counting.py | 21 +++++++++++++++++++-- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index d4cfb6d5b1a46..2d60bfea5d56c 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -60,6 +60,7 @@ Bug Fixes - Bug in :class:`TimedeltaIndex` subtraction could incorrectly overflow when ``NaT`` is present (:issue:`17791`) - Bug in :class:`DatetimeIndex` subtracting datetimelike from DatetimeIndex could fail to overflow (:issue:`18020`) - Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`) +- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 967685c4e11bf..1acc8c3ed0bbb 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -4365,7 +4365,8 @@ def count(self): ids, _, ngroups = self.grouper.group_info mask = ids != -1 - val = ((mask & ~isna(blk.get_values())) for blk in data.blocks) + val = ((mask & ~isna(np.atleast_2d(blk.get_values()))) + for blk in data.blocks) loc = (blk.mgr_locs for blk in data.blocks) counter = partial(count_level_2d, labels=ids, max_bin=ngroups, axis=1) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 485241d593d4f..787d99086873e 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -2,9 +2,11 @@ from __future__ import print_function import numpy as np +import pytest -from pandas import (DataFrame, Series, MultiIndex) -from pandas.util.testing import assert_series_equal +from pandas import (DataFrame, Series, MultiIndex, Timestamp, Timedelta, + Period) +from pandas.util.testing import (assert_series_equal, assert_frame_equal) from pandas.compat import (range, product as cart_product) @@ -195,3 +197,18 @@ def test_ngroup_respects_groupby_order(self): g.ngroup()) assert_series_equal(Series(df['group_index'].values), g.cumcount()) + + @pytest.mark.parametrize('datetimelike', [ + [Timestamp('2016-05-%02d 20:09:25+00:00' % i) for i in range(1, 4)], + [Timestamp('2016-05-%02d 20:09:25' % i) for i in range(1, 4)], + [Timedelta(x, unit="h") for x in range(1, 4)], + [Period(freq="2W", year=2017, month=x) for x in range(1, 4)]]) + def test_count_with_datetimelike(self, datetimelike): + # test for #13393, where DataframeGroupBy.count() fails + # when counting a datetimelike column. + + df = DataFrame({'x': ['a', 'a', 'b'], 'y': datetimelike}) + res = df.groupby('x').count() + expected = DataFrame({'y': [2, 1]}, index=['a', 'b']) + expected.index.name = "x" + assert_frame_equal(expected, res)