-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG: dropna incorrect with categoricals in pivot_table #21252
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
from datetime import datetime, date, timedelta | ||
|
||
|
@@ -16,6 +17,11 @@ | |
from pandas.api.types import CategoricalDtype as CDT | ||
|
||
|
||
@pytest.fixture(params=[True, False]) | ||
def dropna(request): | ||
return request.param | ||
|
||
|
||
class TestPivotTable(object): | ||
|
||
def setup_method(self, method): | ||
|
@@ -109,7 +115,6 @@ def test_pivot_table_categorical(self): | |
index=exp_index) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('dropna', [True, False]) | ||
def test_pivot_table_dropna_categoricals(self, dropna): | ||
# GH 15193 | ||
categories = ['a', 'b', 'c', 'd'] | ||
|
@@ -137,6 +142,25 @@ def test_pivot_table_dropna_categoricals(self, dropna): | |
|
||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_pivot_with_non_observable_dropna(self, dropna): | ||
# gh-21133 | ||
df = pd.DataFrame( | ||
{'A': pd.Categorical([np.nan, 'low', 'high', 'low', 'high'], | ||
categories=['low', 'high'], | ||
ordered=True), | ||
'B': range(5)}) | ||
|
||
result = df.pivot_table(index='A', values='B', dropna=dropna) | ||
expected = pd.DataFrame( | ||
{'B': [2, 3]}, | ||
index=pd.Index( | ||
pd.Categorical.from_codes([0, 1], | ||
categories=['low', 'high'], | ||
ordered=True), | ||
name='A')) | ||
|
||
tm.assert_frame_equal(result, expected) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you also add tests where Using the same definition of In [3]: pd.__version__
Out[3]: '0.23.0'
In [4]: df.pivot_table(columns='A', values='B')
Out[4]:
A NaN low
B 2.0 3.0 Similarly In [5]: df['AA'] = df['A']
In [6]: df.pivot_table(index='A', columns='AA', values='B')
Out[6]:
AA NaN low
A
NaN 2.0 NaN
low NaN 3.0 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jschendel I tested that those are working correctly with this PR, but given I wanted to get this in for the release I already merged. But it's indeed true it would be good to add those as additional test case There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Opened #21370 to keep track of this |
||
def test_pass_array(self): | ||
result = self.data.pivot_table( | ||
'D', index=self.data.A, columns=self.data.C) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have the feeling this workaround would not be needed if the bug in groupby would be solved? (#21151)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jreback ?