-
-
Notifications
You must be signed in to change notification settings - Fork 18.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor index-as-string groupby tests and fix spurious warning (Bug 17383) #17843
Changes from 4 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -253,158 +253,6 @@ def test_grouper_column_and_index(self): | |
expected = df_single.reset_index().groupby(['inner', 'B']).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
def test_grouper_index_level_as_string(self): | ||
# GH 5677, allow strings passed as the `by` parameter to reference | ||
# columns or index levels | ||
|
||
idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), | ||
('b', 1), ('b', 2), ('b', 3)]) | ||
idx.names = ['outer', 'inner'] | ||
df_multi = pd.DataFrame({"A": np.arange(6), | ||
'B': ['one', 'one', 'two', | ||
'two', 'one', 'one']}, | ||
index=idx) | ||
|
||
df_single = df_multi.reset_index('outer') | ||
|
||
# Column and Index on MultiIndex | ||
result = df_multi.groupby(['B', 'inner']).mean() | ||
expected = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Index and Column on MultiIndex | ||
result = df_multi.groupby(['inner', 'B']).mean() | ||
expected = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Column and Index on single Index | ||
result = df_single.groupby(['B', 'inner']).mean() | ||
expected = df_single.groupby(['B', pd.Grouper(level='inner')]).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Index and Column on single Index | ||
result = df_single.groupby(['inner', 'B']).mean() | ||
expected = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Single element list of Index on MultiIndex | ||
result = df_multi.groupby(['inner']).mean() | ||
expected = df_multi.groupby(pd.Grouper(level='inner')).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Single element list of Index on single Index | ||
result = df_single.groupby(['inner']).mean() | ||
expected = df_single.groupby(pd.Grouper(level='inner')).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Index on MultiIndex | ||
result = df_multi.groupby('inner').mean() | ||
expected = df_multi.groupby(pd.Grouper(level='inner')).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Index on single Index | ||
result = df_single.groupby('inner').mean() | ||
expected = df_single.groupby(pd.Grouper(level='inner')).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
def test_grouper_column_index_level_precedence(self): | ||
# GH 5677, when a string passed as the `by` parameter | ||
# matches a column and an index level the column takes | ||
# precedence | ||
|
||
idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), | ||
('b', 1), ('b', 2), ('b', 3)]) | ||
idx.names = ['outer', 'inner'] | ||
df_multi_both = pd.DataFrame({"A": np.arange(6), | ||
'B': ['one', 'one', 'two', | ||
'two', 'one', 'one'], | ||
'inner': [1, 1, 1, 1, 1, 1]}, | ||
index=idx) | ||
|
||
df_single_both = df_multi_both.reset_index('outer') | ||
|
||
# Group MultiIndex by single key | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = df_multi_both.groupby('inner').mean() | ||
|
||
expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
assert_frame_equal(result, expected) | ||
not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() | ||
assert not result.index.equals(not_expected.index) | ||
|
||
# Group single Index by single key | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = df_single_both.groupby('inner').mean() | ||
|
||
expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() | ||
assert_frame_equal(result, expected) | ||
not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() | ||
assert not result.index.equals(not_expected.index) | ||
|
||
# Group MultiIndex by single key list | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = df_multi_both.groupby(['inner']).mean() | ||
|
||
expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean() | ||
assert_frame_equal(result, expected) | ||
not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean() | ||
assert not result.index.equals(not_expected.index) | ||
|
||
# Group single Index by single key list | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = df_single_both.groupby(['inner']).mean() | ||
|
||
expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean() | ||
assert_frame_equal(result, expected) | ||
not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean() | ||
assert not result.index.equals(not_expected.index) | ||
|
||
# Group MultiIndex by two keys (1) | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = df_multi_both.groupby(['B', 'inner']).mean() | ||
|
||
expected = df_multi_both.groupby(['B', | ||
pd.Grouper(key='inner')]).mean() | ||
assert_frame_equal(result, expected) | ||
not_expected = df_multi_both.groupby(['B', | ||
pd.Grouper(level='inner') | ||
]).mean() | ||
assert not result.index.equals(not_expected.index) | ||
|
||
# Group MultiIndex by two keys (2) | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = df_multi_both.groupby(['inner', 'B']).mean() | ||
|
||
expected = df_multi_both.groupby([pd.Grouper(key='inner'), | ||
'B']).mean() | ||
assert_frame_equal(result, expected) | ||
not_expected = df_multi_both.groupby([pd.Grouper(level='inner'), | ||
'B']).mean() | ||
assert not result.index.equals(not_expected.index) | ||
|
||
# Group single Index by two keys (1) | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = df_single_both.groupby(['B', 'inner']).mean() | ||
|
||
expected = df_single_both.groupby(['B', | ||
pd.Grouper(key='inner')]).mean() | ||
assert_frame_equal(result, expected) | ||
not_expected = df_single_both.groupby(['B', | ||
pd.Grouper(level='inner') | ||
]).mean() | ||
assert not result.index.equals(not_expected.index) | ||
|
||
# Group single Index by two keys (2) | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = df_single_both.groupby(['inner', 'B']).mean() | ||
|
||
expected = df_single_both.groupby([pd.Grouper(key='inner'), | ||
'B']).mean() | ||
assert_frame_equal(result, expected) | ||
not_expected = df_single_both.groupby([pd.Grouper(level='inner'), | ||
'B']).mean() | ||
assert not result.index.equals(not_expected.index) | ||
|
||
def test_grouper_getting_correct_binner(self): | ||
|
||
# GH 10063 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
import pytest | ||
import pandas as pd | ||
import numpy as np | ||
|
||
from pandas.util.testing import assert_frame_equal, assert_series_equal | ||
import pandas.util.testing as tm | ||
|
||
|
||
def build_df_multi(): | ||
idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these should just be fixtures |
||
('b', 1), ('b', 2), ('b', 3)]) | ||
idx.names = ['outer', 'inner'] | ||
df_multi = pd.DataFrame({"A": np.arange(6), | ||
'B': ['one', 'one', 'two', | ||
'two', 'one', 'one']}, | ||
index=idx) | ||
return df_multi | ||
|
||
|
||
def build_df_single(): | ||
df_single = build_df_multi().reset_index('outer') | ||
return df_single | ||
|
||
|
||
def build_test_series(): | ||
series_multi = build_df_multi().set_index('B', append=True)['A'] | ||
return series_multi | ||
|
||
|
||
class TestGroupByIndexAsString(object): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no real need to make this a class, that is really a leftover from nose, just make these functions (of course a class is good for grouping generally) |
||
|
||
@pytest.mark.parametrize('frame', [build_df_multi(), build_df_single()]) | ||
def test_grouper_index_level_as_string(self, frame): | ||
# Column and Index | ||
result = frame.groupby(['B', 'inner']).mean() | ||
expected = frame.groupby(['B', pd.Grouper(level='inner')]).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Index and Column | ||
result = frame.groupby(['inner', 'B']).mean() | ||
expected = frame.groupby([pd.Grouper(level='inner'), 'B']).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Single element list of Index | ||
result = frame.groupby(['inner']).mean() | ||
expected = frame.groupby(pd.Grouper(level='inner')).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
# Index name | ||
result = frame.groupby('inner').mean() | ||
expected = frame.groupby(pd.Grouper(level='inner')).mean() | ||
assert_frame_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('levels', [ | ||
'inner', 'outer', 'B', | ||
['inner'], ['outer'], ['B'], | ||
['inner', 'outer'], ['inner', 'outer', 'B'] | ||
]) | ||
def test_grouper_index_level_as_string_series(self, levels): | ||
s = build_test_series() | ||
|
||
# Compute expected result | ||
if isinstance(levels, list): | ||
groupers = [pd.Grouper(level=lv) for lv in levels] | ||
else: | ||
groupers = pd.Grouper(level=levels) | ||
|
||
expected = s.groupby(groupers).mean() | ||
|
||
# Compute and check result | ||
result = s.groupby(levels).mean() | ||
assert_series_equal(result, expected) | ||
|
||
@pytest.mark.parametrize('frame', [build_df_multi(), build_df_single()]) | ||
def test_grouper_column_index_level_precedence(self, frame): | ||
# GH 5677, when a string passed as the `by` parameter | ||
# matches a column and an index level the column takes | ||
# precedence | ||
|
||
# Add 'inner' column to frame | ||
# (frame already has an 'inner' index) | ||
frame['inner'] = [1, 1, 1, 1, 1, 1] | ||
|
||
# Group by single key | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = frame.groupby('inner').mean() | ||
|
||
with tm.assert_produces_warning(False): | ||
expected = frame.groupby(pd.Grouper(key='inner')).mean() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that the |
||
|
||
assert_frame_equal(result, expected) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. happy to have even another level of parameterization to make these shorter here (if that's possible) |
||
with tm.assert_produces_warning(False): | ||
not_expected = frame.groupby(pd.Grouper(level='inner')).mean() | ||
|
||
assert not result.index.equals(not_expected.index) | ||
|
||
# Group by single key list | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = frame.groupby(['inner']).mean() | ||
|
||
with tm.assert_produces_warning(False): | ||
expected = frame.groupby([pd.Grouper(key='inner')]).mean() | ||
|
||
assert_frame_equal(result, expected) | ||
|
||
with tm.assert_produces_warning(False): | ||
not_expected = frame.groupby(pd.Grouper(level='inner')).mean() | ||
|
||
assert not result.index.equals(not_expected.index) | ||
|
||
# Group by two keys ('B', 'inner') | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = frame.groupby(['B', 'inner']).mean() | ||
|
||
with tm.assert_produces_warning(False): | ||
expected = frame.groupby(['B', | ||
pd.Grouper(key='inner')]).mean() | ||
|
||
assert_frame_equal(result, expected) | ||
|
||
with tm.assert_produces_warning(False): | ||
not_expected = frame.groupby(['B', | ||
pd.Grouper(level='inner') | ||
]).mean() | ||
|
||
assert not result.index.equals(not_expected.index) | ||
|
||
# Group by two keys ('inner', 'B') | ||
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): | ||
result = frame.groupby(['inner', 'B']).mean() | ||
|
||
with tm.assert_produces_warning(False): | ||
expected = frame.groupby([pd.Grouper(key='inner'), | ||
'B']).mean() | ||
|
||
assert_frame_equal(result, expected) | ||
|
||
with tm.assert_produces_warning(False): | ||
not_expected = frame.groupby([pd.Grouper(level='inner'), | ||
'B']).mean() | ||
assert not result.index.equals(not_expected.index) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
note that I had to add this flag to 'fix' this warning issue elsewhere, I don't really like it, but would require more refactoring to make this cleaner.