Skip to content

Commit

Permalink
Refactor index-as-string groupby tests and fix spurious warning (Bug …
Browse files Browse the repository at this point in the history
…17383) (pandas-dev#17843)
  • Loading branch information
jonmmease authored and alanbato committed Nov 10, 2017
1 parent d7ca520 commit e9820fb
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 153 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,7 @@ Groupby/Resample/Rolling
- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`)
- Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`)
- Bug in ``DataFrame.groupby`` where a single level selection from a ``MultiIndex`` unexpectedly sorts (:issue:`17537`)
- Bug in ``DataFrame.groupby`` where spurious warning is raised when ``Grouper`` object is used to override ambiguous column name (:issue:`17383`)
- Bug in ``TimeGrouper`` differs when passes as a list and as a scalar (:issue:`17530`)

Sparse
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2703,7 +2703,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,

# a passed-in Grouper, directly convert
if isinstance(key, Grouper):
binner, grouper, obj = key._get_grouper(obj)
binner, grouper, obj = key._get_grouper(obj, validate=False)
if key.key is None:
return grouper, [], obj
else:
Expand Down
152 changes: 0 additions & 152 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,158 +253,6 @@ def test_grouper_column_and_index(self):
expected = df_single.reset_index().groupby(['inner', 'B']).mean()
assert_frame_equal(result, expected)

def test_grouper_index_level_as_string(self):
# GH 5677, allow strings passed as the `by` parameter to reference
# columns or index levels

idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3),
('b', 1), ('b', 2), ('b', 3)])
idx.names = ['outer', 'inner']
df_multi = pd.DataFrame({"A": np.arange(6),
'B': ['one', 'one', 'two',
'two', 'one', 'one']},
index=idx)

df_single = df_multi.reset_index('outer')

# Column and Index on MultiIndex
result = df_multi.groupby(['B', 'inner']).mean()
expected = df_multi.groupby(['B', pd.Grouper(level='inner')]).mean()
assert_frame_equal(result, expected)

# Index and Column on MultiIndex
result = df_multi.groupby(['inner', 'B']).mean()
expected = df_multi.groupby([pd.Grouper(level='inner'), 'B']).mean()
assert_frame_equal(result, expected)

# Column and Index on single Index
result = df_single.groupby(['B', 'inner']).mean()
expected = df_single.groupby(['B', pd.Grouper(level='inner')]).mean()
assert_frame_equal(result, expected)

# Index and Column on single Index
result = df_single.groupby(['inner', 'B']).mean()
expected = df_single.groupby([pd.Grouper(level='inner'), 'B']).mean()
assert_frame_equal(result, expected)

# Single element list of Index on MultiIndex
result = df_multi.groupby(['inner']).mean()
expected = df_multi.groupby(pd.Grouper(level='inner')).mean()
assert_frame_equal(result, expected)

# Single element list of Index on single Index
result = df_single.groupby(['inner']).mean()
expected = df_single.groupby(pd.Grouper(level='inner')).mean()
assert_frame_equal(result, expected)

# Index on MultiIndex
result = df_multi.groupby('inner').mean()
expected = df_multi.groupby(pd.Grouper(level='inner')).mean()
assert_frame_equal(result, expected)

# Index on single Index
result = df_single.groupby('inner').mean()
expected = df_single.groupby(pd.Grouper(level='inner')).mean()
assert_frame_equal(result, expected)

def test_grouper_column_index_level_precedence(self):
# GH 5677, when a string passed as the `by` parameter
# matches a column and an index level the column takes
# precedence

idx = pd.MultiIndex.from_tuples([('a', 1), ('a', 2), ('a', 3),
('b', 1), ('b', 2), ('b', 3)])
idx.names = ['outer', 'inner']
df_multi_both = pd.DataFrame({"A": np.arange(6),
'B': ['one', 'one', 'two',
'two', 'one', 'one'],
'inner': [1, 1, 1, 1, 1, 1]},
index=idx)

df_single_both = df_multi_both.reset_index('outer')

# Group MultiIndex by single key
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df_multi_both.groupby('inner').mean()

expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean()
assert_frame_equal(result, expected)
not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean()
assert not result.index.equals(not_expected.index)

# Group single Index by single key
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df_single_both.groupby('inner').mean()

expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean()
assert_frame_equal(result, expected)
not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean()
assert not result.index.equals(not_expected.index)

# Group MultiIndex by single key list
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df_multi_both.groupby(['inner']).mean()

expected = df_multi_both.groupby([pd.Grouper(key='inner')]).mean()
assert_frame_equal(result, expected)
not_expected = df_multi_both.groupby(pd.Grouper(level='inner')).mean()
assert not result.index.equals(not_expected.index)

# Group single Index by single key list
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df_single_both.groupby(['inner']).mean()

expected = df_single_both.groupby([pd.Grouper(key='inner')]).mean()
assert_frame_equal(result, expected)
not_expected = df_single_both.groupby(pd.Grouper(level='inner')).mean()
assert not result.index.equals(not_expected.index)

# Group MultiIndex by two keys (1)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df_multi_both.groupby(['B', 'inner']).mean()

expected = df_multi_both.groupby(['B',
pd.Grouper(key='inner')]).mean()
assert_frame_equal(result, expected)
not_expected = df_multi_both.groupby(['B',
pd.Grouper(level='inner')
]).mean()
assert not result.index.equals(not_expected.index)

# Group MultiIndex by two keys (2)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df_multi_both.groupby(['inner', 'B']).mean()

expected = df_multi_both.groupby([pd.Grouper(key='inner'),
'B']).mean()
assert_frame_equal(result, expected)
not_expected = df_multi_both.groupby([pd.Grouper(level='inner'),
'B']).mean()
assert not result.index.equals(not_expected.index)

# Group single Index by two keys (1)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df_single_both.groupby(['B', 'inner']).mean()

expected = df_single_both.groupby(['B',
pd.Grouper(key='inner')]).mean()
assert_frame_equal(result, expected)
not_expected = df_single_both.groupby(['B',
pd.Grouper(level='inner')
]).mean()
assert not result.index.equals(not_expected.index)

# Group single Index by two keys (2)
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = df_single_both.groupby(['inner', 'B']).mean()

expected = df_single_both.groupby([pd.Grouper(key='inner'),
'B']).mean()
assert_frame_equal(result, expected)
not_expected = df_single_both.groupby([pd.Grouper(level='inner'),
'B']).mean()
assert not result.index.equals(not_expected.index)

def test_grouper_getting_correct_binner(self):

# GH 10063
Expand Down
116 changes: 116 additions & 0 deletions pandas/tests/groupby/test_index_as_string.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import pytest
import pandas as pd
import numpy as np

from pandas.util.testing import assert_frame_equal, assert_series_equal
import pandas.util.testing as tm


@pytest.fixture(params=[['inner'], ['inner', 'outer']])
def frame(request):
levels = request.param
df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'],
'inner': [1, 2, 3, 1, 2, 3],
'A': np.arange(6),
'B': ['one', 'one', 'two', 'two', 'one', 'one']})
if levels:
df = df.set_index(levels)

return df


@pytest.fixture()
def series():
df = pd.DataFrame({'outer': ['a', 'a', 'a', 'b', 'b', 'b'],
'inner': [1, 2, 3, 1, 2, 3],
'A': np.arange(6),
'B': ['one', 'one', 'two', 'two', 'one', 'one']})
s = df.set_index(['outer', 'inner', 'B'])['A']

return s


@pytest.mark.parametrize('key_strs,groupers', [
('inner', # Index name
pd.Grouper(level='inner')
),
(['inner'], # List of index name
[pd.Grouper(level='inner')]
),
(['B', 'inner'], # Column and index
['B', pd.Grouper(level='inner')]
),
(['inner', 'B'], # Index and column
[pd.Grouper(level='inner'), 'B'])])
def test_grouper_index_level_as_string(frame, key_strs, groupers):
result = frame.groupby(key_strs).mean()
expected = frame.groupby(groupers).mean()
assert_frame_equal(result, expected)


@pytest.mark.parametrize('levels', [
'inner', 'outer', 'B',
['inner'], ['outer'], ['B'],
['inner', 'outer'], ['outer', 'inner'],
['inner', 'outer', 'B'], ['B', 'outer', 'inner']
])
def test_grouper_index_level_as_string_series(series, levels):

# Compute expected result
if isinstance(levels, list):
groupers = [pd.Grouper(level=lv) for lv in levels]
else:
groupers = pd.Grouper(level=levels)

expected = series.groupby(groupers).mean()

# Compute and check result
result = series.groupby(levels).mean()
assert_series_equal(result, expected)


@pytest.mark.parametrize('key_strs,key_groupers,level_groupers', [
('inner', # Index name
pd.Grouper(key='inner'),
pd.Grouper(level='inner'),
),
(['inner'], # List of index name
[pd.Grouper(key='inner')],
[pd.Grouper(level='inner')]
),
(['B', 'inner'], # Column and index
['B', pd.Grouper(key='inner')],
['B', pd.Grouper(level='inner')]
),
(['inner', 'B'], # Index and column
[pd.Grouper(key='inner'), 'B'],
[pd.Grouper(level='inner'), 'B'])])
def test_grouper_column_index_level_precedence(frame,
key_strs,
key_groupers,
level_groupers):

# GH 5677, when a string passed as the `by` parameter
# matches a column and an index level the column takes
# precedence and a FutureWarning is raised

# Add 'inner' column to frame
# (frame already has an 'inner' index)
frame['inner'] = [1, 1, 1, 1, 1, 1]

# Performing a groupby with strings should produce warning
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = frame.groupby(key_strs).mean()

# Grouping with key Grouper should produce the same result and no warning
with tm.assert_produces_warning(False):
expected = frame.groupby(key_groupers).mean()

assert_frame_equal(result, expected)

# Grouping with level Grouper should produce a difference result but
# still no warning
with tm.assert_produces_warning(False):
not_expected = frame.groupby(level_groupers).mean()

assert not result.index.equals(not_expected.index)

0 comments on commit e9820fb

Please sign in to comment.