Skip to content

Commit

Permalink
Will raise in the future
Browse files Browse the repository at this point in the history
  • Loading branch information
jseabold committed Dec 7, 2020
1 parent 57d99a7 commit 3b59b23
Show file tree
Hide file tree
Showing 10 changed files with 64 additions and 53 deletions.
5 changes: 2 additions & 3 deletions doc/source/user_guide/categorical.rst
Original file line number Diff line number Diff line change
Expand Up @@ -635,9 +635,8 @@ even if some categories are not present in the data:
)
df.sum(axis=1, level=1)
Groupby will also show "unused" categories, though this default is deprecated
and will be changed in a future release. It is recommended to use the
``observed`` keyword explicitly as below:
Groupby will also show "unused" categories by default, though this behavior
is deprecated. In a future release, users must specify a value for ``observed``:

.. ipython:: python
Expand Down
11 changes: 6 additions & 5 deletions pandas/core/groupby/grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,13 @@

_observed_msg = textwrap.dedent(
"""\
Using 'observed=False', because grouping on a categorical. A future version
of pandas will change to 'observed=True'.
Grouping by a categorical but 'observed' was not specified.
Using 'observed=False', but in a future version of pandas
not specifying 'observed' will raise an error. Pass
'observed=True' or 'observed=False' to silence this warning.
To silence the warning and switch to the future behavior, pass 'observed=True'.
To keep the current behavior and silence the warning, pass 'observed=False'.
See the `groupby` documentation for more information on the
observed keyword.
"""
)

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,10 @@
If True: only show observed values for categorical groupers.
If False: show all values for categorical groupers.
The current default of ``observed=False`` is deprecated and will
change to ``observed=True`` in a future version of pandas.
The current default of ``observed=False`` is deprecated. In
the future this will be a required keyword in the presence
of a categorical grouper and a failure to specify a value will
result in an error.
Explicitly pass ``observed=True`` to silence the warning and not
show all observed values.
Expand Down
17 changes: 12 additions & 5 deletions pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
"""
test cython .agg behavior
"""

import numpy as np
import pytest

import pandas as pd
from pandas import DataFrame, Index, NaT, Series, Timedelta, Timestamp, bdate_range
import pandas._testing as tm
from pandas import (
DataFrame,
Index,
NaT,
Series,
Timedelta,
Timestamp,
_testing as tm,
bdate_range,
)
from pandas.core.groupby.groupby import DataError


Expand Down Expand Up @@ -175,7 +182,7 @@ def test__cython_agg_general(op, targop):
("max", np.max),
],
)
@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_cython_agg_empty_buckets(op, targop, observed):
df = DataFrame([11, 12, 13])
grps = range(0, 55, 5)
Expand All @@ -190,7 +197,7 @@ def test_cython_agg_empty_buckets(op, targop, observed):
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_cython_agg_empty_buckets_nanops(observed):
# GH-18869 can't call nanops on empty groups, so hardcode expected
# for these
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/groupby/aggregate/test_other.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
test all other .agg behavior
"""

import datetime as dt
from functools import partial

Expand All @@ -15,10 +14,10 @@
MultiIndex,
PeriodIndex,
Series,
_testing as tm,
date_range,
period_range,
)
import pandas._testing as tm
from pandas.core.base import SpecificationError

from pandas.io.formats.printing import pprint_thing
Expand Down Expand Up @@ -555,7 +554,7 @@ def test_agg_structs_series(structure, expected):
tm.assert_series_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_agg_category_nansum(observed):
categories = ["a", "b", "c"]
df = DataFrame(
Expand Down
36 changes: 18 additions & 18 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def f(x):
tm.assert_index_equal((desc_result.stack().index.get_level_values(1)), exp)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_level_get_group(observed):
# GH15155
df = DataFrame(
Expand Down Expand Up @@ -277,7 +277,7 @@ def test_apply(ordered):
tm.assert_series_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_observed(observed):
# multiple groupers, don't re-expand the output space
# of the grouper
Expand Down Expand Up @@ -386,7 +386,7 @@ def test_observed(observed):
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_observed_codes_remap(observed):
d = {"C1": [3, 3, 4, 5], "C2": [1, 2, 3, 4], "C3": [10, 100, 200, 34]}
df = DataFrame(d)
Expand Down Expand Up @@ -427,7 +427,7 @@ def test_observed_perf():
assert result.index.levels[2].nunique() == df.other_id.nunique()


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_observed_groups(observed):
# gh-20583
# test that we have the appropriate groups
Expand All @@ -450,7 +450,7 @@ def test_observed_groups(observed):
tm.assert_dict_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_observed_groups_with_nan(observed):
# GH 24740
df = DataFrame(
Expand Down Expand Up @@ -487,7 +487,7 @@ def test_observed_nth():
tm.assert_series_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_dataframe_categorical_with_nan(observed):
# GH 21151
s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"])
Expand All @@ -511,7 +511,7 @@ def test_dataframe_categorical_with_nan(observed):
@pytest.mark.parametrize("ordered", [True, False])
@pytest.mark.parametrize("observed", [True, False])
@pytest.mark.parametrize("sort", [True, False])
@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
# GH 25871: Fix groupby sorting on ordered Categoricals
# GH 25167: Groupby with observed=True doesn't sort
Expand Down Expand Up @@ -1176,7 +1176,7 @@ def test_seriesgroupby_observed_true(df_cat, operation, kwargs):

@pytest.mark.parametrize("operation", ["agg", "apply"])
@pytest.mark.parametrize("observed", [False, None])
@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
# GH 24880
index, _ = MultiIndex.from_product(
Expand Down Expand Up @@ -1241,7 +1241,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
),
],
)
@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
# GH 24880
expected = Series(data=data, index=index, name="C")
Expand All @@ -1259,7 +1259,7 @@ def test_groupby_categorical_series_dataframe_consistent(df_cat):


@pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])])
@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_groupby_categorical_axis_1(code):
# GH 13420
df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]})
Expand All @@ -1269,7 +1269,7 @@ def test_groupby_categorical_axis_1(code):
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_groupby_cat_preserves_structure(observed, ordered):
# GH 28787
df = DataFrame(
Expand Down Expand Up @@ -1298,7 +1298,7 @@ def test_get_nonexistent_category():
)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, request):
# GH 17605
if reduction_func == "ngroup":
Expand Down Expand Up @@ -1398,7 +1398,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun


@pytest.mark.parametrize("observed", [False, None])
@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
reduction_func, observed, request
):
Expand Down Expand Up @@ -1432,7 +1432,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
assert (res.loc[unobserved_cats] == expected).all().all()


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_series_groupby_categorical_aggregation_getitem():
# GH 8870
d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]}
Expand Down Expand Up @@ -1488,7 +1488,7 @@ def test_groupy_first_returned_categorical_instead_of_dataframe(func):
tm.assert_series_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_read_only_category_no_sort():
# GH33410
cats = np.array([1, 2])
Expand All @@ -1502,7 +1502,7 @@ def test_read_only_category_no_sort():
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_sorted_missing_category_values():
# GH 28597
df = DataFrame(
Expand Down Expand Up @@ -1650,7 +1650,7 @@ def test_categorical_transform():


@pytest.mark.parametrize("func", ["first", "last"])
@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(
func: str, observed: bool
):
Expand All @@ -1676,7 +1676,7 @@ def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(


@pytest.mark.parametrize("func", ["first", "last"])
@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals(
func: str, observed: bool
):
Expand Down
16 changes: 12 additions & 4 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,17 @@
from pandas.errors import UnsupportedFunctionCall

import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna
import pandas._testing as tm
import pandas.core.nanops as nanops
from pandas import (
DataFrame,
Index,
MultiIndex,
Series,
Timestamp,
_testing as tm,
date_range,
isna,
)
from pandas.core import nanops as nanops
from pandas.util import _test_decorators as td


Expand Down Expand Up @@ -410,7 +418,7 @@ def test_cython_median():
tm.assert_frame_equal(rs, xp)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_median_empty_bins(observed):
df = DataFrame(np.random.randint(0, 44, 500))

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def test_groupby_levels_and_columns(self):
by_columns.columns = by_columns.columns.astype(np.int64)
tm.assert_frame_equal(by_levels, by_columns)

@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_groupby_categorical_index_and_columns(self, observed):
# GH18432, adapted for GH25871
columns = ["A", "B", "A", "B"]
Expand Down Expand Up @@ -778,7 +778,7 @@ def test_get_group(self):
with pytest.raises(ValueError, match=msg):
g.get_group(("foo", "bar", "baz"))

@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_get_group_empty_bins(self, observed):

d = DataFrame([3, 1, 7, 6])
Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,12 +472,7 @@ def test_groupby_transform_with_int():

# int case
df = DataFrame(
{
"A": [1, 1, 1, 2, 2, 2],
"B": 1,
"C": [1, 2, 3, 1, 2, 3],
"D": "foo",
}
{"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": [1, 2, 3, 1, 2, 3], "D": "foo",}
)
with np.errstate(all="ignore"):
result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
Expand Down Expand Up @@ -1153,7 +1148,7 @@ def test_transform_lambda_indexing():
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_categorical_and_not_categorical_key(observed):
# Checks that groupby-transform, when grouping by both a categorical
# and a non-categorical key, doesn't try to expand the output to include
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def test_pivot_table(self, observed):
expected = self.data.groupby(index + [columns])["D"].agg(np.mean).unstack()
tm.assert_frame_equal(table, expected)

@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_pivot_table_categorical_observed_equal(self, observed):
# issue #24923
df = DataFrame(
Expand Down Expand Up @@ -1763,7 +1763,7 @@ def test_margins_casted_to_float(self, observed):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_pivot_with_categorical(self, observed, ordered):
# gh-21370
idx = [np.nan, "low", "high", "low", np.nan]
Expand Down Expand Up @@ -1799,7 +1799,7 @@ def test_pivot_with_categorical(self, observed, ordered):

tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_categorical_aggfunc(self, observed):
# GH 9534
df = DataFrame(
Expand All @@ -1820,7 +1820,7 @@ def test_categorical_aggfunc(self, observed):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
def test_categorical_pivot_index_ordering(self, observed):
# GH 8731
df = DataFrame(
Expand Down

0 comments on commit 3b59b23

Please sign in to comment.