Will raise in the future

pandas-dev · Dec 7, 2020 · 3b59b23 · 3b59b23
1 parent 57d99a7
commit 3b59b23
Show file tree

Hide file tree

Showing 10 changed files with 64 additions and 53 deletions.
diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
@@ -635,9 +635,8 @@ even if some categories are not present in the data:
     )
     df.sum(axis=1, level=1)
 
-Groupby will also show "unused" categories, though this default is deprecated
-and will be changed in a future release. It is recommended to use the
-``observed`` keyword explicitly as below:
+Groupby will also show "unused" categories by default, though this behavior
+is deprecated. In a future release, users must specify a value for ``observed``:
 
 .. ipython:: python
 

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -34,12 +34,13 @@
 
 _observed_msg = textwrap.dedent(
     """\
-Using 'observed=False', because grouping on a categorical. A future version
-of pandas will change to 'observed=True'.
+Grouping by a categorical but 'observed' was not specified.
+Using 'observed=False', but in a future version of pandas
+not specifying 'observed' will raise an error. Pass
+'observed=True' or 'observed=False' to silence this warning.
 
-To silence the warning and switch to the future behavior, pass 'observed=True'.
-
-To keep the current behavior and silence the warning, pass 'observed=False'.
+See the `groupby` documentation for more information on the
+observed keyword.
 """
 )
 

diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
@@ -120,8 +120,10 @@
     If True: only show observed values for categorical groupers.
     If False: show all values for categorical groupers.
 
-    The current default of ``observed=False`` is deprecated and will
-    change to ``observed=True`` in a future version of pandas.
+    The current default of ``observed=False`` is deprecated. In
+    the future this will be a required keyword in the presence
+    of a categorical grouper and a failure to specify a value will
+    result in an error.
 
     Explicitly pass ``observed=True`` to silence the warning and not
     show all observed values.

diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
@@ -1,13 +1,20 @@
 """
 test cython .agg behavior
 """
-
 import numpy as np
 import pytest
 
 import pandas as pd
-from pandas import DataFrame, Index, NaT, Series, Timedelta, Timestamp, bdate_range
-import pandas._testing as tm
+from pandas import (
+    DataFrame,
+    Index,
+    NaT,
+    Series,
+    Timedelta,
+    Timestamp,
+    _testing as tm,
+    bdate_range,
+)
 from pandas.core.groupby.groupby import DataError
 
 
@@ -175,7 +182,7 @@ def test__cython_agg_general(op, targop):
         ("max", np.max),
     ],
 )
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_cython_agg_empty_buckets(op, targop, observed):
     df = DataFrame([11, 12, 13])
     grps = range(0, 55, 5)
@@ -190,7 +197,7 @@ def test_cython_agg_empty_buckets(op, targop, observed):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_cython_agg_empty_buckets_nanops(observed):
     # GH-18869 can't call nanops on empty groups, so hardcode expected
     # for these

diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -1,7 +1,6 @@
 """
 test all other .agg behavior
 """
-
 import datetime as dt
 from functools import partial
 
@@ -15,10 +14,10 @@
     MultiIndex,
     PeriodIndex,
     Series,
+    _testing as tm,
     date_range,
     period_range,
 )
-import pandas._testing as tm
 from pandas.core.base import SpecificationError
 
 from pandas.io.formats.printing import pprint_thing
@@ -555,7 +554,7 @@ def test_agg_structs_series(structure, expected):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_agg_category_nansum(observed):
     categories = ["a", "b", "c"]
     df = DataFrame(

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -212,7 +212,7 @@ def f(x):
     tm.assert_index_equal((desc_result.stack().index.get_level_values(1)), exp)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_level_get_group(observed):
     # GH15155
     df = DataFrame(
@@ -277,7 +277,7 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed(observed):
     # multiple groupers, don't re-expand the output space
     # of the grouper
@@ -386,7 +386,7 @@ def test_observed(observed):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed_codes_remap(observed):
     d = {"C1": [3, 3, 4, 5], "C2": [1, 2, 3, 4], "C3": [10, 100, 200, 34]}
     df = DataFrame(d)
@@ -427,7 +427,7 @@ def test_observed_perf():
     assert result.index.levels[2].nunique() == df.other_id.nunique()
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed_groups(observed):
     # gh-20583
     # test that we have the appropriate groups
@@ -450,7 +450,7 @@ def test_observed_groups(observed):
     tm.assert_dict_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed_groups_with_nan(observed):
     # GH 24740
     df = DataFrame(
@@ -487,7 +487,7 @@ def test_observed_nth():
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_dataframe_categorical_with_nan(observed):
     # GH 21151
     s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"])
@@ -511,7 +511,7 @@ def test_dataframe_categorical_with_nan(observed):
 @pytest.mark.parametrize("ordered", [True, False])
 @pytest.mark.parametrize("observed", [True, False])
 @pytest.mark.parametrize("sort", [True, False])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
     # GH 25871: Fix groupby sorting on ordered Categoricals
     # GH 25167: Groupby with observed=True doesn't sort
@@ -1176,7 +1176,7 @@ def test_seriesgroupby_observed_true(df_cat, operation, kwargs):
 
 @pytest.mark.parametrize("operation", ["agg", "apply"])
 @pytest.mark.parametrize("observed", [False, None])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
     # GH 24880
     index, _ = MultiIndex.from_product(
@@ -1241,7 +1241,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
         ),
     ],
 )
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
     # GH 24880
     expected = Series(data=data, index=index, name="C")
@@ -1259,7 +1259,7 @@ def test_groupby_categorical_series_dataframe_consistent(df_cat):
 
 
 @pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_groupby_categorical_axis_1(code):
     # GH 13420
     df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]})
@@ -1269,7 +1269,7 @@ def test_groupby_categorical_axis_1(code):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_groupby_cat_preserves_structure(observed, ordered):
     # GH 28787
     df = DataFrame(
@@ -1298,7 +1298,7 @@ def test_get_nonexistent_category():
         )
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, request):
     # GH 17605
     if reduction_func == "ngroup":
@@ -1398,7 +1398,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun
 
 
 @pytest.mark.parametrize("observed", [False, None])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
     reduction_func, observed, request
 ):
@@ -1432,7 +1432,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
         assert (res.loc[unobserved_cats] == expected).all().all()
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_series_groupby_categorical_aggregation_getitem():
     # GH 8870
     d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]}
@@ -1488,7 +1488,7 @@ def test_groupy_first_returned_categorical_instead_of_dataframe(func):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_read_only_category_no_sort():
     # GH33410
     cats = np.array([1, 2])
@@ -1502,7 +1502,7 @@ def test_read_only_category_no_sort():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_sorted_missing_category_values():
     # GH 28597
     df = DataFrame(
@@ -1650,7 +1650,7 @@ def test_categorical_transform():
 
 
 @pytest.mark.parametrize("func", ["first", "last"])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(
     func: str, observed: bool
 ):
@@ -1676,7 +1676,7 @@ def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(
 
 
 @pytest.mark.parametrize("func", ["first", "last"])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals(
     func: str, observed: bool
 ):

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -7,9 +7,17 @@
 from pandas.errors import UnsupportedFunctionCall
 
 import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna
-import pandas._testing as tm
-import pandas.core.nanops as nanops
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    Timestamp,
+    _testing as tm,
+    date_range,
+    isna,
+)
+from pandas.core import nanops as nanops
 from pandas.util import _test_decorators as td
 
 
@@ -410,7 +418,7 @@ def test_cython_median():
     tm.assert_frame_equal(rs, xp)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_median_empty_bins(observed):
     df = DataFrame(np.random.randint(0, 44, 500))
 

diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -310,7 +310,7 @@ def test_groupby_levels_and_columns(self):
         by_columns.columns = by_columns.columns.astype(np.int64)
         tm.assert_frame_equal(by_levels, by_columns)
 
-    @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_groupby_categorical_index_and_columns(self, observed):
         # GH18432, adapted for GH25871
         columns = ["A", "B", "A", "B"]
@@ -778,7 +778,7 @@ def test_get_group(self):
         with pytest.raises(ValueError, match=msg):
             g.get_group(("foo", "bar", "baz"))
 
-    @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_get_group_empty_bins(self, observed):
 
         d = DataFrame([3, 1, 7, 6])

diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -472,12 +472,7 @@ def test_groupby_transform_with_int():
 
     # int case
     df = DataFrame(
-        {
-            "A": [1, 1, 1, 2, 2, 2],
-            "B": 1,
-            "C": [1, 2, 3, 1, 2, 3],
-            "D": "foo",
-        }
+        {"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": [1, 2, 3, 1, 2, 3], "D": "foo",}
     )
     with np.errstate(all="ignore"):
         result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
@@ -1153,7 +1148,7 @@ def test_transform_lambda_indexing():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_categorical_and_not_categorical_key(observed):
     # Checks that groupby-transform, when grouping by both a categorical
     # and a non-categorical key, doesn't try to expand the output to include

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -108,7 +108,7 @@ def test_pivot_table(self, observed):
         expected = self.data.groupby(index + [columns])["D"].agg(np.mean).unstack()
         tm.assert_frame_equal(table, expected)
 
-    @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_pivot_table_categorical_observed_equal(self, observed):
         # issue #24923
         df = DataFrame(
@@ -1763,7 +1763,7 @@ def test_margins_casted_to_float(self, observed):
         )
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_pivot_with_categorical(self, observed, ordered):
         # gh-21370
         idx = [np.nan, "low", "high", "low", np.nan]
@@ -1799,7 +1799,7 @@ def test_pivot_with_categorical(self, observed, ordered):
 
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_categorical_aggfunc(self, observed):
         # GH 9534
         df = DataFrame(
@@ -1820,7 +1820,7 @@ def test_categorical_aggfunc(self, observed):
         )
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_categorical_pivot_index_ordering(self, observed):
         # GH 8731
         df = DataFrame(