diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst index 446235d1656dc2..35081c5e8a98ea 100644 --- a/doc/source/whatsnew/v1.5.2.rst +++ b/doc/source/whatsnew/v1.5.2.rst @@ -33,7 +33,7 @@ Bug fixes Other ~~~~~ -- +- Introduced ``FutureWarning`` notifying about behaviour change in :meth:`DataFrame.value_counts`, :meth:`Series.value_counts`, :meth:`DataFrameGroupBy.value_counts`, :meth:`SeriesGroupBy.value_counts` - the resulting series will by default now be named ``'counts'`` (or ``'proportion'`` if ``normalize=True``), and the index (if present) will be taken from the original object's name (:issue:`49497`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index cc5ff2e756cfad..831048ab891064 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -810,6 +810,7 @@ def value_counts( normalize: bool = False, bins=None, dropna: bool = True, + name: Hashable | None = None, ) -> Series: """ Compute a histogram of the counts of non-null values. @@ -838,7 +839,8 @@ def value_counts( Series, ) - name = getattr(values, "name", None) + if name is None: + name = getattr(values, "name", None) if bins is not None: from pandas.core.reshape.tile import cut @@ -850,7 +852,7 @@ def value_counts( raise TypeError("bins argument only works with numeric data.") from err # count, remove nulls (from the index), and but the bins - result = ii.value_counts(dropna=dropna) + result = ii.value_counts(dropna=dropna, name=name) result = result[result.index.notna()] result.index = result.index.astype("interval") result = result.sort_index() diff --git a/pandas/core/base.py b/pandas/core/base.py index 46803e1f289753..e9c71cf3abed44 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -17,6 +17,7 @@ final, overload, ) +import warnings import numpy as np @@ -37,6 +38,7 @@ cache_readonly, doc, ) +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_categorical_dtype, @@ -912,6 +914,8 @@ def value_counts( ascending: bool = False, bins=None, dropna: bool = True, + *, + name: lib.NoDefault = lib.no_default, ) -> Series: """ Return a Series containing counts of unique values. @@ -991,6 +995,17 @@ def value_counts( NaN 1 dtype: int64 """ + if name is lib.no_default: + result_name = "proportion" if normalize else "count" + warnings.warn( + "In pandas 2.0.0, the name of the resulting Series will be " + "'count' (or 'proportion' if `normalize=True`), and the index " + "will inherit the original object's name. Specify " + f"`name='{result_name}'` to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = None return value_counts( self, sort=sort, @@ -998,6 +1013,7 @@ def value_counts( normalize=normalize, bins=bins, dropna=dropna, + name=name, ) def unique(self): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1627a7add25ed4..5ed7b2907d5cc4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6936,6 +6936,8 @@ def value_counts( sort: bool = True, ascending: bool = False, dropna: bool = True, + *, + name: NoDefault = no_default, ) -> Series: """ Return a Series containing counts of unique rows in the DataFrame. @@ -7037,10 +7039,20 @@ def value_counts( NaN 1 dtype: int64 """ + if name is no_default: + result_name = "proportion" if normalize else "count" + warnings.warn( + "In pandas 2.0.0, the name of the resulting Series will be " + "'count' (or 'proportion' if `normalize=True`). Specify " + f"`name='{result_name}'` to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = None if subset is None: subset = self.columns.tolist() - counts = self.groupby(subset, dropna=dropna).grouper.size() + counts = self.groupby(subset, dropna=dropna).grouper.size().rename(name) if sort: counts = counts.sort_values(ascending=ascending) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ec9c8564ab5494..f522160dda3afc 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -604,7 +604,19 @@ def value_counts( ascending: bool = False, bins=None, dropna: bool = True, + *, + name: lib.NoDefault = lib.no_default, ) -> Series: + if name is lib.no_default: + result_name = "proportion" if normalize else "count" + warnings.warn( + "In pandas 2.0.0, the name of the resulting Series will be " + "'count' (or 'proportion' if `normalize=True`). Specify " + f"`name='{result_name}'` to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = self.obj.name from pandas.core.reshape.merge import get_join_indexers from pandas.core.reshape.tile import cut @@ -626,6 +638,7 @@ def value_counts( sort=sort, ascending=ascending, bins=bins, + name=name, ) ser.index.names = names return ser @@ -741,7 +754,7 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray: if is_integer_dtype(out.dtype): out = ensure_int64(out) - return self.obj._constructor(out, index=mi, name=self.obj.name) + return self.obj._constructor(out, index=mi, name=name) def fillna( self, @@ -1875,6 +1888,8 @@ def value_counts( sort: bool = True, ascending: bool = False, dropna: bool = True, + *, + name: lib.NoDefault = lib.no_default, ) -> DataFrame | Series: """ Return a Series or DataFrame containing counts of unique rows. @@ -1979,6 +1994,19 @@ def value_counts( 3 male low US 0.25 4 male medium FR 0.25 """ + if name is lib.no_default and self.as_index: + result_name = "proportion" if normalize else "count" + warnings.warn( + "In pandas 2.0.0, the name of the resulting Series will be " + "'count' (or 'proportion' if `normalize=True`). Specify " + f"`name='{result_name}'` to silence this warning.", + FutureWarning, + stacklevel=find_stack_level(), + ) + name = None + elif name is lib.no_default and not self.as_index: + name = None + if self.axis == 1: raise NotImplementedError( "DataFrameGroupBy.value_counts only handles axis=0" @@ -1991,8 +2019,11 @@ def value_counts( grouping.name for grouping in self.grouper.groupings if grouping.in_axis } if isinstance(self._selected_obj, Series): - name = self._selected_obj.name - keys = [] if name in in_axis_names else [self._selected_obj] + keys = ( + [] + if self._selected_obj.name in in_axis_names + else [self._selected_obj] + ) else: unique_cols = set(self._selected_obj.columns) if subset is not None: @@ -2015,8 +2046,8 @@ def value_counts( keys = [ # Can't use .values because the column label needs to be preserved self._selected_obj.iloc[:, idx] - for idx, name in enumerate(self._selected_obj.columns) - if name not in in_axis_names and name in subsetted + for idx, _name in enumerate(self._selected_obj.columns) + if _name not in in_axis_names and _name in subsetted ] groupings = list(self.grouper.groupings) @@ -2038,7 +2069,7 @@ def value_counts( observed=self.observed, dropna=self.dropna, ) - result_series = cast(Series, gb.size()) + result_series = cast(Series, gb.size()).rename(name) # GH-46357 Include non-observed categories # of non-grouping columns regardless of `observed` @@ -2082,7 +2113,8 @@ def value_counts( result = result_series else: # Convert to frame - name = "proportion" if normalize else "count" + if name is None: + name = "proportion" if normalize else "count" index = result_series.index columns = com.fill_missing_names(index.names) if name in columns: diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index dafbd9fee1b8e8..b83b233c546e44 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -24,7 +24,8 @@ def test_value_counts(index_or_series_obj): obj = index_or_series_obj obj = np.repeat(obj, range(1, len(obj) + 1)) - result = obj.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = obj.value_counts() counter = collections.Counter(obj) expected = Series(dict(counter.most_common()), dtype=np.int64, name=obj.name) @@ -75,7 +76,8 @@ def test_value_counts_null(null_obj, index_or_series_obj): expected = Series(dict(counter.most_common()), dtype=np.int64) expected.index = expected.index.astype(obj.dtype) - result = obj.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = obj.value_counts() if obj.duplicated().any(): # TODO(GH#32514): # Order of entries with the same count is inconsistent on CI (gh-32449) @@ -97,7 +99,8 @@ def test_value_counts_null(null_obj, index_or_series_obj): expected[null_obj] = 3 - result = obj.value_counts(dropna=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = obj.value_counts(dropna=False) if obj.duplicated().any(): # TODO(GH#32514): # Order of entries with the same count is inconsistent on CI (gh-32449) @@ -119,7 +122,8 @@ def test_value_counts_inferred(index_or_series): s_values = ["a", "b", "b", "b", "b", "c", "d", "d", "a", "a"] s = klass(s_values) expected = Series([4, 3, 2, 1], index=["b", "a", "d", "c"]) - tm.assert_series_equal(s.value_counts(), expected) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(np.unique(np.array(s_values, dtype=np.object_))) @@ -131,17 +135,20 @@ def test_value_counts_inferred(index_or_series): assert s.nunique() == 4 # don't sort, have to sort after the fact as not sorting is # platform-dep - hist = s.value_counts(sort=False).sort_values() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + hist = s.value_counts(sort=False).sort_values() expected = Series([3, 1, 4, 2], index=list("acbd")).sort_values() tm.assert_series_equal(hist, expected) # sort ascending - hist = s.value_counts(ascending=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + hist = s.value_counts(ascending=True) expected = Series([1, 2, 3, 4], index=list("cdab")) tm.assert_series_equal(hist, expected) # relative histogram. - hist = s.value_counts(normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + hist = s.value_counts(normalize=True) expected = Series([0.4, 0.3, 0.2, 0.1], index=["b", "a", "d", "c"]) tm.assert_series_equal(hist, expected) @@ -153,14 +160,17 @@ def test_value_counts_bins(index_or_series): # bins msg = "bins argument only works with numeric data" - with pytest.raises(TypeError, match=msg): - s.value_counts(bins=1) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + with pytest.raises(TypeError, match=msg): + s.value_counts(bins=1) s1 = Series([1, 1, 2, 3]) - res1 = s1.value_counts(bins=1) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res1 = s1.value_counts(bins=1) exp1 = Series({Interval(0.997, 3.0): 4}) tm.assert_series_equal(res1, exp1) - res1n = s1.value_counts(bins=1, normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({Interval(0.997, 3.0): 1.0}) tm.assert_series_equal(res1n, exp1n) @@ -173,17 +183,20 @@ def test_value_counts_bins(index_or_series): assert s1.nunique() == 3 # these return the same - res4 = s1.value_counts(bins=4, dropna=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res4 = s1.value_counts(bins=4, dropna=True) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4, exp4) - res4 = s1.value_counts(bins=4, dropna=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res4 = s1.value_counts(bins=4, dropna=False) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4, exp4) - res4n = s1.value_counts(bins=4, normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4n, exp4n) @@ -191,7 +204,8 @@ def test_value_counts_bins(index_or_series): s_values = ["a", "b", "b", "b", np.nan, np.nan, "d", "d", "a", "a", "b"] s = klass(s_values) expected = Series([4, 3, 2], index=["b", "a", "d"]) - tm.assert_series_equal(s.value_counts(), expected) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + tm.assert_series_equal(s.value_counts(), expected) if isinstance(s, Index): exp = Index(["a", "b", np.nan, "d"]) @@ -203,7 +217,8 @@ def test_value_counts_bins(index_or_series): s = klass({}) if klass is dict else klass({}, dtype=object) expected = Series([], dtype=np.int64) - tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + tm.assert_series_equal(s.value_counts(), expected, check_index_type=False) # returned dtype differs depending on original if isinstance(s, Index): tm.assert_index_equal(s.unique(), Index([]), exact=False) @@ -241,7 +256,8 @@ def test_value_counts_datetime64(index_or_series): ["2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"] ) expected_s = Series([3, 2, 1], index=idx) - tm.assert_series_equal(s.value_counts(), expected_s) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + tm.assert_series_equal(s.value_counts(), expected_s) expected = pd.array( np.array( @@ -260,11 +276,13 @@ def test_value_counts_datetime64(index_or_series): s = df["dt"].copy() s = klass(list(s.values) + [pd.NaT] * 4) - result = s.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = s.value_counts() assert result.index.dtype == "datetime64[ns]" tm.assert_series_equal(result, expected_s) - result = s.value_counts(dropna=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = s.value_counts(dropna=False) expected_s = pd.concat([Series([4], index=DatetimeIndex([pd.NaT])), expected_s]) tm.assert_series_equal(result, expected_s) @@ -287,7 +305,8 @@ def test_value_counts_datetime64(index_or_series): td = df.dt - df.dt + timedelta(1) td = klass(td, name="dt") - result = td.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = td.value_counts() expected_s = Series([6], index=[Timedelta("1day")], name="dt") tm.assert_series_equal(result, expected_s) @@ -299,7 +318,8 @@ def test_value_counts_datetime64(index_or_series): td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name="dt") - result2 = td2.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s) @@ -309,9 +329,17 @@ def test_value_counts_with_nan(dropna, index_or_series): klass = index_or_series values = [True, pd.NA, np.nan] obj = klass(values) - res = obj.value_counts(dropna=dropna) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + res = obj.value_counts(dropna=dropna) if dropna is True: expected = Series([1], index=Index([True], dtype=obj.dtype)) else: expected = Series([1, 1, 1], index=[True, pd.NA, np.nan]) tm.assert_series_equal(res, expected) + + +def test_value_counts_with_name(index_or_series): + # GH49497 + result = index_or_series(["a", "a", "b"]).value_counts(name="count") + expected = Series([2, 1], index=["a", "b"], name="count") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py index 6e8528845ea6bf..8a099a2d2faa1e 100644 --- a/pandas/tests/frame/methods/test_value_counts.py +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -10,7 +10,8 @@ def test_data_frame_value_counts_unsorted(): index=["falcon", "dog", "cat", "ant"], ) - result = df.value_counts(sort=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts(sort=False) expected = pd.Series( data=[1, 2, 1], index=pd.MultiIndex.from_arrays( @@ -27,7 +28,8 @@ def test_data_frame_value_counts_ascending(): index=["falcon", "dog", "cat", "ant"], ) - result = df.value_counts(ascending=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts(ascending=True) expected = pd.Series( data=[1, 1, 2], index=pd.MultiIndex.from_arrays( @@ -44,7 +46,8 @@ def test_data_frame_value_counts_default(): index=["falcon", "dog", "cat", "ant"], ) - result = df.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts() expected = pd.Series( data=[2, 1, 1], index=pd.MultiIndex.from_arrays( @@ -61,7 +64,8 @@ def test_data_frame_value_counts_normalize(): index=["falcon", "dog", "cat", "ant"], ) - result = df.value_counts(normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts(normalize=True) expected = pd.Series( data=[0.5, 0.25, 0.25], index=pd.MultiIndex.from_arrays( @@ -75,7 +79,8 @@ def test_data_frame_value_counts_normalize(): def test_data_frame_value_counts_single_col_default(): df = pd.DataFrame({"num_legs": [2, 4, 4, 6]}) - result = df.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts() expected = pd.Series( data=[2, 1, 1], index=pd.MultiIndex.from_arrays([[4, 2, 6]], names=["num_legs"]), @@ -87,7 +92,8 @@ def test_data_frame_value_counts_single_col_default(): def test_data_frame_value_counts_empty(): df_no_cols = pd.DataFrame() - result = df_no_cols.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df_no_cols.value_counts() expected = pd.Series([], dtype=np.int64) tm.assert_series_equal(result, expected) @@ -96,7 +102,8 @@ def test_data_frame_value_counts_empty(): def test_data_frame_value_counts_empty_normalize(): df_no_cols = pd.DataFrame() - result = df_no_cols.value_counts(normalize=True) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df_no_cols.value_counts(normalize=True) expected = pd.Series([], dtype=np.float64) tm.assert_series_equal(result, expected) @@ -110,7 +117,8 @@ def test_data_frame_value_counts_dropna_true(nulls_fixture): "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"], }, ) - result = df.value_counts() + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts() expected = pd.Series( data=[1, 1], index=pd.MultiIndex.from_arrays( @@ -130,7 +138,8 @@ def test_data_frame_value_counts_dropna_false(nulls_fixture): }, ) - result = df.value_counts(dropna=False) + with tm.assert_produces_warning(FutureWarning, match="In pandas 2.0.0, the name"): + result = df.value_counts(dropna=False) expected = pd.Series( data=[1, 1, 1, 1], index=pd.MultiIndex( @@ -144,3 +153,11 @@ def test_data_frame_value_counts_dropna_false(nulls_fixture): ) tm.assert_series_equal(result, expected) + + +def test_value_counts_name(): + df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df.value_counts(name="counts") + expected_idx = pd.MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) + expected = pd.Series([1, 1, 1], name="counts", index=expected_idx) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_frame_value_counts.py b/pandas/tests/groupby/test_frame_value_counts.py index 8255fbab40dced..14241c95574fdf 100644 --- a/pandas/tests/groupby/test_frame_value_counts.py +++ b/pandas/tests/groupby/test_frame_value_counts.py @@ -10,6 +10,11 @@ ) import pandas._testing as tm +VALUE_COUNTS_NAME_MSG = ( + r"In pandas 2.0.0, the name of the resulting Series will be 'count' " + r"\(or 'proportion' if `normalize=True`\)" +) + @pytest.fixture def education_df(): @@ -24,21 +29,24 @@ def education_df(): def test_axis(education_df): gp = education_df.groupby("country", axis=1) - with pytest.raises(NotImplementedError, match="axis"): - gp.value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + with pytest.raises(NotImplementedError, match="axis"): + gp.value_counts() def test_bad_subset(education_df): gp = education_df.groupby("country") - with pytest.raises(ValueError, match="subset"): - gp.value_counts(subset=["country"]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + with pytest.raises(ValueError, match="subset"): + gp.value_counts(subset=["country"]) def test_basic(education_df): # gh43564 - result = education_df.groupby("country")[["gender", "education"]].value_counts( - normalize=True - ) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = education_df.groupby("country")[["gender", "education"]].value_counts( + normalize=True + ) expected = Series( data=[0.5, 0.25, 0.25, 0.5, 0.5], index=MultiIndex.from_tuples( @@ -69,10 +77,10 @@ def _frame_value_counts(df, keys, normalize, sort, ascending): (True, False), ], ) -@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) @pytest.mark.parametrize("frame", [True, False]) def test_against_frame_and_seriesgroupby( - education_df, groupby, normalize, sort, ascending, as_index, frame + education_df, groupby, normalize, sort, ascending, as_index, warning, frame ): # test all parameters: # - Use column, array or function as by= parameter @@ -89,14 +97,16 @@ def test_against_frame_and_seriesgroupby( }[groupby] gp = education_df.groupby(by=by, as_index=as_index) - result = gp[["gender", "education"]].value_counts( - normalize=normalize, sort=sort, ascending=ascending - ) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = gp[["gender", "education"]].value_counts( + normalize=normalize, sort=sort, ascending=ascending + ) if frame: # compare against apply with DataFrame value_counts - expected = gp.apply( - _frame_value_counts, ["gender", "education"], normalize, sort, ascending - ) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + expected = gp.apply( + _frame_value_counts, ["gender", "education"], normalize, sort, ascending + ) if as_index: tm.assert_series_equal(result, expected) @@ -114,9 +124,10 @@ def test_against_frame_and_seriesgroupby( else: # compare against SeriesGroupBy value_counts education_df["both"] = education_df["gender"] + "-" + education_df["education"] - expected = gp["both"].value_counts( - normalize=normalize, sort=sort, ascending=ascending - ) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + expected = gp["both"].value_counts( + normalize=normalize, sort=sort, ascending=ascending + ) expected.name = None if as_index: index_frame = expected.index.to_frame(index=False) @@ -153,6 +164,7 @@ def test_compound( ): # Multiple groupby keys and as_index=False gp = education_df.groupby(["country", "gender"], as_index=False, sort=False) + # with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): result = gp["education"].value_counts( normalize=normalize, sort=sort, ascending=ascending ) @@ -189,9 +201,10 @@ def test_data_frame_value_counts( ): # 3-way compare with :meth:`~DataFrame.value_counts` # Tests from frame/methods/test_value_counts.py - result_frame = animals_df.value_counts( - sort=sort, ascending=ascending, normalize=normalize - ) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result_frame = animals_df.value_counts( + sort=sort, ascending=ascending, normalize=normalize + ) expected = Series( data=expected_data, index=MultiIndex.from_arrays( @@ -200,9 +213,10 @@ def test_data_frame_value_counts( ) tm.assert_series_equal(result_frame, expected) - result_frame_groupby = animals_df.groupby("key").value_counts( - sort=sort, ascending=ascending, normalize=normalize - ) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result_frame_groupby = animals_df.groupby("key").value_counts( + sort=sort, ascending=ascending, normalize=normalize + ) tm.assert_series_equal(result_frame_groupby, expected) @@ -238,7 +252,8 @@ def test_dropna_combinations( nulls_df, group_dropna, count_dropna, expected_rows, expected_values ): gp = nulls_df.groupby(["A", "B"], dropna=group_dropna) - result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna) columns = DataFrame() for column in nulls_df.columns: columns[column] = [nulls_df[column][row] for row in expected_rows] @@ -291,7 +306,10 @@ def test_data_frame_value_counts_dropna( # GH 41334 # 3-way compare with :meth:`~DataFrame.value_counts` # Tests with nulls from frame/methods/test_value_counts.py - result_frame = names_with_nulls_df.value_counts(dropna=dropna, normalize=normalize) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result_frame = names_with_nulls_df.value_counts( + dropna=dropna, normalize=normalize + ) expected = Series( data=expected_data, index=expected_index, @@ -301,14 +319,15 @@ def test_data_frame_value_counts_dropna( tm.assert_series_equal(result_frame, expected) - result_frame_groupby = names_with_nulls_df.groupby("key").value_counts( - dropna=dropna, normalize=normalize - ) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result_frame_groupby = names_with_nulls_df.groupby("key").value_counts( + dropna=dropna, normalize=normalize + ) tm.assert_series_equal(result_frame_groupby, expected) -@pytest.mark.parametrize("as_index", [False, True]) +@pytest.mark.parametrize("as_index, warning", ([(False, None), (True, FutureWarning)])) @pytest.mark.parametrize("observed", [False, True]) @pytest.mark.parametrize( "normalize, expected_data", @@ -321,7 +340,7 @@ def test_data_frame_value_counts_dropna( ], ) def test_categorical_single_grouper_with_only_observed_categories( - education_df, as_index, observed, normalize, expected_data + education_df, as_index, warning, observed, normalize, expected_data ): # Test single categorical grouper with only observed grouping categories @@ -330,7 +349,8 @@ def test_categorical_single_grouper_with_only_observed_categories( gp = education_df.astype("category").groupby( "country", as_index=as_index, observed=observed ) - result = gp.value_counts(normalize=normalize) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = gp.value_counts(normalize=normalize) expected_index = MultiIndex.from_tuples( [ @@ -369,7 +389,7 @@ def test_categorical_single_grouper_with_only_observed_categories( def assert_categorical_single_grouper( - education_df, as_index, observed, expected_index, normalize, expected_data + education_df, as_index, warning, observed, expected_index, normalize, expected_data ): # Test single categorical grouper when non-groupers are also categorical education_df = education_df.copy().astype("category") @@ -378,7 +398,8 @@ def assert_categorical_single_grouper( education_df["country"] = education_df["country"].cat.add_categories(["ASIA"]) gp = education_df.groupby("country", as_index=as_index, observed=observed) - result = gp.value_counts(normalize=normalize) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = gp.value_counts(normalize=normalize) expected_series = Series( data=expected_data, @@ -404,7 +425,7 @@ def assert_categorical_single_grouper( tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) @pytest.mark.parametrize( "normalize, expected_data", [ @@ -416,7 +437,7 @@ def assert_categorical_single_grouper( ], ) def test_categorical_single_grouper_observed_true( - education_df, as_index, normalize, expected_data + education_df, as_index, warning, normalize, expected_data ): # GH#46357 @@ -442,10 +463,11 @@ def test_categorical_single_grouper_observed_true( expected_index=expected_index, normalize=normalize, expected_data=expected_data, + warning=warning, ) -@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) @pytest.mark.parametrize( "normalize, expected_data", [ @@ -483,7 +505,7 @@ def test_categorical_single_grouper_observed_true( ], ) def test_categorical_single_grouper_observed_false( - education_df, as_index, normalize, expected_data + education_df, as_index, warning, normalize, expected_data ): # GH#46357 @@ -511,6 +533,7 @@ def test_categorical_single_grouper_observed_false( assert_categorical_single_grouper( education_df=education_df, as_index=as_index, + warning=warning, observed=False, expected_index=expected_index, normalize=normalize, @@ -518,7 +541,7 @@ def test_categorical_single_grouper_observed_false( ) -@pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) @pytest.mark.parametrize( "observed, expected_index", [ @@ -563,7 +586,7 @@ def test_categorical_single_grouper_observed_false( ], ) def test_categorical_multiple_groupers( - education_df, as_index, observed, expected_index, normalize, expected_data + education_df, as_index, warning, observed, expected_index, normalize, expected_data ): # GH#46357 @@ -575,7 +598,8 @@ def test_categorical_multiple_groupers( gp = education_df.groupby( ["country", "education"], as_index=as_index, observed=observed ) - result = gp.value_counts(normalize=normalize) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = gp.value_counts(normalize=normalize) expected_series = Series( data=expected_data[expected_data > 0.0] if observed else expected_data, @@ -598,7 +622,7 @@ def test_categorical_multiple_groupers( tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("as_index", [False, True]) +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) @pytest.mark.parametrize("observed", [False, True]) @pytest.mark.parametrize( "normalize, expected_data", @@ -612,7 +636,7 @@ def test_categorical_multiple_groupers( ], ) def test_categorical_non_groupers( - education_df, as_index, observed, normalize, expected_data + education_df, as_index, warning, observed, normalize, expected_data ): # GH#46357 Test non-observed categories are included in the result, # regardless of `observed` @@ -621,7 +645,8 @@ def test_categorical_non_groupers( education_df["education"] = education_df["education"].astype("category") gp = education_df.groupby("country", as_index=as_index, observed=observed) - result = gp.value_counts(normalize=normalize) + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = gp.value_counts(normalize=normalize) expected_index = [ ("FR", "male", "low"), @@ -689,13 +714,14 @@ def test_mixed_groupings(normalize, expected_label, expected_values): ("level", list("abcd") + ["level_1"], ["a", None, "d", "b", "c", "level_1"]), ], ) -@pytest.mark.parametrize("as_index", [False, True]) -def test_column_label_duplicates(test, columns, expected_names, as_index): +@pytest.mark.parametrize("as_index, warning", ([True, FutureWarning], [False, None])) +def test_column_label_duplicates(test, columns, expected_names, as_index, warning): # GH 44992 # Test for duplicate input column labels and generated duplicate labels df = DataFrame([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]], columns=columns) expected_data = [(1, 0, 7, 3, 5, 9), (2, 1, 8, 4, 6, 10)] - result = df.groupby(["a", [0, 1], "d"], as_index=as_index).value_counts() + with tm.assert_produces_warning(warning, match=VALUE_COUNTS_NAME_MSG): + result = df.groupby(["a", [0, 1], "d"], as_index=as_index).value_counts() if as_index: expected = Series( data=(1, 1), @@ -735,7 +761,8 @@ def test_ambiguous_grouping(): # Test that groupby is not confused by groupings length equal to row count df = DataFrame({"a": [1, 1]}) gb = df.groupby([1, 1]) - result = gb.value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = gb.value_counts() expected = Series([2], index=MultiIndex.from_tuples([[1, 1]], names=[None, "a"])) tm.assert_series_equal(result, expected) @@ -744,22 +771,25 @@ def test_subset_overlaps_gb_key_raises(): # GH 46383 df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1]) msg = "Keys {'c1'} in subset cannot be in the groupby column keys." - with pytest.raises(ValueError, match=msg): - df.groupby("c1").value_counts(subset=["c1"]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + with pytest.raises(ValueError, match=msg): + df.groupby("c1").value_counts(subset=["c1"]) def test_subset_doesnt_exist_in_frame(): # GH 46383 df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1]) msg = "Keys {'c3'} in subset do not exist in the DataFrame." - with pytest.raises(ValueError, match=msg): - df.groupby("c1").value_counts(subset=["c3"]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + with pytest.raises(ValueError, match=msg): + df.groupby("c1").value_counts(subset=["c3"]) def test_subset(): # GH 46383 df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1]) - result = df.groupby(level=0).value_counts(subset=["c2"]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = df.groupby(level=0).value_counts(subset=["c2"]) expected = Series( [1, 2], index=MultiIndex.from_arrays([[0, 1], ["x", "y"]], names=[None, "c2"]) ) @@ -773,7 +803,8 @@ def test_subset_duplicate_columns(): index=[0, 1, 1], columns=["c1", "c2", "c2"], ) - result = df.groupby(level=0).value_counts(subset=["c2"]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = df.groupby(level=0).value_counts(subset=["c2"]) expected = Series( [1, 2], index=MultiIndex.from_arrays( @@ -781,3 +812,12 @@ def test_subset_duplicate_columns(): ), ) tm.assert_series_equal(result, expected) + + +def test_name(): + # https://github.com/pandas-dev/pandas/issues/49497 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df.groupby("a").value_counts(subset=["b"], name="count") + expected_idx = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]], names=["a", "b"]) + expected = Series([1, 1, 1], index=expected_idx, name="count") + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 577a72d3f50908..da6dae92192cb5 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -14,6 +14,8 @@ CategoricalIndex, DataFrame, Grouper, + Index, + Interval, MultiIndex, Series, date_range, @@ -21,6 +23,11 @@ ) import pandas._testing as tm +VALUE_COUNTS_NAME_MSG = ( + r"In pandas 2.0.0, the name of the resulting Series will be 'count' " + r"\(or 'proportion' if `normalize=True`\)" +) + def tests_value_counts_index_names_category_column(): # GH44324 Missing name of index category column @@ -31,7 +38,8 @@ def tests_value_counts_index_names_category_column(): } ) df["gender"] = df["gender"].astype("category") - result = df.groupby("country")["gender"].value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = df.groupby("country")["gender"].value_counts() # Construct expected, very specific multiindex df_mi_expected = DataFrame([["US", "female"]], columns=["country", "gender"]) @@ -103,10 +111,12 @@ def rebuild_index(df): } gr = df.groupby(keys, sort=isort) - left = gr["3rd"].value_counts(**kwargs) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + left = gr["3rd"].value_counts(**kwargs) gr = df.groupby(keys, sort=isort) - right = gr["3rd"].apply(Series.value_counts, **kwargs) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + right = gr["3rd"].apply(Series.value_counts, **kwargs) right.index.names = right.index.names[:-1] + ["3rd"] # have to sort on index because of unstable sort on values @@ -135,8 +145,9 @@ def test_series_groupby_value_counts_with_grouper(): dfg = df.groupby(Grouper(freq="1D", key="Datetime")) # have to sort on index because of unstable sort on values xref GH9212 - result = dfg["Food"].value_counts().sort_index() - expected = dfg["Food"].apply(Series.value_counts).sort_index() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = dfg["Food"].value_counts().sort_index() + expected = dfg["Food"].apply(Series.value_counts).sort_index() expected.index.names = result.index.names tm.assert_series_equal(result, expected) @@ -148,7 +159,8 @@ def test_series_groupby_value_counts_empty(columns): df = DataFrame(columns=columns) dfg = df.groupby(columns[:-1]) - result = dfg[columns[-1]].value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = dfg[columns[-1]].value_counts() expected = Series([], name=columns[-1], dtype=result.dtype) expected.index = MultiIndex.from_arrays([[]] * len(columns), names=columns) @@ -161,8 +173,9 @@ def test_series_groupby_value_counts_one_row(columns): df = DataFrame(data=[range(len(columns))], columns=columns) dfg = df.groupby(columns[:-1]) - result = dfg[columns[-1]].value_counts() - expected = df.value_counts().rename(columns[-1]) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = dfg[columns[-1]].value_counts() + expected = df.value_counts().rename(columns[-1]) tm.assert_series_equal(result, expected) @@ -171,7 +184,8 @@ def test_series_groupby_value_counts_on_categorical(): # GH38672 s = Series(Categorical(["a"], categories=["a", "b"])) - result = s.groupby([0]).value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = s.groupby([0]).value_counts() expected = Series( data=[1, 0], @@ -191,3 +205,26 @@ def test_series_groupby_value_counts_on_categorical(): # dtype: int64 tm.assert_series_equal(result, expected) + + +def test_groupby_value_counts_name(): + # https://github.com/pandas-dev/pandas/issues/49497 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + result = df.groupby("a")["b"].value_counts(name="count") + expected_idx = MultiIndex.from_arrays(([1, 2, 3], [4, 5, 6]), names=["a", "b"]) + expected = Series([1, 1, 1], name="count", index=expected_idx) + tm.assert_series_equal(result, expected) + + result = df.groupby("a").value_counts(name="count") + tm.assert_series_equal(result, expected) + + result = df.groupby("a")["b"].value_counts(name="count", bins=[2, 7]) + expected_idx = MultiIndex.from_arrays( + [Index([1, 2, 3], name="a"), Index([Interval(1.999, 7)] * 3, name="b")] + ) + expected = Series([1, 1, 1], index=expected_idx, name="count") + tm.assert_series_equal(result, expected) + + result = df.groupby("a", as_index=False)["b"].value_counts(name="count") + expected = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "count": [1, 1, 1]}) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py index a7d57eee7e5a1a..9ec2b317f16bda 100644 --- a/pandas/tests/series/methods/test_value_counts.py +++ b/pandas/tests/series/methods/test_value_counts.py @@ -9,6 +9,11 @@ ) import pandas._testing as tm +VALUE_COUNTS_NAME_MSG = ( + r"In pandas 2.0.0, the name of the resulting Series will be 'count' " + r"\(or 'proportion' if `normalize=True`\)" +) + class TestSeriesValueCounts: def test_value_counts_datetime(self): @@ -28,15 +33,18 @@ def test_value_counts_datetime(self): exp = Series([3, 2, 1], index=exp_idx, name="xxx") ser = Series(values, name="xxx") - tm.assert_series_equal(ser.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(), exp) # check DatetimeIndex outputs the same result idx = pd.DatetimeIndex(values, name="xxx") - tm.assert_series_equal(idx.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(idx.value_counts(), exp) # normalize exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") - tm.assert_series_equal(ser.value_counts(normalize=True), exp) - tm.assert_series_equal(idx.value_counts(normalize=True), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_datetime_tz(self): values = [ @@ -55,13 +63,16 @@ def test_value_counts_datetime_tz(self): exp = Series([3, 2, 1], index=exp_idx, name="xxx") ser = Series(values, name="xxx") - tm.assert_series_equal(ser.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(), exp) idx = pd.DatetimeIndex(values, name="xxx") - tm.assert_series_equal(idx.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(idx.value_counts(), exp) exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") - tm.assert_series_equal(ser.value_counts(normalize=True), exp) - tm.assert_series_equal(idx.value_counts(normalize=True), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_period(self): values = [ @@ -77,15 +88,18 @@ def test_value_counts_period(self): exp = Series([3, 2, 1], index=exp_idx, name="xxx") ser = Series(values, name="xxx") - tm.assert_series_equal(ser.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(), exp) # check DatetimeIndex outputs the same result idx = pd.PeriodIndex(values, name="xxx") - tm.assert_series_equal(idx.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(idx.value_counts(), exp) # normalize exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") - tm.assert_series_equal(ser.value_counts(normalize=True), exp) - tm.assert_series_equal(idx.value_counts(normalize=True), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_categorical_ordered(self): # most dtypes are tested in tests/base @@ -95,15 +109,18 @@ def test_value_counts_categorical_ordered(self): exp = Series([3, 2, 1], index=exp_idx, name="xxx") ser = Series(values, name="xxx") - tm.assert_series_equal(ser.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(), exp) # check CategoricalIndex outputs the same result idx = CategoricalIndex(values, name="xxx") - tm.assert_series_equal(idx.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(idx.value_counts(), exp) # normalize exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") - tm.assert_series_equal(ser.value_counts(normalize=True), exp) - tm.assert_series_equal(idx.value_counts(normalize=True), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_categorical_not_ordered(self): values = Categorical([1, 2, 3, 1, 1, 3], ordered=False) @@ -112,27 +129,32 @@ def test_value_counts_categorical_not_ordered(self): exp = Series([3, 2, 1], index=exp_idx, name="xxx") ser = Series(values, name="xxx") - tm.assert_series_equal(ser.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(), exp) # check CategoricalIndex outputs the same result idx = CategoricalIndex(values, name="xxx") - tm.assert_series_equal(idx.value_counts(), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(idx.value_counts(), exp) # normalize exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="xxx") - tm.assert_series_equal(ser.value_counts(normalize=True), exp) - tm.assert_series_equal(idx.value_counts(normalize=True), exp) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + tm.assert_series_equal(ser.value_counts(normalize=True), exp) + tm.assert_series_equal(idx.value_counts(normalize=True), exp) def test_value_counts_categorical(self): # GH#12835 cats = Categorical(list("abcccb"), categories=list("cabd")) ser = Series(cats, name="xxx") - res = ser.value_counts(sort=False) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(sort=False) exp_index = CategoricalIndex(list("cabd"), categories=cats.categories) exp = Series([3, 1, 2, 0], name="xxx", index=exp_index) tm.assert_series_equal(res, exp) - res = ser.value_counts(sort=True) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(sort=True) exp_index = CategoricalIndex(list("cbad"), categories=cats.categories) exp = Series([3, 2, 1, 0], name="xxx", index=exp_index) @@ -141,7 +163,8 @@ def test_value_counts_categorical(self): # check object dtype handles the Series.name as the same # (tested in tests/base) ser = Series(["a", "b", "c", "c", "c", "b"], name="xxx") - res = ser.value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts() exp = Series([3, 2, 1], name="xxx", index=["c", "b", "a"]) tm.assert_series_equal(res, exp) @@ -152,10 +175,12 @@ def test_value_counts_categorical_with_nan(self): ser = Series(["a", "b", "a"], dtype="category") exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) - res = ser.value_counts(dropna=True) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(dropna=True) tm.assert_series_equal(res, exp) - res = ser.value_counts(dropna=True) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(dropna=True) tm.assert_series_equal(res, exp) # same Series via two different constructions --> same behaviour @@ -169,18 +194,21 @@ def test_value_counts_categorical_with_nan(self): for ser in series: # None is a NaN value, so we exclude its count here exp = Series([2, 1], index=CategoricalIndex(["a", "b"])) - res = ser.value_counts(dropna=True) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(dropna=True) tm.assert_series_equal(res, exp) # we don't exclude the count of None and sort by counts exp = Series([3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"])) - res = ser.value_counts(dropna=False) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(dropna=False) tm.assert_series_equal(res, exp) # When we aren't sorting by counts, and np.nan isn't a # category, it should be last. exp = Series([2, 1, 3], index=CategoricalIndex(["a", "b", np.nan])) - res = ser.value_counts(dropna=False, sort=False) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + res = ser.value_counts(dropna=False, sort=False) tm.assert_series_equal(res, exp) @pytest.mark.parametrize( @@ -205,7 +233,8 @@ def test_value_counts_categorical_with_nan(self): ) def test_value_counts_bool_with_nan(self, ser, dropna, exp): # GH32146 - out = ser.value_counts(dropna=dropna) + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + out = ser.value_counts(dropna=dropna) tm.assert_series_equal(out, exp) @pytest.mark.parametrize( @@ -223,5 +252,13 @@ def test_value_counts_bool_with_nan(self, ser, dropna, exp): ) def test_value_counts_complex_numbers(self, input_array, expected): # GH 17927 - result = Series(input_array).value_counts() + with tm.assert_produces_warning(FutureWarning, match=VALUE_COUNTS_NAME_MSG): + result = Series(input_array).value_counts() + tm.assert_series_equal(result, expected) + + def test_value_counts_name(self): + # https://github.com/pandas-dev/pandas/issues/49497 + ser = Series([1, 2, 3], name="foo") + result = ser.value_counts(name="count") + expected = Series([1, 1, 1], index=[1, 2, 3], name="count") tm.assert_series_equal(result, expected)