Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Nov 11, 2022
1 parent e41b6d7 commit a2d9096
Show file tree
Hide file tree
Showing 10 changed files with 356 additions and 135 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.5.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Bug fixes

Other
~~~~~
-
- Introduced ``FutureWarning`` notifying about behaviour change in :meth:`DataFrame.value_counts`, :meth:`Series.value_counts`, :meth:`DataFrameGroupBy.value_counts`, :meth:`SeriesGroupBy.value_counts` - the resulting series will by default now be named ``'counts'`` (or ``'proportion'`` if ``normalize=True``), and the index (if present) will be taken from the original object's name (:issue:`49497`)
-

.. ---------------------------------------------------------------------------
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,7 @@ def value_counts(
normalize: bool = False,
bins=None,
dropna: bool = True,
name: Hashable | None = None,
) -> Series:
"""
Compute a histogram of the counts of non-null values.
Expand Down Expand Up @@ -838,7 +839,8 @@ def value_counts(
Series,
)

name = getattr(values, "name", None)
if name is None:
name = getattr(values, "name", None)

if bins is not None:
from pandas.core.reshape.tile import cut
Expand All @@ -850,7 +852,7 @@ def value_counts(
raise TypeError("bins argument only works with numeric data.") from err

# count, remove nulls (from the index), and but the bins
result = ii.value_counts(dropna=dropna)
result = ii.value_counts(dropna=dropna, name=name)
result = result[result.index.notna()]
result.index = result.index.astype("interval")
result = result.sort_index()
Expand Down
16 changes: 16 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
final,
overload,
)
import warnings

import numpy as np

Expand All @@ -37,6 +38,7 @@
cache_readonly,
doc,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
is_categorical_dtype,
Expand Down Expand Up @@ -912,6 +914,8 @@ def value_counts(
ascending: bool = False,
bins=None,
dropna: bool = True,
*,
name: lib.NoDefault = lib.no_default,
) -> Series:
"""
Return a Series containing counts of unique values.
Expand Down Expand Up @@ -991,13 +995,25 @@ def value_counts(
NaN 1
dtype: int64
"""
if name is lib.no_default:
result_name = "proportion" if normalize else "count"
warnings.warn(
"In pandas 2.0.0, the name of the resulting Series will be "
"'count' (or 'proportion' if `normalize=True`), and the index "
"will inherit the original object's name. Specify "
f"`name='{result_name}'` to silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
name = None
return value_counts(
self,
sort=sort,
ascending=ascending,
normalize=normalize,
bins=bins,
dropna=dropna,
name=name,
)

def unique(self):
Expand Down
14 changes: 13 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6936,6 +6936,8 @@ def value_counts(
sort: bool = True,
ascending: bool = False,
dropna: bool = True,
*,
name: NoDefault = no_default,
) -> Series:
"""
Return a Series containing counts of unique rows in the DataFrame.
Expand Down Expand Up @@ -7037,10 +7039,20 @@ def value_counts(
NaN 1
dtype: int64
"""
if name is no_default:
result_name = "proportion" if normalize else "count"
warnings.warn(
"In pandas 2.0.0, the name of the resulting Series will be "
"'count' (or 'proportion' if `normalize=True`). Specify "
f"`name='{result_name}'` to silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
name = None
if subset is None:
subset = self.columns.tolist()

counts = self.groupby(subset, dropna=dropna).grouper.size()
counts = self.groupby(subset, dropna=dropna).grouper.size().rename(name)

if sort:
counts = counts.sort_values(ascending=ascending)
Expand Down
46 changes: 39 additions & 7 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,19 @@ def value_counts(
ascending: bool = False,
bins=None,
dropna: bool = True,
*,
name: lib.NoDefault = lib.no_default,
) -> Series:
if name is lib.no_default:
result_name = "proportion" if normalize else "count"
warnings.warn(
"In pandas 2.0.0, the name of the resulting Series will be "
"'count' (or 'proportion' if `normalize=True`). Specify "
f"`name='{result_name}'` to silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
name = self.obj.name

from pandas.core.reshape.merge import get_join_indexers
from pandas.core.reshape.tile import cut
Expand All @@ -626,6 +638,7 @@ def value_counts(
sort=sort,
ascending=ascending,
bins=bins,
name=name,
)
ser.index.names = names
return ser
Expand Down Expand Up @@ -741,7 +754,7 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray:

if is_integer_dtype(out.dtype):
out = ensure_int64(out)
return self.obj._constructor(out, index=mi, name=self.obj.name)
return self.obj._constructor(out, index=mi, name=name)

def fillna(
self,
Expand Down Expand Up @@ -1875,6 +1888,8 @@ def value_counts(
sort: bool = True,
ascending: bool = False,
dropna: bool = True,
*,
name: lib.NoDefault = lib.no_default,
) -> DataFrame | Series:
"""
Return a Series or DataFrame containing counts of unique rows.
Expand Down Expand Up @@ -1979,6 +1994,19 @@ def value_counts(
3 male low US 0.25
4 male medium FR 0.25
"""
if name is lib.no_default and self.as_index:
result_name = "proportion" if normalize else "count"
warnings.warn(
"In pandas 2.0.0, the name of the resulting Series will be "
"'count' (or 'proportion' if `normalize=True`). Specify "
f"`name='{result_name}'` to silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
name = None
elif name is lib.no_default and not self.as_index:
name = None

if self.axis == 1:
raise NotImplementedError(
"DataFrameGroupBy.value_counts only handles axis=0"
Expand All @@ -1991,8 +2019,11 @@ def value_counts(
grouping.name for grouping in self.grouper.groupings if grouping.in_axis
}
if isinstance(self._selected_obj, Series):
name = self._selected_obj.name
keys = [] if name in in_axis_names else [self._selected_obj]
keys = (
[]
if self._selected_obj.name in in_axis_names
else [self._selected_obj]
)
else:
unique_cols = set(self._selected_obj.columns)
if subset is not None:
Expand All @@ -2015,8 +2046,8 @@ def value_counts(
keys = [
# Can't use .values because the column label needs to be preserved
self._selected_obj.iloc[:, idx]
for idx, name in enumerate(self._selected_obj.columns)
if name not in in_axis_names and name in subsetted
for idx, _name in enumerate(self._selected_obj.columns)
if _name not in in_axis_names and _name in subsetted
]

groupings = list(self.grouper.groupings)
Expand All @@ -2038,7 +2069,7 @@ def value_counts(
observed=self.observed,
dropna=self.dropna,
)
result_series = cast(Series, gb.size())
result_series = cast(Series, gb.size()).rename(name)

# GH-46357 Include non-observed categories
# of non-grouping columns regardless of `observed`
Expand Down Expand Up @@ -2082,7 +2113,8 @@ def value_counts(
result = result_series
else:
# Convert to frame
name = "proportion" if normalize else "count"
if name is None:
name = "proportion" if normalize else "count"
index = result_series.index
columns = com.fill_missing_names(index.names)
if name in columns:
Expand Down
Loading

0 comments on commit a2d9096

Please sign in to comment.