Skip to content

Commit

Permalink
DEPR: Change numeric_only default to False in remaining groupby metho…
Browse files Browse the repository at this point in the history
…ds (#49951)

* DEPR: Change numeric_only default to False in remaining groupby methods

* More cleanup of ignore_failures
  • Loading branch information
rhshadrach authored Nov 29, 2022
1 parent 32b4222 commit 48e7b6e
Show file tree
Hide file tree
Showing 15 changed files with 158 additions and 419 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ Removal of prior version deprecations/changes
- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
- Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
- Changed default of ``numeric_only`` to ``False`` in various :class:`.DataFrameGroupBy` methods (:issue:`46072`)
- Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`)
- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
-

Expand Down
31 changes: 9 additions & 22 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@
_agg_template,
_apply_docs,
_transform_template,
warn_dropping_nuisance_columns_deprecated,
)
from pandas.core.groupby.grouper import get_grouper
from pandas.core.indexes.api import (
Expand Down Expand Up @@ -438,7 +437,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
)

def _cython_transform(
self, how: str, numeric_only: bool = True, axis: AxisInt = 0, **kwargs
self, how: str, numeric_only: bool = False, axis: AxisInt = 0, **kwargs
):
assert axis == 0 # handled by caller

Expand Down Expand Up @@ -1333,22 +1332,20 @@ def _wrap_applied_output_series(
def _cython_transform(
self,
how: str,
numeric_only: bool | lib.NoDefault = lib.no_default,
numeric_only: bool = False,
axis: AxisInt = 0,
**kwargs,
) -> DataFrame:
assert axis == 0 # handled by caller
# TODO: no tests with self.ndim == 1 for DataFrameGroupBy
numeric_only_bool = self._resolve_numeric_only(how, numeric_only, axis)

# With self.axis == 0, we have multi-block tests
# e.g. test_rank_min_int, test_cython_transform_frame
# test_transform_numeric_ret
# With self.axis == 1, _get_data_to_aggregate does a transpose
# so we always have a single block.
mgr: Manager2D = self._get_data_to_aggregate()
orig_mgr_len = len(mgr)
if numeric_only_bool:
if numeric_only:
mgr = mgr.get_numeric_data(copy=False)

def arr_func(bvalues: ArrayLike) -> ArrayLike:
Expand All @@ -1358,12 +1355,9 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:

# We could use `mgr.apply` here and not have to set_axis, but
# we would have to do shape gymnastics for ArrayManager compat
res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=False)
res_mgr = mgr.grouped_reduce(arr_func)
res_mgr.set_axis(1, mgr.axes[1])

if len(res_mgr) < orig_mgr_len:
warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only)

res_df = self.obj._constructor(res_mgr)
if self.axis == 1:
res_df = res_df.T
Expand Down Expand Up @@ -1493,15 +1487,8 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
output = {}
inds = []
for i, (colname, sgb) in enumerate(self._iterate_column_groupbys(obj)):
try:
output[i] = sgb.transform(wrapper)
except TypeError:
# e.g. trying to call nanmean with string values
warn_dropping_nuisance_columns_deprecated(
type(self), "transform", numeric_only=False
)
else:
inds.append(i)
output[i] = sgb.transform(wrapper)
inds.append(i)

if not output:
raise TypeError("Transform function invalid for data types")
Expand Down Expand Up @@ -2243,7 +2230,7 @@ def corr(
self,
method: str | Callable[[np.ndarray, np.ndarray], float] = "pearson",
min_periods: int = 1,
numeric_only: bool | lib.NoDefault = lib.no_default,
numeric_only: bool = False,
) -> DataFrame:
result = self._op_via_apply(
"corr", method=method, min_periods=min_periods, numeric_only=numeric_only
Expand All @@ -2255,7 +2242,7 @@ def cov(
self,
min_periods: int | None = None,
ddof: int | None = 1,
numeric_only: bool | lib.NoDefault = lib.no_default,
numeric_only: bool = False,
) -> DataFrame:
result = self._op_via_apply(
"cov", min_periods=min_periods, ddof=ddof, numeric_only=numeric_only
Expand Down Expand Up @@ -2316,7 +2303,7 @@ def corrwith(
axis: Axis = 0,
drop: bool = False,
method: CorrelationMethod = "pearson",
numeric_only: bool | lib.NoDefault = lib.no_default,
numeric_only: bool = False,
) -> DataFrame:
result = self._op_via_apply(
"corrwith",
Expand Down
Loading

0 comments on commit 48e7b6e

Please sign in to comment.