diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index b69cf415ac21e..b5a6cf50fcb6a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -776,6 +776,7 @@ Other Deprecations - Deprecated argument ``errors`` for :meth:`Series.mask`, :meth:`Series.where`, :meth:`DataFrame.mask`, and :meth:`DataFrame.where` as ``errors`` had no effect on this methods (:issue:`47728`) - Deprecated arguments ``*args`` and ``**kwargs`` in :class:`Rolling`, :class:`Expanding`, and :class:`ExponentialMovingWindow` ops. (:issue:`47836`) - Deprecated unused arguments ``encoding`` and ``verbose`` in :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` (:issue:`47912`) +- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`) .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9e26598d85e74..631f70f390319 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -465,7 +465,9 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series: klass = type(self.obj) results = [] - for name, group in self: + for name, group in self.grouper.get_iterator( + self._selected_obj, axis=self.axis + ): # this setattr is needed for test_transform_lambda_with_datetimetz object.__setattr__(group, "name", name) res = func(group, *args, **kwargs) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 28e1b2b388035..8e0ed959fabc3 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -645,6 +645,7 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): axis: int grouper: ops.BaseGrouper + keys: _KeysArgType | None = None group_keys: bool | lib.NoDefault @final @@ -821,6 +822,19 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: Generator yielding sequence of (name, subsetted object) for each group """ + keys = self.keys + if isinstance(keys, list) and len(keys) == 1: + warnings.warn( + ( + "In a future version of pandas, a length 1 " + "tuple will be returned when iterating over a " + "a groupby with a grouper equal to a list of " + "length 1. Don't supply a list with a single grouper " + "to avoid this warning." + ), + FutureWarning, + stacklevel=find_stack_level(), + ) return self.grouper.get_iterator(self._selected_obj, axis=self.axis) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 6ce5ffac9de52..e06a288c1eb38 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -150,7 +150,7 @@ def _groupby_and_merge(by, left: DataFrame, right: DataFrame, merge_pieces): if all(item in right.columns for item in by): rby = right.groupby(by, sort=False) - for key, lhs in lby: + for key, lhs in lby.grouper.get_iterator(lby._selected_obj, axis=lby.axis): if rby is None: rhs = right diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py index 4f1cd3f38343a..17a214292608b 100644 --- a/pandas/plotting/_matplotlib/groupby.py +++ b/pandas/plotting/_matplotlib/groupby.py @@ -16,6 +16,8 @@ concat, ) +from pandas.plotting._matplotlib.misc import unpack_single_str_list + def create_iter_data_given_by( data: DataFrame, kind: str = "hist" @@ -108,7 +110,8 @@ def reconstruct_data_with_by( 1 3.0 4.0 NaN NaN 2 NaN NaN 5.0 6.0 """ - grouped = data.groupby(by) + by_modified = unpack_single_str_list(by) + grouped = data.groupby(by_modified) data_list = [] for key, group in grouped: diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 3b151d67c70be..62242a4a2ddab 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -33,6 +33,7 @@ create_iter_data_given_by, reformat_hist_y_given_by, ) +from pandas.plotting._matplotlib.misc import unpack_single_str_list from pandas.plotting._matplotlib.tools import ( create_subplots, flatten_axes, @@ -67,7 +68,8 @@ def _args_adjust(self): # where subplots are created based on by argument if is_integer(self.bins): if self.by is not None: - grouped = self.data.groupby(self.by)[self.columns] + by_modified = unpack_single_str_list(self.by) + grouped = self.data.groupby(by_modified)[self.columns] self.bins = [self._calculate_bins(group) for key, group in grouped] else: self.bins = self._calculate_bins(self.data) diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index e2a0d50544f22..4b74b067053a6 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -475,3 +475,11 @@ def r(h): ax.legend() ax.grid() return ax + + +def unpack_single_str_list(keys): + # GH 42795 + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 920b869ef799b..73aeb17d8c274 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2795,3 +2795,19 @@ def test_groupby_none_column_name(): result = df.groupby(by=[None]).sum() expected = DataFrame({"b": [2, 5], "c": [9, 13]}, index=Index([1, 2], name=None)) tm.assert_frame_equal(result, expected) + + +def test_single_element_list_grouping(): + # GH 42795 + df = DataFrame( + {"a": [np.nan, 1], "b": [np.nan, 5], "c": [np.nan, 2]}, index=["x", "y"] + ) + msg = ( + "In a future version of pandas, a length 1 " + "tuple will be returned when iterating over a " + "a groupby with a grouper equal to a list of " + "length 1. Don't supply a list with a single grouper " + "to avoid this warning." + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + values, _ = next(iter(df.groupby(["a"])))