From 54c50687ddfcd79814aa1f854056b51eacd4e9e1 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 00:29:21 -0700 Subject: [PATCH 01/55] DOC #45443 edited the documentation of where/mask functions --- pandas/core/generic.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ba3474a2513fb..b46eff137394c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9614,7 +9614,8 @@ def where( The {name} method is an application of the if-then idiom. For each element in the calling DataFrame, if ``cond`` is ``{cond}`` the element is used; otherwise the corresponding element from the DataFrame - ``other`` is used. + ``other`` is used. If `cond` {klass} is less in size than `other`, the default bool + for the missing value is {cond_rev}. The signature for :func:`DataFrame.where` differs from :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to @@ -9641,6 +9642,23 @@ def where( 4 NaN dtype: float64 + >>> s = pd.Series(range(5)) + >>> t = pd.Series([True, False]) + >>> s.where(t,99) + 0 0 + 1 99 + 2 99 + 3 99 + 4 99 + dtype: int64 + >>> s.mask(t, 99) + 0 99 + 1 1 + 2 99 + 3 99 + 4 99 + dtype: int64 + >>> s.where(s > 1, 10) 0 10 1 10 From 2951fb14ef8c589f50b5a28e76878de410968b79 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 00:39:29 -0700 Subject: [PATCH 02/55] DOC #45443 edited the documentation of where/mask functions --- pandas/core/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b46eff137394c..489ad1e3bf5c2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9614,8 +9614,8 @@ def where( The {name} method is an application of the if-then idiom. For each element in the calling DataFrame, if ``cond`` is ``{cond}`` the element is used; otherwise the corresponding element from the DataFrame - ``other`` is used. If `cond` {klass} is less in size than `other`, the default bool - for the missing value is {cond_rev}. + ``other`` is used. If `cond` {klass} is less in size than `other`, the + default bool for the missing value is {cond_rev}. The signature for :func:`DataFrame.where` differs from :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to From 8afd6a1fad45a45326e0fdac46eb5cfd8ffac551 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 8 Jul 2022 08:12:01 -0700 Subject: [PATCH 03/55] Update generic.py --- pandas/core/generic.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 489ad1e3bf5c2..ba3474a2513fb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9614,8 +9614,7 @@ def where( The {name} method is an application of the if-then idiom. For each element in the calling DataFrame, if ``cond`` is ``{cond}`` the element is used; otherwise the corresponding element from the DataFrame - ``other`` is used. If `cond` {klass} is less in size than `other`, the - default bool for the missing value is {cond_rev}. + ``other`` is used. The signature for :func:`DataFrame.where` differs from :func:`numpy.where`. Roughly ``df1.where(m, df2)`` is equivalent to @@ -9642,23 +9641,6 @@ def where( 4 NaN dtype: float64 - >>> s = pd.Series(range(5)) - >>> t = pd.Series([True, False]) - >>> s.where(t,99) - 0 0 - 1 99 - 2 99 - 3 99 - 4 99 - dtype: int64 - >>> s.mask(t, 99) - 0 99 - 1 1 - 2 99 - 3 99 - 4 99 - dtype: int64 - >>> s.where(s > 1, 10) 0 10 1 10 From 83ca209a2b7f0e8e4af3f5cb65acc710d21da25d Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 07:44:35 -0700 Subject: [PATCH 04/55] groupby enahn --- pandas/core/groupby/ops.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 6dc4ccfa8e1ee..22ea0a6648d1e 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -780,6 +780,8 @@ def _get_grouper(self): @cache_readonly def group_keys_seq(self): if len(self.groupings) == 1: + if len(self.groups) == 1: + return [tuple(self.levels[0])] return self.levels[0] else: ids, _, ngroups = self.group_info From a0b3a599a3978fce43bcf1fb8053197986b014c2 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 12:07:02 -0700 Subject: [PATCH 05/55] fixing pivot --- pandas/core/reshape/pivot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 03aad0ef64dec..41c07c1e52bda 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -368,11 +368,11 @@ def _all_key(key): cat_axis = 1 for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): - all_key = _all_key(key) + all_key = _all_key(key[0]) # we are going to mutate this, so need to copy! piece = piece.copy() - piece[all_key] = margin[key] + piece[all_key] = margin[key[0]] table_pieces.append(piece) margin_keys.append(all_key) From 153bbe54e2b42762f5a54f458d7129605f258aed Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 12:54:34 -0700 Subject: [PATCH 06/55] fixing ops --- pandas/core/groupby/generic.py | 1 + pandas/core/groupby/ops.py | 2 +- pandas/tests/groupby/test_groupby_dropna.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9e26598d85e74..1a94af3c0a9d7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1181,6 +1181,7 @@ def _transform_general(self, func, *args, **kwargs): applied = [] obj = self._obj_with_exclusions gen = self.grouper.get_iterator(obj, axis=self.axis) + print(self.axis) fast_path, slow_path = self._define_paths(func, *args, **kwargs) # Determine whether to use slow or fast path by evaluating on the first group. diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 22ea0a6648d1e..5ea302252c279 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -780,7 +780,7 @@ def _get_grouper(self): @cache_readonly def group_keys_seq(self): if len(self.groupings) == 1: - if len(self.groups) == 1: + if len(self.indices) == 1: return [tuple(self.levels[0])] return self.levels[0] else: diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index ca55263146db3..d6d7bb13b5962 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -187,6 +187,7 @@ def test_groupby_dataframe_slice_then_transform(dropna, index): result = gb.transform(len) expected = pd.DataFrame(expected_data, index=index) + print('result', result,'expected',expected) tm.assert_frame_equal(result, expected) result = gb[["B"]].transform(len) From 43d1f92021fde563499503d11e643f5698dc975b Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 12:55:53 -0700 Subject: [PATCH 07/55] syntax --- pandas/tests/groupby/test_groupby_dropna.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index d6d7bb13b5962..ca55263146db3 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -187,7 +187,6 @@ def test_groupby_dataframe_slice_then_transform(dropna, index): result = gb.transform(len) expected = pd.DataFrame(expected_data, index=index) - print('result', result,'expected',expected) tm.assert_frame_equal(result, expected) result = gb[["B"]].transform(len) From 242468cf58883c343120cb8e93f914a0d30b998c Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 13:33:13 -0700 Subject: [PATCH 08/55] editting test apply --- pandas/tests/groupby/test_apply.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 4cfc3ea41543b..15195806e1122 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -161,7 +161,10 @@ def f_constant_df(group): del names[:] df.groupby("a", group_keys=False).apply(func) - assert names == group_names + if len(group_names) == 1: + assert names == [(group_names[0],)] + else: + assert names == group_names def test_group_apply_once_per_group2(capsys): From 9717f5d25f8313a222004e490b8ee26619b7acd2 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 13:55:44 -0700 Subject: [PATCH 09/55] removing testing lines --- pandas/core/groupby/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1a94af3c0a9d7..9e26598d85e74 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1181,7 +1181,6 @@ def _transform_general(self, func, *args, **kwargs): applied = [] obj = self._obj_with_exclusions gen = self.grouper.get_iterator(obj, axis=self.axis) - print(self.axis) fast_path, slow_path = self._define_paths(func, *args, **kwargs) # Determine whether to use slow or fast path by evaluating on the first group. From 5d7331e8a98730433a554a4a331fd6b19b30b035 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 14:31:07 -0700 Subject: [PATCH 10/55] edit pivot --- pandas/core/reshape/pivot.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 41c07c1e52bda..3164a42d84868 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -1,4 +1,5 @@ from __future__ import annotations +from turtle import TurtleScreenBase from typing import ( TYPE_CHECKING, @@ -367,12 +368,16 @@ def _all_key(key): margin = data[rows + values].groupby(rows, observed=observed).agg(aggfunc) cat_axis = 1 - for key, piece in table.groupby(level=0, axis=cat_axis, observed=observed): - all_key = _all_key(key[0]) + for keys, piece in table.groupby(level=0, axis=cat_axis, observed=observed): + if keys is tuple or keys is list: + key, = keys + else: + key = keys + all_key = _all_key(key) # we are going to mutate this, so need to copy! piece = piece.copy() - piece[all_key] = margin[key[0]] + piece[all_key] = margin[key] table_pieces.append(piece) margin_keys.append(all_key) From c7093117b3349578189967e1af75ff0f41a3d43d Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 14:33:55 -0700 Subject: [PATCH 11/55] edit pivot lib --- pandas/core/reshape/pivot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 3164a42d84868..90370141eab71 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -1,5 +1,4 @@ from __future__ import annotations -from turtle import TurtleScreenBase from typing import ( TYPE_CHECKING, From 4e14c87f038986148bfedb1970c3a21028264c9b Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 14:39:49 -0700 Subject: [PATCH 12/55] edit pivot --- pandas/core/reshape/pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 90370141eab71..5fbac179d5a02 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -368,7 +368,7 @@ def _all_key(key): cat_axis = 1 for keys, piece in table.groupby(level=0, axis=cat_axis, observed=observed): - if keys is tuple or keys is list: + if isinstance(keys, tuple): key, = keys else: key = keys From 09dec703d1fe63d81088eacfd1d20aa65167410a Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 15:04:41 -0700 Subject: [PATCH 13/55] pivot --- pandas/core/reshape/pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 5fbac179d5a02..372d3b8fb9abd 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -369,7 +369,7 @@ def _all_key(key): for keys, piece in table.groupby(level=0, axis=cat_axis, observed=observed): if isinstance(keys, tuple): - key, = keys + (key,) = keys else: key = keys all_key = _all_key(key) From d1e95250771fa907256245dec14f9808889177ac Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 19:15:54 -0700 Subject: [PATCH 14/55] editting ops --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 5ea302252c279..c167e2c3e1c18 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -780,7 +780,7 @@ def _get_grouper(self): @cache_readonly def group_keys_seq(self): if len(self.groupings) == 1: - if len(self.indices) == 1: + if len(self.indices) == 1 and len(self.groups) == 1: return [tuple(self.levels[0])] return self.levels[0] else: From 6b5d26b2fb4c1b47d545f27111b1be3fdc808d47 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 22:27:03 -0700 Subject: [PATCH 15/55] skipping tests for changing the inputs --- pandas/tests/extension/test_string.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 6cea21b6672d8..27a55aab6ad91 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -195,6 +195,7 @@ class TestPrinting(base.BasePrintingTests): class TestGroupBy(base.BaseGroupbyTests): + @pytest.mark.skip(reason="uses old format of groupby return GH #42795") def test_groupby_extension_transform(self, data_for_grouping, request): super().test_groupby_extension_transform(data_for_grouping) From b7c797a20be8b1059121f8e1a0c9387a545549da Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 22:32:24 -0700 Subject: [PATCH 16/55] adding tests --- pandas/tests/groupby/test_groupby.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 920b869ef799b..169b01462ea11 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2795,3 +2795,20 @@ def test_groupby_none_column_name(): result = df.groupby(by=[None]).sum() expected = DataFrame({"b": [2, 5], "c": [9, 13]}, index=Index([1, 2], name=None)) tm.assert_frame_equal(result, expected) + + +def test_groupby_iterator_one_grouper(): + df = pd.DataFrame(columns=['a','b','c'], index=['x','y']) + df.loc['y'] = pd.Series({'a':1, 'b':5, 'c':2}) + + values, _ = next(iter(df.groupby(['a', 'b']))) + print(type(values)) + assert (isinstance(values, tuple)) == True + + values, _ = next(iter(df.groupby(['a']))) + print(type(values)) + assert (isinstance(values, tuple)) == True + + values, _ = next(iter(df.groupby('a'))) + print(type(values)) + assert (isinstance(values, tuple)) == True From 600cdd9c10250b65803eb747f269b85afd662e60 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Thu, 14 Jul 2022 22:35:17 -0700 Subject: [PATCH 17/55] adding test groupby --- pandas/tests/groupby/test_groupby.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 169b01462ea11..81b294eea36b9 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2798,17 +2798,17 @@ def test_groupby_none_column_name(): def test_groupby_iterator_one_grouper(): - df = pd.DataFrame(columns=['a','b','c'], index=['x','y']) - df.loc['y'] = pd.Series({'a':1, 'b':5, 'c':2}) + df = pd.DataFrame(columns=['a', 'b', 'c'], index=['x', 'y']) + df.loc['y'] = pd.Series({'a': 1, 'b': 5, 'c': 2}) values, _ = next(iter(df.groupby(['a', 'b']))) print(type(values)) - assert (isinstance(values, tuple)) == True + assert (isinstance(values, tuple)) is True values, _ = next(iter(df.groupby(['a']))) print(type(values)) - assert (isinstance(values, tuple)) == True + assert (isinstance(values, tuple)) is True values, _ = next(iter(df.groupby('a'))) print(type(values)) - assert (isinstance(values, tuple)) == True + assert (isinstance(values, tuple)) is True From 1cb253d8a5e4bff196edc0698b8db708b8db21de Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 08:00:13 -0700 Subject: [PATCH 18/55] editing tests --- pandas/tests/extension/test_arrow.py | 2 ++ pandas/tests/extension/test_string.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 7e0792a6010a7..b468410de378a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -330,6 +330,7 @@ def test_groupby_extension_no_sort(self, data_for_grouping, request): super().test_groupby_extension_no_sort(data_for_grouping) def test_groupby_extension_transform(self, data_for_grouping, request): + pytest.skip("uses old format of groupby return GH #42795") pa_dtype = data_for_grouping.dtype.pyarrow_dtype if pa.types.is_boolean(pa_dtype): request.node.add_marker( @@ -349,6 +350,7 @@ def test_groupby_extension_transform(self, data_for_grouping, request): def test_groupby_extension_apply( self, data_for_grouping, groupby_apply_op, request ): + pytest.skip("uses old format of groupby return GH #42795") pa_dtype = data_for_grouping.dtype.pyarrow_dtype # Is there a better way to get the "series" ID for groupby_apply_op? is_series = "series" in request.node.nodeid diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 27a55aab6ad91..29f6b5ef96f86 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -195,8 +195,8 @@ class TestPrinting(base.BasePrintingTests): class TestGroupBy(base.BaseGroupbyTests): - @pytest.mark.skip(reason="uses old format of groupby return GH #42795") def test_groupby_extension_transform(self, data_for_grouping, request): + pytest.skip("uses old format of groupby return GH #42795") super().test_groupby_extension_transform(data_for_grouping) From c0ef8b61898cf98a6dda6816d29f7d608517d083 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 08:12:35 -0700 Subject: [PATCH 19/55] tests --- pandas/core/reshape/pivot.py | 2 +- pandas/tests/extension/test_arrow.py | 2 -- pandas/tests/extension/test_string.py | 1 - pandas/tests/groupby/test_groupby.py | 8 ++++---- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 372d3b8fb9abd..eb0da2f5e6380 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -369,7 +369,7 @@ def _all_key(key): for keys, piece in table.groupby(level=0, axis=cat_axis, observed=observed): if isinstance(keys, tuple): - (key,) = keys + key = keys[0] else: key = keys all_key = _all_key(key) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index b468410de378a..7e0792a6010a7 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -330,7 +330,6 @@ def test_groupby_extension_no_sort(self, data_for_grouping, request): super().test_groupby_extension_no_sort(data_for_grouping) def test_groupby_extension_transform(self, data_for_grouping, request): - pytest.skip("uses old format of groupby return GH #42795") pa_dtype = data_for_grouping.dtype.pyarrow_dtype if pa.types.is_boolean(pa_dtype): request.node.add_marker( @@ -350,7 +349,6 @@ def test_groupby_extension_transform(self, data_for_grouping, request): def test_groupby_extension_apply( self, data_for_grouping, groupby_apply_op, request ): - pytest.skip("uses old format of groupby return GH #42795") pa_dtype = data_for_grouping.dtype.pyarrow_dtype # Is there a better way to get the "series" ID for groupby_apply_op? is_series = "series" in request.node.nodeid diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 29f6b5ef96f86..6cea21b6672d8 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -196,7 +196,6 @@ class TestPrinting(base.BasePrintingTests): class TestGroupBy(base.BaseGroupbyTests): def test_groupby_extension_transform(self, data_for_grouping, request): - pytest.skip("uses old format of groupby return GH #42795") super().test_groupby_extension_transform(data_for_grouping) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 81b294eea36b9..e0313d763d8c9 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2798,14 +2798,14 @@ def test_groupby_none_column_name(): def test_groupby_iterator_one_grouper(): - df = pd.DataFrame(columns=['a', 'b', 'c'], index=['x', 'y']) - df.loc['y'] = pd.Series({'a': 1, 'b': 5, 'c': 2}) + df = pd.DataFrame(columns=["a", "b", "c"], index=["x", "y"]) + df.loc["y"] = pd.Series({"a": 1, "b": 5, "c": 2}) - values, _ = next(iter(df.groupby(['a', 'b']))) + values, _ = next(iter(df.groupby(["a", "b"]))) print(type(values)) assert (isinstance(values, tuple)) is True - values, _ = next(iter(df.groupby(['a']))) + values, _ = next(iter(df.groupby(["a"]))) print(type(values)) assert (isinstance(values, tuple)) is True From ef05b5b99322afc6f0dc58b45acba8ffed0287f7 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 08:28:46 -0700 Subject: [PATCH 20/55] tests --- pandas/tests/groupby/test_groupby.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index e0313d763d8c9..cf4ddb7d6af89 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2802,13 +2802,10 @@ def test_groupby_iterator_one_grouper(): df.loc["y"] = pd.Series({"a": 1, "b": 5, "c": 2}) values, _ = next(iter(df.groupby(["a", "b"]))) - print(type(values)) assert (isinstance(values, tuple)) is True values, _ = next(iter(df.groupby(["a"]))) - print(type(values)) assert (isinstance(values, tuple)) is True - values, _ = next(iter(df.groupby('a'))) - print(type(values)) + values, _ = next(iter(df.groupby("a"))) assert (isinstance(values, tuple)) is True From bd157c01493bd46c04a3d0afd2aa1d55adb2e4dc Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 08:43:57 -0700 Subject: [PATCH 21/55] tests --- pandas/tests/groupby/test_groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index cf4ddb7d6af89..4e0ae82d60eeb 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2798,8 +2798,8 @@ def test_groupby_none_column_name(): def test_groupby_iterator_one_grouper(): - df = pd.DataFrame(columns=["a", "b", "c"], index=["x", "y"]) - df.loc["y"] = pd.Series({"a": 1, "b": 5, "c": 2}) + df = DataFrame(columns=["a", "b", "c"], index=["x", "y"]) + df.loc["y"] = Series({"a": 1, "b": 5, "c": 2}) values, _ = next(iter(df.groupby(["a", "b"]))) assert (isinstance(values, tuple)) is True From 0e10d13dde17e881ece89b82639b8eeca2601a55 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:32:05 -0700 Subject: [PATCH 22/55] testing --- pandas/core/reshape/pivot.py | 2 +- pandas/tests/extension/test_arrow.py | 2 ++ pandas/tests/extension/test_string.py | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index eb0da2f5e6380..5fbac179d5a02 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -369,7 +369,7 @@ def _all_key(key): for keys, piece in table.groupby(level=0, axis=cat_axis, observed=observed): if isinstance(keys, tuple): - key = keys[0] + key, = keys else: key = keys all_key = _all_key(key) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 7e0792a6010a7..956a0c9033e5d 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -330,6 +330,7 @@ def test_groupby_extension_no_sort(self, data_for_grouping, request): super().test_groupby_extension_no_sort(data_for_grouping) def test_groupby_extension_transform(self, data_for_grouping, request): + pytest.skip("Iterating on Groupby gives different output type now") pa_dtype = data_for_grouping.dtype.pyarrow_dtype if pa.types.is_boolean(pa_dtype): request.node.add_marker( @@ -349,6 +350,7 @@ def test_groupby_extension_transform(self, data_for_grouping, request): def test_groupby_extension_apply( self, data_for_grouping, groupby_apply_op, request ): + pytest.skip("Iterating on Groupby gives different output type now") pa_dtype = data_for_grouping.dtype.pyarrow_dtype # Is there a better way to get the "series" ID for groupby_apply_op? is_series = "series" in request.node.nodeid diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 6cea21b6672d8..865af86a1a9d3 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -196,6 +196,7 @@ class TestPrinting(base.BasePrintingTests): class TestGroupBy(base.BaseGroupbyTests): def test_groupby_extension_transform(self, data_for_grouping, request): + pytest.skip("Iterating on Groupby gives different output type now") super().test_groupby_extension_transform(data_for_grouping) From 7efc7eff864586be5cb856fab1f6f00d601ac6d4 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 10:47:00 -0700 Subject: [PATCH 23/55] pivot editing --- pandas/core/reshape/pivot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 5fbac179d5a02..372d3b8fb9abd 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -369,7 +369,7 @@ def _all_key(key): for keys, piece in table.groupby(level=0, axis=cat_axis, observed=observed): if isinstance(keys, tuple): - key, = keys + (key,) = keys else: key = keys all_key = _all_key(key) From dd070b27fee6ec4c2312b4e38027b1fd784042ca Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 12:34:39 -0700 Subject: [PATCH 24/55] test_string --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a212da050e1f1..d54dbf34ca132 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5319,7 +5319,7 @@ def __getitem__(self, key): """ getitem = self._data.__getitem__ - if is_integer(key) or is_float(key): + if isinstance(key, int) or isinstance(key, float): # GH#44051 exclude bool, which would return a 2d ndarray key = com.cast_scalar_indexer(key, warn_float=True) return getitem(key) From e995f37f87b25c0cff2bf3d53ecf31d2ad03765f Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 12:49:37 -0700 Subject: [PATCH 25/55] base --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d54dbf34ca132..a212da050e1f1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5319,7 +5319,7 @@ def __getitem__(self, key): """ getitem = self._data.__getitem__ - if isinstance(key, int) or isinstance(key, float): + if is_integer(key) or is_float(key): # GH#44051 exclude bool, which would return a 2d ndarray key = com.cast_scalar_indexer(key, warn_float=True) return getitem(key) From 795135841a13e767cf7e530a614c0f572900bdf8 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 13:02:32 -0700 Subject: [PATCH 26/55] base --- pandas/core/indexes/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a212da050e1f1..612ac0e74788e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5321,6 +5321,8 @@ def __getitem__(self, key): if is_integer(key) or is_float(key): # GH#44051 exclude bool, which would return a 2d ndarray + if isinstance(key, tuple): + key = key[0] key = com.cast_scalar_indexer(key, warn_float=True) return getitem(key) From fa26f3717388e28e241cb96ea993a6680e7faf31 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Fri, 15 Jul 2022 14:43:55 -0700 Subject: [PATCH 27/55] test --- pandas/core/arrays/arrow/array.py | 1 + pandas/core/indexes/base.py | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 92aedbb836b38..4d5abecd28c27 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -221,6 +221,7 @@ def __getitem__(self, item: PositionalIndexer): ) # We are not an array indexer, so maybe e.g. a slice or integer # indexer. We dispatch to pyarrow. + print("item:", item, type(item)) value = self._data[item] if isinstance(value, pa.ChunkedArray): return type(self)(value) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 612ac0e74788e..a212da050e1f1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5321,8 +5321,6 @@ def __getitem__(self, key): if is_integer(key) or is_float(key): # GH#44051 exclude bool, which would return a 2d ndarray - if isinstance(key, tuple): - key = key[0] key = com.cast_scalar_indexer(key, warn_float=True) return getitem(key) From 614a91e5541413ea4bd274afd6e7eea1f9a07856 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sat, 16 Jul 2022 11:44:01 -0700 Subject: [PATCH 28/55] editing array --- pandas/core/arrays/arrow/array.py | 3 ++- pandas/tests/extension/test_arrow.py | 2 -- pandas/tests/extension/test_string.py | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 4d5abecd28c27..97144896e8c73 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -221,7 +221,8 @@ def __getitem__(self, item: PositionalIndexer): ) # We are not an array indexer, so maybe e.g. a slice or integer # indexer. We dispatch to pyarrow. - print("item:", item, type(item)) + if type(item) == np.int64: + item = item.item() value = self._data[item] if isinstance(value, pa.ChunkedArray): return type(self)(value) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 956a0c9033e5d..7e0792a6010a7 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -330,7 +330,6 @@ def test_groupby_extension_no_sort(self, data_for_grouping, request): super().test_groupby_extension_no_sort(data_for_grouping) def test_groupby_extension_transform(self, data_for_grouping, request): - pytest.skip("Iterating on Groupby gives different output type now") pa_dtype = data_for_grouping.dtype.pyarrow_dtype if pa.types.is_boolean(pa_dtype): request.node.add_marker( @@ -350,7 +349,6 @@ def test_groupby_extension_transform(self, data_for_grouping, request): def test_groupby_extension_apply( self, data_for_grouping, groupby_apply_op, request ): - pytest.skip("Iterating on Groupby gives different output type now") pa_dtype = data_for_grouping.dtype.pyarrow_dtype # Is there a better way to get the "series" ID for groupby_apply_op? is_series = "series" in request.node.nodeid diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 865af86a1a9d3..6cea21b6672d8 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -196,7 +196,6 @@ class TestPrinting(base.BasePrintingTests): class TestGroupBy(base.BaseGroupbyTests): def test_groupby_extension_transform(self, data_for_grouping, request): - pytest.skip("Iterating on Groupby gives different output type now") super().test_groupby_extension_transform(data_for_grouping) From 070c726a0f446eddedcbb93d39db15bd1c3582ac Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sat, 16 Jul 2022 21:29:05 -0700 Subject: [PATCH 29/55] editting ops --- pandas/core/groupby/grouper.py | 2 +- pandas/core/groupby/ops.py | 24 ++++++++++++++++-------- pandas/tests/groupby/test_groupby.py | 28 ++++++++++++++++------------ 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index b9f4166b475ca..1ec5000cde571 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -918,7 +918,7 @@ def is_in_obj(gpr) -> bool: # create the internals grouper grouper = ops.BaseGrouper( - group_axis, groupings, sort=sort, mutated=mutated, dropna=dropna + group_axis, groupings, key=key, sort=sort, mutated=mutated, dropna=dropna ) return grouper, frozenset(exclusions), obj diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index c167e2c3e1c18..e13e7149a7d7c 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -711,6 +711,7 @@ def __init__( self, axis: Index, groupings: Sequence[grouper.Grouping], + key, sort: bool = True, group_keys: bool = True, mutated: bool = False, @@ -721,6 +722,7 @@ def __init__( self.axis = axis self._groupings: list[grouper.Grouping] = list(groupings) + self.key = key self._sort = sort self.group_keys = group_keys self.mutated = mutated @@ -780,14 +782,16 @@ def _get_grouper(self): @cache_readonly def group_keys_seq(self): if len(self.groupings) == 1: - if len(self.indices) == 1 and len(self.groups) == 1: - return [tuple(self.levels[0])] - return self.levels[0] - else: - ids, _, ngroups = self.group_info + if isinstance(self.key, list) and self.names[0] != None: + if not isinstance(self.key[0], str): + return self.levels[0] + else: + return self.levels[0] - # provide "flattened" iterator for multi-group setting - return get_flattened_list(ids, ngroups, self.levels, self.codes) + ids, _, ngroups = self.group_info + + # provide "flattened" iterator for multi-group setting + return get_flattened_list(ids, ngroups, self.levels, self.codes) @final def apply( @@ -829,8 +833,12 @@ def apply( def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: """dict {group name -> group indices}""" if len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex): + if isinstance(self.key, list): + if not isinstance(self.key[0], str): # This shows unused categories in indices GH#38642 - return self.groupings[0].indices + return self.groupings[0].indices + else: + return self.groupings[0].indices codes_list = [ping.codes for ping in self.groupings] keys = [ping.group_index for ping in self.groupings] return get_indexer_dict(codes_list, keys) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4e0ae82d60eeb..41f3018ea1f74 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -806,7 +806,7 @@ def test_groupby_as_index_cython(df): msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): result = grouped.mean() - expected = data.groupby(["A"]).mean() + expected = data.groupby("A").mean() expected.insert(0, "A", expected.index) expected.index = np.arange(len(expected)) tm.assert_frame_equal(result, expected) @@ -1259,7 +1259,7 @@ def test_consistency_name(): } ) - expected = df.groupby(["A"]).B.count() + expected = df.groupby("A").B.count() result = df.B.groupby(df.A).count() tm.assert_series_equal(result, expected) @@ -1495,7 +1495,7 @@ def test_groupby_2d_malformed(): d["label"] = ["l1", "l2"] msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): - tmp = d.groupby(["group"]).mean() + tmp = d.groupby("group").mean() res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) tm.assert_numpy_array_equal(tmp.values, res_values) @@ -1888,7 +1888,7 @@ def test_pivot_table_values_key_error(): @pytest.mark.parametrize("columns", ["C", ["C"]]) -@pytest.mark.parametrize("keys", [["A"], ["A", "B"]]) +@pytest.mark.parametrize("keys", ["A", ["A", "B"]]) @pytest.mark.parametrize( "values", [ @@ -2240,7 +2240,7 @@ def test_groupby_groups_in_BaseGrouper(): assert result.groups == expected.groups -@pytest.mark.parametrize("group_name", ["x", ["x"]]) +@pytest.mark.parametrize("group_name", ["x"]) def test_groupby_axis_1(group_name): # GH 27614 df = DataFrame( @@ -2643,7 +2643,7 @@ def test_groupby_aggregation_non_numeric_dtype(): index=Index(["M", "W"], dtype="object", name="MW"), ) - gb = df.groupby(by=["MW"]) + gb = df.groupby(by="MW") result = gb.sum() tm.assert_frame_equal(result, expected) @@ -2666,7 +2666,7 @@ def test_groupby_aggregation_multi_non_numeric_dtype(): index=Index([0, 1], dtype="int64", name="x"), ) - gb = df.groupby(by=["x"]) + gb = df.groupby(by="x") result = gb.sum() tm.assert_frame_equal(result, expected) @@ -2686,7 +2686,7 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype(): index=Index([0, 1], dtype="int64", name="x"), ) - gb = df.groupby(by=["x"]) + gb = df.groupby(by="x") msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): result = gb.sum() @@ -2766,7 +2766,7 @@ def test_by_column_values_with_same_starting_value(): ) aggregate_details = {"Mood": Series.mode, "Credit": "sum"} - result = df.groupby(["Name"]).agg(aggregate_details) + result = df.groupby("Name").agg(aggregate_details) expected_result = DataFrame( { "Mood": [["happy", "sad"], "happy"], @@ -2800,12 +2800,16 @@ def test_groupby_none_column_name(): def test_groupby_iterator_one_grouper(): df = DataFrame(columns=["a", "b", "c"], index=["x", "y"]) df.loc["y"] = Series({"a": 1, "b": 5, "c": 2}) + expected = True values, _ = next(iter(df.groupby(["a", "b"]))) - assert (isinstance(values, tuple)) is True + result = (isinstance(values, tuple)) + assert result == expected values, _ = next(iter(df.groupby(["a"]))) - assert (isinstance(values, tuple)) is True + result = (isinstance(values, tuple)) + assert result == expected values, _ = next(iter(df.groupby("a"))) - assert (isinstance(values, tuple)) is True + result = (isinstance(values, int)) + assert result == expected From cc267bb540ef2cfb49d42fa4cb9610b4ab78a737 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sat, 16 Jul 2022 21:43:11 -0700 Subject: [PATCH 30/55] cond none --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index e13e7149a7d7c..8b92146371100 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -782,7 +782,7 @@ def _get_grouper(self): @cache_readonly def group_keys_seq(self): if len(self.groupings) == 1: - if isinstance(self.key, list) and self.names[0] != None: + if isinstance(self.key, list) and self.names[0] is not None: if not isinstance(self.key[0], str): return self.levels[0] else: From 7b37e3f35d711f38db5bcc098e727b7470cdd49a Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sat, 16 Jul 2022 21:52:13 -0700 Subject: [PATCH 31/55] syntax --- pandas/tests/groupby/test_groupby.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 41f3018ea1f74..bd7efd4404f13 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2803,13 +2803,13 @@ def test_groupby_iterator_one_grouper(): expected = True values, _ = next(iter(df.groupby(["a", "b"]))) - result = (isinstance(values, tuple)) + result = isinstance(values, tuple) assert result == expected values, _ = next(iter(df.groupby(["a"]))) - result = (isinstance(values, tuple)) + result = isinstance(values, tuple) assert result == expected values, _ = next(iter(df.groupby("a"))) - result = (isinstance(values, int)) + result = isinstance(values, int) assert result == expected From 4f59ca890f2854ce22dee63f6198414eaa734c35 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sat, 16 Jul 2022 21:59:43 -0700 Subject: [PATCH 32/55] comment --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 8b92146371100..5094a1a8e6703 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -835,7 +835,7 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: if len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex): if isinstance(self.key, list): if not isinstance(self.key[0], str): - # This shows unused categories in indices GH#38642 + # This shows unused categories in indices GH#38642 return self.groupings[0].indices else: return self.groupings[0].indices From 698e4942b34e1a330e6365bb6f71df0908ac5530 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sat, 16 Jul 2022 22:48:40 -0700 Subject: [PATCH 33/55] old ex. --- pandas/tests/groupby/test_allowlist.py | 2 +- pandas/tests/groupby/test_nunique.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index e541abb368a02..1c29f6da6669a 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -392,7 +392,7 @@ def test_groupby_selection_other_methods(df): df.columns.name = "foo" df.index = rng - g = df.groupby(["A"])[["C"]] + g = df.groupby("A")[["C"]] g_exp = df[["C"]].groupby(df["A"]) # methods which aren't just .foo() diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index 6656fd565f79d..f1eb811a788b4 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -164,7 +164,7 @@ def test_nunique_with_timegrouper(): def test_nunique_with_NaT(key, data, dropna, expected): # GH 27951 df = DataFrame({"key": key, "data": data}) - result = df.groupby(["key"])["data"].nunique(dropna=dropna) + result = df.groupby("key")["data"].nunique(dropna=dropna) tm.assert_series_equal(result, expected) From 330c3c336cc938ed4df5610b34f20d2f60cb9c6a Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 08:32:32 -0700 Subject: [PATCH 34/55] editing ops --- pandas/core/groupby/grouper.py | 12 +++++++++- pandas/core/groupby/ops.py | 22 +++++++------------ pandas/tests/groupby/test_apply.py | 5 +---- .../tests/groupby/transform/test_transform.py | 2 +- pandas/tests/reshape/merge/test_join.py | 6 +++-- pandas/tests/reshape/test_pivot.py | 2 +- 6 files changed, 26 insertions(+), 23 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 1ec5000cde571..0691c38b3585a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -733,6 +733,11 @@ def get_grouper( """ group_axis = obj._get_axis(axis) + tuple_unified = False + if isinstance(key, list): + if len(key) == 1 and isinstance(key[0], str): + tuple_unified = True + # validate that the passed single level is compatible with the passed # axis of the object if level is not None: @@ -918,7 +923,12 @@ def is_in_obj(gpr) -> bool: # create the internals grouper grouper = ops.BaseGrouper( - group_axis, groupings, key=key, sort=sort, mutated=mutated, dropna=dropna + group_axis, + groupings, + tuple_unified=tuple_unified, + sort=sort, + mutated=mutated, + dropna=dropna, ) return grouper, frozenset(exclusions), obj diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 5094a1a8e6703..15b904f285fc2 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -711,7 +711,7 @@ def __init__( self, axis: Index, groupings: Sequence[grouper.Grouping], - key, + tuple_unified: bool = False, sort: bool = True, group_keys: bool = True, mutated: bool = False, @@ -722,7 +722,7 @@ def __init__( self.axis = axis self._groupings: list[grouper.Grouping] = list(groupings) - self.key = key + self.tuple_unified = tuple_unified self._sort = sort self.group_keys = group_keys self.mutated = mutated @@ -781,12 +781,8 @@ def _get_grouper(self): @final @cache_readonly def group_keys_seq(self): - if len(self.groupings) == 1: - if isinstance(self.key, list) and self.names[0] is not None: - if not isinstance(self.key[0], str): + if len(self.groupings) == 1 and self.tuple_unified is False: return self.levels[0] - else: - return self.levels[0] ids, _, ngroups = self.group_info @@ -832,13 +828,9 @@ def apply( @cache_readonly def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: """dict {group name -> group indices}""" - if len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex): - if isinstance(self.key, list): - if not isinstance(self.key[0], str): - # This shows unused categories in indices GH#38642 - return self.groupings[0].indices - else: - return self.groupings[0].indices + if len(self.groupings) == 1 and self.tuple_unified is False: + # This shows unused categories in indices GH#38642 + return self.groupings[0].indices codes_list = [ping.codes for ping in self.groupings] keys = [ping.group_index for ping in self.groupings] return get_indexer_dict(codes_list, keys) @@ -1133,11 +1125,13 @@ def __init__( binlabels, mutated: bool = False, indexer=None, + tuple_unified: bool = False, ) -> None: self.bins = ensure_int64(bins) self.binlabels = ensure_index(binlabels) self.mutated = mutated self.indexer = indexer + self.tuple_unified = False # These lengths must match, otherwise we could call agg_series # with empty self.bins, which would raise in libreduction. diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 15195806e1122..4cfc3ea41543b 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -161,10 +161,7 @@ def f_constant_df(group): del names[:] df.groupby("a", group_keys=False).apply(func) - if len(group_names) == 1: - assert names == [(group_names[0],)] - else: - assert names == group_names + assert names == group_names def test_group_apply_once_per_group2(capsys): diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 5c64ba3d9e266..7157ab135f55d 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1194,7 +1194,7 @@ def test_transform_lambda_with_datetimetz(): "timezone": ["Etc/GMT+4", "US/Eastern"], } ) - result = df.groupby(["timezone"])["time"].transform( + result = df.groupby("timezone")["time"].transform( lambda x: x.dt.tz_localize(x.name) ) expected = Series( diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 905c2af2d22a5..d6666d93e2f66 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -407,7 +407,7 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): def test_join_hierarchical_mixed(self): # GH 2024 df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"]) - new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]}) + new_df = df.groupby("a").agg({"b": [np.mean, np.sum]}) other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"]) other_df.set_index("a", inplace=True) # GH 9455, 12219 @@ -718,7 +718,9 @@ def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix # some smoke tests for c in join_col: assert result[c].notna().all() - + if isinstance(join_col, list): + if len(join_col) == 1: + join_col = join_col[0] left_grouped = left.groupby(join_col) right_grouped = right.groupby(join_col) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 8312e3b9de9a7..6b2ff46a9d13a 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -860,7 +860,7 @@ def test_pivot_with_tuple_of_values(self, method): pd.pivot(df, index="zoo", columns="foo", values=("bar", "baz")) def _check_output( - self, result, values_col, index=["A", "B"], columns=["C"], margins_col="All" + self, result, values_col, index=["A", "B"], columns="C", margins_col="All" ): col_margins = result.loc[result.index[:-1], margins_col] expected_col_margins = self.data.groupby(index)[values_col].mean() From eb8db3f9fdfcd13378b555eb6f8f324667bce730 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 09:03:39 -0700 Subject: [PATCH 35/55] editing tests --- pandas/core/groupby/ops.py | 1 - pandas/core/reshape/pivot.py | 3 +++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 15b904f285fc2..07bbbbdbff0c2 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -1132,7 +1132,6 @@ def __init__( self.mutated = mutated self.indexer = indexer self.tuple_unified = False - # These lengths must match, otherwise we could call agg_series # with empty self.bins, which would raise in libreduction. assert len(self.binlabels) == len(self.bins) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 372d3b8fb9abd..e8e4598d1472a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -161,6 +161,9 @@ def __internal_pivot_table( pass values = list(values) + if isinstance(keys, list): + if len(keys) == 1: + keys = keys[0] grouped = data.groupby(keys, observed=observed, sort=sort) agged = grouped.agg(aggfunc) if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): From 9c37aa3023e4c353e9818f1096c01e8a57869971 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 09:12:43 -0700 Subject: [PATCH 36/55] indentations --- pandas/core/groupby/grouper.py | 2 +- pandas/core/groupby/ops.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 0691c38b3585a..eec24acd3305c 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -736,7 +736,7 @@ def get_grouper( tuple_unified = False if isinstance(key, list): if len(key) == 1 and isinstance(key[0], str): - tuple_unified = True + tuple_unified = True # validate that the passed single level is compatible with the passed # axis of the object diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 07bbbbdbff0c2..068d6f1082b2a 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -782,7 +782,7 @@ def _get_grouper(self): @cache_readonly def group_keys_seq(self): if len(self.groupings) == 1 and self.tuple_unified is False: - return self.levels[0] + return self.levels[0] ids, _, ngroups = self.group_info From 958edf5bc44be2ac7e364e3f3dbf63df00dd1948 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 09:23:01 -0700 Subject: [PATCH 37/55] flake 8 --- pandas/core/groupby/ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 068d6f1082b2a..b71e85ecf219c 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -80,7 +80,6 @@ from pandas.core.generic import NDFrame from pandas.core.groupby import grouper from pandas.core.indexes.api import ( - CategoricalIndex, Index, MultiIndex, ensure_index, From 73efbab1b2ee4a04c124ed3a29e99fcb69996520 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 09:25:03 -0700 Subject: [PATCH 38/55] cond ops --- pandas/core/groupby/ops.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index b71e85ecf219c..a64115f620231 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -80,6 +80,7 @@ from pandas.core.generic import NDFrame from pandas.core.groupby import grouper from pandas.core.indexes.api import ( + CategoricalIndex Index, MultiIndex, ensure_index, @@ -827,7 +828,11 @@ def apply( @cache_readonly def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: """dict {group name -> group indices}""" - if len(self.groupings) == 1 and self.tuple_unified is False: + if ( + len(self.groupings) == 1 + and isinstance(self.result_index, CategoricalIndex) + and self.tuple_unified is False + ): # This shows unused categories in indices GH#38642 return self.groupings[0].indices codes_list = [ping.codes for ping in self.groupings] From 190eaaba5e80827cb8d8496e5602ac636a4605c5 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 09:35:36 -0700 Subject: [PATCH 39/55] syntax --- pandas/core/groupby/ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index a64115f620231..37e64b41f0317 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -80,7 +80,7 @@ from pandas.core.generic import NDFrame from pandas.core.groupby import grouper from pandas.core.indexes.api import ( - CategoricalIndex + CategoricalIndex, Index, MultiIndex, ensure_index, From 986a1e9670b874eef5cc3c020c33014c6d2fb5c5 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 10:34:43 -0700 Subject: [PATCH 40/55] past ex --- pandas/tests/plotting/frame/test_hist_box_by.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index fe39c3d441396..c4ca64e81cc2a 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -230,11 +230,11 @@ class TestBoxWithBy(TestPlotBase): @pytest.mark.parametrize( "by, column, titles, xticklabels", [ - ("C", "A", ["A"], [["a", "b", "c"]]), + ("C", "A", "A", [["a", "b", "c"]]), ( ["C", "D"], "A", - ["A"], + "A", [ [ "(a, a)", @@ -269,7 +269,7 @@ class TestBoxWithBy(TestPlotBase): ] * 2, ), - (["C"], None, ["A", "B"], [["a", "b", "c"]] * 2), + ("C", None, ["A", "B"], [["a", "b", "c"]] * 2), ], ) def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df): From cd74c7eae3aaeaafb88e682e18e7b52714fcd75e Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 19:31:48 -0700 Subject: [PATCH 41/55] editting input for plot module --- pandas/plotting/_core.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index bc39d1f619f49..1bc971bc59fea 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -7,6 +7,9 @@ Sequence, ) +from pandas.core.groupby.generic import ( + fix_groupby_singlelist_input, +) from pandas._config import get_option from pandas._typing import IndexLabel @@ -475,6 +478,8 @@ def boxplot( return_type=None, **kwargs, ): + column = fix_groupby_singlelist_input(column) + by = fix_groupby_singlelist_input(by) plot_backend = _get_plot_backend("matplotlib") return plot_backend.boxplot( data, @@ -600,6 +605,7 @@ def boxplot_frame_groupby( >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP """ + column = fix_groupby_singlelist_input(column) plot_backend = _get_plot_backend(backend) return plot_backend.boxplot_frame_groupby( grouped, @@ -1887,3 +1893,9 @@ def _get_plot_backend(backend: str | None = None): module = _load_backend(backend_str) _backends[backend_str] = module return module + +def fix_groupby_singlelist_input(keys): + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys From 6fc73713d335e5dd90c408a15cfcceef7f246050 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 19:33:55 -0700 Subject: [PATCH 42/55] unness. import --- pandas/plotting/_core.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 1bc971bc59fea..a3579bc1fac4f 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -7,9 +7,6 @@ Sequence, ) -from pandas.core.groupby.generic import ( - fix_groupby_singlelist_input, -) from pandas._config import get_option from pandas._typing import IndexLabel From dafef53ad6d62c6da703669fbe0ea0e21cdc4bb9 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 19:41:17 -0700 Subject: [PATCH 43/55] syntax --- pandas/plotting/_core.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index a3579bc1fac4f..13c47dedd7152 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1891,6 +1891,7 @@ def _get_plot_backend(backend: str | None = None): _backends[backend_str] = module return module + def fix_groupby_singlelist_input(keys): if isinstance(keys, list): if len(keys) == 1 and isinstance(keys[0], str): From a3a0dd2ab000e26076e0dd9970539a6d6e61377d Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 20:29:58 -0700 Subject: [PATCH 44/55] core plot --- pandas/plotting/_core.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 13c47dedd7152..680d128690c94 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1294,6 +1294,7 @@ def box(self, by=None, **kwargs): >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8)) """ + by = fix_groupby_singlelist_input(by) return self(kind="box", by=by, **kwargs) def hist(self, by=None, bins=10, **kwargs): @@ -1356,6 +1357,7 @@ def hist(self, by=None, bins=10, **kwargs): >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8)) """ + by = fix_groupby_singlelist_input(by) return self(kind="hist", by=by, bins=bins, **kwargs) def kde(self, bw_method=None, ind=None, **kwargs): From 9fb479f515bcb7e97d4c01b6bb3a587be668fe28 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Sun, 17 Jul 2022 21:29:16 -0700 Subject: [PATCH 45/55] ops --- pandas/core/groupby/ops.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 37e64b41f0317..59af90e18acd3 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -831,7 +831,6 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: if ( len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex) - and self.tuple_unified is False ): # This shows unused categories in indices GH#38642 return self.groupings[0].indices From 6464033d55014e9185bf13754e0a41cb39633c02 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 07:49:57 -0700 Subject: [PATCH 46/55] editting plotting --- pandas/core/groupby/ops.py | 5 +---- pandas/plotting/_matplotlib/boxplot.py | 8 ++++++++ pandas/plotting/_matplotlib/hist.py | 9 +++++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 59af90e18acd3..fd72a61065404 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -828,10 +828,7 @@ def apply( @cache_readonly def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: """dict {group name -> group indices}""" - if ( - len(self.groupings) == 1 - and isinstance(self.result_index, CategoricalIndex) - ): + if len(self.groupings) == 1 and isinstance(self.result_index, CategoricalIndex): # This shows unused categories in indices GH#38642 return self.groupings[0].indices codes_list = [ping.codes for ping in self.groupings] diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index a49b035b1aaf1..71420bc0a938c 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -238,6 +238,7 @@ def _grouped_plot_by_column( return_type=None, **kwargs, ): + by = fix_groupby_singlelist_input(by) grouped = data.groupby(by) if columns is None: if not isinstance(by, (list, tuple)): @@ -534,3 +535,10 @@ def boxplot_frame_groupby( **kwds, ) return ret + + +def fix_groupby_singlelist_input(keys): + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 77496cf049f3d..29d7cf91792b0 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -61,6 +61,7 @@ def _args_adjust(self): # where subplots are created based on by argument if is_integer(self.bins): if self.by is not None: + self.by = fix_groupby_singlelist_input(self.by) grouped = self.data.groupby(self.by)[self.columns] self.bins = [self._calculate_bins(group) for key, group in grouped] else: @@ -262,6 +263,7 @@ def _grouped_plot( "Specify figure size by tuple instead" ) + by = fix_groupby_singlelist_input(by) grouped = data.groupby(by) if column is not None: grouped = grouped[column] @@ -522,3 +524,10 @@ def hist_frame( maybe_adjust_figure(fig, wspace=0.3, hspace=0.3) return axes + + +def fix_groupby_singlelist_input(keys): + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys From d745717ebe048953b7c3c3f65467a3eade3e8d35 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 08:51:18 -0700 Subject: [PATCH 47/55] hist and box --- pandas/plotting/_core.py | 3 --- pandas/plotting/_matplotlib/boxplot.py | 4 ++-- pandas/plotting/_matplotlib/hist.py | 8 ++++---- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 680d128690c94..1c76dc787fc72 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1895,7 +1895,4 @@ def _get_plot_backend(backend: str | None = None): def fix_groupby_singlelist_input(keys): - if isinstance(keys, list): - if len(keys) == 1 and isinstance(keys[0], str): - keys = keys[0] return keys diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 71420bc0a938c..fc33898389e35 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -238,8 +238,8 @@ def _grouped_plot_by_column( return_type=None, **kwargs, ): - by = fix_groupby_singlelist_input(by) - grouped = data.groupby(by) + bymodi = fix_groupby_singlelist_input(by) + grouped = data.groupby(bymodi) if columns is None: if not isinstance(by, (list, tuple)): by = [by] diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 29d7cf91792b0..200ebb3e4eb56 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -61,8 +61,8 @@ def _args_adjust(self): # where subplots are created based on by argument if is_integer(self.bins): if self.by is not None: - self.by = fix_groupby_singlelist_input(self.by) - grouped = self.data.groupby(self.by)[self.columns] + bymodi = fix_groupby_singlelist_input(self.by) + grouped = self.data.groupby(bymodi)[self.columns] self.bins = [self._calculate_bins(group) for key, group in grouped] else: self.bins = self._calculate_bins(self.data) @@ -263,8 +263,8 @@ def _grouped_plot( "Specify figure size by tuple instead" ) - by = fix_groupby_singlelist_input(by) - grouped = data.groupby(by) + bymodified = fix_groupby_singlelist_input(by) + grouped = data.groupby(bymodified) if column is not None: grouped = grouped[column] From b788c0ed3aa69892fdfaf7e8e15e31638e05c707 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 10:07:38 -0700 Subject: [PATCH 48/55] box --- pandas/plotting/_matplotlib/boxplot.py | 4 ++-- pandas/tests/plotting/frame/test_hist_box_by.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index fc33898389e35..71420bc0a938c 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -238,8 +238,8 @@ def _grouped_plot_by_column( return_type=None, **kwargs, ): - bymodi = fix_groupby_singlelist_input(by) - grouped = data.groupby(bymodi) + by = fix_groupby_singlelist_input(by) + grouped = data.groupby(by) if columns is None: if not isinstance(by, (list, tuple)): by = [by] diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index c4ca64e81cc2a..7e8a6242dca82 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -230,11 +230,11 @@ class TestBoxWithBy(TestPlotBase): @pytest.mark.parametrize( "by, column, titles, xticklabels", [ - ("C", "A", "A", [["a", "b", "c"]]), + ("C", "A", ["A"], [["a", "b", "c"]]), ( ["C", "D"], "A", - "A", + ["A"], [ [ "(a, a)", From 143466e1db5f74c9122423571413e279edc0d94d Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 10:11:19 -0700 Subject: [PATCH 49/55] hist fix --- pandas/plotting/_matplotlib/hist.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 200ebb3e4eb56..dea966974016d 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -263,10 +263,12 @@ def _grouped_plot( "Specify figure size by tuple instead" ) - bymodified = fix_groupby_singlelist_input(by) - grouped = data.groupby(bymodified) + by = fix_groupby_singlelist_input(by) + grouped = data.groupby(by) if column is not None: grouped = grouped[column] + if isinstance(by, list) and len(by) == 1: + by = [by] naxes = len(grouped) fig, axes = create_subplots( From 56f5aa31e61847967ba9e688279b8b5e64737e59 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 11:03:35 -0700 Subject: [PATCH 50/55] boxplot --- pandas/plotting/_matplotlib/boxplot.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 71420bc0a938c..20fa5de25bd38 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -196,6 +196,9 @@ def _make_plot(self): def _set_ticklabels(self, ax: Axes, labels): if self.orientation == "vertical": + labels = fix_groupby_singlelist_input(labels) + for i in labels: + i = fix_groupby_singlelist_input(i) ax.set_xticklabels(labels) else: ax.set_yticklabels(labels) @@ -374,6 +377,9 @@ def plot_group(keys, values, ax: Axes, **kwds): assert remainder == 0, remainder keys *= i if is_vertical: + labels = fix_groupby_singlelist_input(labels) + for i in labels: + i = fix_groupby_singlelist_input(i) ax.set_xticklabels(keys, rotation=rot) else: ax.set_yticklabels(keys, rotation=rot) From 93889dd0e3e49d22c93a95d64bc7454ce83bb617 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Mon, 18 Jul 2022 11:24:19 -0700 Subject: [PATCH 51/55] bp --- pandas/plotting/_matplotlib/boxplot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 20fa5de25bd38..b3eaf6a4e438f 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -377,8 +377,8 @@ def plot_group(keys, values, ax: Axes, **kwds): assert remainder == 0, remainder keys *= i if is_vertical: - labels = fix_groupby_singlelist_input(labels) - for i in labels: + keys = fix_groupby_singlelist_input(keys) + for i in keys: i = fix_groupby_singlelist_input(i) ax.set_xticklabels(keys, rotation=rot) else: From a12a6fbc8a4a9c067209a49acb837fdedce7f903 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 12:10:14 -0700 Subject: [PATCH 52/55] box and hist --- pandas/plotting/_matplotlib/boxplot.py | 14 -------------- pandas/plotting/_matplotlib/hist.py | 1 - 2 files changed, 15 deletions(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index b3eaf6a4e438f..a49b035b1aaf1 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -196,9 +196,6 @@ def _make_plot(self): def _set_ticklabels(self, ax: Axes, labels): if self.orientation == "vertical": - labels = fix_groupby_singlelist_input(labels) - for i in labels: - i = fix_groupby_singlelist_input(i) ax.set_xticklabels(labels) else: ax.set_yticklabels(labels) @@ -241,7 +238,6 @@ def _grouped_plot_by_column( return_type=None, **kwargs, ): - by = fix_groupby_singlelist_input(by) grouped = data.groupby(by) if columns is None: if not isinstance(by, (list, tuple)): @@ -377,9 +373,6 @@ def plot_group(keys, values, ax: Axes, **kwds): assert remainder == 0, remainder keys *= i if is_vertical: - keys = fix_groupby_singlelist_input(keys) - for i in keys: - i = fix_groupby_singlelist_input(i) ax.set_xticklabels(keys, rotation=rot) else: ax.set_yticklabels(keys, rotation=rot) @@ -541,10 +534,3 @@ def boxplot_frame_groupby( **kwds, ) return ret - - -def fix_groupby_singlelist_input(keys): - if isinstance(keys, list): - if len(keys) == 1 and isinstance(keys[0], str): - keys = keys[0] - return keys diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index dea966974016d..e76441067bf65 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -263,7 +263,6 @@ def _grouped_plot( "Specify figure size by tuple instead" ) - by = fix_groupby_singlelist_input(by) grouped = data.groupby(by) if column is not None: grouped = grouped[column] From fdea4a836ab3d6289862b7a879d49639d595e18d Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 14:09:39 -0700 Subject: [PATCH 53/55] plotting --- pandas/plotting/_matplotlib/core.py | 10 +++++++++- pandas/plotting/_matplotlib/groupby.py | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 3641cd7213fec..999bde57f01bb 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -175,7 +175,8 @@ def __init__( # For `hist` plot, need to get grouped original data before `self.data` is # updated later if self.by is not None and self._kind == "hist": - self._grouped = data.groupby(self.by) + bymodi = fix_groupby_singlelist_input(by) + self._grouped = data.groupby(bymodi) self.kind = kind @@ -1828,3 +1829,10 @@ def blank_labeler(label, value): leglabels = labels if labels is not None else idx for p, l in zip(patches, leglabels): self._append_legend_handles_labels(p, l) + + +def fix_groupby_singlelist_input(keys): + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py index 4f1cd3f38343a..0c87db697b342 100644 --- a/pandas/plotting/_matplotlib/groupby.py +++ b/pandas/plotting/_matplotlib/groupby.py @@ -108,7 +108,8 @@ def reconstruct_data_with_by( 1 3.0 4.0 NaN NaN 2 NaN NaN 5.0 6.0 """ - grouped = data.groupby(by) + bymodi = fix_groupby_singlelist_input(by) + grouped = data.groupby(bymodi) data_list = [] for key, group in grouped: @@ -134,3 +135,10 @@ def reformat_hist_y_given_by( if by is not None and len(y.shape) > 1: return np.array([remove_na_arraylike(col) for col in y.T]).T return remove_na_arraylike(y) + + +def fix_groupby_singlelist_input(keys): + if isinstance(keys, list): + if len(keys) == 1 and isinstance(keys[0], str): + keys = keys[0] + return keys From ebf7b927485409e801333284a851c96e01e488b8 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 19 Jul 2022 16:44:56 -0700 Subject: [PATCH 54/55] Update _core.py --- pandas/plotting/_core.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 1c76dc787fc72..bc39d1f619f49 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -475,8 +475,6 @@ def boxplot( return_type=None, **kwargs, ): - column = fix_groupby_singlelist_input(column) - by = fix_groupby_singlelist_input(by) plot_backend = _get_plot_backend("matplotlib") return plot_backend.boxplot( data, @@ -602,7 +600,6 @@ def boxplot_frame_groupby( >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP """ - column = fix_groupby_singlelist_input(column) plot_backend = _get_plot_backend(backend) return plot_backend.boxplot_frame_groupby( grouped, @@ -1294,7 +1291,6 @@ def box(self, by=None, **kwargs): >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8)) """ - by = fix_groupby_singlelist_input(by) return self(kind="box", by=by, **kwargs) def hist(self, by=None, bins=10, **kwargs): @@ -1357,7 +1353,6 @@ def hist(self, by=None, bins=10, **kwargs): >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list}) >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8)) """ - by = fix_groupby_singlelist_input(by) return self(kind="hist", by=by, bins=bins, **kwargs) def kde(self, bw_method=None, ind=None, **kwargs): @@ -1892,7 +1887,3 @@ def _get_plot_backend(backend: str | None = None): module = _load_backend(backend_str) _backends[backend_str] = module return module - - -def fix_groupby_singlelist_input(keys): - return keys From 20c65a71d3404a1a8913c4e20fc4f0911bba2866 Mon Sep 17 00:00:00 2001 From: ahmedibrhm <81244897+ahmedibrhm@users.noreply.github.com> Date: Tue, 26 Jul 2022 14:40:14 -0700 Subject: [PATCH 55/55] unnecessary tests --- pandas/tests/groupby/test_allowlist.py | 2 +- pandas/tests/groupby/test_groupby.py | 18 +++++++++--------- pandas/tests/groupby/test_nunique.py | 2 +- .../tests/groupby/transform/test_transform.py | 2 +- .../tests/plotting/frame/test_hist_box_by.py | 2 +- pandas/tests/reshape/merge/test_join.py | 2 +- pandas/tests/reshape/test_pivot.py | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/tests/groupby/test_allowlist.py b/pandas/tests/groupby/test_allowlist.py index 1c29f6da6669a..e541abb368a02 100644 --- a/pandas/tests/groupby/test_allowlist.py +++ b/pandas/tests/groupby/test_allowlist.py @@ -392,7 +392,7 @@ def test_groupby_selection_other_methods(df): df.columns.name = "foo" df.index = rng - g = df.groupby("A")[["C"]] + g = df.groupby(["A"])[["C"]] g_exp = df[["C"]].groupby(df["A"]) # methods which aren't just .foo() diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index bd7efd4404f13..0ce73c73dd2a4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -806,7 +806,7 @@ def test_groupby_as_index_cython(df): msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): result = grouped.mean() - expected = data.groupby("A").mean() + expected = data.groupby(["A"]).mean() expected.insert(0, "A", expected.index) expected.index = np.arange(len(expected)) tm.assert_frame_equal(result, expected) @@ -1259,7 +1259,7 @@ def test_consistency_name(): } ) - expected = df.groupby("A").B.count() + expected = df.groupby(["A"]).B.count() result = df.B.groupby(df.A).count() tm.assert_series_equal(result, expected) @@ -1495,7 +1495,7 @@ def test_groupby_2d_malformed(): d["label"] = ["l1", "l2"] msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): - tmp = d.groupby("group").mean() + tmp = d.groupby(["group"]).mean() res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) tm.assert_numpy_array_equal(tmp.values, res_values) @@ -1888,7 +1888,7 @@ def test_pivot_table_values_key_error(): @pytest.mark.parametrize("columns", ["C", ["C"]]) -@pytest.mark.parametrize("keys", ["A", ["A", "B"]]) +@pytest.mark.parametrize("keys", [["A"], ["A", "B"]]) @pytest.mark.parametrize( "values", [ @@ -2240,7 +2240,7 @@ def test_groupby_groups_in_BaseGrouper(): assert result.groups == expected.groups -@pytest.mark.parametrize("group_name", ["x"]) +@pytest.mark.parametrize("group_name", ["x", ["x"]]) def test_groupby_axis_1(group_name): # GH 27614 df = DataFrame( @@ -2643,7 +2643,7 @@ def test_groupby_aggregation_non_numeric_dtype(): index=Index(["M", "W"], dtype="object", name="MW"), ) - gb = df.groupby(by="MW") + gb = df.groupby(by=["MW"]) result = gb.sum() tm.assert_frame_equal(result, expected) @@ -2666,7 +2666,7 @@ def test_groupby_aggregation_multi_non_numeric_dtype(): index=Index([0, 1], dtype="int64", name="x"), ) - gb = df.groupby(by="x") + gb = df.groupby(by=["x"]) result = gb.sum() tm.assert_frame_equal(result, expected) @@ -2686,7 +2686,7 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype(): index=Index([0, 1], dtype="int64", name="x"), ) - gb = df.groupby(by="x") + gb = df.groupby(by=["x"]) msg = "The default value of numeric_only" with tm.assert_produces_warning(FutureWarning, match=msg): result = gb.sum() @@ -2766,7 +2766,7 @@ def test_by_column_values_with_same_starting_value(): ) aggregate_details = {"Mood": Series.mode, "Credit": "sum"} - result = df.groupby("Name").agg(aggregate_details) + result = df.groupby(["Name"]).agg(aggregate_details) expected_result = DataFrame( { "Mood": [["happy", "sad"], "happy"], diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index f1eb811a788b4..6656fd565f79d 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -164,7 +164,7 @@ def test_nunique_with_timegrouper(): def test_nunique_with_NaT(key, data, dropna, expected): # GH 27951 df = DataFrame({"key": key, "data": data}) - result = df.groupby("key")["data"].nunique(dropna=dropna) + result = df.groupby(["key"])["data"].nunique(dropna=dropna) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 7157ab135f55d..5c64ba3d9e266 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1194,7 +1194,7 @@ def test_transform_lambda_with_datetimetz(): "timezone": ["Etc/GMT+4", "US/Eastern"], } ) - result = df.groupby("timezone")["time"].transform( + result = df.groupby(["timezone"])["time"].transform( lambda x: x.dt.tz_localize(x.name) ) expected = Series( diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index 7e8a6242dca82..fe39c3d441396 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -269,7 +269,7 @@ class TestBoxWithBy(TestPlotBase): ] * 2, ), - ("C", None, ["A", "B"], [["a", "b", "c"]] * 2), + (["C"], None, ["A", "B"], [["a", "b", "c"]] * 2), ], ) def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df): diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index d6666d93e2f66..ee460eb365d25 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -407,7 +407,7 @@ def test_join_inner_multiindex(self, lexsorted_two_level_string_multiindex): def test_join_hierarchical_mixed(self): # GH 2024 df = DataFrame([(1, 2, 3), (4, 5, 6)], columns=["a", "b", "c"]) - new_df = df.groupby("a").agg({"b": [np.mean, np.sum]}) + new_df = df.groupby(["a"]).agg({"b": [np.mean, np.sum]}) other_df = DataFrame([(1, 2, 3), (7, 10, 6)], columns=["a", "b", "d"]) other_df.set_index("a", inplace=True) # GH 9455, 12219 diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6b2ff46a9d13a..8312e3b9de9a7 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -860,7 +860,7 @@ def test_pivot_with_tuple_of_values(self, method): pd.pivot(df, index="zoo", columns="foo", values=("bar", "baz")) def _check_output( - self, result, values_col, index=["A", "B"], columns="C", margins_col="All" + self, result, values_col, index=["A", "B"], columns=["C"], margins_col="All" ): col_margins = result.loc[result.index[:-1], margins_col] expected_col_margins = self.data.groupby(index)[values_col].mean()