From fc66d101c86ce07287a9ddc046312995d5cb7ba7 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 17 Jul 2019 07:41:33 -0400 Subject: [PATCH] multi-index column support --- pandas/core/frame.py | 8 ++++---- pandas/tests/frame/test_explode.py | 21 ++++++++++++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1078394708d4be..f2d3934243c100 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -15,7 +15,7 @@ import itertools import sys from textwrap import dedent -from typing import FrozenSet, List, Optional, Set, Type, Union +from typing import FrozenSet, List, Optional, Set, Tuple, Type, Union import warnings import numpy as np @@ -6233,7 +6233,7 @@ def stack(self, level=-1, dropna=True): else: return stack(self, level, dropna=dropna) - def explode(self, column: str) -> "DataFrame": + def explode(self, column: Union[str, Tuple]) -> "DataFrame": """ Transform each element of a list-like to a row, replicating the index values. @@ -6242,7 +6242,7 @@ def explode(self, column: str) -> "DataFrame": Parameters ---------- - column : str + column : str or tuple Returns ------- @@ -6290,7 +6290,7 @@ def explode(self, column: str) -> "DataFrame": 3 4 1 """ - if not is_scalar(column): + if not (is_scalar(column) or isinstance(column, tuple)): raise ValueError("column must be a scalar") if not self.columns.is_unique: raise ValueError("columns must be unique") diff --git a/pandas/tests/frame/test_explode.py b/pandas/tests/frame/test_explode.py index 4a970879942fe0..b4330aadbfba3d 100644 --- a/pandas/tests/frame/test_explode.py +++ b/pandas/tests/frame/test_explode.py @@ -33,7 +33,7 @@ def test_basic(): tm.assert_frame_equal(result, expected) -def test_multi_index(): +def test_multi_index_rows(): df = pd.DataFrame( {"A": np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), "B": 1}, index=pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1), ("b", 2)]), @@ -63,6 +63,25 @@ def test_multi_index(): tm.assert_frame_equal(result, expected) +def test_multi_index_columns(): + df = pd.DataFrame( + {("A", 1): np.array([[0, 1, 2], np.nan, [], (3, 4)], dtype=object), ("A", 2): 1} + ) + + result = df.explode(("A", 1)) + expected = pd.DataFrame( + { + ("A", 1): pd.Series( + [0, 1, 2, np.nan, np.nan, 3, 4], + index=pd.Index([0, 0, 0, 1, 2, 3, 3]), + dtype=object, + ), + ("A", 2): 1, + } + ) + tm.assert_frame_equal(result, expected) + + def test_usecase(): # explode a single column # gh-10511