From 0c9aadf786c13cc76d867275d70aa10459c538d8 Mon Sep 17 00:00:00 2001 From: Matt Richards <45483497+m-richards@users.noreply.github.com> Date: Tue, 25 Apr 2023 19:27:05 +0200 Subject: [PATCH] BUG: Fix getitem dtype preservation with multiindexes (#51895) * BUG/TST fix dtype preservation with multindex * lint * Update pandas/tests/indexing/multiindex/test_multiindex.py Co-authored-by: Joris Van den Bossche * cleanups * switch to iloc, reindex fails in some cases * suggestions from code review * address code review comments Co-Authored-By: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --------- Co-authored-by: Joris Van den Bossche Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> (cherry picked from commit 194b6bb006a48b913b73e176b1210ece54f226a8) --- doc/source/whatsnew/v2.1.0.rst | 0 pandas/core/frame.py | 14 ++----------- .../indexing/multiindex/test_multiindex.py | 20 +++++++++++++++++++ 3 files changed, 22 insertions(+), 12 deletions(-) create mode 100644 doc/source/whatsnew/v2.1.0.rst diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8be8ed188cce2..7e1d8711aee86 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3816,18 +3816,8 @@ def _getitem_multilevel(self, key): if isinstance(loc, (slice, np.ndarray)): new_columns = self.columns[loc] result_columns = maybe_droplevels(new_columns, key) - if self._is_mixed_type: - result = self.reindex(columns=new_columns) - result.columns = result_columns - else: - new_values = self._values[:, loc] - result = self._constructor( - new_values, index=self.index, columns=result_columns, copy=False - ) - if using_copy_on_write() and isinstance(loc, slice): - result._mgr.add_references(self._mgr) # type: ignore[arg-type] - - result = result.__finalize__(self) + result = self.iloc[:, loc] + result.columns = result_columns # If there is only one column being returned, and its name is # either an empty string, or a tuple with an empty string as its diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py index 8e507212976ec..22a6f62f53392 100644 --- a/pandas/tests/indexing/multiindex/test_multiindex.py +++ b/pandas/tests/indexing/multiindex/test_multiindex.py @@ -6,12 +6,14 @@ import pandas as pd from pandas import ( + CategoricalDtype, DataFrame, Index, MultiIndex, Series, ) import pandas._testing as tm +from pandas.core.arrays.boolean import BooleanDtype class TestMultiIndexBasic: @@ -206,3 +208,21 @@ def test_multiindex_with_na_missing_key(self): ) with pytest.raises(KeyError, match="missing_key"): df[[("missing_key",)]] + + def test_multiindex_dtype_preservation(self): + # GH51261 + columns = MultiIndex.from_tuples([("A", "B")], names=["lvl1", "lvl2"]) + df = DataFrame(["value"], columns=columns).astype("category") + df_no_multiindex = df["A"] + assert isinstance(df_no_multiindex["B"].dtype, CategoricalDtype) + + # geopandas 1763 analogue + df = DataFrame( + [[1, 0], [0, 1]], + columns=[ + ["foo", "foo"], + ["location", "location"], + ["x", "y"], + ], + ).assign(bools=Series([True, False], dtype="boolean")) + assert isinstance(df["bools"].dtype, BooleanDtype)