diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f4fbbd3596b57c..b06075f2dd13c1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1017,6 +1017,7 @@ Categorical - Bug in :func:`Series.isin` when called with a categorical (:issue:`16639`) - Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`) - Bug in categorical operations with :ref:`Series.cat ` not preserving the original Series' name (:issue:`17509`) +- Bug in :func:`DataFrame.merge` failing for categorical columns with boolean/int data types (:issue:`17187`) .. _whatsnew_0210.pypy: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index a1e9b24afe5fc8..f6c5ecbca81ef3 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -5596,7 +5596,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): # preserve these for validation in _concat_compat return self.block.values - if self.block.is_bool: + if self.block.is_bool and not self.block.is_categorical: # External code requested filling/upcasting, bool values must # be upcasted to object to avoid being upcasted to numeric. values = self.block.astype(np.object_).values diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index ed99814afd20a9..81956c0bd5b281 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -1546,6 +1546,30 @@ def test_dtype_on_categorical_dates(self): result_inner = pd.merge(df, df2, how='inner', on=['date']) assert_frame_equal(result_inner, expected_inner) + @pytest.mark.parametrize('category_column,categories,expected_categories', + [([False, True, True, False], [True, False], + [True, False]), + ([2, 1, 1, 2], [1, 2], [1, 2]), + (['False', 'True', 'True', 'False'], + ['True', 'False'], ['True', 'False'])]) + def test_merging_with_bool_or_int_cateorical_column(self, category_column, + categories, + expected_categories): + # GH 17187 + # merging with a boolean/int categorical column + df1 = pd.DataFrame({'id': [1, 2, 3, 4], + 'cat': category_column}) + df1['cat'] = df1['cat'].astype('category', + categories=categories, ordered=True) + df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]}) + result = df1.merge(df2) + expected = pd.DataFrame({'id': [2, 4], 'cat': expected_categories, + 'num': [1, 9]}) + expected['cat'] = expected['cat'].astype('category', + categories=categories, + ordered=True) + assert_frame_equal(expected, result) + @pytest.fixture def left_df():