Skip to content

Commit

Permalink
BUG: merging with a boolean/int categorical column (pandas-dev#17841)
Browse files Browse the repository at this point in the history
* BUG: merging with a boolean/int categorical column pandas-dev#17187
  • Loading branch information
jdrudolph authored and alanbato committed Nov 10, 2017
1 parent c382785 commit d7ca520
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,7 @@ Categorical
- Bug in :func:`Series.isin` when called with a categorical (:issue:`16639`)
- Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`)
- Bug in categorical operations with :ref:`Series.cat <categorical.cat>` not preserving the original Series' name (:issue:`17509`)
- Bug in :func:`DataFrame.merge` failing for categorical columns with boolean/int data types (:issue:`17187`)

.. _whatsnew_0210.pypy:

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -5596,7 +5596,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na):
# preserve these for validation in _concat_compat
return self.block.values

if self.block.is_bool:
if self.block.is_bool and not self.block.is_categorical:
# External code requested filling/upcasting, bool values must
# be upcasted to object to avoid being upcasted to numeric.
values = self.block.astype(np.object_).values
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/reshape/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1546,6 +1546,30 @@ def test_dtype_on_categorical_dates(self):
result_inner = pd.merge(df, df2, how='inner', on=['date'])
assert_frame_equal(result_inner, expected_inner)

@pytest.mark.parametrize('category_column,categories,expected_categories',
[([False, True, True, False], [True, False],
[True, False]),
([2, 1, 1, 2], [1, 2], [1, 2]),
(['False', 'True', 'True', 'False'],
['True', 'False'], ['True', 'False'])])
def test_merging_with_bool_or_int_cateorical_column(self, category_column,
categories,
expected_categories):
# GH 17187
# merging with a boolean/int categorical column
df1 = pd.DataFrame({'id': [1, 2, 3, 4],
'cat': category_column})
df1['cat'] = df1['cat'].astype('category',
categories=categories, ordered=True)
df2 = pd.DataFrame({'id': [2, 4], 'num': [1, 9]})
result = df1.merge(df2)
expected = pd.DataFrame({'id': [2, 4], 'cat': expected_categories,
'num': [1, 9]})
expected['cat'] = expected['cat'].astype('category',
categories=categories,
ordered=True)
assert_frame_equal(expected, result)


@pytest.fixture
def left_df():
Expand Down

0 comments on commit d7ca520

Please sign in to comment.