Skip to content

Commit

Permalink
REF: Categorical.is_dtype_equal -> categories_match_up_to_permutation (
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Nov 2, 2020
1 parent 4bad8cb commit 4492979
Show file tree
Hide file tree
Showing 7 changed files with 50 additions and 28 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ Deprecations
- :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
- Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
- :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior. Use the named set methods instead (:issue:`36758`)
- :meth:`Categorical.is_dtype_equal` and :meth:`CategoricalIndex.is_dtype_equal` are deprecated, will be removed in a future version (:issue:`37545`)

.. ---------------------------------------------------------------------------
Expand Down
20 changes: 15 additions & 5 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def func(self, other):
# the same (maybe up to ordering, depending on ordered)

msg = "Categoricals can only be compared if 'categories' are the same."
if not self.is_dtype_equal(other):
if not self._categories_match_up_to_permutation(other):
raise TypeError(msg)

if not self.ordered and not self.categories.equals(other.categories):
Expand Down Expand Up @@ -1869,11 +1869,12 @@ def _validate_setitem_value(self, value):

# require identical categories set
if isinstance(value, Categorical):
if not is_dtype_equal(self, value):
if not is_dtype_equal(self.dtype, value.dtype):
raise ValueError(
"Cannot set a Categorical with another, "
"without identical categories"
)
# is_dtype_equal implies categories_match_up_to_permutation
new_codes = self._validate_listlike(value)
value = Categorical.from_codes(new_codes, dtype=self.dtype)

Expand Down Expand Up @@ -2107,7 +2108,7 @@ def equals(self, other: object) -> bool:
"""
if not isinstance(other, Categorical):
return False
elif self.is_dtype_equal(other):
elif self._categories_match_up_to_permutation(other):
other_codes = self._validate_listlike(other)
return np.array_equal(self._codes, other_codes)
return False
Expand All @@ -2120,7 +2121,7 @@ def _concat_same_type(self, to_concat):

# ------------------------------------------------------------------

def is_dtype_equal(self, other):
def _categories_match_up_to_permutation(self, other: "Categorical") -> bool:
"""
Returns True if categoricals are the same dtype
same categories, and same ordered
Expand All @@ -2133,8 +2134,17 @@ def is_dtype_equal(self, other):
-------
bool
"""
return hash(self.dtype) == hash(other.dtype)

def is_dtype_equal(self, other) -> bool:
warn(
"Categorical.is_dtype_equal is deprecated and will be removed "
"in a future version",
FutureWarning,
stacklevel=2,
)
try:
return hash(self.dtype) == hash(other.dtype)
return self._categories_match_up_to_permutation(other)
except (AttributeError, TypeError):
return False

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def _maybe_unwrap(x):
raise TypeError("dtype of categories must be the same")

ordered = False
if all(first.is_dtype_equal(other) for other in to_union[1:]):
if all(first._categories_match_up_to_permutation(other) for other in to_union[1:]):
# identical categories - fastpath
categories = first.categories
ordered = first.ordered
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def _is_dtype_compat(self, other) -> Categorical:
"""
if is_categorical_dtype(other):
other = extract_array(other)
if not other.is_dtype_equal(self):
if not other._categories_match_up_to_permutation(self):
raise TypeError(
"categories must match existing categories when appending"
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1083,7 +1083,7 @@ def _maybe_coerce_merge_keys(self):
# if either left or right is a categorical
# then the must match exactly in categories & ordered
if lk_is_cat and rk_is_cat:
if lk.is_dtype_equal(rk):
if lk._categories_match_up_to_permutation(rk):
continue

elif lk_is_cat or rk_is_cat:
Expand Down
45 changes: 28 additions & 17 deletions pandas/tests/arrays/categorical/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,45 @@


class TestCategoricalDtypes:
def test_is_equal_dtype(self):
def test_is_dtype_equal_deprecated(self):
# GH#37545
c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)

with tm.assert_produces_warning(FutureWarning):
c1.is_dtype_equal(c1)

def test_categories_match_up_to_permutation(self):

# test dtype comparisons between cats

c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False)
c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True)
assert c1.is_dtype_equal(c1)
assert c2.is_dtype_equal(c2)
assert c3.is_dtype_equal(c3)
assert c1.is_dtype_equal(c2)
assert not c1.is_dtype_equal(c3)
assert not c1.is_dtype_equal(Index(list("aabca")))
assert not c1.is_dtype_equal(c1.astype(object))
assert c1.is_dtype_equal(CategoricalIndex(c1))
assert c1.is_dtype_equal(CategoricalIndex(c1, categories=list("cab")))
assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))
assert c1._categories_match_up_to_permutation(c1)
assert c2._categories_match_up_to_permutation(c2)
assert c3._categories_match_up_to_permutation(c3)
assert c1._categories_match_up_to_permutation(c2)
assert not c1._categories_match_up_to_permutation(c3)
assert not c1._categories_match_up_to_permutation(Index(list("aabca")))
assert not c1._categories_match_up_to_permutation(c1.astype(object))
assert c1._categories_match_up_to_permutation(CategoricalIndex(c1))
assert c1._categories_match_up_to_permutation(
CategoricalIndex(c1, categories=list("cab"))
)
assert not c1._categories_match_up_to_permutation(
CategoricalIndex(c1, ordered=True)
)

# GH 16659
s1 = Series(c1)
s2 = Series(c2)
s3 = Series(c3)
assert c1.is_dtype_equal(s1)
assert c2.is_dtype_equal(s2)
assert c3.is_dtype_equal(s3)
assert c1.is_dtype_equal(s2)
assert not c1.is_dtype_equal(s3)
assert not c1.is_dtype_equal(s1.astype(object))
assert c1._categories_match_up_to_permutation(s1)
assert c2._categories_match_up_to_permutation(s2)
assert c3._categories_match_up_to_permutation(s3)
assert c1._categories_match_up_to_permutation(s2)
assert not c1._categories_match_up_to_permutation(s3)
assert not c1._categories_match_up_to_permutation(s1.astype(object))

def test_set_dtype_same(self):
c = Categorical(["a", "b", "c"])
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1707,8 +1707,8 @@ def test_other_columns(self, left, right):
tm.assert_series_equal(result, expected)

# categories are preserved
assert left.X.values.is_dtype_equal(merged.X.values)
assert right.Z.values.is_dtype_equal(merged.Z.values)
assert left.X.values._categories_match_up_to_permutation(merged.X.values)
assert right.Z.values._categories_match_up_to_permutation(merged.Z.values)

@pytest.mark.parametrize(
"change",
Expand All @@ -1725,7 +1725,7 @@ def test_dtype_on_merged_different(self, change, join_type, left, right):
X = change(right.X.astype("object"))
right = right.assign(X=X)
assert is_categorical_dtype(left.X.values.dtype)
# assert not left.X.values.is_dtype_equal(right.X.values)
# assert not left.X.values._categories_match_up_to_permutation(right.X.values)

merged = pd.merge(left, right, on="X", how=join_type)

Expand Down

0 comments on commit 4492979

Please sign in to comment.