pandas-dev · jreback · Nov 2, 2020 · Oct 31, 2020 · Oct 31, 2020 · Oct 31, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -338,6 +338,7 @@ Deprecations
 - Deprecated slice-indexing on timezone-aware :class:`DatetimeIndex` with naive ``datetime`` objects, to match scalar indexing behavior (:issue:`36148`)
 - :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
 - Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
+- :meth:`Categorical.is_dtype_equal` and :meth:`CategoricalIndex.is_dtype_equal` are deprecated, use :meth:`Categorical.categories_match_up_to_permutation` instead (:issue:`37545`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -78,7 +78,7 @@ def func(self, other):
             # the same (maybe up to ordering, depending on ordered)
 
             msg = "Categoricals can only be compared if 'categories' are the same."
-            if not self.is_dtype_equal(other):
+            if not self.categories_match_up_to_permutation(other):
                 raise TypeError(msg)
 
             if not self.ordered and not self.categories.equals(other.categories):
@@ -1709,7 +1709,7 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray:
             if self.categories.equals(target.categories):
                 # We use the same codes, so can go directly to the engine
                 codes = target.codes
-            elif self.is_dtype_equal(target):
+            elif self.categories_match_up_to_permutation(target):
                 # We have the same categories up to a reshuffling of codes.
                 codes = recode_for_categories(
                     target.codes, target.categories, self.categories
@@ -1882,11 +1882,12 @@ def _validate_setitem_value(self, value):
 
         # require identical categories set
         if isinstance(value, Categorical):
-            if not is_dtype_equal(self, value):
+            if not is_dtype_equal(self.dtype, value.dtype):
                 raise ValueError(
                     "Cannot set a Categorical with another, "
                     "without identical categories"
                 )
+            # is_dtype_equal implies categories_match_up_to_permutation
             new_codes = self._validate_listlike(value)
             value = Categorical.from_codes(new_codes, dtype=self.dtype)
 
@@ -2120,7 +2121,7 @@ def equals(self, other: object) -> bool:
         """
         if not isinstance(other, Categorical):
             return False
-        elif self.is_dtype_equal(other):
+        elif self.categories_match_up_to_permutation(other):
             other_codes = self._validate_listlike(other)
             return np.array_equal(self._codes, other_codes)
         return False
@@ -2133,7 +2134,7 @@ def _concat_same_type(self, to_concat):
 
     # ------------------------------------------------------------------
 
-    def is_dtype_equal(self, other):
+    def categories_match_up_to_permutation(self, other: "Categorical") -> bool:
         """
         Returns True if categoricals are the same dtype
           same categories, and same ordered
@@ -2146,8 +2147,17 @@ def is_dtype_equal(self, other):
         -------
         bool
         """
+        return hash(self.dtype) == hash(other.dtype)
+
+    def is_dtype_equal(self, other) -> bool:
+        warn(
+            "Categorical.is_dtype_equal is deprecated and will be removed "
+            "in a future version, use categories_match_up_to_permutation instead",
+            FutureWarning,
+            stacklevel=2,
+        )
         try:
-            return hash(self.dtype) == hash(other.dtype)
+            return self.categories_match_up_to_permutation(other)
         except (AttributeError, TypeError):
             return False
 

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -296,7 +296,7 @@ def _maybe_unwrap(x):
         raise TypeError("dtype of categories must be the same")
 
     ordered = False
-    if all(first.is_dtype_equal(other) for other in to_union[1:]):
+    if all(first.categories_match_up_to_permutation(other) for other in to_union[1:]):
         # identical categories - fastpath
         categories = first.categories
         ordered = first.ordered

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -256,7 +256,7 @@ def _is_dtype_compat(self, other) -> Categorical:
         """
         if is_categorical_dtype(other):
             other = extract_array(other)
-            if not other.is_dtype_equal(self):
+            if not other.categories_match_up_to_permutation(self):
                 raise TypeError(
                     "categories must match existing categories when appending"
                 )

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -1083,7 +1083,7 @@ def _maybe_coerce_merge_keys(self):
             # if either left or right is a categorical
             # then the must match exactly in categories & ordered
             if lk_is_cat and rk_is_cat:
-                if lk.is_dtype_equal(rk):
+                if lk.categories_match_up_to_permutation(rk):
                     continue
 
             elif lk_is_cat or rk_is_cat:

diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py
@@ -8,34 +8,45 @@
 
 
 class TestCategoricalDtypes:
-    def test_is_equal_dtype(self):
+    def test_is_dtype_equal_deprecated(self):
+        # GH#37545
+        c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
+
+        with tm.assert_produces_warning(FutureWarning):
+            c1.is_dtype_equal(c1)
+
+    def test_categories_match_up_to_permutation(self):
 
         # test dtype comparisons between cats
 
         c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
         c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False)
         c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True)
-        assert c1.is_dtype_equal(c1)
-        assert c2.is_dtype_equal(c2)
-        assert c3.is_dtype_equal(c3)
-        assert c1.is_dtype_equal(c2)
-        assert not c1.is_dtype_equal(c3)
-        assert not c1.is_dtype_equal(Index(list("aabca")))
-        assert not c1.is_dtype_equal(c1.astype(object))
-        assert c1.is_dtype_equal(CategoricalIndex(c1))
-        assert c1.is_dtype_equal(CategoricalIndex(c1, categories=list("cab")))
-        assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))
+        assert c1.categories_match_up_to_permutation(c1)
+        assert c2.categories_match_up_to_permutation(c2)
+        assert c3.categories_match_up_to_permutation(c3)
+        assert c1.categories_match_up_to_permutation(c2)
+        assert not c1.categories_match_up_to_permutation(c3)
+        assert not c1.categories_match_up_to_permutation(Index(list("aabca")))
+        assert not c1.categories_match_up_to_permutation(c1.astype(object))
+        assert c1.categories_match_up_to_permutation(CategoricalIndex(c1))
+        assert c1.categories_match_up_to_permutation(
+            CategoricalIndex(c1, categories=list("cab"))
+        )
+        assert not c1.categories_match_up_to_permutation(
+            CategoricalIndex(c1, ordered=True)
+        )
 
         # GH 16659
         s1 = Series(c1)
         s2 = Series(c2)
         s3 = Series(c3)
-        assert c1.is_dtype_equal(s1)
-        assert c2.is_dtype_equal(s2)
-        assert c3.is_dtype_equal(s3)
-        assert c1.is_dtype_equal(s2)
-        assert not c1.is_dtype_equal(s3)
-        assert not c1.is_dtype_equal(s1.astype(object))
+        assert c1.categories_match_up_to_permutation(s1)
+        assert c2.categories_match_up_to_permutation(s2)
+        assert c3.categories_match_up_to_permutation(s3)
+        assert c1.categories_match_up_to_permutation(s2)
+        assert not c1.categories_match_up_to_permutation(s3)
+        assert not c1.categories_match_up_to_permutation(s1.astype(object))
 
     def test_set_dtype_same(self):
         c = Categorical(["a", "b", "c"])

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -1707,8 +1707,8 @@ def test_other_columns(self, left, right):
         tm.assert_series_equal(result, expected)
 
         # categories are preserved
-        assert left.X.values.is_dtype_equal(merged.X.values)
-        assert right.Z.values.is_dtype_equal(merged.Z.values)
+        assert left.X.values.categories_match_up_to_permutation(merged.X.values)
+        assert right.Z.values.categories_match_up_to_permutation(merged.Z.values)
 
     @pytest.mark.parametrize(
         "change",
@@ -1725,7 +1725,7 @@ def test_dtype_on_merged_different(self, change, join_type, left, right):
         X = change(right.X.astype("object"))
         right = right.assign(X=X)
         assert is_categorical_dtype(left.X.values.dtype)
-        # assert not left.X.values.is_dtype_equal(right.X.values)
+        # assert not left.X.values.categories_match_up_to_permutation(right.X.values)
 
         merged = pd.merge(left, right, on="X", how=join_type)