From 44929796567593d05d497896dce83c66e43c1f12 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Nov 2020 14:39:26 -0800
Subject: [PATCH] REF: Categorical.is_dtype_equal ->
 categories_match_up_to_permutation (#37545)

---
 doc/source/whatsnew/v1.2.0.rst                |  1 +
 pandas/core/arrays/categorical.py             | 20 ++++++---
 pandas/core/dtypes/concat.py                  |  2 +-
 pandas/core/indexes/category.py               |  2 +-
 pandas/core/reshape/merge.py                  |  2 +-
 .../tests/arrays/categorical/test_dtypes.py   | 45 ++++++++++++-------
 pandas/tests/reshape/merge/test_merge.py      |  6 +--
 7 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 6f137302d4994..8a092cb6e36db 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -340,6 +340,7 @@ Deprecations
 - :meth:`Index.ravel` returning a ``np.ndarray`` is deprecated, in the future this will return a view on the same index (:issue:`19956`)
 - Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
 - :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior.  Use the named set methods instead (:issue:`36758`)
+- :meth:`Categorical.is_dtype_equal` and :meth:`CategoricalIndex.is_dtype_equal` are deprecated, will be removed in a future version (:issue:`37545`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 263512e427c69..b1f913e9ea641 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -78,7 +78,7 @@ def func(self, other):
             # the same (maybe up to ordering, depending on ordered)
 
             msg = "Categoricals can only be compared if 'categories' are the same."
-            if not self.is_dtype_equal(other):
+            if not self._categories_match_up_to_permutation(other):
                 raise TypeError(msg)
 
             if not self.ordered and not self.categories.equals(other.categories):
@@ -1869,11 +1869,12 @@ def _validate_setitem_value(self, value):
 
         # require identical categories set
         if isinstance(value, Categorical):
-            if not is_dtype_equal(self, value):
+            if not is_dtype_equal(self.dtype, value.dtype):
                 raise ValueError(
                     "Cannot set a Categorical with another, "
                     "without identical categories"
                 )
+            # is_dtype_equal implies categories_match_up_to_permutation
             new_codes = self._validate_listlike(value)
             value = Categorical.from_codes(new_codes, dtype=self.dtype)
 
@@ -2107,7 +2108,7 @@ def equals(self, other: object) -> bool:
         """
         if not isinstance(other, Categorical):
             return False
-        elif self.is_dtype_equal(other):
+        elif self._categories_match_up_to_permutation(other):
             other_codes = self._validate_listlike(other)
             return np.array_equal(self._codes, other_codes)
         return False
@@ -2120,7 +2121,7 @@ def _concat_same_type(self, to_concat):
 
     # ------------------------------------------------------------------
 
-    def is_dtype_equal(self, other):
+    def _categories_match_up_to_permutation(self, other: "Categorical") -> bool:
         """
         Returns True if categoricals are the same dtype
           same categories, and same ordered
@@ -2133,8 +2134,17 @@ def is_dtype_equal(self, other):
         -------
         bool
         """
+        return hash(self.dtype) == hash(other.dtype)
+
+    def is_dtype_equal(self, other) -> bool:
+        warn(
+            "Categorical.is_dtype_equal is deprecated and will be removed "
+            "in a future version",
+            FutureWarning,
+            stacklevel=2,
+        )
         try:
-            return hash(self.dtype) == hash(other.dtype)
+            return self._categories_match_up_to_permutation(other)
         except (AttributeError, TypeError):
             return False
 
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 60fd959701821..99dc01ef421d1 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -296,7 +296,7 @@ def _maybe_unwrap(x):
         raise TypeError("dtype of categories must be the same")
 
     ordered = False
-    if all(first.is_dtype_equal(other) for other in to_union[1:]):
+    if all(first._categories_match_up_to_permutation(other) for other in to_union[1:]):
         # identical categories - fastpath
         categories = first.categories
         ordered = first.ordered
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 2f2836519d847..8cbd0d83c78d7 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -255,7 +255,7 @@ def _is_dtype_compat(self, other) -> Categorical:
         """
         if is_categorical_dtype(other):
             other = extract_array(other)
-            if not other.is_dtype_equal(self):
+            if not other._categories_match_up_to_permutation(self):
                 raise TypeError(
                     "categories must match existing categories when appending"
                 )
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 5012be593820e..d82b1474ff3e0 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1083,7 +1083,7 @@ def _maybe_coerce_merge_keys(self):
             # if either left or right is a categorical
             # then the must match exactly in categories & ordered
             if lk_is_cat and rk_is_cat:
-                if lk.is_dtype_equal(rk):
+                if lk._categories_match_up_to_permutation(rk):
                     continue
 
             elif lk_is_cat or rk_is_cat:
diff --git a/pandas/tests/arrays/categorical/test_dtypes.py b/pandas/tests/arrays/categorical/test_dtypes.py
index 47ce9cb4089f9..deafa22a6e8eb 100644
--- a/pandas/tests/arrays/categorical/test_dtypes.py
+++ b/pandas/tests/arrays/categorical/test_dtypes.py
@@ -8,34 +8,45 @@
 
 
 class TestCategoricalDtypes:
-    def test_is_equal_dtype(self):
+    def test_is_dtype_equal_deprecated(self):
+        # GH#37545
+        c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
+
+        with tm.assert_produces_warning(FutureWarning):
+            c1.is_dtype_equal(c1)
+
+    def test_categories_match_up_to_permutation(self):
 
         # test dtype comparisons between cats
 
         c1 = Categorical(list("aabca"), categories=list("abc"), ordered=False)
         c2 = Categorical(list("aabca"), categories=list("cab"), ordered=False)
         c3 = Categorical(list("aabca"), categories=list("cab"), ordered=True)
-        assert c1.is_dtype_equal(c1)
-        assert c2.is_dtype_equal(c2)
-        assert c3.is_dtype_equal(c3)
-        assert c1.is_dtype_equal(c2)
-        assert not c1.is_dtype_equal(c3)
-        assert not c1.is_dtype_equal(Index(list("aabca")))
-        assert not c1.is_dtype_equal(c1.astype(object))
-        assert c1.is_dtype_equal(CategoricalIndex(c1))
-        assert c1.is_dtype_equal(CategoricalIndex(c1, categories=list("cab")))
-        assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))
+        assert c1._categories_match_up_to_permutation(c1)
+        assert c2._categories_match_up_to_permutation(c2)
+        assert c3._categories_match_up_to_permutation(c3)
+        assert c1._categories_match_up_to_permutation(c2)
+        assert not c1._categories_match_up_to_permutation(c3)
+        assert not c1._categories_match_up_to_permutation(Index(list("aabca")))
+        assert not c1._categories_match_up_to_permutation(c1.astype(object))
+        assert c1._categories_match_up_to_permutation(CategoricalIndex(c1))
+        assert c1._categories_match_up_to_permutation(
+            CategoricalIndex(c1, categories=list("cab"))
+        )
+        assert not c1._categories_match_up_to_permutation(
+            CategoricalIndex(c1, ordered=True)
+        )
 
         # GH 16659
         s1 = Series(c1)
         s2 = Series(c2)
         s3 = Series(c3)
-        assert c1.is_dtype_equal(s1)
-        assert c2.is_dtype_equal(s2)
-        assert c3.is_dtype_equal(s3)
-        assert c1.is_dtype_equal(s2)
-        assert not c1.is_dtype_equal(s3)
-        assert not c1.is_dtype_equal(s1.astype(object))
+        assert c1._categories_match_up_to_permutation(s1)
+        assert c2._categories_match_up_to_permutation(s2)
+        assert c3._categories_match_up_to_permutation(s3)
+        assert c1._categories_match_up_to_permutation(s2)
+        assert not c1._categories_match_up_to_permutation(s3)
+        assert not c1._categories_match_up_to_permutation(s1.astype(object))
 
     def test_set_dtype_same(self):
         c = Categorical(["a", "b", "c"])
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index bb2860b88b288..a58372040c7f3 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -1707,8 +1707,8 @@ def test_other_columns(self, left, right):
         tm.assert_series_equal(result, expected)
 
         # categories are preserved
-        assert left.X.values.is_dtype_equal(merged.X.values)
-        assert right.Z.values.is_dtype_equal(merged.Z.values)
+        assert left.X.values._categories_match_up_to_permutation(merged.X.values)
+        assert right.Z.values._categories_match_up_to_permutation(merged.Z.values)
 
     @pytest.mark.parametrize(
         "change",
@@ -1725,7 +1725,7 @@ def test_dtype_on_merged_different(self, change, join_type, left, right):
         X = change(right.X.astype("object"))
         right = right.assign(X=X)
         assert is_categorical_dtype(left.X.values.dtype)
-        # assert not left.X.values.is_dtype_equal(right.X.values)
+        # assert not left.X.values._categories_match_up_to_permutation(right.X.values)
 
         merged = pd.merge(left, right, on="X", how=join_type)