-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
REF: Categorical.is_dtype_equal -> categories_match_up_to_permutation #37545
Changes from 4 commits
6fe0c3e
27517a6
2bd4416
3354bb3
ad412f1
10cf22c
20c668e
c153bbd
af35fd2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,7 +78,7 @@ def func(self, other): | |
# the same (maybe up to ordering, depending on ordered) | ||
|
||
msg = "Categoricals can only be compared if 'categories' are the same." | ||
if not self.is_dtype_equal(other): | ||
if not self.categories_match_up_to_permutation(other): | ||
raise TypeError(msg) | ||
|
||
if not self.ordered and not self.categories.equals(other.categories): | ||
|
@@ -1709,7 +1709,7 @@ def _validate_listlike(self, target: ArrayLike) -> np.ndarray: | |
if self.categories.equals(target.categories): | ||
# We use the same codes, so can go directly to the engine | ||
codes = target.codes | ||
elif self.is_dtype_equal(target): | ||
elif self.categories_match_up_to_permutation(target): | ||
# We have the same categories up to a reshuffling of codes. | ||
codes = recode_for_categories( | ||
target.codes, target.categories, self.categories | ||
|
@@ -1882,11 +1882,12 @@ def _validate_setitem_value(self, value): | |
|
||
# require identical categories set | ||
if isinstance(value, Categorical): | ||
if not is_dtype_equal(self, value): | ||
if not is_dtype_equal(self.dtype, value.dtype): | ||
raise ValueError( | ||
"Cannot set a Categorical with another, " | ||
"without identical categories" | ||
) | ||
# is_dtype_equal implies categories_match_up_to_permutation | ||
new_codes = self._validate_listlike(value) | ||
value = Categorical.from_codes(new_codes, dtype=self.dtype) | ||
|
||
|
@@ -2120,7 +2121,7 @@ def equals(self, other: object) -> bool: | |
""" | ||
if not isinstance(other, Categorical): | ||
return False | ||
elif self.is_dtype_equal(other): | ||
elif self.categories_match_up_to_permutation(other): | ||
other_codes = self._validate_listlike(other) | ||
return np.array_equal(self._codes, other_codes) | ||
return False | ||
|
@@ -2133,7 +2134,7 @@ def _concat_same_type(self, to_concat): | |
|
||
# ------------------------------------------------------------------ | ||
|
||
def is_dtype_equal(self, other): | ||
def categories_match_up_to_permutation(self, other: "Categorical") -> bool: | ||
""" | ||
Returns True if categoricals are the same dtype | ||
same categories, and same ordered | ||
|
@@ -2146,8 +2147,17 @@ def is_dtype_equal(self, other): | |
------- | ||
bool | ||
""" | ||
return hash(self.dtype) == hash(other.dtype) | ||
|
||
def is_dtype_equal(self, other) -> bool: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should deprecate any is_dtype_equal methods on any other Array/Index (not sure if we have any) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's just a CategoricalDtype.is_dtype_equal left. ill be happy to see that go There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. woops, no, got rid of that a while ago |
||
warn( | ||
"Categorical.is_dtype_equal is deprecated and will be removed " | ||
"in a future version, use categories_match_up_to_permutation instead", | ||
FutureWarning, | ||
stacklevel=2, | ||
) | ||
try: | ||
return hash(self.dtype) == hash(other.dtype) | ||
return self.categories_match_up_to_permutation(other) | ||
except (AttributeError, TypeError): | ||
return False | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
umm, why this name? what is wrong with the existing one?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is_dtype_equal(self, other)
means something different fromself.is_dtype_equal(other)
which i find confusingit accurately describes what is being checked
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i don't have a problem with this as a private method, and its ok to deprecate the .is_dtype_equal but we shouldn't offer this as a replacement.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
privatized+green