-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: add NA scalar for missing value indicator, use in StringArray. #29597
Changes from 3 commits
03f83bd
c1797d5
3339eaa
e9d4d6a
4450d2d
1849a23
c72e3ee
3a97782
2302661
2ab592a
018399e
31290b9
33fd3e0
289c885
22de7cd
f8208db
371eeeb
1cadeda
1fcf4b7
f6798e5
14c1434
788a2c2
1bcbab2
775cdfb
589a961
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,6 +68,7 @@ | |
DatetimeTZDtype, | ||
StringDtype, | ||
# missing | ||
NA, | ||
isna, | ||
isnull, | ||
notna, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import numbers | ||
|
||
|
||
def _create_binary_propagating_op(name): | ||
def method(self, other): | ||
if isinstance(other, numbers.Number) or other is NA or isinstance(other, str): | ||
return NA | ||
|
||
return NotImplemented | ||
|
||
method.__name__ = name | ||
return method | ||
|
||
|
||
def _create_unary_propagating_op(name): | ||
def method(self): | ||
return NA | ||
|
||
method.__name__ = name | ||
return method | ||
|
||
|
||
class NAType: | ||
|
||
_instance = None | ||
|
||
def __new__(cls, *args, **kwargs): | ||
if NAType._instance is None: | ||
NAType._instance = object.__new__(cls, *args, **kwargs) | ||
return NAType._instance | ||
|
||
def __repr__(self) -> str: | ||
return "NA" | ||
|
||
def __str__(self) -> str: | ||
return "NA" | ||
|
||
def __bool__(self): | ||
raise TypeError("boolean value of NA is ambiguous") | ||
|
||
def __hash__(self): | ||
return id(self) | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# Binary arithmetic and comparison ops -> propagate | ||
|
||
__add__ = _create_binary_propagating_op("__add__") | ||
__radd__ = _create_binary_propagating_op("__radd__") | ||
__sub__ = _create_binary_propagating_op("__sub__") | ||
__rsub__ = _create_binary_propagating_op("__rsub__") | ||
__mul__ = _create_binary_propagating_op("__mul__") | ||
__rmul__ = _create_binary_propagating_op("__rmul__") | ||
__matmul__ = _create_binary_propagating_op("__matmul__") | ||
__rmatmul__ = _create_binary_propagating_op("__rmatmul__") | ||
__truediv__ = _create_binary_propagating_op("__truediv__") | ||
__rtruediv__ = _create_binary_propagating_op("__rtruediv__") | ||
__floordiv__ = _create_binary_propagating_op("__floordiv__") | ||
__rfloordiv__ = _create_binary_propagating_op("__rfloordiv__") | ||
__mod__ = _create_binary_propagating_op("__mod__") | ||
__rmod__ = _create_binary_propagating_op("__rmod__") | ||
__divmod__ = _create_binary_propagating_op("__divmod__") | ||
__rdivmod__ = _create_binary_propagating_op("__rdivmod__") | ||
__pow__ = _create_binary_propagating_op("__pow__") | ||
__rpow__ = _create_binary_propagating_op("__rpow__") | ||
|
||
# __lshift__ = _create_binary_propagating_op("__add__") | ||
# __rshift__ = _create_binary_propagating_op("__add__") | ||
|
||
__eq__ = _create_binary_propagating_op("__eq__") | ||
__ne__ = _create_binary_propagating_op("__ne__") | ||
__le__ = _create_binary_propagating_op("__le__") | ||
__lt__ = _create_binary_propagating_op("__lt__") | ||
__gt__ = _create_binary_propagating_op("__gt__") | ||
__ge__ = _create_binary_propagating_op("__ge__") | ||
|
||
# Unary ops | ||
|
||
__neg__ = _create_unary_propagating_op("__neg__") | ||
__pos__ = _create_unary_propagating_op("__pos__") | ||
__abs__ = _create_unary_propagating_op("__abs__") | ||
__invert__ = _create_unary_propagating_op("__invert__") | ||
|
||
# Logical ops using Kleene logic | ||
|
||
def __and__(self, other): | ||
if other is False: | ||
return False | ||
elif other is True or other is NA: | ||
return NA | ||
else: | ||
return NotImplemented | ||
|
||
__rand__ = __and__ | ||
|
||
def __or__(self, other): | ||
if other is True: | ||
return True | ||
elif other is False or other is NA: | ||
return NA | ||
else: | ||
return NotImplemented | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. other NA objects? NaT? NaN? None? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For logical ops, I am not sure it should handle those, as logical ops typically involve booleans (and NaT/NaN/None cannot be stored in a boolean array) Eg for float is also raises right now:
(for comparison ops, though, we should probably add those) |
||
|
||
__ror__ = __or__ | ||
|
||
def __xor__(self, other): | ||
if other is False or other is True or other is NA: | ||
return NA | ||
return NotImplemented | ||
|
||
__rxor__ = __xor__ | ||
|
||
|
||
NA = NAType() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
from pandas.core.na_scalar import NA | ||
|
||
|
||
def test_singleton(): | ||
assert NA is NA | ||
new_NA = type(NA)() | ||
assert new_NA is NA | ||
|
||
|
||
def test_repr(): | ||
assert repr(NA) == "NA" | ||
assert str(NA) == "NA" | ||
|
||
|
||
def test_truthiness(): | ||
with pytest.raises(TypeError): | ||
bool(NA) | ||
|
||
with pytest.raises(TypeError): | ||
not NA | ||
|
||
|
||
def test_hashable(): | ||
assert hash(NA) == hash(NA) | ||
d = {NA: "test"} | ||
assert d[NA] == "test" | ||
|
||
|
||
def test_arithmetic_ops(all_arithmetic_functions): | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
op = all_arithmetic_functions | ||
|
||
for other in [NA, 1, 1.0, "a", np.int64(1), np.nan]: | ||
if op.__name__ == "rmod" and isinstance(other, str): | ||
continue | ||
assert op(NA, other) is NA | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
def test_comparison_ops(): | ||
|
||
for other in [NA, 1, 1.0, "a", np.int64(1), np.nan]: | ||
assert (NA == other) is NA | ||
assert (NA != other) is NA | ||
assert (NA > other) is NA | ||
assert (NA >= other) is NA | ||
assert (NA < other) is NA | ||
assert (NA <= other) is NA | ||
|
||
if isinstance(other, np.int64): | ||
# for numpy scalars we get a deprecation warning and False as result | ||
# for equality or error for larger/lesser than | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So numpy scalars we don't have full control over, so this means that if they are the left operand, we get some other behaviour:
(for the first case, not sure what the behaviour will be once the change in numpy is done) |
||
|
||
assert (other == NA) is NA | ||
assert (other != NA) is NA | ||
assert (other > NA) is NA | ||
assert (other >= NA) is NA | ||
assert (other < NA) is NA | ||
assert (other <= NA) is NA | ||
|
||
|
||
def test_unary_ops(): | ||
assert +NA is NA | ||
assert -NA is NA | ||
assert abs(NA) is NA | ||
assert ~NA is NA | ||
|
||
|
||
def test_logical_and(): | ||
|
||
assert NA & True is NA | ||
assert True & NA is NA | ||
assert NA & False is False | ||
assert False & NA is False | ||
assert NA & NA is NA | ||
|
||
with pytest.raises(TypeError): | ||
NA & 5 | ||
|
||
|
||
def test_logical_or(): | ||
|
||
assert NA | True is True | ||
assert True | NA is True | ||
assert NA | False is NA | ||
assert False | NA is NA | ||
assert NA | NA is NA | ||
|
||
with pytest.raises(TypeError): | ||
NA | 5 | ||
|
||
|
||
def test_logical_xor(): | ||
|
||
assert NA ^ True is NA | ||
assert True ^ NA is NA | ||
assert NA ^ False is NA | ||
assert False ^ NA is NA | ||
assert NA ^ NA is NA | ||
|
||
with pytest.raises(TypeError): | ||
NA ^ 5 | ||
|
||
|
||
def test_logical_not(): | ||
assert ~NA is NA |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Question is what type of objects we should recognize here.
I think we want to recognize scalar values that can be stored in dataframes. But of course we don't have control over what kind of scalars people put in a dataframe.
For now I did numbers + string (numbers also covers numpy scalars). Our own scalars (Timedelta, Timestamp, etc) could also be added, or they can also handle that on their side)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you might want to look at how NaT handles these, in particular for arraylike others
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want it to be recognized by arrays? (I somewhat purposefully left those out, but didn't think it fully through)
For our own internal arrays (eg IntegerArray), that can be handled on the array level? (so returning NotImplemented here)
For numpy arrays the question is what to do with it, as you cannot represent NAs in a numpy array (except in an object array). So not handling it might be fine?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually, by not handling it and deferring to numpy, this already happens (-> converting to object):
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thats fine for now, but longer-term this will need to be optimized
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you explain what you mean with this?