Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix DataFrame logical ops Series inconsistency #28741

Merged
merged 8 commits into from
Oct 5, 2019
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ Numeric
^^^^^^^
- Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`)
- :class:`DataFrame` inequality comparisons with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`)
- Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`)
-

Conversion
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5282,7 +5282,7 @@ def _arith_op(left, right):
def _combine_match_index(self, other, func):
# at this point we have `self.index.equals(other.index)`

if self._is_mixed_type or other._is_mixed_type:
if ops.should_series_dispatch(self, other, func):
# operate column-wise; avoid costly object-casting in `.values`
new_data = ops.dispatch_to_series(self, other, func)
else:
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,7 @@ def _arith_method_FRAME(cls, op, special):
default_axis = _get_frame_op_default_axis(op_name)

na_op = define_na_arithmetic_op(op, str_rep, eval_kwargs)
is_logical = str_rep in ["&", "|", "^"]

if op_name in _op_descriptions:
# i.e. include "add" but not "__add__"
Expand All @@ -707,11 +708,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
if isinstance(other, ABCDataFrame):
# Another DataFrame
pass_op = op if should_series_dispatch(self, other, op) else na_op
pass_op = pass_op if not is_logical else op
return self._combine_frame(other, pass_op, fill_value, level)
elif isinstance(other, ABCSeries):
# For these values of `axis`, we end up dispatching to Series op,
# so do not want the masked op.
pass_op = op if axis in [0, "columns", None] else na_op
pass_op = pass_op if not is_logical else op
return _combine_series_frame(
self, other, pass_op, fill_value=fill_value, axis=axis, level=level
)
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/ops/dispatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def should_series_dispatch(left, right, op):
Parameters
----------
left : DataFrame
right : DataFrame
right : DataFrame or Series
op : binary operator

Returns
Expand All @@ -66,6 +66,16 @@ def should_series_dispatch(left, right, op):
if left._is_mixed_type or right._is_mixed_type:
return True

if op.__name__.strip("_") in ["and", "or", "xor", "rand", "ror", "rxor"]:
# TODO: GH references for what this fixes
# Note: this check must come before the check for nonempty columns.
return True

if right.ndim == 1:
# operating with Series, short-circuit checks that would fail
# with AttributeError.
return False

if not len(left.columns) or not len(right.columns):
# ensure obj.dtypes[0] exists for each obj
return False
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/frame/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ def test_logical_ops_empty_frame(self):
dfa = DataFrame(index=[1], columns=["A"])

result = dfa & dfa
assert_frame_equal(result, dfa)
expected = DataFrame(False, index=[1], columns=["A"])
assert_frame_equal(result, expected)

def test_logical_ops_bool_frame(self):
# GH#5808
Expand All @@ -145,7 +146,11 @@ def test_logical_ops_int_frame(self):
df1a_bool = DataFrame(True, index=[1], columns=["A"])

result = df1a_int | df1a_bool
assert_frame_equal(result, df1a_int)
assert_frame_equal(result, df1a_bool)

# Check that this matches Series behavior
res_ser = df1a_int["A"] | df1a_bool["A"]
assert_series_equal(res_ser, df1a_bool["A"])

def test_logical_ops_invalid(self):
# GH#5808
Expand Down
47 changes: 26 additions & 21 deletions pandas/tests/series/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,20 @@ def test_scalar_na_logical_ops_corners(self):

with pytest.raises(TypeError):
d.__and__(s, axis="columns")
with pytest.raises(TypeError):
d.__and__(s, axis=1)

with pytest.raises(TypeError):
s & d
with pytest.raises(TypeError):
d & s

expected = (s & s).to_frame("A")
result = d.__and__(s, axis="index")
tm.assert_frame_equal(result, expected)

# this is wrong as its not a boolean result
# result = d.__and__(s,axis='index')
result = d.__and__(s, axis=0)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor])
def test_logical_ops_with_index(self, op):
Expand Down Expand Up @@ -436,20 +444,19 @@ def test_logical_ops_df_compat(self):
assert_series_equal(s2 & s1, exp)

# True | np.nan => True
exp = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
assert_series_equal(s1 | s2, exp)
exp_or1 = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
assert_series_equal(s1 | s2, exp_or1)
# np.nan | True => np.nan, filled with False
exp = pd.Series([True, True, False, False], index=list("ABCD"), name="x")
assert_series_equal(s2 | s1, exp)
exp_or = pd.Series([True, True, False, False], index=list("ABCD"), name="x")
assert_series_equal(s2 | s1, exp_or)

# DataFrame doesn't fill nan with False
exp = pd.DataFrame({"x": [True, False, np.nan, np.nan]}, index=list("ABCD"))
assert_frame_equal(s1.to_frame() & s2.to_frame(), exp)
assert_frame_equal(s2.to_frame() & s1.to_frame(), exp)
assert_frame_equal(s1.to_frame() & s2.to_frame(), exp.to_frame())
assert_frame_equal(s2.to_frame() & s1.to_frame(), exp.to_frame())

exp = pd.DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD"))
assert_frame_equal(s1.to_frame() | s2.to_frame(), exp)
assert_frame_equal(s2.to_frame() | s1.to_frame(), exp)
assert_frame_equal(s1.to_frame() | s2.to_frame(), exp_or1.to_frame())
assert_frame_equal(s2.to_frame() | s1.to_frame(), exp_or.to_frame())

# different length
s3 = pd.Series([True, False, True], index=list("ABC"), name="x")
Expand All @@ -460,19 +467,17 @@ def test_logical_ops_df_compat(self):
assert_series_equal(s4 & s3, exp)

# np.nan | True => np.nan, filled with False
exp = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
assert_series_equal(s3 | s4, exp)
exp_or1 = pd.Series([True, True, True, False], index=list("ABCD"), name="x")
assert_series_equal(s3 | s4, exp_or1)
# True | np.nan => True
exp = pd.Series([True, True, True, True], index=list("ABCD"), name="x")
assert_series_equal(s4 | s3, exp)
exp_or = pd.Series([True, True, True, True], index=list("ABCD"), name="x")
assert_series_equal(s4 | s3, exp_or)

exp = pd.DataFrame({"x": [True, False, True, np.nan]}, index=list("ABCD"))
assert_frame_equal(s3.to_frame() & s4.to_frame(), exp)
assert_frame_equal(s4.to_frame() & s3.to_frame(), exp)
assert_frame_equal(s3.to_frame() & s4.to_frame(), exp.to_frame())
assert_frame_equal(s4.to_frame() & s3.to_frame(), exp.to_frame())

exp = pd.DataFrame({"x": [True, True, True, np.nan]}, index=list("ABCD"))
assert_frame_equal(s3.to_frame() | s4.to_frame(), exp)
assert_frame_equal(s4.to_frame() | s3.to_frame(), exp)
assert_frame_equal(s3.to_frame() | s4.to_frame(), exp_or1.to_frame())
assert_frame_equal(s4.to_frame() | s3.to_frame(), exp_or.to_frame())


class TestSeriesComparisons:
Expand Down