From e63c41624adeb38341d2ce3d2b14fcea0d59daa4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 1 Oct 2019 20:15:36 -0500 Subject: [PATCH 1/4] BUG: Fix DataFrame logical ops Series inconsistency --- pandas/core/frame.py | 2 +- pandas/core/ops/__init__.py | 3 ++ pandas/core/ops/dispatch.py | 12 ++++++- pandas/tests/frame/test_operators.py | 9 +++-- pandas/tests/series/test_operators.py | 47 +++++++++++++++------------ 5 files changed, 48 insertions(+), 25 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e4a44a89998e3..4998a1de0dd4a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5275,7 +5275,7 @@ def _combine_match_index(self, other, func, level=None): left, right = self.align(other, join="outer", axis=0, level=level, copy=False) # at this point we have `left.index.equals(right.index)` - if left._is_mixed_type or right._is_mixed_type: + if ops.should_series_dispatch(left, right, func): # operate column-wise; avoid costly object-casting in `.values` new_data = ops.dispatch_to_series(left, right, func) else: diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 16d2eaa410637..565427306d82f 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -673,6 +673,7 @@ def _arith_method_FRAME(cls, op, special): default_axis = _get_frame_op_default_axis(op_name) na_op = define_na_arithmetic_op(op, str_rep, eval_kwargs) + is_logical = str_rep in ["&", "|", "^"] if op_name in _op_descriptions: # i.e. include "add" but not "__add__" @@ -688,11 +689,13 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if isinstance(other, ABCDataFrame): # Another DataFrame pass_op = op if should_series_dispatch(self, other, op) else na_op + pass_op = pass_op if not is_logical else op return self._combine_frame(other, pass_op, fill_value, level) elif isinstance(other, ABCSeries): # For these values of `axis`, we end up dispatching to Series op, # so do not want the masked op. pass_op = op if axis in [0, "columns", None] else na_op + pass_op = pass_op if not is_logical else op return _combine_series_frame( self, other, pass_op, fill_value=fill_value, axis=axis, level=level ) diff --git a/pandas/core/ops/dispatch.py b/pandas/core/ops/dispatch.py index 9835d57ee7366..c39f4d6d9698d 100644 --- a/pandas/core/ops/dispatch.py +++ b/pandas/core/ops/dispatch.py @@ -56,7 +56,7 @@ def should_series_dispatch(left, right, op): Parameters ---------- left : DataFrame - right : DataFrame + right : DataFrame or Series op : binary operator Returns @@ -66,6 +66,16 @@ def should_series_dispatch(left, right, op): if left._is_mixed_type or right._is_mixed_type: return True + if op.__name__.strip("_") in ["and", "or", "xor", "rand", "ror", "rxor"]: + # TODO: GH references for what this fixes + # Note: this check must come before the check for nonempty columns. + return True + + if right.ndim == 1: + # operating with Series, short-circuit checks that would fail + # with AttributeError. + return False + if not len(left.columns) or not len(right.columns): # ensure obj.dtypes[0] exists for each obj return False diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index bffdf17a49750..0e1cd42329169 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -127,7 +127,8 @@ def test_logical_ops_empty_frame(self): dfa = DataFrame(index=[1], columns=["A"]) result = dfa & dfa - assert_frame_equal(result, dfa) + expected = DataFrame(False, index=[1], columns=["A"]) + assert_frame_equal(result, expected) def test_logical_ops_bool_frame(self): # GH#5808 @@ -145,7 +146,11 @@ def test_logical_ops_int_frame(self): df1a_bool = DataFrame(True, index=[1], columns=["A"]) result = df1a_int | df1a_bool - assert_frame_equal(result, df1a_int) + assert_frame_equal(result, df1a_bool) + + # Check that this matches Series behavior + res_ser = df1a_int["A"] | df1a_bool["A"] + assert_series_equal(res_ser, df1a_bool["A"]) def test_logical_ops_invalid(self): # GH#5808 diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index c2cf91e582c47..467f2c177850a 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -250,12 +250,20 @@ def test_scalar_na_logical_ops_corners(self): with pytest.raises(TypeError): d.__and__(s, axis="columns") + with pytest.raises(TypeError): + d.__and__(s, axis=1) with pytest.raises(TypeError): s & d + with pytest.raises(TypeError): + d & s + + expected = (s & s).to_frame("A") + result = d.__and__(s, axis="index") + tm.assert_frame_equal(result, expected) - # this is wrong as its not a boolean result - # result = d.__and__(s,axis='index') + result = d.__and__(s, axis=0) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("op", [operator.and_, operator.or_, operator.xor]) def test_logical_ops_with_index(self, op): @@ -436,20 +444,19 @@ def test_logical_ops_df_compat(self): assert_series_equal(s2 & s1, exp) # True | np.nan => True - exp = pd.Series([True, True, True, False], index=list("ABCD"), name="x") - assert_series_equal(s1 | s2, exp) + exp_or1 = pd.Series([True, True, True, False], index=list("ABCD"), name="x") + assert_series_equal(s1 | s2, exp_or1) # np.nan | True => np.nan, filled with False - exp = pd.Series([True, True, False, False], index=list("ABCD"), name="x") - assert_series_equal(s2 | s1, exp) + exp_or = pd.Series([True, True, False, False], index=list("ABCD"), name="x") + assert_series_equal(s2 | s1, exp_or) # DataFrame doesn't fill nan with False - exp = pd.DataFrame({"x": [True, False, np.nan, np.nan]}, index=list("ABCD")) - assert_frame_equal(s1.to_frame() & s2.to_frame(), exp) - assert_frame_equal(s2.to_frame() & s1.to_frame(), exp) + assert_frame_equal(s1.to_frame() & s2.to_frame(), exp.to_frame()) + assert_frame_equal(s2.to_frame() & s1.to_frame(), exp.to_frame()) exp = pd.DataFrame({"x": [True, True, np.nan, np.nan]}, index=list("ABCD")) - assert_frame_equal(s1.to_frame() | s2.to_frame(), exp) - assert_frame_equal(s2.to_frame() | s1.to_frame(), exp) + assert_frame_equal(s1.to_frame() | s2.to_frame(), exp_or1.to_frame()) + assert_frame_equal(s2.to_frame() | s1.to_frame(), exp_or.to_frame()) # different length s3 = pd.Series([True, False, True], index=list("ABC"), name="x") @@ -460,19 +467,17 @@ def test_logical_ops_df_compat(self): assert_series_equal(s4 & s3, exp) # np.nan | True => np.nan, filled with False - exp = pd.Series([True, True, True, False], index=list("ABCD"), name="x") - assert_series_equal(s3 | s4, exp) + exp_or1 = pd.Series([True, True, True, False], index=list("ABCD"), name="x") + assert_series_equal(s3 | s4, exp_or1) # True | np.nan => True - exp = pd.Series([True, True, True, True], index=list("ABCD"), name="x") - assert_series_equal(s4 | s3, exp) + exp_or = pd.Series([True, True, True, True], index=list("ABCD"), name="x") + assert_series_equal(s4 | s3, exp_or) - exp = pd.DataFrame({"x": [True, False, True, np.nan]}, index=list("ABCD")) - assert_frame_equal(s3.to_frame() & s4.to_frame(), exp) - assert_frame_equal(s4.to_frame() & s3.to_frame(), exp) + assert_frame_equal(s3.to_frame() & s4.to_frame(), exp.to_frame()) + assert_frame_equal(s4.to_frame() & s3.to_frame(), exp.to_frame()) - exp = pd.DataFrame({"x": [True, True, True, np.nan]}, index=list("ABCD")) - assert_frame_equal(s3.to_frame() | s4.to_frame(), exp) - assert_frame_equal(s4.to_frame() | s3.to_frame(), exp) + assert_frame_equal(s3.to_frame() | s4.to_frame(), exp_or1.to_frame()) + assert_frame_equal(s4.to_frame() | s3.to_frame(), exp_or.to_frame()) class TestSeriesComparisons: From 7f803140b7e9ba3a372519d68dadd5950723b7bc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 12:56:13 -0500 Subject: [PATCH 2/4] whatsnew --- doc/source/whatsnew/v1.0.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 2668734031ee1..3fcca8d9d5315 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -208,7 +208,8 @@ Numeric ^^^^^^^ - Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) - :class:`DataFrame` inequality comparisons with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) -- +- Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class`Series` behavior by filling NA values (:issue:`28741`) + Conversion ^^^^^^^^^^ From 4da84cb49ae2215249b82f776df0553399776bd9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 2 Oct 2019 15:20:08 -0500 Subject: [PATCH 3/4] litn fixup --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 3fcca8d9d5315..a9a1a70ba5ab7 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -209,7 +209,7 @@ Numeric - Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) - :class:`DataFrame` inequality comparisons with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) - Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class`Series` behavior by filling NA values (:issue:`28741`) - +- Conversion ^^^^^^^^^^ From 3993b9e3a22d619236093eda294f2b276a3a13a9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 3 Oct 2019 10:27:52 -0500 Subject: [PATCH 4/4] typo fixup --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 9f2c7258c2381..5244047899bff 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -210,7 +210,7 @@ Numeric ^^^^^^^ - Bug in :meth:`DataFrame.quantile` with zero-column :class:`DataFrame` incorrectly raising (:issue:`23925`) - :class:`DataFrame` inequality comparisons with object-dtype and ``complex`` entries failing to raise ``TypeError`` like their :class:`Series` counterparts (:issue:`28079`) -- Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class`Series` behavior by filling NA values (:issue:`28741`) +- Bug in :class:`DataFrame` logical operations (`&`, `|`, `^`) not matching :class:`Series` behavior by filling NA values (:issue:`28741`) - Conversion