Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: decorator for ops boilerplate #24282

Closed
wants to merge 12 commits into from
Closed
5 changes: 5 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@


def _cat_compare_op(op):

# Note: using unpack_and_defer here doesn't break any tests, but the
# behavior here is idiosyncratic enough that I'm not confident enough
# to change it.
# @ops.unpack_and_defer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger are you the right person to ask about Categorical methods? In particular, why does this only return NotImplemented for ABCSeries and not for ABCDataFrame and ABCIndexClass?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No idea. Returning NotImplemented seems reasonable, as long as it ends up back here with the unboxed values.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Categorical == DataFrame is weird both before and after

data = ["a", "b", 2, "a"]
cat = pd.Categorical(data)
idx = pd.CategoricalIndex(cat)
ser = pd.Series(cat)
df = pd.DataFrame(cat)

master

>>> cat == df
array([[ True, False, False,  True],
       [False,  True, False, False],
       [False, False,  True, False],
       [ True, False, False,  True]])

PR

>>> cat == df
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "pandas/core/ops.py", line 2127, in f
    other = _align_method_FRAME(self, other, axis=None)
  File "pandas/core/ops.py", line 2021, in _align_method_FRAME
    right = to_series(right)
  File "pandas/core/ops.py", line 1983, in to_series
    given_len=len(right)))
ValueError: Unable to coerce to Series, length must be 1: given 4

If we transpose then we get all-True, the only difference being that in the PR the result is a DataFrame instead of an ndarray.

def f(self, other):
# On python2, you can usually compare any type to any type, and
# Categoricals can be seen as a custom type, but having different
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
def _make_comparison_op(cls, op):
# TODO: share code with indexes.base version? Main difference is that
# the block for MultiIndex was removed here.

# @ops.unpack_and_defer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using this here leads to recursion errors, related to the fact that this only returns NotImplemented for ABCDataFrame. I think this will be easier to resolve after the change to composition.

def cmp_method(self, other):
if isinstance(other, ABCDataFrame):
return NotImplemented
Expand Down Expand Up @@ -885,6 +887,7 @@ def _time_shift(self, periods, freq=None):
return self._generate_range(start=start, end=end, periods=None,
freq=self.freq)

# @ops.unpack_and_defer
def __add__(self, other):
other = lib.item_from_zerodim(other)
if isinstance(other, (ABCSeries, ABCDataFrame)):
Expand Down Expand Up @@ -946,6 +949,7 @@ def __radd__(self, other):
# alias for __add__
return self.__add__(other)

# @ops.unpack_and_defer
def __sub__(self, other):
other = lib.item_from_zerodim(other)
if isinstance(other, (ABCSeries, ABCDataFrame)):
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def _dt_array_cmp(cls, op):
opname = '__{name}__'.format(name=op.__name__)
nat_result = True if opname == '__ne__' else False

# @ops.unpack_and_defer
def wrapper(self, other):
meth = getattr(dtl.DatetimeLikeArrayMixin, opname)

Expand Down Expand Up @@ -504,8 +505,6 @@ def _assert_tzawareness_compat(self, other):

def _sub_datetime_arraylike(self, other):
"""subtract DatetimeArray/Index or ndarray[datetime64]"""
if len(self) != len(other):
raise ValueError("cannot add indices of unequal length")
jbrockmendel marked this conversation as resolved.
Show resolved Hide resolved

if isinstance(other, np.ndarray):
assert is_datetime64_dtype(other)
Expand Down
17 changes: 5 additions & 12 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna, notna

from pandas.core import nanops
from pandas.core import nanops, ops
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin


Expand Down Expand Up @@ -483,25 +483,16 @@ def _values_for_argsort(self):

@classmethod
def _create_comparison_method(cls, op):

@ops.unpack_and_defer
def cmp_method(self, other):

op_name = op.__name__
mask = None

if isinstance(other, (ABCSeries, ABCIndexClass)):
# Rely on pandas to unbox and dispatch to us.
return NotImplemented

if isinstance(other, IntegerArray):
other, mask = other._data, other._mask

elif is_list_like(other):
other = np.asarray(other)
if other.ndim > 0 and len(self) != len(other):
raise ValueError('Lengths must match to compare')

other = lib.item_from_zerodim(other)

# numpy will show a DeprecationWarning on invalid elementwise
# comparisons, this will raise in the future
with warnings.catch_warnings():
Expand Down Expand Up @@ -573,6 +564,8 @@ def _maybe_mask_result(self, result, mask, other, op_name):

@classmethod
def _create_arithmetic_method(cls, op):

# @ops.unpack_and_defer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback using the decorator here breaks a bunch of tests, specifically ones operating with DataFrame. Why does this return NotImplemented for Series/Index but not DataFrame?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Below, why use other = other.item() instead of item_from_zerodim?

def integer_arithmetic_method(self, other):

op_name = op.__name__
Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def _period_array_cmp(cls, op):
opname = '__{name}__'.format(name=op.__name__)
nat_result = True if opname == '__ne__' else False

# @ops.unwrap_and_defer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger any idea why not returning NotImplemented for DataFrame?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure... I vaguely remember a broadcasting error, but that may have been user error.

def wrapper(self, other):
op = getattr(self.asi8, opname)
# We want to eventually defer to the Series or PeriodIndex (which will
Expand Down
21 changes: 5 additions & 16 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
ABCIndexClass, ABCSeries, ABCSparseSeries)
from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna

from pandas.core import ops
from pandas.core.accessor import PandasDelegate, delegate_names
import pandas.core.algorithms as algos
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
Expand Down Expand Up @@ -1650,13 +1651,11 @@ def sparse_unary_method(self):

@classmethod
def _create_arithmetic_method(cls, op):

@ops.unpack_and_defer
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both here and below this is technically a change since in the status quo this doesn't defer to DataFrame. Also doesn't call item_from_zerodim ATM.

def sparse_arithmetic_method(self, other):
op_name = op.__name__

if isinstance(other, (ABCSeries, ABCIndexClass)):
# Rely on pandas to dispatch to us.
return NotImplemented

if isinstance(other, SparseArray):
return _sparse_array_op(self, other, op, op_name)

Expand All @@ -1678,10 +1677,6 @@ def sparse_arithmetic_method(self, other):
with np.errstate(all='ignore'):
# TODO: delete sparse stuff in core/ops.py
# TODO: look into _wrap_result
if len(self) != len(other):
raise AssertionError(
("length mismatch: {self} vs. {other}".format(
self=len(self), other=len(other))))
if not isinstance(other, SparseArray):
dtype = getattr(other, 'dtype', None)
other = SparseArray(other, fill_value=self.fill_value,
Expand All @@ -1693,26 +1688,20 @@ def sparse_arithmetic_method(self, other):

@classmethod
def _create_comparison_method(cls, op):

@ops.unpack_and_defer
def cmp_method(self, other):
op_name = op.__name__

if op_name in {'and_', 'or_'}:
op_name = op_name[:-1]

if isinstance(other, (ABCSeries, ABCIndexClass)):
# Rely on pandas to unbox and dispatch to us.
return NotImplemented

if not is_scalar(other) and not isinstance(other, type(self)):
# convert list-like to ndarray
other = np.asarray(other)

if isinstance(other, np.ndarray):
# TODO: make this more flexible than just ndarray...
if len(self) != len(other):
raise AssertionError("length mismatch: {self} vs. {other}"
.format(self=len(self),
other=len(other)))
other = SparseArray(other, fill_value=self.fill_value)

if isinstance(other, SparseArray):
Expand Down
Loading