Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: 0/frame numeric ops buggy (GH9144) #9308

Merged
merged 1 commit into from
Feb 16, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,37 @@ methods (:issue:`9088`).
dtype: int64


- During division involving a ``Series`` or ``DataFrame``, ``0/0`` and ``0//0`` now give ``np.nan`` instead of ``np.inf``. (:issue:`9144`, :issue:`8445`)

Previous Behavior

.. code-block:: python

In [2]: p = pd.Series([0, 1])

In [3]: p / 0
Out[3]:
0 inf
1 inf
dtype: float64

In [4]: p // 0
Out[4]:
0 inf
1 inf
dtype: float64



New Behavior

.. ipython:: python

p = pd.Series([0, 1])
p / 0
p // 0



Deprecations
~~~~~~~~~~~~
Expand Down
46 changes: 25 additions & 21 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1395,36 +1395,40 @@ def _fill_zeros(result, x, y, name, fill):
mask the nan's from x
"""

if fill is not None:
if fill is None or is_float_dtype(result):
return result

if name.startswith(('r', '__r')):
x,y = y,x

if name.startswith('r'):
x,y = y,x
if np.isscalar(y):
y = np.array(y)

if is_integer_dtype(y):

if not isinstance(y, np.ndarray):
dtype, value = _infer_dtype_from_scalar(y)
y = np.empty(result.shape, dtype=dtype)
y.fill(value)
if (y == 0).any():

if is_integer_dtype(y):
# GH 7325, mask and nans must be broadcastable (also: PR 9308)
# Raveling and then reshaping makes np.putmask faster
mask = ((y == 0) & ~np.isnan(result)).ravel()

if (y.ravel() == 0).any():
shape = result.shape
result = result.ravel().astype('float64')
shape = result.shape
result = result.astype('float64', copy=False).ravel()

# GH 7325, mask and nans must be broadcastable
signs = np.sign(result)
mask = ((y == 0) & ~np.isnan(x)).ravel()
np.putmask(result, mask, fill)

np.putmask(result, mask, fill)
# if we have a fill of inf, then sign it correctly
# (GH 6178 and PR 9308)
if np.isinf(fill):
signs = np.sign(y if name.startswith(('r', '__r')) else x)
negative_inf_mask = (signs.ravel() < 0) & mask
np.putmask(result, negative_inf_mask, -fill)

# if we have a fill of inf, then sign it
# correctly
# GH 6178
if np.isinf(fill):
np.putmask(result,(signs<0) & mask, -fill)
if "floordiv" in name: # (PR 9308)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Working on #19322 it looks like the problem would be solved by making this condition include other "div" operations. Was there a specific reason to only include floordiv here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid I don't remember why only floordiv is here, but I do tend to comment non-obvious, intentional choices, so I'm guessing it was not intended that only floordiv be here.

nan_mask = ((y == 0) & (x == 0)).ravel()
np.putmask(result, nan_mask, np.nan)

result = result.reshape(shape)
result = result.reshape(shape)

return result

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def names(x):
rpow=arith_method(lambda x, y: y ** x, names('rpow'), op('**'),
default_axis=default_axis, reversed=True),
rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'),
default_axis=default_axis, reversed=True),
default_axis=default_axis, fill_zeros=np.nan,
reversed=True),
)
new_methods['div'] = new_methods['truediv']
new_methods['rdiv'] = new_methods['rtruediv']
Expand Down
19 changes: 11 additions & 8 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)
from pandas import compat

from numpy import random, nan
from numpy import random, nan, inf
from numpy.random import randn
import numpy as np
import numpy.ma as ma
Expand Down Expand Up @@ -5138,23 +5138,26 @@ def test_modulo(self):

def test_div(self):

# integer div, but deal with the 0's
# integer div, but deal with the 0's (GH 9144)
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
result = p / p

### this is technically wrong as the integer portion is coerced to float ###
expected = DataFrame({ 'first' : Series([1,1,1,1],dtype='float64'), 'second' : Series([np.inf,np.inf,np.inf,1]) })
expected = DataFrame({'first': Series([1.0, 1.0, 1.0, 1.0]),
'second': Series([nan, nan, nan, 1])})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue number here as a comment

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks.

assert_frame_equal(result,expected)

result2 = DataFrame(p.values.astype('float64')/p.values,index=p.index,columns=p.columns).fillna(np.inf)
result2 = DataFrame(p.values.astype('float') / p.values, index=p.index,
columns=p.columns)
assert_frame_equal(result2,expected)

result = p / 0
expected = DataFrame(np.inf,index=p.index,columns=p.columns)
expected = DataFrame(inf, index=p.index, columns=p.columns)
expected.iloc[0:3, 1] = nan
assert_frame_equal(result,expected)

# numpy has a slightly different (wrong) treatement
result2 = DataFrame(p.values.astype('float64')/0,index=p.index,columns=p.columns).fillna(np.inf)
result2 = DataFrame(p.values.astype('float64') / 0, index=p.index,
columns=p.columns)
assert_frame_equal(result2,expected)

p = DataFrame(np.random.randn(10, 5))
Expand Down Expand Up @@ -5604,7 +5607,7 @@ def test_arith_flex_series(self):

# broadcasting issue in GH7325
df = DataFrame(np.arange(3*2).reshape((3,2)),dtype='int64')
expected = DataFrame([[np.inf,np.inf],[1.0,1.5],[1.0,1.25]])
expected = DataFrame([[nan, inf], [1.0, 1.5], [1.0, 1.25]])
result = df.div(df[0],axis='index')
assert_frame_equal(result,expected)

Expand Down
34 changes: 30 additions & 4 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import nose

from numpy import nan
from numpy import nan, inf
import numpy as np
import numpy.ma as ma
import pandas as pd
Expand Down Expand Up @@ -2689,6 +2689,17 @@ def test_modulo(self):
result2 = p['second'] % p['first']
self.assertFalse(np.array_equal(result, result2))

# GH 9144
s = Series([0, 1])

result = s % 0
expected = Series([nan, nan])
assert_series_equal(result, expected)

result = 0 % s
expected = Series([nan, 0.0])
assert_series_equal(result, expected)

def test_div(self):

# no longer do integer div for any ops, but deal with the 0's
Expand Down Expand Up @@ -2730,6 +2741,21 @@ def test_div(self):
result = p['second'] / p['first']
assert_series_equal(result, expected)

# GH 9144
s = Series([-1, 0, 1])

result = 0 / s
expected = Series([0.0, nan, 0.0])
assert_series_equal(result, expected)

result = s / 0
expected = Series([-inf, nan, inf])
assert_series_equal(result, expected)

result = s // 0
expected = Series([-inf, nan, inf])
assert_series_equal(result, expected)

def test_operators(self):

def _check_op(series, other, op, pos_only=False):
Expand Down Expand Up @@ -6414,17 +6440,17 @@ def test_pct_change_shift_over_nas(self):
def test_autocorr(self):
# Just run the function
corr1 = self.ts.autocorr()

# Now run it with the lag parameter
corr2 = self.ts.autocorr(lag=1)

# corr() with lag needs Series of at least length 2
if len(self.ts) <= 2:
self.assertTrue(np.isnan(corr1))
self.assertTrue(np.isnan(corr2))
else:
self.assertEqual(corr1, corr2)

# Choose a random lag between 1 and length of Series - 2
# and compare the result with the Series corr() function
n = 1 + np.random.randint(max(1, len(self.ts) - 2))
Expand Down
45 changes: 45 additions & 0 deletions vb_suite/binary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,51 @@
Benchmark("df * df2", setup, name='frame_mult_no_ne',cleanup="expr.set_use_numexpr(True)",
start_date=datetime(2013, 2, 26))

#----------------------------------------------------------------------
# division

setup = common_setup + """
df = DataFrame(np.random.randn(1000, 1000))
"""
frame_float_div_by_zero = \
Benchmark("df / 0", setup, name='frame_float_div_by_zero')

setup = common_setup + """
df = DataFrame(np.random.randn(1000, 1000))
"""
frame_float_floor_by_zero = \
Benchmark("df // 0", setup, name='frame_float_floor_by_zero')

setup = common_setup + """
df = DataFrame(np.random.random_integers((1000, 1000)))
"""
frame_int_div_by_zero = \
Benchmark("df / 0", setup, name='frame_int_div_by_zero')

setup = common_setup + """
df = DataFrame(np.random.randn(1000, 1000))
df2 = DataFrame(np.random.randn(1000, 1000))
"""
frame_float_div = \
Benchmark("df // df2", setup, name='frame_float_div')

#----------------------------------------------------------------------
# modulo

setup = common_setup + """
df = DataFrame(np.random.randn(1000, 1000))
df2 = DataFrame(np.random.randn(1000, 1000))
"""
frame_float_mod = \
Benchmark("df / df2", setup, name='frame_float_mod')

setup = common_setup + """
df = DataFrame(np.random.random_integers((1000, 1000)))
df2 = DataFrame(np.random.random_integers((1000, 1000)))
"""
frame_int_mod = \
Benchmark("df / df2", setup, name='frame_int_mod')

#----------------------------------------------------------------------
# multi and

Expand Down