From 85342ee3b814fdc6e9c6382a1cd6798e91297f17 Mon Sep 17 00:00:00 2001 From: Garrett-R Date: Mon, 19 Jan 2015 22:45:02 -0800 Subject: [PATCH] BUG: Fix #9144 #8445 Fix how core.common._fill_zeros handles div and mod by zero --- doc/source/whatsnew/v0.16.0.txt | 31 ++++++++++++++++++++++ pandas/core/common.py | 46 ++++++++++++++++++--------------- pandas/core/ops.py | 3 ++- pandas/tests/test_frame.py | 19 ++++++++------ pandas/tests/test_series.py | 34 +++++++++++++++++++++--- vb_suite/binary_ops.py | 45 ++++++++++++++++++++++++++++++++ 6 files changed, 144 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt index cd7cdbb645686..3d82ba1f1b265 100644 --- a/doc/source/whatsnew/v0.16.0.txt +++ b/doc/source/whatsnew/v0.16.0.txt @@ -131,6 +131,37 @@ methods (:issue:`9088`). dtype: int64 +- During division involving a ``Series`` or ``DataFrame``, ``0/0`` and ``0//0`` now give ``np.nan`` instead of ``np.inf``. (:issue:`9144`, :issue:`8445`) + + Previous Behavior + + .. code-block:: python + + In [2]: p = pd.Series([0, 1]) + + In [3]: p / 0 + Out[3]: + 0 inf + 1 inf + dtype: float64 + + In [4]: p // 0 + Out[4]: + 0 inf + 1 inf + dtype: float64 + + + + New Behavior + + .. ipython:: python + + p = pd.Series([0, 1]) + p / 0 + p // 0 + + Deprecations ~~~~~~~~~~~~ diff --git a/pandas/core/common.py b/pandas/core/common.py index b48e73ca7c85c..937dc421e3926 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1395,36 +1395,40 @@ def _fill_zeros(result, x, y, name, fill): mask the nan's from x """ - if fill is not None: + if fill is None or is_float_dtype(result): + return result + + if name.startswith(('r', '__r')): + x,y = y,x - if name.startswith('r'): - x,y = y,x + if np.isscalar(y): + y = np.array(y) + if is_integer_dtype(y): - if not isinstance(y, np.ndarray): - dtype, value = _infer_dtype_from_scalar(y) - y = np.empty(result.shape, dtype=dtype) - y.fill(value) + if (y == 0).any(): - if is_integer_dtype(y): + # GH 7325, mask and nans must be broadcastable (also: PR 9308) + # Raveling and then reshaping makes np.putmask faster + mask = ((y == 0) & ~np.isnan(result)).ravel() - if (y.ravel() == 0).any(): - shape = result.shape - result = result.ravel().astype('float64') + shape = result.shape + result = result.astype('float64', copy=False).ravel() - # GH 7325, mask and nans must be broadcastable - signs = np.sign(result) - mask = ((y == 0) & ~np.isnan(x)).ravel() + np.putmask(result, mask, fill) - np.putmask(result, mask, fill) + # if we have a fill of inf, then sign it correctly + # (GH 6178 and PR 9308) + if np.isinf(fill): + signs = np.sign(y if name.startswith(('r', '__r')) else x) + negative_inf_mask = (signs.ravel() < 0) & mask + np.putmask(result, negative_inf_mask, -fill) - # if we have a fill of inf, then sign it - # correctly - # GH 6178 - if np.isinf(fill): - np.putmask(result,(signs<0) & mask, -fill) + if "floordiv" in name: # (PR 9308) + nan_mask = ((y == 0) & (x == 0)).ravel() + np.putmask(result, nan_mask, np.nan) - result = result.reshape(shape) + result = result.reshape(shape) return result diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 64672a9e75244..954d2c8a77326 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -81,7 +81,8 @@ def names(x): rpow=arith_method(lambda x, y: y ** x, names('rpow'), op('**'), default_axis=default_axis, reversed=True), rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'), - default_axis=default_axis, reversed=True), + default_axis=default_axis, fill_zeros=np.nan, + reversed=True), ) new_methods['div'] = new_methods['truediv'] new_methods['rdiv'] = new_methods['rtruediv'] diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 82e5d68187b1e..18e699695f330 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -20,7 +20,7 @@ ) from pandas import compat -from numpy import random, nan +from numpy import random, nan, inf from numpy.random import randn import numpy as np import numpy.ma as ma @@ -5138,23 +5138,26 @@ def test_modulo(self): def test_div(self): - # integer div, but deal with the 0's + # integer div, but deal with the 0's (GH 9144) p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] }) result = p / p - ### this is technically wrong as the integer portion is coerced to float ### - expected = DataFrame({ 'first' : Series([1,1,1,1],dtype='float64'), 'second' : Series([np.inf,np.inf,np.inf,1]) }) + expected = DataFrame({'first': Series([1.0, 1.0, 1.0, 1.0]), + 'second': Series([nan, nan, nan, 1])}) assert_frame_equal(result,expected) - result2 = DataFrame(p.values.astype('float64')/p.values,index=p.index,columns=p.columns).fillna(np.inf) + result2 = DataFrame(p.values.astype('float') / p.values, index=p.index, + columns=p.columns) assert_frame_equal(result2,expected) result = p / 0 - expected = DataFrame(np.inf,index=p.index,columns=p.columns) + expected = DataFrame(inf, index=p.index, columns=p.columns) + expected.iloc[0:3, 1] = nan assert_frame_equal(result,expected) # numpy has a slightly different (wrong) treatement - result2 = DataFrame(p.values.astype('float64')/0,index=p.index,columns=p.columns).fillna(np.inf) + result2 = DataFrame(p.values.astype('float64') / 0, index=p.index, + columns=p.columns) assert_frame_equal(result2,expected) p = DataFrame(np.random.randn(10, 5)) @@ -5604,7 +5607,7 @@ def test_arith_flex_series(self): # broadcasting issue in GH7325 df = DataFrame(np.arange(3*2).reshape((3,2)),dtype='int64') - expected = DataFrame([[np.inf,np.inf],[1.0,1.5],[1.0,1.25]]) + expected = DataFrame([[nan, inf], [1.0, 1.5], [1.0, 1.25]]) result = df.div(df[0],axis='index') assert_frame_equal(result,expected) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index dd2dd4e6066b9..d54dae2aca6d2 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -11,7 +11,7 @@ import nose -from numpy import nan +from numpy import nan, inf import numpy as np import numpy.ma as ma import pandas as pd @@ -2689,6 +2689,17 @@ def test_modulo(self): result2 = p['second'] % p['first'] self.assertFalse(np.array_equal(result, result2)) + # GH 9144 + s = Series([0, 1]) + + result = s % 0 + expected = Series([nan, nan]) + assert_series_equal(result, expected) + + result = 0 % s + expected = Series([nan, 0.0]) + assert_series_equal(result, expected) + def test_div(self): # no longer do integer div for any ops, but deal with the 0's @@ -2730,6 +2741,21 @@ def test_div(self): result = p['second'] / p['first'] assert_series_equal(result, expected) + # GH 9144 + s = Series([-1, 0, 1]) + + result = 0 / s + expected = Series([0.0, nan, 0.0]) + assert_series_equal(result, expected) + + result = s / 0 + expected = Series([-inf, nan, inf]) + assert_series_equal(result, expected) + + result = s // 0 + expected = Series([-inf, nan, inf]) + assert_series_equal(result, expected) + def test_operators(self): def _check_op(series, other, op, pos_only=False): @@ -6414,17 +6440,17 @@ def test_pct_change_shift_over_nas(self): def test_autocorr(self): # Just run the function corr1 = self.ts.autocorr() - + # Now run it with the lag parameter corr2 = self.ts.autocorr(lag=1) - + # corr() with lag needs Series of at least length 2 if len(self.ts) <= 2: self.assertTrue(np.isnan(corr1)) self.assertTrue(np.isnan(corr2)) else: self.assertEqual(corr1, corr2) - + # Choose a random lag between 1 and length of Series - 2 # and compare the result with the Series corr() function n = 1 + np.random.randint(max(1, len(self.ts) - 2)) diff --git a/vb_suite/binary_ops.py b/vb_suite/binary_ops.py index 5ec2d9fcfc2cf..db9a6b730064e 100644 --- a/vb_suite/binary_ops.py +++ b/vb_suite/binary_ops.py @@ -72,6 +72,51 @@ Benchmark("df * df2", setup, name='frame_mult_no_ne',cleanup="expr.set_use_numexpr(True)", start_date=datetime(2013, 2, 26)) +#---------------------------------------------------------------------- +# division + +setup = common_setup + """ +df = DataFrame(np.random.randn(1000, 1000)) +""" +frame_float_div_by_zero = \ + Benchmark("df / 0", setup, name='frame_float_div_by_zero') + +setup = common_setup + """ +df = DataFrame(np.random.randn(1000, 1000)) +""" +frame_float_floor_by_zero = \ + Benchmark("df // 0", setup, name='frame_float_floor_by_zero') + +setup = common_setup + """ +df = DataFrame(np.random.random_integers((1000, 1000))) +""" +frame_int_div_by_zero = \ + Benchmark("df / 0", setup, name='frame_int_div_by_zero') + +setup = common_setup + """ +df = DataFrame(np.random.randn(1000, 1000)) +df2 = DataFrame(np.random.randn(1000, 1000)) +""" +frame_float_div = \ + Benchmark("df // df2", setup, name='frame_float_div') + +#---------------------------------------------------------------------- +# modulo + +setup = common_setup + """ +df = DataFrame(np.random.randn(1000, 1000)) +df2 = DataFrame(np.random.randn(1000, 1000)) +""" +frame_float_mod = \ + Benchmark("df / df2", setup, name='frame_float_mod') + +setup = common_setup + """ +df = DataFrame(np.random.random_integers((1000, 1000))) +df2 = DataFrame(np.random.random_integers((1000, 1000))) +""" +frame_int_mod = \ + Benchmark("df / df2", setup, name='frame_int_mod') + #---------------------------------------------------------------------- # multi and