From 742d81f1ec87b77a057ce28d7d649430ba4f9959 Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Sun, 10 Mar 2019 18:25:36 -0400 Subject: [PATCH 1/4] BUG: Fix error in replace with strings that are large numbers (#25616) --- doc/source/whatsnew/v0.24.2.rst | 1 + pandas/core/internals/blocks.py | 4 ++-- pandas/tests/series/test_replace.py | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 2c6d1e01ed89b..e269a98e3373f 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -32,6 +32,7 @@ Fixed Regressions - Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`) - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) +- Fixed regression in :func:`replace` where large strings of numbers would be coerced into int, causing an ``OverflowError`` (:issue:`25616`) .. _whatsnew_0242.enhancements: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ada663556899b..0375f782badcc 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1079,7 +1079,7 @@ def coerce_to_target_dtype(self, other): try: return self.astype(dtype) - except (ValueError, TypeError): + except (ValueError, TypeError, OverflowError): pass return self.astype(object) @@ -3210,7 +3210,7 @@ def _putmask_smart(v, m, n): nv = v.copy() nv[m] = nn_at return nv - except (ValueError, IndexError, TypeError): + except (ValueError, IndexError, TypeError, OverflowError): pass n = np.asarray(n) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 40b28047080da..6a4e3a25b486e 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -181,6 +181,20 @@ def check_replace(to_rep, val, expected): tr, v = [3, 4], [3.5, True] check_replace(tr, v, e) + # GH 25616 + # casts to object without Exception due to OverflowError + e = pd.Series([0, 1, 2, '100000000000000000000', 4]) + tr, v = [3], ['100000000000000000000'] + check_replace(tr, v, e) + + # GH 25616 + # casts to object without Exception due to OverflowError + original = pd.Series([0, '100000000000000000000', + '100000000000000000001']) + result = original.replace(['100000000000000000000'], [1]) + expected = pd.Series([0, 1, '100000000000000000001']) + tm.assert_series_equal(result, expected) + # test an object with dates + floats + integers + strings dr = pd.date_range('1/1/2001', '1/10/2001', freq='D').to_series().reset_index(drop=True) From 1aab32ca60fa8274776682ec0222c5b632d1cde4 Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Sun, 10 Mar 2019 18:54:08 -0400 Subject: [PATCH 2/4] updated whatsnew, moved tests to new func --- doc/source/whatsnew/v0.24.2.rst | 2 +- pandas/tests/series/test_replace.py | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index e269a98e3373f..7c2718a55c4be 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -32,7 +32,7 @@ Fixed Regressions - Fixed regression in creating a period-dtype array from a read-only NumPy array of period objects. (:issue:`25403`) - Fixed regression in :class:`Categorical`, where constructing it from a categorical ``Series`` and an explicit ``categories=`` that differed from that in the ``Series`` created an invalid object which could trigger segfaults. (:issue:`25318`) - Fixed pip installing from source into an environment without NumPy (:issue:`25193`) -- Fixed regression in :func:`replace` where large strings of numbers would be coerced into int, causing an ``OverflowError`` (:issue:`25616`) +- Fixed regression in :meth:`DataFrame.replace` where large strings of numbers would be coerced into ``int64``, causing an ``OverflowError`` (:issue:`25616`) .. _whatsnew_0242.enhancements: diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index 6a4e3a25b486e..fba9caae8af59 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -181,20 +181,6 @@ def check_replace(to_rep, val, expected): tr, v = [3, 4], [3.5, True] check_replace(tr, v, e) - # GH 25616 - # casts to object without Exception due to OverflowError - e = pd.Series([0, 1, 2, '100000000000000000000', 4]) - tr, v = [3], ['100000000000000000000'] - check_replace(tr, v, e) - - # GH 25616 - # casts to object without Exception due to OverflowError - original = pd.Series([0, '100000000000000000000', - '100000000000000000001']) - result = original.replace(['100000000000000000000'], [1]) - expected = pd.Series([0, 1, '100000000000000000001']) - tm.assert_series_equal(result, expected) - # test an object with dates + floats + integers + strings dr = pd.date_range('1/1/2001', '1/10/2001', freq='D').to_series().reset_index(drop=True) @@ -294,3 +280,17 @@ def test_replace_mixed_types_with_string(self): result = s.replace([2, '4'], np.nan) expected = pd.Series([1, np.nan, 3, np.nan, 4, 5]) tm.assert_series_equal(expected, result) + + def test_replace_with_no_overflowerror(self): + # GH 25616 + # casts to object without Exception from OverflowError + s = pd.Series([0, 1, 2, 3, 4]) + result = s.replace([3], ['100000000000000000000']) + expected = pd.Series([0, 1, 2, '100000000000000000000', 4]) + tm.assert_series_equal(result, expected) + + s = pd.Series([0, '100000000000000000000', + '100000000000000000001']) + result = s.replace(['100000000000000000000'], [1]) + expected = pd.Series([0, 1, '100000000000000000001']) + tm.assert_series_equal(result, expected) From 4e16b9a74b43b8c24cde203309c32b6ef4a1df33 Mon Sep 17 00:00:00 2001 From: ArtificialQualia Date: Sun, 10 Mar 2019 18:56:16 -0400 Subject: [PATCH 3/4] fixed indenting --- pandas/tests/series/test_replace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_replace.py b/pandas/tests/series/test_replace.py index fba9caae8af59..2e7b746f6c9f2 100644 --- a/pandas/tests/series/test_replace.py +++ b/pandas/tests/series/test_replace.py @@ -290,7 +290,7 @@ def test_replace_with_no_overflowerror(self): tm.assert_series_equal(result, expected) s = pd.Series([0, '100000000000000000000', - '100000000000000000001']) + '100000000000000000001']) result = s.replace(['100000000000000000000'], [1]) expected = pd.Series([0, 1, '100000000000000000001']) tm.assert_series_equal(result, expected) From f32163cce85cec7ae4fa71b46e6f940e385adaa8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 12 Mar 2019 21:44:57 +0100 Subject: [PATCH 4/4] add to contributors --- doc/source/whatsnew/v0.24.2.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 95bdd5eaf272f..5b5c9c78d10da 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -91,6 +91,7 @@ A total of 25 people contributed patches to this release. People with a "+" by t * Joris Van den Bossche * Josh * Justin Zheng +* Kendall Masse * Matthew Roeschke * Max Bolingbroke + * rbenes +