From 6171bd68663169cc7968719ca3876258c4460c7d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 26 Nov 2019 18:06:04 -0800 Subject: [PATCH 1/4] DEPR: dropna multiple axes, fillna int for td64, from_codes with floats, Series.nonzero --- doc/source/whatsnew/v1.0.0.rst | 4 ++ pandas/core/arrays/categorical.py | 17 +----- pandas/core/frame.py | 60 ++++++++----------- pandas/core/internals/blocks.py | 12 ++-- pandas/core/series.py | 49 --------------- .../arrays/categorical/test_constructors.py | 10 ++-- pandas/tests/frame/test_missing.py | 16 ++--- pandas/tests/series/test_missing.py | 24 +++----- 8 files changed, 53 insertions(+), 139 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 48808a7ef7a46..7956f2ed7b433 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -450,6 +450,10 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`) - Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) - Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) +- Passing an integer to :meth:`Series.fillna` or :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype now raises ``TypeError`` (:issue:`24694`) +- Passing multiple axes to :meth:`DataFrame.dropna` is no longer supported (:issue:`20995`) +- Removed previously deprecated :meth:`Series.nonzero`, use `to_numpy().nonzero()` instead (:issue:`24048`) +- Passing floating dtype ``codes`` to :meth:`Categorical.from_codes` is no longer supported, pass ``codes.astype(np.int64)`` instead (:issue:`21775`) - Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, - :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) - diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 6cc3f660fb425..26ac8929bc51d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -666,22 +666,7 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None): codes = np.asarray(codes) # #21767 if len(codes) and not is_integer_dtype(codes): - msg = "codes need to be array-like integers" - if is_float_dtype(codes): - icodes = codes.astype("i8") - if (icodes == codes).all(): - msg = None - codes = icodes - warn( - ( - "float codes will be disallowed in the future and " - "raise a ValueError" - ), - FutureWarning, - stacklevel=2, - ) - if msg: - raise ValueError(msg) + raise ValueError("codes need to be array-like integers") if len(codes) and (codes.max() >= len(dtype.categories) or codes.min() < -1): raise ValueError("codes need to be between -1 and len(categories)-1") diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d436385ba61ce..85c74c046efe8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4482,7 +4482,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): * 0, or 'index' : Drop rows which contain missing values. * 1, or 'columns' : Drop columns which contain missing value. - .. deprecated:: 0.23.0 + .. changed:: 1.0.0 Pass tuple or list to drop on multiple axes. Only a single axis is allowed. @@ -4572,43 +4572,35 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): inplace = validate_bool_kwarg(inplace, "inplace") if isinstance(axis, (tuple, list)): # GH20987 - msg = ( - "supplying multiple axes to axis is deprecated and " - "will be removed in a future version." - ) - warnings.warn(msg, FutureWarning, stacklevel=2) + raise TypeError("supplying multiple axes to axis is no longer supported.") - result = self - for ax in axis: - result = result.dropna(how=how, thresh=thresh, subset=subset, axis=ax) + axis = self._get_axis_number(axis) + agg_axis = 1 - axis + + agg_obj = self + if subset is not None: + ax = self._get_axis(agg_axis) + indices = ax.get_indexer_for(subset) + check = indices == -1 + if check.any(): + raise KeyError(list(np.compress(check, subset))) + agg_obj = self.take(indices, axis=agg_axis) + + count = agg_obj.count(axis=agg_axis) + + if thresh is not None: + mask = count >= thresh + elif how == "any": + mask = count == len(agg_obj._get_axis(agg_axis)) + elif how == "all": + mask = count > 0 else: - axis = self._get_axis_number(axis) - agg_axis = 1 - axis - - agg_obj = self - if subset is not None: - ax = self._get_axis(agg_axis) - indices = ax.get_indexer_for(subset) - check = indices == -1 - if check.any(): - raise KeyError(list(np.compress(check, subset))) - agg_obj = self.take(indices, axis=agg_axis) - - count = agg_obj.count(axis=agg_axis) - - if thresh is not None: - mask = count >= thresh - elif how == "any": - mask = count == len(agg_obj._get_axis(agg_axis)) - elif how == "all": - mask = count > 0 + if how is not None: + raise ValueError("invalid how option: {h}".format(h=how)) else: - if how is not None: - raise ValueError("invalid how option: {h}".format(h=how)) - else: - raise TypeError("must specify how or thresh") + raise TypeError("must specify how or thresh") - result = self.loc(axis=axis)[mask] + result = self.loc(axis=axis)[mask] if inplace: self._update_inplace(result) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2d6ffb7277742..ff87c129976be 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2444,15 +2444,11 @@ def fillna(self, value, **kwargs): # interpreted as nanoseconds if is_integer(value): # Deprecation GH#24694, GH#19233 - warnings.warn( - "Passing integers to fillna is deprecated, will " - "raise a TypeError in a future version. To retain " - "the old behavior, pass pd.Timedelta(seconds=n) " - "instead.", - FutureWarning, - stacklevel=6, + raise TypeError( + "Passing integers to fillna for timedelta64[ns] dtype is no " + "longer supporetd. To obtain the old behavior, pass " + "`pd.Timedelta(seconds=n)` instead." ) - value = Timedelta(value, unit="s") return super().fillna(value, **kwargs) def should_store(self, value): diff --git a/pandas/core/series.py b/pandas/core/series.py index a9ecf97dad68b..12313f78ccbe4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -566,55 +566,6 @@ def compress(self, condition, *args, **kwargs): nv.validate_compress(args, kwargs) return self[condition] - def nonzero(self): - """ - Return the *integer* indices of the elements that are non-zero. - - .. deprecated:: 0.24.0 - Please use .to_numpy().nonzero() as a replacement. - - This method is equivalent to calling `numpy.nonzero` on the - series data. For compatibility with NumPy, the return value is - the same (a tuple with an array of indices for each dimension), - but it will always be a one-item tuple because series only have - one dimension. - - Returns - ------- - numpy.ndarray - Indices of elements that are non-zero. - - See Also - -------- - numpy.nonzero - - Examples - -------- - >>> s = pd.Series([0, 3, 0, 4]) - >>> s.nonzero() - (array([1, 3]),) - >>> s.iloc[s.nonzero()[0]] - 1 3 - 3 4 - dtype: int64 - - # same return although index of s is different - >>> s = pd.Series([0, 3, 0, 4], index=['a', 'b', 'c', 'd']) - >>> s.nonzero() - (array([1, 3]),) - >>> s.iloc[s.nonzero()[0]] - b 3 - d 4 - dtype: int64 - """ - msg = ( - "Series.nonzero() is deprecated " - "and will be removed in a future version." - "Use Series.to_numpy().nonzero() instead" - ) - warnings.warn(msg, FutureWarning, stacklevel=2) - return self._values.nonzero() - def put(self, *args, **kwargs): """ Apply the `put` method to its `values` attribute if it has one. diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 59017a1442cb4..14bb9b88eee88 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -529,13 +529,11 @@ def test_from_codes_with_float(self): # empty codes should not raise for floats Categorical.from_codes([], dtype.categories) - with tm.assert_produces_warning(FutureWarning): - cat = Categorical.from_codes(codes, dtype.categories) - tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="i1")) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype.categories) - with tm.assert_produces_warning(FutureWarning): - cat = Categorical.from_codes(codes, dtype=dtype) - tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="i1")) + with pytest.raises(ValueError, match="codes need to be array-like integers"): + Categorical.from_codes(codes, dtype=dtype) codes = [1.1, 2.0, 0] # non-integer with pytest.raises(ValueError, match="codes need to be array-like integers"): diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 24510ff9338ca..1bc2ca56d142b 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -168,20 +168,14 @@ def test_dropna_multiple_axes(self): cp = df.copy() # GH20987 - with tm.assert_produces_warning(FutureWarning): - result = df.dropna(how="all", axis=[0, 1]) - with tm.assert_produces_warning(FutureWarning): - result2 = df.dropna(how="all", axis=(0, 1)) - expected = df.dropna(how="all").dropna(how="all", axis=1) - - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result2, expected) - tm.assert_frame_equal(df, cp) + with pytest.raises(TypeError, match="supplying multiple axes"): + df.dropna(how="all", axis=[0, 1]) + with pytest.raises(TypeError, match="supplying multiple axes"): + df.dropna(how="all", axis=(0, 1)) inp = df.copy() - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(TypeError, match="supplying multiple axes"): inp.dropna(how="all", axis=(0, 1), inplace=True) - tm.assert_frame_equal(inp, expected) def test_dropna_tz_aware_datetime(self): # GH13407 diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 81bf1edbe86df..3ee547bedcf55 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -17,6 +17,7 @@ NaT, Series, Timestamp, + Timedelta, date_range, isna, ) @@ -60,8 +61,7 @@ def test_timedelta_fillna(self): td = s.diff() # reg fillna - with tm.assert_produces_warning(FutureWarning): - result = td.fillna(0) + result = td.fillna(Timedelta(seconds=0)) expected = Series( [ timedelta(0), @@ -73,8 +73,10 @@ def test_timedelta_fillna(self): tm.assert_series_equal(result, expected) # interpreted as seconds, deprecated - with tm.assert_produces_warning(FutureWarning): - result = td.fillna(1) + with pytest.raises(TypeError, match="Passing integers to fillna"): + td.fillna(1) + + result = td.fillna(Timedelta(seconds=1)) expected = Series( [ timedelta(seconds=1), @@ -121,17 +123,15 @@ def test_timedelta_fillna(self): # ffill td[2] = np.nan - result = td.ffill() - with tm.assert_produces_warning(FutureWarning): - expected = td.fillna(0) + result = td.ffill() + expected = td.fillna(Timedelta(seconds=0)) expected[0] = np.nan tm.assert_series_equal(result, expected) # bfill td[2] = np.nan result = td.bfill() - with tm.assert_produces_warning(FutureWarning): - expected = td.fillna(0) + expected = td.fillna(Timedelta(seconds=0)) expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1) tm.assert_series_equal(result, expected) @@ -1597,12 +1597,6 @@ def test_series_interpolate_intraday(self): tm.assert_numpy_array_equal(result.values, exp.values) - def test_nonzero_warning(self): - # GH 24048 - ser = pd.Series([1, 0, 3, 4]) - with tm.assert_produces_warning(FutureWarning): - ser.nonzero() - @pytest.mark.parametrize( "ind", [ From feca181f80b496ef3f45dbdd81c88d3b6ae74a7c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 26 Nov 2019 18:22:20 -0800 Subject: [PATCH 2/4] lint fixup --- pandas/tests/series/test_missing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 3ee547bedcf55..09f1db25a3e31 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -16,8 +16,8 @@ MultiIndex, NaT, Series, - Timestamp, Timedelta, + Timestamp, date_range, isna, ) @@ -123,7 +123,7 @@ def test_timedelta_fillna(self): # ffill td[2] = np.nan - result = td.ffill() + result = td.ffill() expected = td.fillna(Timedelta(seconds=0)) expected[0] = np.nan tm.assert_series_equal(result, expected) From 0e1f3fb28ed5b2e965a37e6c1c8aff22f5356bb8 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 27 Nov 2019 09:41:31 -0800 Subject: [PATCH 3/4] lint fixups --- pandas/core/arrays/categorical.py | 1 - pandas/io/pytables.py | 4 +++- pandas/tests/frame/test_missing.py | 1 - 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 26ac8929bc51d..94b352ff0ae5a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -28,7 +28,6 @@ is_dict_like, is_dtype_equal, is_extension_array_dtype, - is_float_dtype, is_integer_dtype, is_iterator, is_list_like, diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 69d632479e969..a50d2d1a72155 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2641,7 +2641,9 @@ def write(self, **kwargs): "cannot write on an abstract storer: sublcasses should implement" ) - def delete(self, where=None, start=None, stop=None, **kwargs): + def delete( + self, where=None, start: Optional[int] = None, stop: Optional[int] = None + ): """ support fully deleting the node in its entirety (only) - where specification must be None diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 1bc2ca56d142b..0b77c0067e5f2 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -165,7 +165,6 @@ def test_dropna_multiple_axes(self): [7, np.nan, 8, 9], ] ) - cp = df.copy() # GH20987 with pytest.raises(TypeError, match="supplying multiple axes"): From f0223e3e74f14af42b8c200ecc52e79d38024601 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 27 Nov 2019 13:29:57 -0800 Subject: [PATCH 4/4] fix doc --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 85c74c046efe8..7b3248b4a4670 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4482,7 +4482,7 @@ def dropna(self, axis=0, how="any", thresh=None, subset=None, inplace=False): * 0, or 'index' : Drop rows which contain missing values. * 1, or 'columns' : Drop columns which contain missing value. - .. changed:: 1.0.0 + .. versionchanged:: 1.0.0 Pass tuple or list to drop on multiple axes. Only a single axis is allowed.