From 88fc79f7e9858398416deeb88cd402ddc2bc8e68 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 23 Nov 2022 10:41:44 -0800 Subject: [PATCH 1/2] API: series int-slicing always positional --- doc/source/whatsnew/v2.0.0.rst | 33 +++++++++++++++ pandas/core/indexes/base.py | 40 ++----------------- pandas/core/indexes/numeric.py | 15 ------- pandas/tests/extension/base/getitem.py | 3 +- pandas/tests/indexing/test_floats.py | 26 ++++++++---- pandas/tests/series/indexing/test_get.py | 3 +- pandas/tests/series/indexing/test_getitem.py | 3 +- pandas/tests/series/indexing/test_setitem.py | 12 ++---- .../tests/series/methods/test_interpolate.py | 10 ++--- 9 files changed, 65 insertions(+), 80 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index bac404496fa53..4dd9324862704 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -164,6 +164,39 @@ the appropriate rows. Also, when ``n`` is larger than the group, no rows instead Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_200.api_breaking.int_slicing_always_positional: + +Integer slicing on Series is always positional +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +In past versions, a :class:`Series` (e.g. ``ser[1:2]`` or ``ser[2:]``) was _usually_ +positional but not always. Starting in pandas 2.0, standard integer slices are always +treated as being positional in :meth:`Series.__getitem__` and :meth:`Series.__setitem__`. + +Importantly, this means the deprecation in (:issue:`45324`) is reverted. + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: ser = pd.Series(range(10), index=[x / 2 for x in range(10)]) + In [6]: ser[1:3] + Out[6]: + 1.0 2 + 1.5 3 + 2.0 4 + 2.5 5 + 3.0 6 + dtype: int64 + +*New behavior*: + +.. ipython:: python + + ser = pd.Series(range(10), index=[x / 2 for x in range(10)]) + ser[1:3] + +To treat slice keys as labels, explicitly use ``loc`` e.g. ``ser.loc[1:3]`` (:issue:`49612`). + .. _whatsnew_200.api_breaking.unsupported_datetimelike_dtype_arg: Construction with datetime64 or timedelta64 dtype with unsupported resolution diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0bc568fb122ed..f1c83ff7029e4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -122,7 +122,6 @@ ABCDatetimeIndex, ABCMultiIndex, ABCPeriodIndex, - ABCRangeIndex, ABCSeries, ABCTimedeltaIndex, ) @@ -3982,43 +3981,10 @@ def is_int(v): called from the getitem slicers, validate that we are in fact integers """ - if self.is_integer(): - if is_frame: - # unambiguously positional, no deprecation - pass - elif start is None and stop is None: - # label-based vs positional is irrelevant - pass - elif isinstance(self, ABCRangeIndex) and self._range == range( - len(self) - ): - # In this case there is no difference between label-based - # and positional, so nothing will change. - pass - elif ( - self.dtype.kind in ["i", "u"] - and self._is_strictly_monotonic_increasing - and len(self) > 0 - and self[0] == 0 - and self[-1] == len(self) - 1 - ): - # We are range-like, e.g. created with Index(np.arange(N)) - pass - elif not is_index_slice: - # we're going to raise, so don't bother warning, e.g. - # test_integer_positional_indexing - pass - else: - warnings.warn( - "The behavior of `series[i:j]` with an integer-dtype index " - "is deprecated. In a future version, this will be treated " - "as *label-based* indexing, consistent with e.g. `series[i]` " - "lookups. To retain the old behavior, use `series.iloc[i:j]`. " - "To get the future behavior, use `series.loc[i:j]`.", - FutureWarning, - stacklevel=find_stack_level(), - ) + # GH#49612 as of 2.0, all-int-or-none slices are _always_ positional if self.is_integer() or is_index_slice: + # In case with is_integer but not is_index_slice, the validation + # here will raise. # Note: these checks are redundant if we know is_index_slice self._validate_indexer("slice", key.start, "getitem") self._validate_indexer("slice", key.stop, "getitem") diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 328e3d2f401e6..2c61cb01746d0 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -219,21 +219,6 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: def _should_fallback_to_positional(self) -> bool: return False - @doc(Index._convert_slice_indexer) - def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False): - # TODO(2.0): once #45324 deprecation is enforced we should be able - # to simplify this. - if is_float_dtype(self.dtype): - assert kind in ["loc", "getitem"] - - # TODO: can we write this as a condition based on - # e.g. _should_fallback_to_positional? - # We always treat __getitem__ slicing as label-based - # translate to locations - return self.slice_indexer(key.start, key.stop, key.step) - - return super()._convert_slice_indexer(key, kind=kind, is_frame=is_frame) - @doc(Index._maybe_cast_slice_bound) def _maybe_cast_slice_bound(self, label, side: str): # we will try to coerce to integers diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index e966d4602a02c..cf51d9d693155 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -313,8 +313,7 @@ def test_get(self, data): expected = s.iloc[[2, 3]] self.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="label-based"): - result = s.get(slice(2)) + result = s.get(slice(2)) expected = s.iloc[[0, 1]] self.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 186cba62c138f..235b8fc7ce8c3 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -340,8 +340,7 @@ def test_integer_positional_indexing(self, idx): """ s = Series(range(2, 6), index=range(2, 6)) - with tm.assert_produces_warning(FutureWarning, match="label-based"): - result = s[2:4] + result = s[2:4] expected = s.iloc[2:4] tm.assert_series_equal(result, expected) @@ -509,14 +508,25 @@ def test_floating_misc(self, indexer_sl): result2 = indexer_sl(s)[2.0:5.0] result3 = indexer_sl(s)[2.0:5] result4 = indexer_sl(s)[2.1:5] - tm.assert_series_equal(result1, result2) - tm.assert_series_equal(result1, result3) - tm.assert_series_equal(result1, result4) - expected = Series([1, 2], index=[2.5, 5.0]) - result = indexer_sl(s)[2:5] + tm.assert_series_equal(result2, result3) + tm.assert_series_equal(result2, result4) - tm.assert_series_equal(result, expected) + if indexer_sl is tm.setitem: + # GH#49612 of 2.0, slicing with only-integers is _always_ positional + result1 = indexer_sl(s)[2:5] + tm.assert_series_equal(result1, s.iloc[2:5]) + else: + tm.assert_series_equal(result1, s.loc[2:5]) + tm.assert_series_equal(result1, result2) + + result = indexer_sl(s)[2:5] + if indexer_sl is tm.setitem: + # GH#49612 of 2.0, slicing with only-integers is _always_ positional + tm.assert_series_equal(result, s.iloc[2:5]) + else: + expected = Series([1, 2], index=[2.5, 5.0]) + tm.assert_series_equal(result, expected) # list selection result1 = indexer_sl(s)[[0.0, 5, 10]] diff --git a/pandas/tests/series/indexing/test_get.py b/pandas/tests/series/indexing/test_get.py index 1a54796dbeec3..e8034bd4f7160 100644 --- a/pandas/tests/series/indexing/test_get.py +++ b/pandas/tests/series/indexing/test_get.py @@ -167,8 +167,7 @@ def test_get_with_ea(arr): expected = ser.iloc[[2, 3]] tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="label-based"): - result = ser.get(slice(2)) + result = ser.get(slice(2)) expected = ser.iloc[[0, 1]] tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index faaa61e84a351..73881965f8a09 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -332,8 +332,7 @@ def test_getitem_slice_bug(self): def test_getitem_slice_integers(self): ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) - with tm.assert_produces_warning(FutureWarning, match="label-based"): - result = ser[:4] + result = ser[:4] expected = Series(ser.values[:4], index=[2, 4, 6, 8]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 7d77a755e082b..6da8f7c453e03 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -220,15 +220,9 @@ def test_setitem_slice(self): def test_setitem_slice_integers(self): ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) - msg = r"In a future version, this will be treated as \*label-based\* indexing" - with tm.assert_produces_warning(FutureWarning, match=msg): - ser[:4] = 0 - with tm.assert_produces_warning( - FutureWarning, match=msg, check_stacklevel=False - ): - assert (ser[:4] == 0).all() - with tm.assert_produces_warning(FutureWarning, match=msg): - assert not (ser[4:] == 0).any() + ser[:4] = 0 + assert (ser[:4] == 0).all() + assert not (ser[4:] == 0).any() def test_setitem_slicestep(self): # caught this bug when writing tests diff --git a/pandas/tests/series/methods/test_interpolate.py b/pandas/tests/series/methods/test_interpolate.py index fc2f636199493..b983a00d8be5c 100644 --- a/pandas/tests/series/methods/test_interpolate.py +++ b/pandas/tests/series/methods/test_interpolate.py @@ -118,7 +118,7 @@ def test_interpolate_cubicspline(self): new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype( float ) - result = ser.reindex(new_index).interpolate(method="cubicspline")[1:3] + result = ser.reindex(new_index).interpolate(method="cubicspline").loc[1:3] tm.assert_series_equal(result, expected) @td.skip_if_no_scipy @@ -148,7 +148,7 @@ def test_interpolate_akima(self): float ) interp_s = ser.reindex(new_index).interpolate(method="akima") - tm.assert_series_equal(interp_s[1:3], expected) + tm.assert_series_equal(interp_s.loc[1:3], expected) # interpolate at new_index where `der` is a non-zero int expected = Series( @@ -159,7 +159,7 @@ def test_interpolate_akima(self): float ) interp_s = ser.reindex(new_index).interpolate(method="akima", der=1) - tm.assert_series_equal(interp_s[1:3], expected) + tm.assert_series_equal(interp_s.loc[1:3], expected) @td.skip_if_no_scipy def test_interpolate_piecewise_polynomial(self): @@ -174,7 +174,7 @@ def test_interpolate_piecewise_polynomial(self): float ) interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial") - tm.assert_series_equal(interp_s[1:3], expected) + tm.assert_series_equal(interp_s.loc[1:3], expected) @td.skip_if_no_scipy def test_interpolate_from_derivatives(self): @@ -189,7 +189,7 @@ def test_interpolate_from_derivatives(self): float ) interp_s = ser.reindex(new_index).interpolate(method="from_derivatives") - tm.assert_series_equal(interp_s[1:3], expected) + tm.assert_series_equal(interp_s.loc[1:3], expected) @pytest.mark.parametrize( "kwargs", From 2a07555e75c426892e90c168350955b8c89e5a93 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 24 Nov 2022 13:22:10 -0800 Subject: [PATCH 2/2] use iloc --- pandas/tests/series/indexing/test_setitem.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 6da8f7c453e03..897fe527c130e 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -221,8 +221,8 @@ def test_setitem_slice_integers(self): ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16]) ser[:4] = 0 - assert (ser[:4] == 0).all() - assert not (ser[4:] == 0).any() + assert (ser.iloc[:4] == 0).all() + assert not (ser.iloc[4:] == 0).any() def test_setitem_slicestep(self): # caught this bug when writing tests