Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: series int-slicing always positional #49869

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,39 @@ the appropriate rows. Also, when ``n`` is larger than the group, no rows instead
Backwards incompatible API changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. _whatsnew_200.api_breaking.int_slicing_always_positional:

Integer slicing on Series is always positional
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
In past versions, a :class:`Series` (e.g. ``ser[1:2]`` or ``ser[2:]``) was _usually_
positional but not always. Starting in pandas 2.0, standard integer slices are always
treated as being positional in :meth:`Series.__getitem__` and :meth:`Series.__setitem__`.

Importantly, this means the deprecation in (:issue:`45324`) is reverted.

*Previous behavior*:

.. code-block:: ipython

In [5]: ser = pd.Series(range(10), index=[x / 2 for x in range(10)])
In [6]: ser[1:3]
Out[6]:
1.0 2
1.5 3
2.0 4
2.5 5
3.0 6
dtype: int64

*New behavior*:

.. ipython:: python

ser = pd.Series(range(10), index=[x / 2 for x in range(10)])
ser[1:3]

To treat slice keys as labels, explicitly use ``loc`` e.g. ``ser.loc[1:3]`` (:issue:`49612`).

.. _whatsnew_200.api_breaking.unsupported_datetimelike_dtype_arg:

Construction with datetime64 or timedelta64 dtype with unsupported resolution
Expand Down
45 changes: 7 additions & 38 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@
ABCDatetimeIndex,
ABCMultiIndex,
ABCPeriodIndex,
ABCRangeIndex,
ABCSeries,
ABCTimedeltaIndex,
)
Expand Down Expand Up @@ -3882,44 +3881,14 @@ def is_int(v):
is_positional = is_index_slice and ints_are_positional

if kind == "getitem":
# called from the getitem slicers, validate that we are in fact integers
if self.is_integer():
if is_frame:
# unambiguously positional, no deprecation
pass
elif start is None and stop is None:
# label-based vs positional is irrelevant
pass
elif isinstance(self, ABCRangeIndex) and self._range == range(
len(self)
):
# In this case there is no difference between label-based
# and positional, so nothing will change.
pass
elif (
self.dtype.kind in ["i", "u"]
and self._is_strictly_monotonic_increasing
and len(self) > 0
and self[0] == 0
and self[-1] == len(self) - 1
):
# We are range-like, e.g. created with Index(np.arange(N))
pass
elif not is_index_slice:
# we're going to raise, so don't bother warning, e.g.
# test_integer_positional_indexing
pass
else:
warnings.warn(
"The behavior of `series[i:j]` with an integer-dtype index "
"is deprecated. In a future version, this will be treated "
"as *label-based* indexing, consistent with e.g. `series[i]` "
"lookups. To retain the old behavior, use `series.iloc[i:j]`. "
"To get the future behavior, use `series.loc[i:j]`.",
FutureWarning,
stacklevel=find_stack_level(),
)
"""
called from the getitem slicers, validate that we are in fact
integers
"""
# GH#49612 as of 2.0, all-int-or-none slices are _always_ positional
if self.is_integer() or is_index_slice:
# In case with is_integer but not is_index_slice, the validation
# here will raise.
# Note: these checks are redundant if we know is_index_slice
self._validate_indexer("slice", key.start, "getitem")
self._validate_indexer("slice", key.stop, "getitem")
Expand Down
15 changes: 0 additions & 15 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,21 +219,6 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
def _should_fallback_to_positional(self) -> bool:
return False

@doc(Index._convert_slice_indexer)
def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
# TODO(2.0): once #45324 deprecation is enforced we should be able
# to simplify this.
if is_float_dtype(self.dtype):
assert kind in ["loc", "getitem"]

# TODO: can we write this as a condition based on
# e.g. _should_fallback_to_positional?
# We always treat __getitem__ slicing as label-based
# translate to locations
return self.slice_indexer(key.start, key.stop, key.step)

return super()._convert_slice_indexer(key, kind=kind, is_frame=is_frame)

@doc(Index._maybe_cast_slice_bound)
def _maybe_cast_slice_bound(self, label, side: str):
# we will try to coerce to integers
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/extension/base/getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,8 +313,7 @@ def test_get(self, data):
expected = s.iloc[[2, 3]]
self.assert_series_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, match="label-based"):
result = s.get(slice(2))
result = s.get(slice(2))
expected = s.iloc[[0, 1]]
self.assert_series_equal(result, expected)

Expand Down
26 changes: 18 additions & 8 deletions pandas/tests/indexing/test_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,7 @@ def test_integer_positional_indexing(self, idx):
"""
s = Series(range(2, 6), index=range(2, 6))

with tm.assert_produces_warning(FutureWarning, match="label-based"):
result = s[2:4]
result = s[2:4]
expected = s.iloc[2:4]
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -506,14 +505,25 @@ def test_floating_misc(self, indexer_sl):
result2 = indexer_sl(s)[2.0:5.0]
result3 = indexer_sl(s)[2.0:5]
result4 = indexer_sl(s)[2.1:5]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, result4)

expected = Series([1, 2], index=[2.5, 5.0])
result = indexer_sl(s)[2:5]
tm.assert_series_equal(result2, result3)
tm.assert_series_equal(result2, result4)

tm.assert_series_equal(result, expected)
if indexer_sl is tm.setitem:
# GH#49612 of 2.0, slicing with only-integers is _always_ positional
result1 = indexer_sl(s)[2:5]
tm.assert_series_equal(result1, s.iloc[2:5])
else:
tm.assert_series_equal(result1, s.loc[2:5])
tm.assert_series_equal(result1, result2)

result = indexer_sl(s)[2:5]
if indexer_sl is tm.setitem:
# GH#49612 of 2.0, slicing with only-integers is _always_ positional
tm.assert_series_equal(result, s.iloc[2:5])
else:
expected = Series([1, 2], index=[2.5, 5.0])
tm.assert_series_equal(result, expected)

# list selection
result1 = indexer_sl(s)[[0.0, 5, 10]]
Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/series/indexing/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,7 @@ def test_get_with_ea(arr):
expected = ser.iloc[[2, 3]]
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, match="label-based"):
result = ser.get(slice(2))
result = ser.get(slice(2))
expected = ser.iloc[[0, 1]]
tm.assert_series_equal(result, expected)

Expand Down
3 changes: 1 addition & 2 deletions pandas/tests/series/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,7 @@ def test_getitem_slice_bug(self):
def test_getitem_slice_integers(self):
ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])

with tm.assert_produces_warning(FutureWarning, match="label-based"):
result = ser[:4]
result = ser[:4]
expected = Series(ser.values[:4], index=[2, 4, 6, 8])
tm.assert_series_equal(result, expected)

Expand Down
12 changes: 3 additions & 9 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,15 +220,9 @@ def test_setitem_slice(self):
def test_setitem_slice_integers(self):
ser = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])

msg = r"In a future version, this will be treated as \*label-based\* indexing"
with tm.assert_produces_warning(FutureWarning, match=msg):
ser[:4] = 0
with tm.assert_produces_warning(
FutureWarning, match=msg, check_stacklevel=False
):
assert (ser[:4] == 0).all()
with tm.assert_produces_warning(FutureWarning, match=msg):
assert not (ser[4:] == 0).any()
ser[:4] = 0
Copy link
Member

@phofl phofl Nov 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might misunderstand this, but as I see it this would pass if both (getitem and setitem) are label based? If yes, can you set explicitly to 0 during creation?

Edit: Sorry missed the test name. Rather using iloc to get the values then?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good idea, updated

assert (ser.iloc[:4] == 0).all()
assert not (ser.iloc[4:] == 0).any()

def test_setitem_slicestep(self):
# caught this bug when writing tests
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/series/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_interpolate_cubicspline(self):
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
float
)
result = ser.reindex(new_index).interpolate(method="cubicspline")[1:3]
result = ser.reindex(new_index).interpolate(method="cubicspline").loc[1:3]
tm.assert_series_equal(result, expected)

@td.skip_if_no_scipy
Expand Down Expand Up @@ -148,7 +148,7 @@ def test_interpolate_akima(self):
float
)
interp_s = ser.reindex(new_index).interpolate(method="akima")
tm.assert_series_equal(interp_s[1:3], expected)
tm.assert_series_equal(interp_s.loc[1:3], expected)

# interpolate at new_index where `der` is a non-zero int
expected = Series(
Expand All @@ -159,7 +159,7 @@ def test_interpolate_akima(self):
float
)
interp_s = ser.reindex(new_index).interpolate(method="akima", der=1)
tm.assert_series_equal(interp_s[1:3], expected)
tm.assert_series_equal(interp_s.loc[1:3], expected)

@td.skip_if_no_scipy
def test_interpolate_piecewise_polynomial(self):
Expand All @@ -174,7 +174,7 @@ def test_interpolate_piecewise_polynomial(self):
float
)
interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial")
tm.assert_series_equal(interp_s[1:3], expected)
tm.assert_series_equal(interp_s.loc[1:3], expected)

@td.skip_if_no_scipy
def test_interpolate_from_derivatives(self):
Expand All @@ -189,7 +189,7 @@ def test_interpolate_from_derivatives(self):
float
)
interp_s = ser.reindex(new_index).interpolate(method="from_derivatives")
tm.assert_series_equal(interp_s[1:3], expected)
tm.assert_series_equal(interp_s.loc[1:3], expected)

@pytest.mark.parametrize(
"kwargs",
Expand Down