Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG / CoW: also return new object in case of null slice for both rows and columsn (.(i)loc[:, :]) #49469

Merged
Merged
4 changes: 4 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,10 @@ Other API changes
- Files are now closed when encountering an exception in :func:`read_json` (:issue:`49921`)
- Changed behavior of :func:`read_csv`, :func:`read_json` & :func:`read_fwf`, where the index will now always be a :class:`RangeIndex`, when no index is specified. Previously the index would be a :class:`Index` with dtype ``object`` if the new DataFrame/Series has length 0 (:issue:`49572`)
- :meth:`DataFrame.values`, :meth:`DataFrame.to_numpy`, :meth:`DataFrame.xs`, :meth:`DataFrame.reindex`, :meth:`DataFrame.fillna`, and :meth:`DataFrame.replace` no longer silently consolidate the underlying arrays; do ``df = df.copy()`` to ensure consolidation (:issue:`49356`)
- Creating a new DataFrame using a full slice on both axes with :attr:`~DataFrame.loc`
or :attr:`~DataFrame.iloc` (thus, ``df.loc[:, :]`` or ``df.iloc[:, :]``) now returns a
new DataFrame (shallow copy) instead of the original DataFrame, consistent with other
methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`)
-

.. ---------------------------------------------------------------------------
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,11 @@ def _getitem_tuple_same_dim(self, tup: tuple):
# be handled by the _getitem_lowerdim call above.
assert retval.ndim == self.ndim

if retval is self.obj:
# if all axes were a null slice (`df.loc[:, :]`), ensure we still
# return a new object (https://github.com/pandas-dev/pandas/pull/49469)
retval = retval.copy(deep=False)

return retval

@final
Expand Down
56 changes: 56 additions & 0 deletions pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,62 @@ def test_subset_chained_single_block_row(using_copy_on_write, using_array_manage
assert subset.iloc[0] == 0


@pytest.mark.parametrize(
"method",
[
lambda df: df[:],
lambda df: df.loc[:, :],
lambda df: df.loc[:],
lambda df: df.iloc[:, :],
lambda df: df.iloc[:],
],
ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"],
)
def test_null_slice(request, method, using_copy_on_write):
# Case: also all variants of indexing with a null slice (:) should return
# new objects to ensure we correctly use CoW for the results
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
df_orig = df.copy()

df2 = method(df)

# we always return new objects (shallow copy), regardless of CoW or not
assert df2 is not df

# and those trigger CoW when mutated
df2.iloc[0, 0] = 0
if using_copy_on_write:
tm.assert_frame_equal(df, df_orig)
else:
assert df.iloc[0, 0] == 0


@pytest.mark.parametrize(
"method",
[
lambda s: s[:],
lambda s: s.loc[:],
lambda s: s.iloc[:],
],
ids=["getitem", "loc", "iloc"],
)
def test_null_slice_series(request, method, using_copy_on_write):
s = Series([1, 2, 3], index=["a", "b", "c"])
s_orig = s.copy()

s2 = method(s)

# we always return new objects, regardless of CoW or not
assert s2 is not s

# and those trigger CoW when mutated
s2.iloc[0] = 0
if using_copy_on_write:
tm.assert_series_equal(s, s_orig)
else:
assert s.iloc[0] == 0


# TODO add more tests modifying the parent


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,7 @@ def test_identity_slice_returns_new_object(
sliced_df = original_df.loc[:]
assert sliced_df is not original_df
assert original_df[:] is not original_df
assert original_df.loc[:, :] is not original_df

# should be a shallow copy
assert np.shares_memory(original_df["a"]._values, sliced_df["a"]._values)
Expand All @@ -1110,7 +1111,6 @@ def test_identity_slice_returns_new_object(
assert (sliced_df["a"] == 4).all()

# These should not return copies
assert original_df is original_df.loc[:, :]
df = DataFrame(np.random.randn(10, 4))
assert df[0] is df.loc[:, 0]

Expand Down