Skip to content

Commit

Permalink
BUG: CoW not tracking references when indexing midx with slice (#51944)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Mar 14, 2023
1 parent ffc55a0 commit eaacf83
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 1 deletion.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ Copy-on-Write improvements
- Arithmetic operations that can be inplace, e.g. ``ser *= 2`` will now respect the
Copy-on-Write mechanism.

- :meth:`DataFrame.__getitem__` will now respect the Copy-on-Write mechanism when the
:class:`DataFrame` has :class:`MultiIndex` columns.

- :meth:`Series.view` will now respect the Copy-on-Write mechanism.

Copy-on-Write can be enabled through one of
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3832,10 +3832,13 @@ def _getitem_multilevel(self, key):
result = self.reindex(columns=new_columns)
result.columns = result_columns
else:
new_values = self.values[:, loc]
new_values = self._values[:, loc]
result = self._constructor(
new_values, index=self.index, columns=result_columns
)
if using_copy_on_write() and isinstance(loc, slice):
result._mgr.add_references(self._mgr) # type: ignore[arg-type]

result = result.__finalize__(self)

# If there is only one column being returned, and its name is
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/copy_view/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1034,3 +1034,18 @@ def test_set_value_copy_only_necessary_column(
assert not np.shares_memory(get_array(df, "a"), get_array(view, "a"))
else:
assert np.shares_memory(get_array(df, "a"), get_array(view, "a"))


def test_getitem_midx_slice(using_copy_on_write, using_array_manager):
df = DataFrame({("a", "x"): [1, 2], ("a", "y"): 1, ("b", "x"): 2})
df_orig = df.copy()
new_df = df[("a",)]

if using_copy_on_write:
assert not new_df._mgr._has_no_reference(0)

if not using_array_manager:
assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x"))
if using_copy_on_write:
new_df.iloc[0, 0] = 100
tm.assert_frame_equal(df_orig, df)

0 comments on commit eaacf83

Please sign in to comment.