Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CoW: Add warning for chained assignment with fillna #53779

Merged
merged 11 commits into from
Jul 6, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ Copy-on-Write improvements
of those Index objects for the columns of the DataFrame (:issue:`52947`)
- Add lazy copy mechanism to :meth:`DataFrame.eval` (:issue:`53746`)

- Trying to operate inplace on a temporary column selection
(for example, ``df["a"].fillna(100, inplace=True)``)
will now always raise a warning when Copy-on-Write is enabled. In this mode,
operating inplace like this will never work, since the selection behaves
as a temporary copy. This holds true for:

- DataFrame.fillna / Series.fillna

.. _whatsnew_210.enhancements.enhancement2:

``map(func, na_action="ignore")`` now works for all array types
Expand Down
17 changes: 17 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import operator
import pickle
import re
import sys
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -89,13 +90,19 @@
WriteExcelBuffer,
npt,
)
from pandas.compat import (
PY311,
PYPY,
)
from pandas.compat._optional import import_optional_dependency
from pandas.compat.numpy import function as nv
from pandas.errors import (
AbstractMethodError,
ChainedAssignmentError,
InvalidIndexError,
SettingWithCopyError,
SettingWithCopyWarning,
_chained_assignment_method_msg,
)
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level
Expand Down Expand Up @@ -7083,6 +7090,16 @@ def fillna(
Note that column D is not affected since it is not present in df2.
"""
inplace = validate_bool_kwarg(inplace, "inplace")
if inplace:
if not PYPY and using_copy_on_write():
refcount = 2 if PY311 else 3
if sys.getrefcount(self) <= refcount:
warnings.warn(
_chained_assignment_method_msg,
ChainedAssignmentError,
stacklevel=2,
)

value, method = validate_fillna_kwargs(value, method)
if method is not None:
warnings.warn(
Expand Down
11 changes: 11 additions & 0 deletions pandas/errors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,17 @@ class ChainedAssignmentError(Warning):
)


_chained_assignment_method_msg = (
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment.\n"
"When using the Copy-on-Write mode, such chained assignment never works "
"to update the original DataFrame or Series, because the intermediate "
"object on which we are setting values always behaves as a copy.\n\n"
"Try using 'df.method({col: value}, inplace=True)' instead, to perform "
"the operation inplace.\n\n"
)


class NumExprClobberingError(NameError):
"""
Exception raised when trying to use a built-in numexpr name as a variable name.
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/copy_view/test_interp_fillna.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas.errors import ChainedAssignmentError

from pandas import (
NA,
ArrowDtype,
Expand Down Expand Up @@ -344,3 +346,16 @@ def test_fillna_inplace_ea_noop_shares_memory(
assert not np.shares_memory(get_array(df, "b"), get_array(view, "b"))
df.iloc[0, 1] = 100
tm.assert_frame_equal(df_orig, view)


def test_fillna_chained_assignment(using_copy_on_write):
df = DataFrame({"a": [1, np.nan, 2], "b": 1})
df_orig = df.copy()
if using_copy_on_write:
with tm.assert_produces_warning(ChainedAssignmentError):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a helper for this, with tm.raises_chained_assignment_error():, which will take care of the fact that it doesn't raise a warning on PyPy

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thx

df["a"].fillna(100, inplace=True)
tm.assert_frame_equal(df, df_orig)

with tm.assert_produces_warning(ChainedAssignmentError):
df[["a"]].fillna(100, inplace=True)
tm.assert_frame_equal(df, df_orig)
10 changes: 7 additions & 3 deletions pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pytest

from pandas.errors import ChainedAssignmentError
import pandas.util._test_decorators as td

from pandas import (
Expand Down Expand Up @@ -49,11 +50,12 @@ def test_fillna_on_column_view(self, using_copy_on_write):
arr = np.full((40, 50), np.nan)
df = DataFrame(arr, copy=False)

# TODO(CoW): This should raise a chained assignment error
df[0].fillna(-1, inplace=True)
if using_copy_on_write:
with tm.assert_produces_warning(ChainedAssignmentError):
df[0].fillna(-1, inplace=True)
assert np.isnan(arr[:, 0]).all()
else:
df[0].fillna(-1, inplace=True)
assert (arr[:, 0] == -1).all()

# i.e. we didn't create a new 49-column block
Expand Down Expand Up @@ -105,7 +107,9 @@ def test_fillna_mixed_float(self, mixed_float_frame):
result = mf.fillna(method="pad")
_check_mixed_float(result, dtype={"C": None})

def test_fillna_empty(self):
def test_fillna_empty(self, using_copy_on_write):
if using_copy_on_write:
pytest.skip("condition is unnecessary complex and is deprecated anyway")
# empty frame (GH#2778)
df = DataFrame(columns=["x"])
for m in ["pad", "backfill"]:
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import numpy as np
import pytest

from pandas.errors import PerformanceWarning
from pandas.errors import (
ChainedAssignmentError,
PerformanceWarning,
)
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -410,7 +413,11 @@ def test_update_inplace_sets_valid_block_values(using_copy_on_write):
df = DataFrame({"a": Series([1, 2, None], dtype="category")})

# inplace update of a single column
df["a"].fillna(1, inplace=True)
if using_copy_on_write:
with tm.assert_produces_warning(ChainedAssignmentError):
df["a"].fillna(1, inplace=True)
else:
df["a"].fillna(1, inplace=True)

# check we haven't put a Series into any block.values
assert isinstance(df._mgr.blocks[0].values, Categorical)
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/indexing/multiindex/test_chaining_and_caching.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import numpy as np
import pytest

from pandas.errors import SettingWithCopyError
from pandas.errors import (
ChainedAssignmentError,
SettingWithCopyError,
)
import pandas.util._test_decorators as td

from pandas import (
Expand Down Expand Up @@ -30,7 +33,8 @@ def test_detect_chained_assignment(using_copy_on_write):
zed = DataFrame(events, index=["a", "b"], columns=multiind)

if using_copy_on_write:
zed["eyes"]["right"].fillna(value=555, inplace=True)
with tm.assert_produces_warning(ChainedAssignmentError):
zed["eyes"]["right"].fillna(value=555, inplace=True)
else:
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(SettingWithCopyError, match=msg):
Expand Down
1 change: 1 addition & 0 deletions scripts/validate_unwanted_patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
"_arrow_dtype_mapping",
"_global_config",
"_chained_assignment_msg",
"_chained_assignment_method_msg",
}


Expand Down