Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: keyword-only arguments for DataFrame/Series statistical methods #58122

Merged
merged 4 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions doc/source/user_guide/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -476,15 +476,15 @@ For example:
.. ipython:: python

df
df.mean(0)
df.mean(1)
df.mean(axis=0)
df.mean(axis=1)

All such methods have a ``skipna`` option signaling whether to exclude missing
data (``True`` by default):

.. ipython:: python

df.sum(0, skipna=False)
df.sum(axis=0, skipna=False)
df.sum(axis=1, skipna=True)

Combined with the broadcasting / arithmetic behavior, one can describe various
Expand All @@ -495,8 +495,8 @@ standard deviation of 1), very concisely:

ts_stand = (df - df.mean()) / df.std()
ts_stand.std()
xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0)
xs_stand.std(1)
xs_stand = df.sub(df.mean(axis=1), axis=0).div(df.std(axis=1), axis=0)
xs_stand.std(axis=1)

Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`
preserve the location of ``NaN`` values. This is somewhat different from
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@ To select a row where each column meets its own criterion:

values = {'ids': ['a', 'b'], 'ids2': ['a', 'c'], 'vals': [1, 3]}

row_mask = df.isin(values).all(1)
row_mask = df.isin(values).all(axis=1)

df[row_mask]

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ Other Deprecations

- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`)
- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`)
- Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`)
- Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`)
-
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
from pandas.util._decorators import (
Appender,
Substitution,
deprecate_nonkeyword_arguments,
doc,
set_module,
)
Expand Down Expand Up @@ -11543,6 +11544,7 @@ def all(
**kwargs,
) -> Series | bool: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all")
@doc(make_doc("all", ndim=1))
def all(
self,
Expand Down Expand Up @@ -11589,6 +11591,7 @@ def min(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min")
@doc(make_doc("min", ndim=2))
def min(
self,
Expand Down Expand Up @@ -11635,6 +11638,7 @@ def max(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max")
@doc(make_doc("max", ndim=2))
def max(
self,
Expand All @@ -11650,6 +11654,7 @@ def max(
result = result.__finalize__(self, method="max")
return result

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum")
@doc(make_doc("sum", ndim=2))
def sum(
self,
Expand All @@ -11670,6 +11675,7 @@ def sum(
result = result.__finalize__(self, method="sum")
return result

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod")
@doc(make_doc("prod", ndim=2))
def prod(
self,
Expand Down Expand Up @@ -11721,6 +11727,7 @@ def mean(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean")
@doc(make_doc("mean", ndim=2))
def mean(
self,
Expand Down Expand Up @@ -11767,6 +11774,7 @@ def median(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median")
@doc(make_doc("median", ndim=2))
def median(
self,
Expand Down Expand Up @@ -11816,6 +11824,7 @@ def sem(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem")
@doc(make_doc("sem", ndim=2))
def sem(
self,
Expand Down Expand Up @@ -11866,6 +11875,7 @@ def var(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var")
@doc(make_doc("var", ndim=2))
def var(
self,
Expand Down Expand Up @@ -11916,6 +11926,7 @@ def std(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std")
@doc(make_doc("std", ndim=2))
def std(
self,
Expand Down Expand Up @@ -11963,6 +11974,7 @@ def skew(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew")
@doc(make_doc("skew", ndim=2))
def skew(
self,
Expand Down Expand Up @@ -12009,6 +12021,7 @@ def kurt(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt")
@doc(make_doc("kurt", ndim=2))
def kurt(
self,
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -6189,6 +6189,7 @@ def any( # type: ignore[override]
filter_type="bool",
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all")
@Appender(make_doc("all", ndim=1))
def all(
self,
Expand All @@ -6208,6 +6209,7 @@ def all(
filter_type="bool",
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min")
@doc(make_doc("min", ndim=1))
def min(
self,
Expand All @@ -6220,6 +6222,7 @@ def min(
self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max")
@doc(make_doc("max", ndim=1))
def max(
self,
Expand All @@ -6232,6 +6235,7 @@ def max(
self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum")
@doc(make_doc("sum", ndim=1))
def sum(
self,
Expand All @@ -6250,6 +6254,7 @@ def sum(
**kwargs,
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod")
@doc(make_doc("prod", ndim=1))
def prod(
self,
Expand All @@ -6268,6 +6273,7 @@ def prod(
**kwargs,
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean")
@doc(make_doc("mean", ndim=1))
def mean(
self,
Expand All @@ -6280,6 +6286,7 @@ def mean(
self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median")
@doc(make_doc("median", ndim=1))
def median(
self,
Expand All @@ -6292,6 +6299,7 @@ def median(
self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem")
@doc(make_doc("sem", ndim=1))
def sem(
self,
Expand All @@ -6310,6 +6318,7 @@ def sem(
**kwargs,
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var")
@doc(make_doc("var", ndim=1))
def var(
self,
Expand All @@ -6328,6 +6337,7 @@ def var(
**kwargs,
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std")
@doc(make_doc("std", ndim=1))
def std(
self,
Expand All @@ -6346,6 +6356,7 @@ def std(
**kwargs,
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew")
@doc(make_doc("skew", ndim=1))
def skew(
self,
Expand All @@ -6358,6 +6369,7 @@ def skew(
self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt")
@doc(make_doc("kurt", ndim=1))
def kurt(
self,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def test_apply_yield_list(float_frame):

def test_apply_reduce_Series(float_frame):
float_frame.iloc[::2, float_frame.columns.get_loc("A")] = np.nan
expected = float_frame.mean(1)
expected = float_frame.mean(axis=1)
result = float_frame.apply(np.mean, axis=1)
tm.assert_series_equal(result, expected)

Expand Down
25 changes: 8 additions & 17 deletions pandas/tests/apply/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,18 @@

@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"])
@pytest.mark.parametrize(
"args,kwds",
"kwds",
[
pytest.param([], {}, id="no_args_or_kwds"),
pytest.param([1], {}, id="axis_from_args"),
pytest.param([], {"axis": 1}, id="axis_from_kwds"),
pytest.param([], {"numeric_only": True}, id="optional_kwds"),
pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"),
pytest.param({}, id="no_kwds"),
pytest.param({"axis": 1}, id="on_axis"),
pytest.param({"numeric_only": True}, id="func_kwds"),
pytest.param({"axis": 1, "numeric_only": True}, id="axis_and_func_kwds"),
],
)
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
if len(args) > 1 and how == "agg":
request.applymarker(
pytest.mark.xfail(
raises=TypeError,
reason="agg/apply signature mismatch - agg passes 2nd "
"argument to func",
)
)
result = getattr(float_frame, how)(func, *args, **kwds)
expected = getattr(float_frame, func)(*args, **kwds)
def test_apply_with_string_funcs(request, float_frame, func, kwds, how):
result = getattr(float_frame, how)(func, **kwds)
expected = getattr(float_frame, func)(**kwds)
tm.assert_series_equal(result, expected)


Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/methods/test_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,18 @@ def test_basic(self, date_range_frame):
dates = date_range("1/1/1990", periods=N * 3, freq="25s")

result = df.asof(dates)
assert result.notna().all(1).all()
assert result.notna().all(axis=1).all()
lb = df.index[14]
ub = df.index[30]

dates = list(dates)

result = df.asof(dates)
assert result.notna().all(1).all()
assert result.notna().all(axis=1).all()

mask = (result.index >= lb) & (result.index < ub)
rs = result[mask]
assert (rs == 14).all(1).all()
assert (rs == 14).all(axis=1).all()

def test_subset(self, date_range_frame):
N = 10
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def test_fillna_dict_series(self):

# disable this for now
with pytest.raises(NotImplementedError, match="column by column"):
df.fillna(df.max(1), axis=1)
df.fillna(df.max(axis=1), axis=1)

def test_fillna_dataframe(self):
# GH#8377
Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ def test_operators_timedelta64(self):
tm.assert_series_equal(result, expected)

# works when only those columns are selected
result = mixed[["A", "B"]].min(1)
result = mixed[["A", "B"]].min(axis=1)
expected = Series([timedelta(days=-1)] * 3)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -832,8 +832,8 @@ def test_std_datetime64_with_nat(self, values, skipna, request, unit):
def test_sum_corner(self):
empty_frame = DataFrame()

axis0 = empty_frame.sum(0)
axis1 = empty_frame.sum(1)
axis0 = empty_frame.sum(axis=0)
axis1 = empty_frame.sum(axis=1)
assert isinstance(axis0, Series)
assert isinstance(axis1, Series)
assert len(axis0) == 0
Expand Down Expand Up @@ -967,8 +967,8 @@ def test_sum_object(self, float_frame):
def test_sum_bool(self, float_frame):
# ensure this works, bug report
bools = np.isnan(float_frame)
bools.sum(1)
bools.sum(0)
bools.sum(axis=1)
bools.sum(axis=0)

def test_sum_mixed_datetime(self):
# GH#30886
Expand All @@ -990,7 +990,7 @@ def test_mean_corner(self, float_frame, float_string_frame):

# take mean of boolean column
float_frame["bool"] = float_frame["A"] > 0
means = float_frame.mean(0)
means = float_frame.mean(axis=0)
assert means["bool"] == float_frame["bool"].values.mean()

def test_mean_datetimelike(self):
Expand Down Expand Up @@ -1043,13 +1043,13 @@ def test_mean_extensionarray_numeric_only_true(self):

def test_stats_mixed_type(self, float_string_frame):
with pytest.raises(TypeError, match="could not convert"):
float_string_frame.std(1)
float_string_frame.std(axis=1)
with pytest.raises(TypeError, match="could not convert"):
float_string_frame.var(1)
float_string_frame.var(axis=1)
with pytest.raises(TypeError, match="unsupported operand type"):
float_string_frame.mean(1)
float_string_frame.mean(axis=1)
with pytest.raises(TypeError, match="could not convert"):
float_string_frame.skew(1)
float_string_frame.skew(axis=1)

def test_sum_bools(self):
df = DataFrame(index=range(1), columns=range(10))
Expand Down Expand Up @@ -1331,11 +1331,11 @@ def test_any_all_extra(self):
result = df[["A", "B"]].any(axis=1, bool_only=True)
tm.assert_series_equal(result, expected)

result = df.all(1)
result = df.all(axis=1)
expected = Series([True, False, False], index=["a", "b", "c"])
tm.assert_series_equal(result, expected)

result = df.all(1, bool_only=True)
result = df.all(axis=1, bool_only=True)
tm.assert_series_equal(result, expected)

# Axis is None
Expand Down
Loading