Skip to content

Commit

Permalink
DEPR: keyword-only arguments for DataFrame/Series statistical methods (
Browse files Browse the repository at this point in the history
…#58122)

* Enforce keyword-only arguments for dataframe/series statistical methods

* Line not long anymore!

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Adjust test

---------

Co-authored-by: Abdulaziz Aloqeely <52792999+DAzVise@users.noreply.github.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
3 people authored Apr 9, 2024
1 parent 583026b commit 6126b85
Show file tree
Hide file tree
Showing 14 changed files with 61 additions and 44 deletions.
10 changes: 5 additions & 5 deletions doc/source/user_guide/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -476,15 +476,15 @@ For example:
.. ipython:: python
df
df.mean(0)
df.mean(1)
df.mean(axis=0)
df.mean(axis=1)
All such methods have a ``skipna`` option signaling whether to exclude missing
data (``True`` by default):

.. ipython:: python
df.sum(0, skipna=False)
df.sum(axis=0, skipna=False)
df.sum(axis=1, skipna=True)
Combined with the broadcasting / arithmetic behavior, one can describe various
Expand All @@ -495,8 +495,8 @@ standard deviation of 1), very concisely:
ts_stand = (df - df.mean()) / df.std()
ts_stand.std()
xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0)
xs_stand.std(1)
xs_stand = df.sub(df.mean(axis=1), axis=0).div(df.std(axis=1), axis=0)
xs_stand.std(axis=1)
Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod`
preserve the location of ``NaN`` values. This is somewhat different from
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,7 @@ To select a row where each column meets its own criterion:
values = {'ids': ['a', 'b'], 'ids2': ['a', 'c'], 'vals': [1, 3]}
row_mask = df.isin(values).all(1)
row_mask = df.isin(values).all(axis=1)
df[row_mask]
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ Other Deprecations

- Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`)
- Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`)
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`)
- Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`)
- Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`)
-
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
from pandas.util._decorators import (
Appender,
Substitution,
deprecate_nonkeyword_arguments,
doc,
set_module,
)
Expand Down Expand Up @@ -11543,6 +11544,7 @@ def all(
**kwargs,
) -> Series | bool: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all")
@doc(make_doc("all", ndim=1))
def all(
self,
Expand Down Expand Up @@ -11589,6 +11591,7 @@ def min(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min")
@doc(make_doc("min", ndim=2))
def min(
self,
Expand Down Expand Up @@ -11635,6 +11638,7 @@ def max(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max")
@doc(make_doc("max", ndim=2))
def max(
self,
Expand All @@ -11650,6 +11654,7 @@ def max(
result = result.__finalize__(self, method="max")
return result

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum")
@doc(make_doc("sum", ndim=2))
def sum(
self,
Expand All @@ -11670,6 +11675,7 @@ def sum(
result = result.__finalize__(self, method="sum")
return result

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod")
@doc(make_doc("prod", ndim=2))
def prod(
self,
Expand Down Expand Up @@ -11721,6 +11727,7 @@ def mean(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean")
@doc(make_doc("mean", ndim=2))
def mean(
self,
Expand Down Expand Up @@ -11767,6 +11774,7 @@ def median(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median")
@doc(make_doc("median", ndim=2))
def median(
self,
Expand Down Expand Up @@ -11816,6 +11824,7 @@ def sem(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem")
@doc(make_doc("sem", ndim=2))
def sem(
self,
Expand Down Expand Up @@ -11866,6 +11875,7 @@ def var(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var")
@doc(make_doc("var", ndim=2))
def var(
self,
Expand Down Expand Up @@ -11916,6 +11926,7 @@ def std(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std")
@doc(make_doc("std", ndim=2))
def std(
self,
Expand Down Expand Up @@ -11963,6 +11974,7 @@ def skew(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew")
@doc(make_doc("skew", ndim=2))
def skew(
self,
Expand Down Expand Up @@ -12009,6 +12021,7 @@ def kurt(
**kwargs,
) -> Series | Any: ...

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt")
@doc(make_doc("kurt", ndim=2))
def kurt(
self,
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -6187,6 +6187,7 @@ def any( # type: ignore[override]
filter_type="bool",
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all")
@Appender(make_doc("all", ndim=1))
def all(
self,
Expand All @@ -6206,6 +6207,7 @@ def all(
filter_type="bool",
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min")
@doc(make_doc("min", ndim=1))
def min(
self,
Expand All @@ -6218,6 +6220,7 @@ def min(
self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max")
@doc(make_doc("max", ndim=1))
def max(
self,
Expand All @@ -6230,6 +6233,7 @@ def max(
self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum")
@doc(make_doc("sum", ndim=1))
def sum(
self,
Expand All @@ -6248,6 +6252,7 @@ def sum(
**kwargs,
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod")
@doc(make_doc("prod", ndim=1))
def prod(
self,
Expand All @@ -6266,6 +6271,7 @@ def prod(
**kwargs,
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean")
@doc(make_doc("mean", ndim=1))
def mean(
self,
Expand All @@ -6278,6 +6284,7 @@ def mean(
self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median")
@doc(make_doc("median", ndim=1))
def median(
self,
Expand All @@ -6290,6 +6297,7 @@ def median(
self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem")
@doc(make_doc("sem", ndim=1))
def sem(
self,
Expand All @@ -6308,6 +6316,7 @@ def sem(
**kwargs,
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var")
@doc(make_doc("var", ndim=1))
def var(
self,
Expand All @@ -6326,6 +6335,7 @@ def var(
**kwargs,
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std")
@doc(make_doc("std", ndim=1))
def std(
self,
Expand All @@ -6344,6 +6354,7 @@ def std(
**kwargs,
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew")
@doc(make_doc("skew", ndim=1))
def skew(
self,
Expand All @@ -6356,6 +6367,7 @@ def skew(
self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs
)

@deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt")
@doc(make_doc("kurt", ndim=1))
def kurt(
self,
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def test_apply_yield_list(float_frame):

def test_apply_reduce_Series(float_frame):
float_frame.iloc[::2, float_frame.columns.get_loc("A")] = np.nan
expected = float_frame.mean(1)
expected = float_frame.mean(axis=1)
result = float_frame.apply(np.mean, axis=1)
tm.assert_series_equal(result, expected)

Expand Down
25 changes: 8 additions & 17 deletions pandas/tests/apply/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,18 @@

@pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"])
@pytest.mark.parametrize(
"args,kwds",
"kwds",
[
pytest.param([], {}, id="no_args_or_kwds"),
pytest.param([1], {}, id="axis_from_args"),
pytest.param([], {"axis": 1}, id="axis_from_kwds"),
pytest.param([], {"numeric_only": True}, id="optional_kwds"),
pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"),
pytest.param({}, id="no_kwds"),
pytest.param({"axis": 1}, id="on_axis"),
pytest.param({"numeric_only": True}, id="func_kwds"),
pytest.param({"axis": 1, "numeric_only": True}, id="axis_and_func_kwds"),
],
)
@pytest.mark.parametrize("how", ["agg", "apply"])
def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how):
if len(args) > 1 and how == "agg":
request.applymarker(
pytest.mark.xfail(
raises=TypeError,
reason="agg/apply signature mismatch - agg passes 2nd "
"argument to func",
)
)
result = getattr(float_frame, how)(func, *args, **kwds)
expected = getattr(float_frame, func)(*args, **kwds)
def test_apply_with_string_funcs(request, float_frame, func, kwds, how):
result = getattr(float_frame, how)(func, **kwds)
expected = getattr(float_frame, func)(**kwds)
tm.assert_series_equal(result, expected)


Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/methods/test_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,18 @@ def test_basic(self, date_range_frame):
dates = date_range("1/1/1990", periods=N * 3, freq="25s")

result = df.asof(dates)
assert result.notna().all(1).all()
assert result.notna().all(axis=1).all()
lb = df.index[14]
ub = df.index[30]

dates = list(dates)

result = df.asof(dates)
assert result.notna().all(1).all()
assert result.notna().all(axis=1).all()

mask = (result.index >= lb) & (result.index < ub)
rs = result[mask]
assert (rs == 14).all(1).all()
assert (rs == 14).all(axis=1).all()

def test_subset(self, date_range_frame):
N = 10
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ def test_fillna_dict_series(self):

# disable this for now
with pytest.raises(NotImplementedError, match="column by column"):
df.fillna(df.max(1), axis=1)
df.fillna(df.max(axis=1), axis=1)

def test_fillna_dataframe(self):
# GH#8377
Expand Down
24 changes: 12 additions & 12 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -773,7 +773,7 @@ def test_operators_timedelta64(self):
tm.assert_series_equal(result, expected)

# works when only those columns are selected
result = mixed[["A", "B"]].min(1)
result = mixed[["A", "B"]].min(axis=1)
expected = Series([timedelta(days=-1)] * 3)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -832,8 +832,8 @@ def test_std_datetime64_with_nat(self, values, skipna, request, unit):
def test_sum_corner(self):
empty_frame = DataFrame()

axis0 = empty_frame.sum(0)
axis1 = empty_frame.sum(1)
axis0 = empty_frame.sum(axis=0)
axis1 = empty_frame.sum(axis=1)
assert isinstance(axis0, Series)
assert isinstance(axis1, Series)
assert len(axis0) == 0
Expand Down Expand Up @@ -967,8 +967,8 @@ def test_sum_object(self, float_frame):
def test_sum_bool(self, float_frame):
# ensure this works, bug report
bools = np.isnan(float_frame)
bools.sum(1)
bools.sum(0)
bools.sum(axis=1)
bools.sum(axis=0)

def test_sum_mixed_datetime(self):
# GH#30886
Expand All @@ -990,7 +990,7 @@ def test_mean_corner(self, float_frame, float_string_frame):

# take mean of boolean column
float_frame["bool"] = float_frame["A"] > 0
means = float_frame.mean(0)
means = float_frame.mean(axis=0)
assert means["bool"] == float_frame["bool"].values.mean()

def test_mean_datetimelike(self):
Expand Down Expand Up @@ -1043,13 +1043,13 @@ def test_mean_extensionarray_numeric_only_true(self):

def test_stats_mixed_type(self, float_string_frame):
with pytest.raises(TypeError, match="could not convert"):
float_string_frame.std(1)
float_string_frame.std(axis=1)
with pytest.raises(TypeError, match="could not convert"):
float_string_frame.var(1)
float_string_frame.var(axis=1)
with pytest.raises(TypeError, match="unsupported operand type"):
float_string_frame.mean(1)
float_string_frame.mean(axis=1)
with pytest.raises(TypeError, match="could not convert"):
float_string_frame.skew(1)
float_string_frame.skew(axis=1)

def test_sum_bools(self):
df = DataFrame(index=range(1), columns=range(10))
Expand Down Expand Up @@ -1331,11 +1331,11 @@ def test_any_all_extra(self):
result = df[["A", "B"]].any(axis=1, bool_only=True)
tm.assert_series_equal(result, expected)

result = df.all(1)
result = df.all(axis=1)
expected = Series([True, False, False], index=["a", "b", "c"])
tm.assert_series_equal(result, expected)

result = df.all(1, bool_only=True)
result = df.all(axis=1, bool_only=True)
tm.assert_series_equal(result, expected)

# Axis is None
Expand Down
Loading

0 comments on commit 6126b85

Please sign in to comment.