diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst index eed3fc149263a..92799359a61d2 100644 --- a/doc/source/user_guide/basics.rst +++ b/doc/source/user_guide/basics.rst @@ -476,15 +476,15 @@ For example: .. ipython:: python df - df.mean(0) - df.mean(1) + df.mean(axis=0) + df.mean(axis=1) All such methods have a ``skipna`` option signaling whether to exclude missing data (``True`` by default): .. ipython:: python - df.sum(0, skipna=False) + df.sum(axis=0, skipna=False) df.sum(axis=1, skipna=True) Combined with the broadcasting / arithmetic behavior, one can describe various @@ -495,8 +495,8 @@ standard deviation of 1), very concisely: ts_stand = (df - df.mean()) / df.std() ts_stand.std() - xs_stand = df.sub(df.mean(1), axis=0).div(df.std(1), axis=0) - xs_stand.std(1) + xs_stand = df.sub(df.mean(axis=1), axis=0).div(df.std(axis=1), axis=0) + xs_stand.std(axis=1) Note that methods like :meth:`~DataFrame.cumsum` and :meth:`~DataFrame.cumprod` preserve the location of ``NaN`` values. This is somewhat different from diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index fd843ca68a60b..0da87e1d31fec 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -952,7 +952,7 @@ To select a row where each column meets its own criterion: values = {'ids': ['a', 'b'], 'ids2': ['a', 'c'], 'vals': [1, 3]} - row_mask = df.isin(values).all(1) + row_mask = df.isin(values).all(axis=1) df[row_mask] diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0f71b52120a47..06688b8029609 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -191,6 +191,7 @@ Other Deprecations - Deprecated :meth:`Timestamp.utcfromtimestamp`, use ``Timestamp.fromtimestamp(ts, "UTC")`` instead (:issue:`56680`) - Deprecated :meth:`Timestamp.utcnow`, use ``Timestamp.now("UTC")`` instead (:issue:`56680`) +- Deprecated allowing non-keyword arguments in :meth:`DataFrame.all`, :meth:`DataFrame.min`, :meth:`DataFrame.max`, :meth:`DataFrame.sum`, :meth:`DataFrame.prod`, :meth:`DataFrame.mean`, :meth:`DataFrame.median`, :meth:`DataFrame.sem`, :meth:`DataFrame.var`, :meth:`DataFrame.std`, :meth:`DataFrame.skew`, :meth:`DataFrame.kurt`, :meth:`Series.all`, :meth:`Series.min`, :meth:`Series.max`, :meth:`Series.sum`, :meth:`Series.prod`, :meth:`Series.mean`, :meth:`Series.median`, :meth:`Series.sem`, :meth:`Series.var`, :meth:`Series.std`, :meth:`Series.skew`, and :meth:`Series.kurt`. (:issue:`57087`) - Deprecated allowing non-keyword arguments in :meth:`Series.to_markdown` except ``buf``. (:issue:`57280`) - Deprecated allowing non-keyword arguments in :meth:`Series.to_string` except ``buf``. (:issue:`57280`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 66a68755a2a09..97cf86d45812d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -64,6 +64,7 @@ from pandas.util._decorators import ( Appender, Substitution, + deprecate_nonkeyword_arguments, doc, set_module, ) @@ -11543,6 +11544,7 @@ def all( **kwargs, ) -> Series | bool: ... + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all") @doc(make_doc("all", ndim=1)) def all( self, @@ -11589,6 +11591,7 @@ def min( **kwargs, ) -> Series | Any: ... + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min") @doc(make_doc("min", ndim=2)) def min( self, @@ -11635,6 +11638,7 @@ def max( **kwargs, ) -> Series | Any: ... + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max") @doc(make_doc("max", ndim=2)) def max( self, @@ -11650,6 +11654,7 @@ def max( result = result.__finalize__(self, method="max") return result + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum") @doc(make_doc("sum", ndim=2)) def sum( self, @@ -11670,6 +11675,7 @@ def sum( result = result.__finalize__(self, method="sum") return result + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod") @doc(make_doc("prod", ndim=2)) def prod( self, @@ -11721,6 +11727,7 @@ def mean( **kwargs, ) -> Series | Any: ... + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean") @doc(make_doc("mean", ndim=2)) def mean( self, @@ -11767,6 +11774,7 @@ def median( **kwargs, ) -> Series | Any: ... + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median") @doc(make_doc("median", ndim=2)) def median( self, @@ -11816,6 +11824,7 @@ def sem( **kwargs, ) -> Series | Any: ... + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem") @doc(make_doc("sem", ndim=2)) def sem( self, @@ -11866,6 +11875,7 @@ def var( **kwargs, ) -> Series | Any: ... + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var") @doc(make_doc("var", ndim=2)) def var( self, @@ -11916,6 +11926,7 @@ def std( **kwargs, ) -> Series | Any: ... + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std") @doc(make_doc("std", ndim=2)) def std( self, @@ -11963,6 +11974,7 @@ def skew( **kwargs, ) -> Series | Any: ... + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew") @doc(make_doc("skew", ndim=2)) def skew( self, @@ -12009,6 +12021,7 @@ def kurt( **kwargs, ) -> Series | Any: ... + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt") @doc(make_doc("kurt", ndim=2)) def kurt( self, diff --git a/pandas/core/series.py b/pandas/core/series.py index 62a6178f5af4d..0f796964eb56d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6187,6 +6187,7 @@ def any( # type: ignore[override] filter_type="bool", ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="all") @Appender(make_doc("all", ndim=1)) def all( self, @@ -6206,6 +6207,7 @@ def all( filter_type="bool", ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="min") @doc(make_doc("min", ndim=1)) def min( self, @@ -6218,6 +6220,7 @@ def min( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max") @doc(make_doc("max", ndim=1)) def max( self, @@ -6230,6 +6233,7 @@ def max( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sum") @doc(make_doc("sum", ndim=1)) def sum( self, @@ -6248,6 +6252,7 @@ def sum( **kwargs, ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="prod") @doc(make_doc("prod", ndim=1)) def prod( self, @@ -6266,6 +6271,7 @@ def prod( **kwargs, ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="mean") @doc(make_doc("mean", ndim=1)) def mean( self, @@ -6278,6 +6284,7 @@ def mean( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median") @doc(make_doc("median", ndim=1)) def median( self, @@ -6290,6 +6297,7 @@ def median( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="sem") @doc(make_doc("sem", ndim=1)) def sem( self, @@ -6308,6 +6316,7 @@ def sem( **kwargs, ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="var") @doc(make_doc("var", ndim=1)) def var( self, @@ -6326,6 +6335,7 @@ def var( **kwargs, ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="std") @doc(make_doc("std", ndim=1)) def std( self, @@ -6344,6 +6354,7 @@ def std( **kwargs, ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="skew") @doc(make_doc("skew", ndim=1)) def skew( self, @@ -6356,6 +6367,7 @@ def skew( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) + @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="kurt") @doc(make_doc("kurt", ndim=1)) def kurt( self, diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index de5f5cac1282c..2501ca6c5e1c4 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -402,7 +402,7 @@ def test_apply_yield_list(float_frame): def test_apply_reduce_Series(float_frame): float_frame.iloc[::2, float_frame.columns.get_loc("A")] = np.nan - expected = float_frame.mean(1) + expected = float_frame.mean(axis=1) result = float_frame.apply(np.mean, axis=1) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index e9192dae66a46..50cf0f0ed3e84 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -19,27 +19,18 @@ @pytest.mark.parametrize("func", ["sum", "mean", "min", "max", "std"]) @pytest.mark.parametrize( - "args,kwds", + "kwds", [ - pytest.param([], {}, id="no_args_or_kwds"), - pytest.param([1], {}, id="axis_from_args"), - pytest.param([], {"axis": 1}, id="axis_from_kwds"), - pytest.param([], {"numeric_only": True}, id="optional_kwds"), - pytest.param([1, True], {"numeric_only": True}, id="args_and_kwds"), + pytest.param({}, id="no_kwds"), + pytest.param({"axis": 1}, id="on_axis"), + pytest.param({"numeric_only": True}, id="func_kwds"), + pytest.param({"axis": 1, "numeric_only": True}, id="axis_and_func_kwds"), ], ) @pytest.mark.parametrize("how", ["agg", "apply"]) -def test_apply_with_string_funcs(request, float_frame, func, args, kwds, how): - if len(args) > 1 and how == "agg": - request.applymarker( - pytest.mark.xfail( - raises=TypeError, - reason="agg/apply signature mismatch - agg passes 2nd " - "argument to func", - ) - ) - result = getattr(float_frame, how)(func, *args, **kwds) - expected = getattr(float_frame, func)(*args, **kwds) +def test_apply_with_string_funcs(request, float_frame, func, kwds, how): + result = getattr(float_frame, how)(func, **kwds) + expected = getattr(float_frame, func)(**kwds) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index 029aa3a5b8f05..c510ef78d03aa 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -36,18 +36,18 @@ def test_basic(self, date_range_frame): dates = date_range("1/1/1990", periods=N * 3, freq="25s") result = df.asof(dates) - assert result.notna().all(1).all() + assert result.notna().all(axis=1).all() lb = df.index[14] ub = df.index[30] dates = list(dates) result = df.asof(dates) - assert result.notna().all(1).all() + assert result.notna().all(axis=1).all() mask = (result.index >= lb) & (result.index < ub) rs = result[mask] - assert (rs == 14).all(1).all() + assert (rs == 14).all(axis=1).all() def test_subset(self, date_range_frame): N = 10 diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 81f66cfd48b0a..b8f67138889cc 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -466,7 +466,7 @@ def test_fillna_dict_series(self): # disable this for now with pytest.raises(NotImplementedError, match="column by column"): - df.fillna(df.max(1), axis=1) + df.fillna(df.max(axis=1), axis=1) def test_fillna_dataframe(self): # GH#8377 diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 8bf7cc6f1630c..8ccd7b2ca83ba 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -773,7 +773,7 @@ def test_operators_timedelta64(self): tm.assert_series_equal(result, expected) # works when only those columns are selected - result = mixed[["A", "B"]].min(1) + result = mixed[["A", "B"]].min(axis=1) expected = Series([timedelta(days=-1)] * 3) tm.assert_series_equal(result, expected) @@ -832,8 +832,8 @@ def test_std_datetime64_with_nat(self, values, skipna, request, unit): def test_sum_corner(self): empty_frame = DataFrame() - axis0 = empty_frame.sum(0) - axis1 = empty_frame.sum(1) + axis0 = empty_frame.sum(axis=0) + axis1 = empty_frame.sum(axis=1) assert isinstance(axis0, Series) assert isinstance(axis1, Series) assert len(axis0) == 0 @@ -967,8 +967,8 @@ def test_sum_object(self, float_frame): def test_sum_bool(self, float_frame): # ensure this works, bug report bools = np.isnan(float_frame) - bools.sum(1) - bools.sum(0) + bools.sum(axis=1) + bools.sum(axis=0) def test_sum_mixed_datetime(self): # GH#30886 @@ -990,7 +990,7 @@ def test_mean_corner(self, float_frame, float_string_frame): # take mean of boolean column float_frame["bool"] = float_frame["A"] > 0 - means = float_frame.mean(0) + means = float_frame.mean(axis=0) assert means["bool"] == float_frame["bool"].values.mean() def test_mean_datetimelike(self): @@ -1043,13 +1043,13 @@ def test_mean_extensionarray_numeric_only_true(self): def test_stats_mixed_type(self, float_string_frame): with pytest.raises(TypeError, match="could not convert"): - float_string_frame.std(1) + float_string_frame.std(axis=1) with pytest.raises(TypeError, match="could not convert"): - float_string_frame.var(1) + float_string_frame.var(axis=1) with pytest.raises(TypeError, match="unsupported operand type"): - float_string_frame.mean(1) + float_string_frame.mean(axis=1) with pytest.raises(TypeError, match="could not convert"): - float_string_frame.skew(1) + float_string_frame.skew(axis=1) def test_sum_bools(self): df = DataFrame(index=range(1), columns=range(10)) @@ -1331,11 +1331,11 @@ def test_any_all_extra(self): result = df[["A", "B"]].any(axis=1, bool_only=True) tm.assert_series_equal(result, expected) - result = df.all(1) + result = df.all(axis=1) expected = Series([True, False, False], index=["a", "b", "c"]) tm.assert_series_equal(result, expected) - result = df.all(1, bool_only=True) + result = df.all(axis=1, bool_only=True) tm.assert_series_equal(result, expected) # Axis is None diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index ac7d7c8f679c1..46753b668a8b0 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -665,7 +665,7 @@ def test_empty(self, method, unit, use_bottleneck, dtype): # GH#844 (changed in GH#9422) df = DataFrame(np.empty((10, 0)), dtype=dtype) - assert (getattr(df, method)(1) == unit).all() + assert (getattr(df, method)(axis=1) == unit).all() s = Series([1], dtype=dtype) result = getattr(s, method)(min_count=2) diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 7b45a267a4572..a63ffbbd3a5a1 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -107,7 +107,7 @@ def test_contains(self, datetime_series): def test_axis_alias(self): s = Series([1, 2, np.nan]) tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) - assert s.dropna().sum("rows") == 3 + assert s.dropna().sum(axis="rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 4e2af9fef377b..97e0fa93c90ef 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -135,7 +135,7 @@ def test_multilevel_consolidate(self): df = DataFrame( np.random.default_rng(2).standard_normal((4, 4)), index=index, columns=index ) - df["Totals", ""] = df.sum(1) + df["Totals", ""] = df.sum(axis=1) df = df._consolidate() def test_level_with_tuples(self): diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index d2e92bb971888..3bffd1f1987aa 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -36,7 +36,7 @@ def redundant_import(self, paramx=None, paramy=None) -> None: >>> import pandas as pd >>> df = pd.DataFrame(np.ones((3, 3)), ... columns=('a', 'b', 'c')) - >>> df.all(1) + >>> df.all(axis=1) 0 True 1 True 2 True