diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c0ed198e200f1..4124936b910e6 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -248,7 +248,6 @@ def _get_hashtable_algo(values): def _get_data_algo(values, func_map): - if is_categorical_dtype(values): values = values._values_for_rank() @@ -299,7 +298,6 @@ def match(to_match, values, na_sentinel=-1): result = table.lookup(to_match) if na_sentinel != -1: - # replace but return a numpy array # use a Series because it handles dtype conversions properly from pandas import Series @@ -1165,7 +1163,6 @@ def compute(self, method): # slow method if n >= len(self.obj): - reverse_it = self.keep == "last" or method == "nlargest" ascending = method == "nsmallest" slc = np.s_[::-1] if reverse_it else np.s_[:] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f1ed3a125f60c..79f3ca6ffab2c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -32,7 +32,11 @@ deprecate_kwarg, rewrite_axis_style_signature, ) -from pandas.util._validators import validate_axis_style_args, validate_bool_kwarg +from pandas.util._validators import ( + validate_axis_style_args, + validate_bool_kwarg, + validate_percentile, +) from pandas.core.dtypes.cast import ( cast_scalar_to_array, @@ -8225,7 +8229,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"): C 1 days 12:00:00 Name: 0.5, dtype: object """ - self._check_percentile(q) + validate_percentile(q) data = self._get_numeric_data() if numeric_only else self axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 68308b2f83b60..bbfbea37b4a71 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -31,7 +31,11 @@ from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature -from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs +from pandas.util._validators import ( + validate_bool_kwarg, + validate_fillna_kwargs, + validate_percentile, +) from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask from pandas.core.dtypes.common import ( @@ -10169,7 +10173,7 @@ def describe(self, percentiles=None, include=None, exclude=None): percentiles = list(percentiles) # get them all to be in [0, 1] - self._check_percentile(percentiles) + validate_percentile(percentiles) # median should always be included if 0.5 not in percentiles: @@ -10273,21 +10277,6 @@ def describe_1d(data): d.columns = data.columns.copy() return d - def _check_percentile(self, q): - """ - Validate percentiles (used by describe and quantile). - """ - - msg = "percentiles should all be in the interval [0, 1]. Try {0} instead." - q = np.asarray(q) - if q.ndim == 0: - if not 0 <= q <= 1: - raise ValueError(msg.format(q / 100.0)) - else: - if not all(0 <= qs <= 1 for qs in q): - raise ValueError(msg.format(q / 100.0)) - return q - _shared_docs[ "pct_change" ] = """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 922977bc04d63..4ee05b582003b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -16,7 +16,7 @@ from pandas.compat import PY36 from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, deprecate -from pandas.util._validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg, validate_percentile from pandas.core.dtypes.common import ( _is_unorderable_exception, @@ -2353,7 +2353,7 @@ def quantile(self, q=0.5, interpolation="linear"): dtype: float64 """ - self._check_percentile(q) + validate_percentile(q) # We dispatch to DataFrame so that core.internals only has to worry # about 2D cases. diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 8d5f9f7749682..f5a472596f58f 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -2,8 +2,11 @@ Module that contains many useful utilities for validating data or function arguments """ +from typing import Iterable, Union import warnings +import numpy as np + from pandas.core.dtypes.common import is_bool @@ -370,3 +373,35 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): raise ValueError("Cannot specify both 'value' and 'method'.") return value, method + + +def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray: + """ + Validate percentiles (used by describe and quantile). + + This function checks if the given float oriterable of floats is a valid percentile + otherwise raises a ValueError. + + Parameters + ---------- + q: float or iterable of floats + A single percentile or an iterable of percentiles. + + Returns + ------- + ndarray + An ndarray of the percentiles if valid. + + Raises + ------ + ValueError if percentiles are not in given interval([0, 1]). + """ + msg = "percentiles should all be in the interval [0, 1]. Try {0} instead." + q_arr = np.asarray(q) + if q_arr.ndim == 0: + if not 0 <= q_arr <= 1: + raise ValueError(msg.format(q_arr / 100.0)) + else: + if not all(0 <= qs <= 1 for qs in q_arr): + raise ValueError(msg.format(q_arr / 100.0)) + return q_arr