Skip to content

Commit

Permalink
CLN: Centralised _check_percentile (pandas-dev#27584)
Browse files Browse the repository at this point in the history
  • Loading branch information
hedonhermdev authored and proost committed Dec 19, 2019
1 parent 7a67e25 commit 8d71dc0
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 24 deletions.
3 changes: 0 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,6 @@ def _get_hashtable_algo(values):


def _get_data_algo(values, func_map):

if is_categorical_dtype(values):
values = values._values_for_rank()

Expand Down Expand Up @@ -297,7 +296,6 @@ def match(to_match, values, na_sentinel=-1):
result = table.lookup(to_match)

if na_sentinel != -1:

# replace but return a numpy array
# use a Series because it handles dtype conversions properly
from pandas import Series
Expand Down Expand Up @@ -1163,7 +1161,6 @@ def compute(self, method):

# slow method
if n >= len(self.obj):

reverse_it = self.keep == "last" or method == "nlargest"
ascending = method == "nsmallest"
slc = np.s_[::-1] if reverse_it else np.s_[:]
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@
deprecate_kwarg,
rewrite_axis_style_signature,
)
from pandas.util._validators import validate_axis_style_args, validate_bool_kwarg
from pandas.util._validators import (
validate_axis_style_args,
validate_bool_kwarg,
validate_percentile,
)

from pandas.core.dtypes.cast import (
cast_scalar_to_array,
Expand Down Expand Up @@ -8178,7 +8182,7 @@ def quantile(self, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
C 1 days 12:00:00
Name: 0.5, dtype: object
"""
self._check_percentile(q)
validate_percentile(q)

data = self._get_numeric_data() if numeric_only else self
axis = self._get_axis_number(axis)
Expand Down
23 changes: 6 additions & 17 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature
from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
from pandas.util._validators import (
validate_bool_kwarg,
validate_fillna_kwargs,
validate_percentile,
)

from pandas.core.dtypes.common import (
ensure_int64,
Expand Down Expand Up @@ -10168,7 +10172,7 @@ def describe(self, percentiles=None, include=None, exclude=None):
percentiles = list(percentiles)

# get them all to be in [0, 1]
self._check_percentile(percentiles)
validate_percentile(percentiles)

# median should always be included
if 0.5 not in percentiles:
Expand Down Expand Up @@ -10272,21 +10276,6 @@ def describe_1d(data):
d.columns = data.columns.copy()
return d

def _check_percentile(self, q):
"""
Validate percentiles (used by describe and quantile).
"""

msg = "percentiles should all be in the interval [0, 1]. Try {0} instead."
q = np.asarray(q)
if q.ndim == 0:
if not 0 <= q <= 1:
raise ValueError(msg.format(q / 100.0))
else:
if not all(0 <= qs <= 1 for qs in q):
raise ValueError(msg.format(q / 100.0))
return q

_shared_docs[
"pct_change"
] = """
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pandas.compat import PY36
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution, deprecate
from pandas.util._validators import validate_bool_kwarg
from pandas.util._validators import validate_bool_kwarg, validate_percentile

from pandas.core.dtypes.common import (
_is_unorderable_exception,
Expand Down Expand Up @@ -2317,7 +2317,7 @@ def quantile(self, q=0.5, interpolation="linear"):
dtype: float64
"""

self._check_percentile(q)
validate_percentile(q)

# We dispatch to DataFrame so that core.internals only has to worry
# about 2D cases.
Expand Down
35 changes: 35 additions & 0 deletions pandas/util/_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
Module that contains many useful utilities
for validating data or function arguments
"""
from typing import Iterable, Union
import warnings

import numpy as np

from pandas.core.dtypes.common import is_bool


Expand Down Expand Up @@ -370,3 +373,35 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True):
raise ValueError("Cannot specify both 'value' and 'method'.")

return value, method


def validate_percentile(q: Union[float, Iterable[float]]) -> np.ndarray:
"""
Validate percentiles (used by describe and quantile).
This function checks if the given float oriterable of floats is a valid percentile
otherwise raises a ValueError.
Parameters
----------
q: float or iterable of floats
A single percentile or an iterable of percentiles.
Returns
-------
ndarray
An ndarray of the percentiles if valid.
Raises
------
ValueError if percentiles are not in given interval([0, 1]).
"""
msg = "percentiles should all be in the interval [0, 1]. Try {0} instead."
q_arr = np.asarray(q)
if q_arr.ndim == 0:
if not 0 <= q_arr <= 1:
raise ValueError(msg.format(q_arr / 100.0))
else:
if not all(0 <= qs <= 1 for qs in q_arr):
raise ValueError(msg.format(q_arr / 100.0))
return q_arr

0 comments on commit 8d71dc0

Please sign in to comment.