Skip to content

Commit

Permalink
ENH: ArrayManager.quantile (#40189)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Mar 5, 2021
1 parent ec56dd2 commit af0a60e
Show file tree
Hide file tree
Showing 17 changed files with 129 additions and 95 deletions.
88 changes: 87 additions & 1 deletion pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,50 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import numpy as np

from pandas._libs import lib
from pandas._typing import ArrayLike

from pandas.core.dtypes.common import is_list_like
from pandas.core.dtypes.common import (
is_list_like,
is_sparse,
)
from pandas.core.dtypes.missing import (
isna,
na_value_for_dtype,
)

from pandas.core.nanops import nanpercentile

if TYPE_CHECKING:
from pandas.core.arrays import ExtensionArray


def quantile_compat(values: ArrayLike, qs, interpolation: str, axis: int) -> ArrayLike:
"""
Compute the quantiles of the given values for each quantile in `qs`.
Parameters
----------
values : np.ndarray or ExtensionArray
qs : a scalar or list of the quantiles to be computed
interpolation : str
axis : int
Returns
-------
np.ndarray or ExtensionArray
"""
if isinstance(values, np.ndarray):
fill_value = na_value_for_dtype(values.dtype, compat=False)
mask = isna(values)
result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)
else:
result = quantile_ea_compat(values, qs, interpolation, axis)
return result


def quantile_with_mask(
values: np.ndarray,
Expand Down Expand Up @@ -75,3 +114,50 @@ def quantile_with_mask(
result = lib.item_from_zerodim(result)

return result


def quantile_ea_compat(
values: ExtensionArray, qs, interpolation: str, axis: int
) -> ExtensionArray:
"""
ExtensionArray compatibility layer for quantile_with_mask.
We pretend that an ExtensionArray with shape (N,) is actually (1, N,)
for compatibility with non-EA code.
Parameters
----------
values : ExtensionArray
qs : a scalar or list of the quantiles to be computed
interpolation: str
axis : int
Returns
-------
ExtensionArray
"""
# TODO(EA2D): make-believe not needed with 2D EAs
orig = values

# asarray needed for Sparse, see GH#24600
mask = np.asarray(values.isna())
mask = np.atleast_2d(mask)

values, fill_value = values._values_for_factorize()
values = np.atleast_2d(values)

result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)

if not is_sparse(orig.dtype):
# shape[0] should be 1 as long as EAs are 1D

if result.ndim == 1:
# i.e. qs was originally a scalar
assert result.shape == (1,), result.shape
result = type(orig)._from_factorized(result, orig)

else:
assert result.shape == (1, len(qs)), result.shape
result = type(orig)._from_factorized(result[0], orig)

return result
13 changes: 3 additions & 10 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9661,9 +9661,8 @@ def quantile(
q = Index(q, dtype=np.float64)
data = self._get_numeric_data() if numeric_only else self
axis = self._get_axis_number(axis)
is_transposed = axis == 1

if is_transposed:
if axis == 1:
data = data.T

if len(data.columns) == 0:
Expand All @@ -9673,15 +9672,9 @@ def quantile(
return self._constructor([], index=q, columns=cols)
return self._constructor_sliced([], index=cols, name=q, dtype=np.float64)

result = data._mgr.quantile(
qs=q, axis=1, interpolation=interpolation, transposed=is_transposed
)

result = self._constructor(result)

if is_transposed:
result = result.T
res = data._mgr.quantile(qs=q, axis=1, interpolation=interpolation)

result = self._constructor(res)
return result

@doc(NDFrame.asfreq, **_shared_doc_kwargs)
Expand Down
29 changes: 28 additions & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

from typing import (
TYPE_CHECKING,
Any,
Callable,
List,
Expand Down Expand Up @@ -56,6 +57,7 @@
)

import pandas.core.algorithms as algos
from pandas.core.array_algos.quantile import quantile_compat
from pandas.core.array_algos.take import take_nd
from pandas.core.arrays import (
DatetimeArray,
Expand All @@ -82,6 +84,10 @@
)
from pandas.core.internals.blocks import make_block

if TYPE_CHECKING:
from pandas import Float64Index


T = TypeVar("T", bound="ArrayManager")


Expand Down Expand Up @@ -448,7 +454,28 @@ def apply_with_block(self: T, f, align_keys=None, **kwargs) -> T:

return type(self)(result_arrays, self._axes)

# TODO quantile
def quantile(
self,
*,
qs: Float64Index,
axis: int = 0,
transposed: bool = False,
interpolation="linear",
) -> ArrayManager:

arrs = [
x if not isinstance(x, np.ndarray) else np.atleast_2d(x)
for x in self.arrays
]
assert axis == 1
new_arrs = [quantile_compat(x, qs, interpolation, axis=axis) for x in arrs]
for i, arr in enumerate(new_arrs):
if arr.ndim == 2:
assert arr.shape[0] == 1, arr.shape
new_arrs[i] = arr[0]

axes = [qs, self._axes[1]]
return type(self)(new_arrs, axes)

def isna(self, func) -> ArrayManager:
return self.apply("apply", func=func)
Expand Down
26 changes: 2 additions & 24 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
setitem_datetimelike_compat,
validate_putmask,
)
from pandas.core.array_algos.quantile import quantile_with_mask
from pandas.core.array_algos.quantile import quantile_compat
from pandas.core.array_algos.replace import (
compare_or_regex_search,
replace_regex,
Expand Down Expand Up @@ -1458,11 +1458,7 @@ def quantile(
assert axis == 1 # only ever called this way
assert is_list_like(qs) # caller is responsible for this

fill_value = self.fill_value
values = self.values
mask = np.asarray(isna(values))

result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)
result = quantile_compat(self.values, qs, interpolation, axis)

return make_block(result, placement=self.mgr_locs, ndim=2)

Expand Down Expand Up @@ -1836,24 +1832,6 @@ def _unstack(self, unstacker, fill_value, new_placement):
]
return blocks, mask

def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
# asarray needed for Sparse, see GH#24600
mask = np.asarray(isna(self.values))
mask = np.atleast_2d(mask)

values, fill_value = self.values._values_for_factorize()

values = np.atleast_2d(values)

result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)

if not is_sparse(self.dtype):
# shape[0] should be 1 as long as EAs are 1D
assert result.shape == (1, len(qs)), result.shape
result = type(self.values)._from_factorized(result[0], self.values)

return make_block(result, placement=self.mgr_locs, ndim=2)


class HybridMixin:
"""
Expand Down
10 changes: 0 additions & 10 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,7 +521,6 @@ def quantile(
*,
qs: Float64Index,
axis: int = 0,
transposed: bool = False,
interpolation="linear",
) -> BlockManager:
"""
Expand All @@ -534,8 +533,6 @@ def quantile(
axis: reduction axis, default 0
consolidate: bool, default True. Join together blocks having same
dtype
transposed: bool, default False
we are holding transposed data
interpolation : type of interpolation, default 'linear'
qs : list of the quantiles to be computed
Expand All @@ -557,13 +554,6 @@ def quantile(
for blk in self.blocks
]

if transposed:
new_axes = new_axes[::-1]
blocks = [
b.make_block(b.values.T, placement=np.arange(b.shape[1]))
for b in blocks
]

return type(self)(blocks, new_axes)

def isna(self, func) -> BlockManager:
Expand Down
5 changes: 0 additions & 5 deletions pandas/tests/frame/methods/test_describe.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
Categorical,
Expand All @@ -13,9 +11,6 @@
)
import pandas._testing as tm

# TODO(ArrayManager) quantile is needed for describe()
pytestmark = td.skip_array_manager_not_yet_implemented


class TestDataFrameDescribe:
def test_describe_bool_in_mixed_frame(self):
Expand Down
15 changes: 9 additions & 6 deletions pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
DataFrame,
Expand All @@ -11,8 +9,6 @@
)
import pandas._testing as tm

pytestmark = td.skip_array_manager_not_yet_implemented


class TestDataFrameQuantile:
@pytest.mark.parametrize(
Expand Down Expand Up @@ -526,12 +522,13 @@ def test_quantile_empty_no_columns(self):
expected.columns.name = "captain tightpants"
tm.assert_frame_equal(result, expected)

def test_quantile_item_cache(self):
def test_quantile_item_cache(self, using_array_manager):
# previous behavior incorrect retained an invalid _item_cache entry
df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"])
df["D"] = df["A"] * 2
ser = df["A"]
assert len(df._mgr.blocks) == 2
if not using_array_manager:
assert len(df._mgr.blocks) == 2

df.quantile(numeric_only=False)
ser.values[0] = 99
Expand Down Expand Up @@ -610,12 +607,18 @@ def test_quantile_ea_with_na(self, index, frame_or_series):
expected = frame_or_series(expected)
tm.assert_equal(result, expected)

# TODO: filtering can be removed after GH#39763 is fixed
@pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning")
def test_quantile_ea_all_na(self, index, frame_or_series):

obj = frame_or_series(index).copy()

obj.iloc[:] = index._na_value

# TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed
obj[:] = obj.astype(index.dtype)
assert np.all(obj.dtypes == index.dtype)

# result should be invariant to shuffling
indexer = np.arange(len(index), dtype=np.intp)
np.random.shuffle(indexer)
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import pytest

from pandas.errors import PerformanceWarning
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_integer_dtype

Expand Down Expand Up @@ -46,7 +45,6 @@ def test_agg_regression1(tsframe):
tm.assert_frame_equal(result, expected)


@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile/describe
def test_agg_must_agg(df):
grouped = df.groupby("A")["C"]

Expand Down
1 change: 0 additions & 1 deletion pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,6 @@ def test_groupby_as_index_apply(df):
tm.assert_index_equal(res, ind)


@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
def test_apply_concat_preserve_names(three_group):
grouped = three_group.groupby(["A", "B"])

Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ def get_stats(group):
assert result.index.names[0] == "C"


@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
def test_basic():

cats = Categorical(
Expand Down Expand Up @@ -540,7 +539,6 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
assert False, msg


@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
def test_datetime():
# GH9049: ensure backward compatibility
levels = pd.date_range("2014-01-01", periods=4)
Expand Down Expand Up @@ -606,7 +604,6 @@ def test_categorical_index():
tm.assert_frame_equal(result, expected)


@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
def test_describe_categorical_columns():
# GH 11558
cats = CategoricalIndex(
Expand All @@ -621,7 +618,6 @@ def test_describe_categorical_columns():
tm.assert_categorical_equal(result.stack().columns.values, cats.values)


@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
def test_unstack_categorical():
# GH11558 (example is taken from the original issue)
df = DataFrame(
Expand Down
Loading

0 comments on commit af0a60e

Please sign in to comment.