diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py index 8d4dd7be28839..802fc4db0a36d 100644 --- a/pandas/core/array_algos/quantile.py +++ b/pandas/core/array_algos/quantile.py @@ -1,11 +1,50 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + import numpy as np from pandas._libs import lib +from pandas._typing import ArrayLike -from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.common import ( + is_list_like, + is_sparse, +) +from pandas.core.dtypes.missing import ( + isna, + na_value_for_dtype, +) from pandas.core.nanops import nanpercentile +if TYPE_CHECKING: + from pandas.core.arrays import ExtensionArray + + +def quantile_compat(values: ArrayLike, qs, interpolation: str, axis: int) -> ArrayLike: + """ + Compute the quantiles of the given values for each quantile in `qs`. + + Parameters + ---------- + values : np.ndarray or ExtensionArray + qs : a scalar or list of the quantiles to be computed + interpolation : str + axis : int + + Returns + ------- + np.ndarray or ExtensionArray + """ + if isinstance(values, np.ndarray): + fill_value = na_value_for_dtype(values.dtype, compat=False) + mask = isna(values) + result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis) + else: + result = quantile_ea_compat(values, qs, interpolation, axis) + return result + def quantile_with_mask( values: np.ndarray, @@ -75,3 +114,50 @@ def quantile_with_mask( result = lib.item_from_zerodim(result) return result + + +def quantile_ea_compat( + values: ExtensionArray, qs, interpolation: str, axis: int +) -> ExtensionArray: + """ + ExtensionArray compatibility layer for quantile_with_mask. + + We pretend that an ExtensionArray with shape (N,) is actually (1, N,) + for compatibility with non-EA code. + + Parameters + ---------- + values : ExtensionArray + qs : a scalar or list of the quantiles to be computed + interpolation: str + axis : int + + Returns + ------- + ExtensionArray + """ + # TODO(EA2D): make-believe not needed with 2D EAs + orig = values + + # asarray needed for Sparse, see GH#24600 + mask = np.asarray(values.isna()) + mask = np.atleast_2d(mask) + + values, fill_value = values._values_for_factorize() + values = np.atleast_2d(values) + + result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis) + + if not is_sparse(orig.dtype): + # shape[0] should be 1 as long as EAs are 1D + + if result.ndim == 1: + # i.e. qs was originally a scalar + assert result.shape == (1,), result.shape + result = type(orig)._from_factorized(result, orig) + + else: + assert result.shape == (1, len(qs)), result.shape + result = type(orig)._from_factorized(result[0], orig) + + return result diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 16d6071585a29..8ad534c2265f3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9661,9 +9661,8 @@ def quantile( q = Index(q, dtype=np.float64) data = self._get_numeric_data() if numeric_only else self axis = self._get_axis_number(axis) - is_transposed = axis == 1 - if is_transposed: + if axis == 1: data = data.T if len(data.columns) == 0: @@ -9673,15 +9672,9 @@ def quantile( return self._constructor([], index=q, columns=cols) return self._constructor_sliced([], index=cols, name=q, dtype=np.float64) - result = data._mgr.quantile( - qs=q, axis=1, interpolation=interpolation, transposed=is_transposed - ) - - result = self._constructor(result) - - if is_transposed: - result = result.T + res = data._mgr.quantile(qs=q, axis=1, interpolation=interpolation) + result = self._constructor(res) return result @doc(NDFrame.asfreq, **_shared_doc_kwargs) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 0bdd94cabdb47..0449be84bdcf7 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -4,6 +4,7 @@ from __future__ import annotations from typing import ( + TYPE_CHECKING, Any, Callable, List, @@ -56,6 +57,7 @@ ) import pandas.core.algorithms as algos +from pandas.core.array_algos.quantile import quantile_compat from pandas.core.array_algos.take import take_nd from pandas.core.arrays import ( DatetimeArray, @@ -82,6 +84,10 @@ ) from pandas.core.internals.blocks import make_block +if TYPE_CHECKING: + from pandas import Float64Index + + T = TypeVar("T", bound="ArrayManager") @@ -448,7 +454,28 @@ def apply_with_block(self: T, f, align_keys=None, **kwargs) -> T: return type(self)(result_arrays, self._axes) - # TODO quantile + def quantile( + self, + *, + qs: Float64Index, + axis: int = 0, + transposed: bool = False, + interpolation="linear", + ) -> ArrayManager: + + arrs = [ + x if not isinstance(x, np.ndarray) else np.atleast_2d(x) + for x in self.arrays + ] + assert axis == 1 + new_arrs = [quantile_compat(x, qs, interpolation, axis=axis) for x in arrs] + for i, arr in enumerate(new_arrs): + if arr.ndim == 2: + assert arr.shape[0] == 1, arr.shape + new_arrs[i] = arr[0] + + axes = [qs, self._axes[1]] + return type(self)(new_arrs, axes) def isna(self, func) -> ArrayManager: return self.apply("apply", func=func) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 63fae32acf3ff..59b5a151497ff 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -81,7 +81,7 @@ setitem_datetimelike_compat, validate_putmask, ) -from pandas.core.array_algos.quantile import quantile_with_mask +from pandas.core.array_algos.quantile import quantile_compat from pandas.core.array_algos.replace import ( compare_or_regex_search, replace_regex, @@ -1458,11 +1458,7 @@ def quantile( assert axis == 1 # only ever called this way assert is_list_like(qs) # caller is responsible for this - fill_value = self.fill_value - values = self.values - mask = np.asarray(isna(values)) - - result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis) + result = quantile_compat(self.values, qs, interpolation, axis) return make_block(result, placement=self.mgr_locs, ndim=2) @@ -1836,24 +1832,6 @@ def _unstack(self, unstacker, fill_value, new_placement): ] return blocks, mask - def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block: - # asarray needed for Sparse, see GH#24600 - mask = np.asarray(isna(self.values)) - mask = np.atleast_2d(mask) - - values, fill_value = self.values._values_for_factorize() - - values = np.atleast_2d(values) - - result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis) - - if not is_sparse(self.dtype): - # shape[0] should be 1 as long as EAs are 1D - assert result.shape == (1, len(qs)), result.shape - result = type(self.values)._from_factorized(result[0], self.values) - - return make_block(result, placement=self.mgr_locs, ndim=2) - class HybridMixin: """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b3a60d7d1ef45..dcd58c703619f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -521,7 +521,6 @@ def quantile( *, qs: Float64Index, axis: int = 0, - transposed: bool = False, interpolation="linear", ) -> BlockManager: """ @@ -534,8 +533,6 @@ def quantile( axis: reduction axis, default 0 consolidate: bool, default True. Join together blocks having same dtype - transposed: bool, default False - we are holding transposed data interpolation : type of interpolation, default 'linear' qs : list of the quantiles to be computed @@ -557,13 +554,6 @@ def quantile( for blk in self.blocks ] - if transposed: - new_axes = new_axes[::-1] - blocks = [ - b.make_block(b.values.T, placement=np.arange(b.shape[1])) - for b in blocks - ] - return type(self)(blocks, new_axes) def isna(self, func) -> BlockManager: diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index 0b4ce0dfa80fc..fa91eb928e35c 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -1,8 +1,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd from pandas import ( Categorical, @@ -13,9 +11,6 @@ ) import pandas._testing as tm -# TODO(ArrayManager) quantile is needed for describe() -pytestmark = td.skip_array_manager_not_yet_implemented - class TestDataFrameDescribe: def test_describe_bool_in_mixed_frame(self): diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 46d3e335539fb..9d56bb6314534 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -1,8 +1,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd from pandas import ( DataFrame, @@ -11,8 +9,6 @@ ) import pandas._testing as tm -pytestmark = td.skip_array_manager_not_yet_implemented - class TestDataFrameQuantile: @pytest.mark.parametrize( @@ -526,12 +522,13 @@ def test_quantile_empty_no_columns(self): expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) - def test_quantile_item_cache(self): + def test_quantile_item_cache(self, using_array_manager): # previous behavior incorrect retained an invalid _item_cache entry df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) df["D"] = df["A"] * 2 ser = df["A"] - assert len(df._mgr.blocks) == 2 + if not using_array_manager: + assert len(df._mgr.blocks) == 2 df.quantile(numeric_only=False) ser.values[0] = 99 @@ -610,12 +607,18 @@ def test_quantile_ea_with_na(self, index, frame_or_series): expected = frame_or_series(expected) tm.assert_equal(result, expected) + # TODO: filtering can be removed after GH#39763 is fixed + @pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning") def test_quantile_ea_all_na(self, index, frame_or_series): obj = frame_or_series(index).copy() obj.iloc[:] = index._na_value + # TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed + obj[:] = obj.astype(index.dtype) + assert np.all(obj.dtypes == index.dtype) + # result should be invariant to shuffling indexer = np.arange(len(index), dtype=np.intp) np.random.shuffle(indexer) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 8e6d67d03ae9d..b7df1c8382daa 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -10,7 +10,6 @@ import pytest from pandas.errors import PerformanceWarning -import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_integer_dtype @@ -46,7 +45,6 @@ def test_agg_regression1(tsframe): tm.assert_frame_equal(result, expected) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile/describe def test_agg_must_agg(df): grouped = df.groupby("A")["C"] diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 79ec0af267234..eb54887cea277 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -318,7 +318,6 @@ def test_groupby_as_index_apply(df): tm.assert_index_equal(res, ind) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_apply_concat_preserve_names(three_group): grouped = three_group.groupby(["A", "B"]) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index a7247c2c04761..1c250998c344f 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -83,7 +83,6 @@ def get_stats(group): assert result.index.names[0] == "C" -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_basic(): cats = Categorical( @@ -540,7 +539,6 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort): assert False, msg -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_datetime(): # GH9049: ensure backward compatibility levels = pd.date_range("2014-01-01", periods=4) @@ -606,7 +604,6 @@ def test_categorical_index(): tm.assert_frame_equal(result, expected) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_describe_categorical_columns(): # GH 11558 cats = CategoricalIndex( @@ -621,7 +618,6 @@ def test_describe_categorical_columns(): tm.assert_categorical_equal(result.stack().columns.values, cats.values) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_unstack_categorical(): # GH11558 (example is taken from the original issue) df = DataFrame( diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 598465a951e0f..cab5417e81445 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -367,7 +367,6 @@ def test_mad(self, gb, gni): result = gni.mad() tm.assert_frame_equal(result, expected) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_describe(self, df, gb, gni): # describe expected_index = Index([1, 3], name="A") @@ -924,13 +923,11 @@ def test_is_monotonic_decreasing(in_vals, out_vals): # -------------------------------- -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_apply_describe_bug(mframe): grouped = mframe.groupby(level="first") grouped.describe() # it works! -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_series_describe_multikey(): ts = tm.makeTimeSeries() grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) @@ -940,7 +937,6 @@ def test_series_describe_multikey(): tm.assert_series_equal(result["min"], grouped.min(), check_names=False) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_series_describe_single(): ts = tm.makeTimeSeries() grouped = ts.groupby(lambda x: x.month) @@ -955,7 +951,6 @@ def test_series_index_name(df): assert result.index.name == "A" -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_frame_describe_multikey(tsframe): grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.describe() @@ -978,7 +973,6 @@ def test_frame_describe_multikey(tsframe): tm.assert_frame_equal(result, expected) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_frame_describe_tupleindex(): # GH 14848 - regression from 0.19.0 to 0.19.1 @@ -998,7 +992,6 @@ def test_frame_describe_tupleindex(): df2.groupby("key").describe() -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_frame_describe_unstacked_format(): # GH 4792 prices = { @@ -1025,7 +1018,6 @@ def test_frame_describe_unstacked_format(): tm.assert_frame_equal(result, expected) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile @pytest.mark.filterwarnings( "ignore:" "indexing past lexsort depth may impact performance:" diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 8cbb9d2443cb2..afde1daca74c1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -7,7 +7,6 @@ from pandas.compat import IS64 from pandas.errors import PerformanceWarning -import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -211,7 +210,6 @@ def f(grp): tm.assert_series_equal(result, e) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_pass_args_kwargs(ts, tsframe): def f(x, q=None, axis=0): return np.percentile(x, q, axis=axis) @@ -366,7 +364,6 @@ def f3(x): df2.groupby("a").apply(f3) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_attr_wrapper(ts): grouped = ts.groupby(lambda x: x.weekday()) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 2924348e98b56..9c9d1aa881890 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -1,8 +1,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd from pandas import ( DataFrame, @@ -10,9 +8,6 @@ ) import pandas._testing as tm -# TODO(ArrayManager) quantile -pytestmark = td.skip_array_manager_not_yet_implemented - @pytest.mark.parametrize( "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 77b549e675a8d..cf6b2d372657d 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -6,8 +6,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd from pandas import ( Categorical, @@ -293,7 +291,6 @@ def test_numpy_minmax_timedelta64(self): with pytest.raises(ValueError, match=errmsg): np.argmax(td, out=0) - @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile def test_timedelta_ops(self): # GH#4984 # make sure ops return Timedelta diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 733a8c0aa58ec..bf3e6d822ab19 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -3,8 +3,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - from pandas import ( DataFrame, NaT, @@ -247,7 +245,6 @@ def test_resampler_is_iterable(series): tm.assert_series_equal(rv, gv) -@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile @all_ts def test_resample_quantile(series): # GH 15023 diff --git a/pandas/tests/series/methods/test_describe.py b/pandas/tests/series/methods/test_describe.py index 1113efc972e76..bdb308ddbfd58 100644 --- a/pandas/tests/series/methods/test_describe.py +++ b/pandas/tests/series/methods/test_describe.py @@ -1,7 +1,5 @@ import numpy as np -import pandas.util._test_decorators as td - from pandas import ( Period, Series, @@ -11,9 +9,6 @@ ) import pandas._testing as tm -# TODO(ArrayManager) quantile is needed for describe() -pytestmark = td.skip_array_manager_not_yet_implemented - class TestSeriesDescribe: def test_describe(self): diff --git a/pandas/tests/series/methods/test_quantile.py b/pandas/tests/series/methods/test_quantile.py index 9001f95fe4299..461c81bc3b44f 100644 --- a/pandas/tests/series/methods/test_quantile.py +++ b/pandas/tests/series/methods/test_quantile.py @@ -1,8 +1,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - from pandas.core.dtypes.common import is_integer import pandas as pd @@ -13,8 +11,6 @@ import pandas._testing as tm from pandas.core.indexes.datetimes import Timestamp -pytestmark = td.skip_array_manager_not_yet_implemented - class TestSeriesQuantile: def test_quantile(self, datetime_series):