pandas-dev · jreback · Feb 25, 2021 · Feb 10, 2021 · Feb 11, 2021 · Feb 12, 2021
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -157,3 +157,5 @@ jobs:
       run: |
         source activate pandas-dev
         pytest pandas/tests/frame/methods --array-manager
+        pytest pandas/tests/frame/test_reductions.py --array-manager
+        pytest pandas/tests/reductions/ --array-manager
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from pandas._libs import algos as libalgos, lib
+from pandas._libs import NaT, algos as libalgos, lib
 from pandas._typing import ArrayLike, DtypeObj, Hashable
 from pandas.util._validators import validate_bool_kwarg
 
@@ -17,6 +17,8 @@
     is_dtype_equal,
     is_extension_array_dtype,
     is_numeric_dtype,
+    is_object_dtype,
+    is_timedelta64_ns_dtype,
 )
 from pandas.core.dtypes.dtypes import ExtensionDtype, PandasDtype
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
@@ -25,7 +27,11 @@
 import pandas.core.algorithms as algos
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.sparse import SparseDtype
-from pandas.core.construction import extract_array
+from pandas.core.construction import (
+    ensure_wrapped_if_datetimelike,
+    extract_array,
+    sanitize_array,
+)
 from pandas.core.indexers import maybe_convert_indices
 from pandas.core.indexes.api import Index, ensure_index
 from pandas.core.internals.base import DataManager
@@ -173,18 +179,48 @@ def _verify_integrity(self) -> None:
     def reduce(
         self: T, func: Callable, ignore_failures: bool = False
     ) -> Tuple[T, np.ndarray]:
-        # TODO this still fails because `func` assumes to work on 2D arrays
-        # TODO implement ignore_failures
-        assert self.ndim == 2
+        """
+        Apply reduction function column-wise, returning a single-row ArrayManager.
 
-        res_arrays = []
-        for arr in self.arrays:
-            res = func(arr, axis=0)
-            res_arrays.append(np.array([res]))
+        Parameters
+        ----------
+        func : reduction function
+        ignore_failures : bool, default False
+            Whether to drop columns where func raises TypeError.
 
-        index = Index([None])  # placeholder
-        new_mgr = type(self)(res_arrays, [index, self.items])
-        indexer = np.arange(self.shape[0])
+        Returns
+        -------
+        ArrayManager
+        np.ndarray
+            Indexer of column indices that are retained.
+        """
+        result_arrays: List[np.ndarray] = []
+        result_indices: List[int] = []
+        for i, arr in enumerate(self.arrays):
+            try:
+                res = func(arr, axis=0)
+            except TypeError:
+                if not ignore_failures:
+                    raise
+            else:
+                # TODO NaT doesn't preserve dtype, so we need to ensure to create
+                # a timedelta result array if original was timedelta
+                # what if datetime results in timedelta? (eg std)
+                if res is NaT and is_timedelta64_ns_dtype(arr.dtype):
+                    result_arrays.append(np.array(["NaT"], dtype="timedelta64[ns]"))
+                else:
+                    result_arrays.append(sanitize_array([res], None))
+                result_indices.append(i)
+
+        index = Index._simple_new(np.array([None], dtype=object))  # placeholder
+        if ignore_failures:
+            indexer = np.array(result_indices)
+            columns = self.items[result_indices]
+        else:
+            indexer = np.arange(self.shape[0])
+            columns = self.items
+
+        new_mgr = type(self)(result_arrays, [index, columns])
         return new_mgr, indexer
 
     def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:
@@ -473,15 +509,19 @@ def is_single_block(self) -> bool:
 
     def get_bool_data(self, copy: bool = False) -> ArrayManager:
         """
-        Parameters
-        ----------
-        copy : bool, default False
-            Whether to copy the blocks
+        Select columns that are bool-dtype and object-dtype columns that are all-bool.
         """
-        mask = np.array([is_bool_dtype(t) for t in self.get_dtypes()], dtype="object")
-        arrays = [self.arrays[i] for i in np.nonzero(mask)[0]]
+        arrays = []
+        indices = []
+        for i, arr in enumerate(self.arrays):
+            if is_bool_dtype(arr.dtype) or (
+                is_object_dtype(arr.dtype) and lib.is_bool_array(arr)
+            ):
+                arrays.append(arr)
+                indices.append(i)
+
         # TODO copy?
-        new_axes = [self._axes[0], self._axes[1][mask]]
+        new_axes = [self._axes[0], self._axes[1][indices]]
         return type(self)(arrays, new_axes)
 
     def get_numeric_data(self, copy: bool = False) -> ArrayManager:
@@ -668,9 +708,11 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
             if isinstance(value, np.ndarray) and value.ndim == 2:
                 value = value[0, :]
 
+            # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item
+            # but we should avoid that and pass directly the proper array
+            value = ensure_wrapped_if_datetimelike(value)
+
             assert isinstance(value, (np.ndarray, ExtensionArray))
-            # value = np.asarray(value)
-            # assert isinstance(value, np.ndarray)
             assert len(value) == len(self._axes[0])
             self.arrays[loc] = value
             return

diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     Categorical,
@@ -288,6 +290,7 @@ def test_numpy_minmax_timedelta64(self):
         with pytest.raises(ValueError, match=errmsg):
             np.argmax(td, out=0)
 
+    @td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile
     def test_timedelta_ops(self):
         # GH#4984
         # make sure ops return Timedelta