From 946b1cf16c42bcf76b33e3b8ebba2eaa80619020 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 4 Dec 2021 09:54:05 +0100 Subject: [PATCH] PERF: avoid calling .values to know the result dtype in eval() --- pandas/core/computation/ops.py | 15 +++++++++------ pandas/core/internals/array_manager.py | 9 +++++++++ pandas/core/internals/managers.py | 16 ++++++++++++++++ 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 8758565cf9f2a..72b2d2444a14e 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -146,15 +146,18 @@ def is_scalar(self) -> bool: @property def type(self): try: - # potentially very slow for large, mixed dtype frames - return self._value.values.dtype + # .values for dataframe would be slow + return self._value._mgr.as_array_dtype() except AttributeError: try: - # ndarray - return self._value.dtype + return self._value.values.dtype except AttributeError: - # scalar - return type(self._value) + try: + # ndarray + return self._value.dtype + except AttributeError: + # scalar + return type(self._value) return_type = type diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 598974979fefb..75d6476deb676 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -1145,6 +1145,15 @@ def as_array( return result + def as_array_dtype(self): + """ + The dtype of the np.ndarray when you would convert self to a numpy array + (i.e. calling ``mgr.as_array()`` or ``df.values``). + """ + if len(self.arrays) == 0: + return np.dtype(float) + return interleaved_dtype([arr.dtype for arr in self.arrays]) + class SingleArrayManager(BaseArrayManager, SingleDataManager): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 28b6ce3f25ced..e157a20ed0c00 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1549,6 +1549,22 @@ def as_array( return arr.transpose() + def as_array_dtype(self): + """ + The dtype of the np.ndarray when you would convert self to a numpy array + (i.e. calling ``mgr.as_array()`` or ``df.values``). + """ + if len(self.blocks) == 0: + return np.dtype(float) + + if self.is_single_block: + return self.blocks[0].dtype + + dtype = interleaved_dtype( # type: ignore[assignment] + [blk.dtype for blk in self.blocks] + ) + return dtype + def _interleave( self, dtype: np.dtype | None = None,