From ce4eef3750052cec62ca0fe6536521dec523cd64 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 9 May 2017 06:10:24 -0400 Subject: [PATCH] PERF: fix clean_index_list perf (#16295) closes #16285 --- asv_bench/benchmarks/indexing.py | 3 +++ doc/source/whatsnew/v0.20.2.txt | 1 + pandas/_libs/lib.pyx | 22 +++++++++++++--------- pandas/core/indexes/base.py | 2 +- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 8947a0fdd796c..31af56b3715a5 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -19,6 +19,9 @@ def time_getitem_list_like(self): def time_getitem_array(self): self.s[np.arange(10000)] + def time_getitem_lists(self): + self.s[np.arange(10000).tolist()] + def time_iloc_array(self): self.s.iloc[np.arange(10000)] diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index d89422631ed04..2a7b37c95230c 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -26,6 +26,7 @@ Enhancements Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ +- Performance regression fix when indexing with a list-like (:issue:`16285`) .. _whatsnew_0202.bug_fixes: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 31402c38c770d..f6e574b66a828 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -950,7 +950,6 @@ def clean_index_list(list obj): Utility used in pandas.core.index._ensure_index """ cdef: - ndarray[object] converted Py_ssize_t i, n = len(obj) object v bint all_arrays = 1 @@ -964,15 +963,20 @@ def clean_index_list(list obj): if all_arrays: return obj, all_arrays - converted = np.empty(n, dtype=object) - for i in range(n): - v = obj[i] - if PyList_Check(v) or np.PyArray_Check(v) or hasattr(v, '_data'): - converted[i] = tuple(v) - else: - converted[i] = v + # don't force numpy coerce with nan's + inferred = infer_dtype(obj) + if inferred in ['string', 'bytes', 'unicode', + 'mixed', 'mixed-integer']: + return np.asarray(obj, dtype=object), 0 + elif inferred in ['integer']: + + # TODO: we infer an integer but it *could* be a unint64 + try: + return np.asarray(obj, dtype='int64'), 0 + except OverflowError: + return np.asarray(obj, dtype='object'), 0 - return maybe_convert_objects(converted), 0 + return np.asarray(obj), 0 ctypedef fused pandas_string: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 82f3bf3b15462..9b29f1b04ff73 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3960,7 +3960,7 @@ def _ensure_index(index_like, copy=False): if isinstance(index_like, list): if type(index_like) != list: index_like = list(index_like) - # 2200 ? + converted, all_arrays = lib.clean_index_list(index_like) if len(converted) > 0 and all_arrays: