Skip to content

Commit

Permalink
PERF: fix clean_index_list perf (#16295)
Browse files Browse the repository at this point in the history
closes #16285
  • Loading branch information
jreback authored May 9, 2017
1 parent 0091810 commit ce4eef3
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 10 deletions.
3 changes: 3 additions & 0 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ def time_getitem_list_like(self):
def time_getitem_array(self):
self.s[np.arange(10000)]

def time_getitem_lists(self):
self.s[np.arange(10000).tolist()]

def time_iloc_array(self):
self.s.iloc[np.arange(10000)]

Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Enhancements
Performance Improvements
~~~~~~~~~~~~~~~~~~~~~~~~

- Performance regression fix when indexing with a list-like (:issue:`16285`)


.. _whatsnew_0202.bug_fixes:
Expand Down
22 changes: 13 additions & 9 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,6 @@ def clean_index_list(list obj):
Utility used in pandas.core.index._ensure_index
"""
cdef:
ndarray[object] converted
Py_ssize_t i, n = len(obj)
object v
bint all_arrays = 1
Expand All @@ -964,15 +963,20 @@ def clean_index_list(list obj):
if all_arrays:
return obj, all_arrays

converted = np.empty(n, dtype=object)
for i in range(n):
v = obj[i]
if PyList_Check(v) or np.PyArray_Check(v) or hasattr(v, '_data'):
converted[i] = tuple(v)
else:
converted[i] = v
# don't force numpy coerce with nan's
inferred = infer_dtype(obj)
if inferred in ['string', 'bytes', 'unicode',
'mixed', 'mixed-integer']:
return np.asarray(obj, dtype=object), 0
elif inferred in ['integer']:

# TODO: we infer an integer but it *could* be a unint64
try:
return np.asarray(obj, dtype='int64'), 0
except OverflowError:
return np.asarray(obj, dtype='object'), 0

return maybe_convert_objects(converted), 0
return np.asarray(obj), 0


ctypedef fused pandas_string:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3960,7 +3960,7 @@ def _ensure_index(index_like, copy=False):
if isinstance(index_like, list):
if type(index_like) != list:
index_like = list(index_like)
# 2200 ?

converted, all_arrays = lib.clean_index_list(index_like)

if len(converted) > 0 and all_arrays:
Expand Down

0 comments on commit ce4eef3

Please sign in to comment.