Skip to content

Commit

Permalink
REF/PERF: dont use hashtable in IndexEngine.__contains__ (pandas-dev#…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and phofl committed Feb 14, 2022
1 parent ebd2049 commit 52db085
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 24 deletions.
13 changes: 8 additions & 5 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,12 @@ cdef class IndexEngine:
self._np_type = values.dtype.type

def __contains__(self, val: object) -> bool:
# We assume before we get here:
# - val is hashable
self._ensure_mapping_populated()
return val in self.mapping
hash(val)
try:
self.get_loc(val)
except KeyError:
return False
return True

cpdef get_loc(self, object val):
# -> Py_ssize_t | slice | ndarray[bool]
Expand All @@ -141,7 +143,7 @@ cdef class IndexEngine:
if is_definitely_invalid_key(val):
raise TypeError(f"'{val}' is an invalid key")

self._check_type(val)
val = self._check_type(val)

if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
Expand Down Expand Up @@ -270,6 +272,7 @@ cdef class IndexEngine:

cdef _check_type(self, object val):
hash(val)
return val

@property
def is_mapping_populated(self) -> bool:
Expand Down
5 changes: 5 additions & 0 deletions pandas/_libs/index_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ cdef class {{name}}Engine(IndexEngine):
cdef _check_type(self, object val):
{{if name not in {'Float64', 'Float32'} }}
if not util.is_integer_object(val):
if util.is_float_object(val):
# Make sure Int64Index.get_loc(2.0) works
if val.is_integer():
return int(val)
raise KeyError(val)
{{if name.startswith("U")}}
if val < 0:
Expand All @@ -46,6 +50,7 @@ cdef class {{name}}Engine(IndexEngine):
# in particular catch bool and avoid casting True -> 1.0
raise KeyError(val)
{{endif}}
return val


{{endfor}}
2 changes: 0 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6422,8 +6422,6 @@ def _maybe_cast_indexer(self, key):
If we have a float key and are not a floating index, then try to cast
to an int if equivalent.
"""
if not self.is_floating():
return com.cast_scalar_indexer(key)
return key

def _maybe_cast_listlike_indexer(self, target) -> Index:
Expand Down
17 changes: 0 additions & 17 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

from pandas.core.dtypes.common import (
is_dtype_equal,
is_float,
is_float_dtype,
is_integer_dtype,
is_numeric_dtype,
Expand Down Expand Up @@ -213,22 +212,6 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
# dtype for Int64Index, UInt64Index etc. Needed for backwards compat.
return cls._default_dtype

def __contains__(self, key) -> bool:
"""
Check if key is a float and has a decimal. If it has, return False.
"""
if not is_integer_dtype(self.dtype):
return super().__contains__(key)

hash(key)
try:
if is_float(key) and int(key) != key:
# otherwise the `key in self._engine` check casts e.g. 1.1 -> 1
return False
return key in self._engine
except (OverflowError, TypeError, ValueError):
return False

# ----------------------------------------------------------------
# Indexing Methods

Expand Down

0 comments on commit 52db085

Please sign in to comment.