From 8ba9b6fbc7f48b49e8f9166c7ddf973c6b3f2206 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 16 May 2017 10:10:50 +0200 Subject: [PATCH] use infer_dtype_from_scalar --- pandas/core/dtypes/cast.py | 4 ++-- pandas/core/util/hashing.py | 18 +++--------------- pandas/tests/util/test_hashing.py | 4 ++-- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 19d3792f73de7f..0089cc94fe6f4e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -333,7 +333,7 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def infer_dtype_from_scalar(val, pandas_dtype=False): +def infer_dtype_from_scalar(val, pandas_dtype=False, use_datetimetz=True): """ interpret the dtype from a scalar @@ -368,7 +368,7 @@ def infer_dtype_from_scalar(val, pandas_dtype=False): elif isinstance(val, (np.datetime64, datetime)): val = tslib.Timestamp(val) - if val is tslib.NaT or val.tz is None: + if val is tslib.NaT or val.tz is None or not use_datetimetz: dtype = np.dtype('M8[ns]') else: if pandas_dtype: diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index cec8ef35c3aaaf..87231ab0b77547 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -5,7 +5,6 @@ import numpy as np from pandas._libs import hashing -from pandas.compat import string_and_binary_types, text_type from pandas.core.dtypes.generic import ( ABCMultiIndex, ABCIndexClass, @@ -14,6 +13,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_list_like) from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.cast import infer_dtype_from_scalar # 16 byte long hashing key @@ -317,20 +317,8 @@ def _hash_scalar(val, encoding='utf8', hash_key=None): # this is to be consistent with the _hash_categorical implementation return np.array([np.iinfo(np.uint64).max], dtype='u8') - if isinstance(val, string_and_binary_types + (text_type,)): - vals = np.array([val], dtype=object) - else: - vals = np.array([val]) - - if vals.dtype == np.object_: - from pandas import Timestamp, Timedelta, Period, Interval - if isinstance(val, (Timestamp, Timedelta)): - vals = np.array([val.value]) - elif isinstance(val, (Period, Interval)): - pass - else: - from pandas import Index - vals = Index(vals).values + dtype, val = infer_dtype_from_scalar(val, use_datetimetz=False) + vals = np.array([val], dtype=dtype) return hash_array(vals, hash_key=hash_key, encoding=encoding, categorize=False) diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index f7de891e0e7d97..c0efe65371651f 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -81,13 +81,13 @@ def test_hash_tuples(self): def test_hash_tuple(self): # test equivalence between hash_tuples and hash_tuple - for tup in [(1, 'one'), (1, np.nan)]: + for tup in [(1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A')]: result = hash_tuple(tup) expected = hash_tuples([tup])[0] assert result == expected def test_hash_scalar(self): - for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"), + for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz='Europe/Brussels'), pd.Period('2012-01-01', freq='D'), pd.Timedelta('1 days'), pd.Interval(0, 1), np.nan, pd.NaT, None]: