Skip to content

Commit

Permalink
BUG/TST: assure conversions of datetimelikes for object, numeric dtypes
Browse files Browse the repository at this point in the history
closes #19176
  • Loading branch information
jreback committed Jan 13, 2018
1 parent 8347ff8 commit 29fbc64
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 42 deletions.
5 changes: 5 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,11 @@ Conversion
- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`)
- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`)



- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19222`)


Indexing
^^^^^^^^

Expand Down
26 changes: 24 additions & 2 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ from np_datetime cimport (check_dts_bounds,

from util cimport (is_string_object,
is_datetime64_object,
is_integer_object, is_float_object)
is_integer_object, is_float_object, is_array)

from timedeltas cimport cast_from_unit
from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
Expand All @@ -45,6 +45,8 @@ from nattype cimport NPY_NAT, checknull_with_nat
# Constants

cdef int64_t DAY_NS = 86400000000000LL
NS_DTYPE = np.dtype('M8[ns]')
TD_DTYPE = np.dtype('m8[ns]')

UTC = pytz.UTC

Expand Down Expand Up @@ -73,13 +75,14 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
return ival


def ensure_datetime64ns(ndarray arr):
def ensure_datetime64ns(ndarray arr, copy=True):
"""
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
Parameters
----------
arr : ndarray
copy : boolean, default True
Returns
-------
Expand All @@ -104,6 +107,8 @@ def ensure_datetime64ns(ndarray arr):

unit = get_datetime64_unit(arr.flat[0])
if unit == PANDAS_FR_ns:
if copy :
arr = arr.copy()
result = arr
else:
for i in range(n):
Expand All @@ -117,6 +122,23 @@ def ensure_datetime64ns(ndarray arr):
return result


def ensure_timedelta64ns(ndarray arr, copy=True):
"""
Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]'
Parameters
----------
arr : ndarray
copy : boolean, default True
Returns
-------
result : ndarray with dtype timedelta64[ns]
"""
return arr.astype(TD_DTYPE, copy=copy)


def datetime_to_datetime64(ndarray[object] values):
"""
Convert ndarray of datetime-like objects to int64 array representing
Expand Down
52 changes: 18 additions & 34 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from pandas._libs import tslib, lib
from pandas._libs.tslib import iNaT
from pandas.compat import string_types, text_type, PY3
from pandas.compat import string_types, text_type
from .common import (_ensure_object, is_bool, is_integer, is_float,
is_complex, is_datetimetz, is_categorical_dtype,
is_datetimelike,
Expand All @@ -24,7 +24,7 @@
pandas_dtype,
_ensure_int8, _ensure_int16,
_ensure_int32, _ensure_int64,
_NS_DTYPE, _TD_DTYPE, _INT64_DTYPE,
_NS_DTYPE, _TD_DTYPE,
_POSSIBLY_CAST_DTYPES)
from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype
from .generic import (ABCDatetimeIndex, ABCPeriodIndex,
Expand Down Expand Up @@ -656,33 +656,29 @@ def astype_nansafe(arr, dtype, copy=True):
return tslib.ints_to_pydatetime(arr.view(np.int64))
elif dtype == np.int64:
return arr.view(dtype)
elif dtype != _NS_DTYPE:
raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))
return arr.astype(_NS_DTYPE)

# allow frequency conversions
if dtype.kind == 'M':
return arr.astype(dtype)

raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))

elif is_timedelta64_dtype(arr):
if dtype == np.int64:
return arr.view(dtype)
elif dtype == object:
return tslib.ints_to_pytimedelta(arr.view(np.int64))

# in py3, timedelta64[ns] are int64
elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
(not PY3 and dtype != _TD_DTYPE)):

# allow frequency conversions
if dtype.kind == 'm':
mask = isna(arr)
result = arr.astype(dtype).astype(np.float64)
result[mask] = np.nan
return result
# allow frequency conversions
if dtype.kind == 'm':
return arr.astype(dtype)

raise TypeError("cannot astype a timedelta from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))
raise TypeError("cannot astype a timedelta from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))

return arr.astype(_TD_DTYPE)
elif (np.issubdtype(arr.dtype, np.floating) and
np.issubdtype(dtype, np.integer)):

Expand All @@ -704,19 +700,7 @@ def astype_nansafe(arr, dtype, copy=True):

if copy:

if arr.dtype == dtype:
return arr.copy()

# we handle datetimelikes with pandas machinery
# to be robust to the input type
elif is_datetime64_dtype(dtype):
from pandas import to_datetime
return to_datetime(arr).values
elif is_timedelta64_dtype(dtype):
from pandas import to_timedelta
return to_timedelta(arr).values

return arr.astype(dtype)
return arr.astype(dtype, copy=True)
return arr.view(dtype)


Expand Down
8 changes: 6 additions & 2 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pandas.compat import (string_types, text_type, binary_type,
PY3, PY36)
from pandas._libs import algos, lib
from pandas._libs.tslibs import conversion
from .dtypes import (CategoricalDtype, CategoricalDtypeType,
DatetimeTZDtype, DatetimeTZDtypeType,
PeriodDtype, PeriodDtypeType,
Expand All @@ -21,8 +22,8 @@
for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
'int32', 'uint32', 'int64', 'uint64']])

_NS_DTYPE = np.dtype('M8[ns]')
_TD_DTYPE = np.dtype('m8[ns]')
_NS_DTYPE = conversion.NS_DTYPE
_TD_DTYPE = conversion.TD_DTYPE
_INT64_DTYPE = np.dtype(np.int64)

# oh the troubles to reduce import time
Expand All @@ -31,6 +32,9 @@
_ensure_float64 = algos.ensure_float64
_ensure_float32 = algos.ensure_float32

_ensure_datetime64ns = conversion.ensure_datetime64ns
_ensure_timedelta64ns = conversion.ensure_timedelta64ns


def _ensure_float(arr):
"""
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1954,6 +1954,13 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
_can_hold_na = True
is_numeric = False

def __init__(self, values, placement, fastpath=False, **kwargs):
if values.dtype != _TD_DTYPE:
values = conversion.ensure_timedelta64ns(values)

super(TimeDeltaBlock, self).__init__(values, fastpath=True,
placement=placement, **kwargs)

@property
def _box_func(self):
return lambda x: tslib.Timedelta(x, unit='ns')
Expand Down
55 changes: 55 additions & 0 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,61 @@ def test_astype_categoricaldtype_class_raises(self, cls):
with tm.assert_raises_regex(TypeError, xpr):
df['A'].astype(cls)

@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
@pytest.mark.parametrize("dtype", ["M8", "m8"])
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit):
# tests all units from numeric origination
# gh-19223
dtype = "{}[{}]".format(dtype, unit)
arr = np.array([[1, 2, 3]], dtype=arr_dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(arr.astype(dtype))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_datetime_unit(self, unit):
# tests all units from datetime origination
# gh-19223
dtype = "M8[{}]".format(unit)
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(arr.astype(dtype))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_timedelta_unit(self, unit):
# tests all units from timedelta origination
# gh-19223
dtype = "m8[{}]".format(unit)
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(arr.astype(dtype))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_incorrect_datetimelike(self, unit):
# trying to astype a m to a M, or vice-versa
# gh-19176
dtype = "M8[{}]".format(unit)
other = "m8[{}]".format(unit)

with pytest.raises(TypeError):
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
df.astype(other)

with pytest.raises(TypeError):
arr = np.array([[1, 2, 3]], dtype=other)
df = DataFrame(arr)
df.astype(dtype)

def test_timedeltas(self):
df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3,
freq='D')),
Expand Down
18 changes: 14 additions & 4 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,10 +552,6 @@ def test_constructor_dtype_datetime64(self):
s.iloc[0] = np.nan
assert s.dtype == 'M8[ns]'

# invalid astypes
for t in ['s', 'D', 'us', 'ms']:
pytest.raises(TypeError, s.astype, 'M8[%s]' % t)

# GH3414 related
pytest.raises(TypeError, lambda x: Series(
Series(dates).astype('int') / 1000000, dtype='M8[ms]'))
Expand Down Expand Up @@ -707,6 +703,20 @@ def test_constructor_with_datetime_tz(self):
expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
assert_series_equal(s, expected)

@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
@pytest.mark.parametrize("dtype", ["M8", "m8"])
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit):
# tests all units
# gh-19223
dtype = "{}[{}]".format(dtype, unit)
arr = np.array([1, 2, 3], dtype=arr_dtype)
s = Series(arr)
result = s.astype(dtype)
expected = Series(arr.astype(dtype))

tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('arg',
['2013-01-01 00:00:00', pd.NaT, np.nan, None])
def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):
Expand Down

0 comments on commit 29fbc64

Please sign in to comment.