Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG/TST: assure conversions of datetimelikes for object, numeric dtypes #19224

Merged
merged 1 commit into from
Jan 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,11 @@ Conversion
- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`)
- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`)



- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19223`, :issue:`12425`)


Indexing
^^^^^^^^

Expand Down
26 changes: 24 additions & 2 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ from np_datetime cimport (check_dts_bounds,

from util cimport (is_string_object,
is_datetime64_object,
is_integer_object, is_float_object)
is_integer_object, is_float_object, is_array)

from timedeltas cimport cast_from_unit
from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
Expand All @@ -45,6 +45,8 @@ from nattype cimport NPY_NAT, checknull_with_nat
# Constants

cdef int64_t DAY_NS = 86400000000000LL
NS_DTYPE = np.dtype('M8[ns]')
TD_DTYPE = np.dtype('m8[ns]')

UTC = pytz.UTC

Expand Down Expand Up @@ -73,13 +75,14 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
return ival


def ensure_datetime64ns(ndarray arr):
def ensure_datetime64ns(ndarray arr, copy=True):
"""
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
Parameters
----------
arr : ndarray
copy : boolean, default True
Returns
-------
Expand All @@ -104,6 +107,8 @@ def ensure_datetime64ns(ndarray arr):

unit = get_datetime64_unit(arr.flat[0])
if unit == PANDAS_FR_ns:
if copy:
arr = arr.copy()
result = arr
else:
for i in range(n):
Expand All @@ -117,6 +122,23 @@ def ensure_datetime64ns(ndarray arr):
return result


def ensure_timedelta64ns(ndarray arr, copy=True):
"""
Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]'
Parameters
----------
arr : ndarray
copy : boolean, default True
Returns
-------
result : ndarray with dtype timedelta64[ns]
"""
return arr.astype(TD_DTYPE, copy=copy)


def datetime_to_datetime64(ndarray[object] values):
"""
Convert ndarray of datetime-like objects to int64 array representing
Expand Down
42 changes: 18 additions & 24 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,33 +656,39 @@ def astype_nansafe(arr, dtype, copy=True):
return tslib.ints_to_pydatetime(arr.view(np.int64))
elif dtype == np.int64:
return arr.view(dtype)
elif dtype != _NS_DTYPE:
raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))
return arr.astype(_NS_DTYPE)

# allow frequency conversions
if dtype.kind == 'M':
return arr.astype(dtype)

raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))

elif is_timedelta64_dtype(arr):
if dtype == np.int64:
return arr.view(dtype)
elif dtype == object:
return tslib.ints_to_pytimedelta(arr.view(np.int64))

# in py3, timedelta64[ns] are int64
elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
(not PY3 and dtype != _TD_DTYPE)):
if ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
(not PY3 and dtype != _TD_DTYPE)):

# allow frequency conversions
# we return a float here!
if dtype.kind == 'm':
mask = isna(arr)
result = arr.astype(dtype).astype(np.float64)
result[mask] = np.nan
return result
elif dtype == _TD_DTYPE:
return arr.astype(_TD_DTYPE, copy=copy)

raise TypeError("cannot astype a timedelta from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))
raise TypeError("cannot astype a timedelta from [{from_dtype}] "
"to [{to_dtype}]".format(from_dtype=arr.dtype,
to_dtype=dtype))

return arr.astype(_TD_DTYPE)
elif (np.issubdtype(arr.dtype, np.floating) and
np.issubdtype(dtype, np.integer)):

Expand All @@ -704,19 +710,7 @@ def astype_nansafe(arr, dtype, copy=True):

if copy:

if arr.dtype == dtype:
return arr.copy()

# we handle datetimelikes with pandas machinery
# to be robust to the input type
elif is_datetime64_dtype(dtype):
from pandas import to_datetime
return to_datetime(arr).values
elif is_timedelta64_dtype(dtype):
from pandas import to_timedelta
return to_timedelta(arr).values

return arr.astype(dtype)
return arr.astype(dtype, copy=True)
return arr.view(dtype)


Expand Down
8 changes: 6 additions & 2 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pandas.compat import (string_types, text_type, binary_type,
PY3, PY36)
from pandas._libs import algos, lib
from pandas._libs.tslibs import conversion
from .dtypes import (CategoricalDtype, CategoricalDtypeType,
DatetimeTZDtype, DatetimeTZDtypeType,
PeriodDtype, PeriodDtypeType,
Expand All @@ -21,8 +22,8 @@
for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
'int32', 'uint32', 'int64', 'uint64']])

_NS_DTYPE = np.dtype('M8[ns]')
_TD_DTYPE = np.dtype('m8[ns]')
_NS_DTYPE = conversion.NS_DTYPE
_TD_DTYPE = conversion.TD_DTYPE
_INT64_DTYPE = np.dtype(np.int64)

# oh the troubles to reduce import time
Expand All @@ -31,6 +32,9 @@
_ensure_float64 = algos.ensure_float64
_ensure_float32 = algos.ensure_float32

_ensure_datetime64ns = conversion.ensure_datetime64ns
_ensure_timedelta64ns = conversion.ensure_timedelta64ns


def _ensure_float(arr):
"""
Expand Down
9 changes: 8 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
values = astype_nansafe(values.ravel(), dtype, copy=True)
values = values.reshape(self.shape)

newb = make_block(values, placement=self.mgr_locs, dtype=dtype,
newb = make_block(values, placement=self.mgr_locs,
klass=klass)
except:
if errors == 'raise':
Expand Down Expand Up @@ -1954,6 +1954,13 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
_can_hold_na = True
is_numeric = False

def __init__(self, values, placement, fastpath=False, **kwargs):
if values.dtype != _TD_DTYPE:
values = conversion.ensure_timedelta64ns(values)

super(TimeDeltaBlock, self).__init__(values, fastpath=True,
placement=placement, **kwargs)

@property
def _box_func(self):
return lambda x: tslib.Timedelta(x, unit='ns')
Expand Down
65 changes: 65 additions & 0 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,71 @@ def test_astype_categoricaldtype_class_raises(self, cls):
with tm.assert_raises_regex(TypeError, xpr):
df['A'].astype(cls)

@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
@pytest.mark.parametrize("dtype", ["M8", "m8"])
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit):
# tests all units from numeric origination
# gh-19223 / gh-12425
dtype = "{}[{}]".format(dtype, unit)
arr = np.array([[1, 2, 3]], dtype=arr_dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(arr.astype(dtype))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_datetime_unit(self, unit):
# tests all units from datetime origination
# gh-19223
dtype = "M8[{}]".format(unit)
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(arr.astype(dtype))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['ns'])
def test_astype_to_timedelta_unit_ns(self, unit):
# preserver the timedelta conversion
# gh-19223
dtype = "m8[{}]".format(unit)
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(arr.astype(dtype))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_timedelta_unit(self, unit):
# coerce to float
# gh-19223
dtype = "m8[{}]".format(unit)
arr = np.array([[1, 2, 3]], dtype=dtype)
df = DataFrame(arr)
result = df.astype(dtype)
expected = DataFrame(df.values.astype(dtype).astype(float))

tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_astype_to_incorrect_datetimelike(self, unit):
# trying to astype a m to a M, or vice-versa
# gh-19224
dtype = "M8[{}]".format(unit)
other = "m8[{}]".format(unit)

df = DataFrame(np.array([[1, 2, 3]], dtype=dtype))
with pytest.raises(TypeError):
df.astype(other)

df = DataFrame(np.array([[1, 2, 3]], dtype=other))
with pytest.raises(TypeError):
df.astype(dtype)

def test_timedeltas(self):
df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3,
freq='D')),
Expand Down
24 changes: 11 additions & 13 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,25 +523,23 @@ def test_other_datetime_unit(self):
columns=['entity_id', 'days'])
tm.assert_frame_equal(result, exp)

def test_other_timedelta_unit(self):
@pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns'])
def test_other_timedelta_unit(self, unit):
# GH 13389
df1 = pd.DataFrame({'entity_id': [101, 102]})
s = pd.Series([None, None], index=[101, 102], name='days')

for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
'timedelta64[ns]']:
dtype = "m8[{}]".format(unit)
df2 = s.astype(dtype).to_frame('days')
assert df2['days'].dtype == 'm8[ns]'

df2 = s.astype(dtype).to_frame('days')
assert df2['days'].dtype == dtype

result = df1.merge(df2, left_on='entity_id', right_index=True)
result = df1.merge(df2, left_on='entity_id', right_index=True)

exp = pd.DataFrame({'entity_id': [101, 102],
'days': np.array(['nat', 'nat'],
dtype=dtype)},
columns=['entity_id', 'days'])
tm.assert_frame_equal(result, exp)
exp = pd.DataFrame({'entity_id': [101, 102],
'days': np.array(['nat', 'nat'],
dtype=dtype)},
columns=['entity_id', 'days'])
tm.assert_frame_equal(result, exp)

def test_overlapping_columns_error_message(self):
df = DataFrame({'key': [1, 2, 3],
Expand Down
18 changes: 14 additions & 4 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,10 +552,6 @@ def test_constructor_dtype_datetime64(self):
s.iloc[0] = np.nan
assert s.dtype == 'M8[ns]'

# invalid astypes
for t in ['s', 'D', 'us', 'ms']:
pytest.raises(TypeError, s.astype, 'M8[%s]' % t)

# GH3414 related
pytest.raises(TypeError, lambda x: Series(
Series(dates).astype('int') / 1000000, dtype='M8[ms]'))
Expand Down Expand Up @@ -707,6 +703,20 @@ def test_constructor_with_datetime_tz(self):
expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
assert_series_equal(s, expected)

@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
@pytest.mark.parametrize("dtype", ["M8", "m8"])
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit):
# tests all units
# gh-19223
dtype = "{}[{}]".format(dtype, unit)
arr = np.array([1, 2, 3], dtype=arr_dtype)
s = Series(arr)
result = s.astype(dtype)
expected = Series(arr.astype(dtype))

tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('arg',
['2013-01-01 00:00:00', pd.NaT, np.nan, None])
def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):
Expand Down
32 changes: 13 additions & 19 deletions pandas/tests/series/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -1649,32 +1649,26 @@ def test_invalid_ops(self):
pytest.raises(Exception, self.objSeries.__sub__,
np.array(1, dtype=np.int64))

def test_timedelta64_conversions(self):
@pytest.mark.parametrize("m", [1, 3, 10])
@pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns'])
def test_timedelta64_conversions(self, m, unit):

startdate = Series(date_range('2013-01-01', '2013-01-03'))
enddate = Series(date_range('2013-03-01', '2013-03-03'))

s1 = enddate - startdate
s1[2] = np.nan

for m in [1, 3, 10]:
for unit in ['D', 'h', 'm', 's', 'ms', 'us', 'ns']:

# op
expected = s1.apply(lambda x: x / np.timedelta64(m, unit))
result = s1 / np.timedelta64(m, unit)
assert_series_equal(result, expected)

if m == 1 and unit != 'ns':

# astype
result = s1.astype("timedelta64[{0}]".format(unit))
assert_series_equal(result, expected)
# op
expected = s1.apply(lambda x: x / np.timedelta64(m, unit))
result = s1 / np.timedelta64(m, unit)
assert_series_equal(result, expected)

# reverse op
expected = s1.apply(
lambda x: Timedelta(np.timedelta64(m, unit)) / x)
result = np.timedelta64(m, unit) / s1
assert_series_equal(result, expected)
# reverse op
expected = s1.apply(
lambda x: Timedelta(np.timedelta64(m, unit)) / x)
result = np.timedelta64(m, unit) / s1
assert_series_equal(result, expected)

# astype
s = Series(date_range('20130101', periods=3))
Expand Down