Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: passing timedelta-like to DatetimeIndex constructor, xref #23675 #23937

Merged
merged 1 commit into from
Nov 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,7 @@ Deprecations
`use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`)
- :func:`pandas.read_excel` has deprecated accepting ``usecols`` as an integer. Please pass in a list of ints from 0 to ``usecols`` inclusive instead (:issue:`23527`)
- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`)
- Constructing a :class:`DatetimeIndex` from data with ``timedelta64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23675`)
- The ``keep_tz=False`` option (the default) of the ``keep_tz`` keyword of
:meth:`DatetimeIndex.to_series` is deprecated (:issue:`17832`).
- Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`)
Expand Down
82 changes: 80 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes.common import (
_NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_int64_dtype,
is_object_dtype)
_NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type,
is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype,
is_timedelta64_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -1421,6 +1422,83 @@ def to_julian_date(self):
DatetimeArrayMixin._add_datetimelike_methods()


# -------------------------------------------------------------------
# Constructor Helpers

def maybe_infer_tz(tz, inferred_tz):
"""
If a timezone is inferred from data, check that it is compatible with
the user-provided timezone, if any.

Parameters
----------
tz : tzinfo or None
inferred_tz : tzinfo or None

Returns
-------
tz : tzinfo or None

Raises
------
TypeError : if both timezones are present but do not match
"""
if tz is None:
tz = inferred_tz
elif inferred_tz is None:
pass
elif not timezones.tz_compare(tz, inferred_tz):
raise TypeError('data is already tz-aware {inferred_tz}, unable to '
'set specified tz: {tz}'
.format(inferred_tz=inferred_tz, tz=tz))
return tz


def maybe_convert_dtype(data, copy):
"""
Convert data based on dtype conventions, issuing deprecation warnings
or errors where appropriate.
Parameters
----------
data : np.ndarray or pd.Index
copy : bool
Returns
-------
data : np.ndarray or pd.Index
copy : bool
Raises
------
TypeError : PeriodDType data is passed
"""
if is_float_dtype(data):
# Note: we must cast to datetime64[ns] here in order to treat these
# as wall-times instead of UTC timestamps.
data = data.astype(_NS_DTYPE)
copy = False
# TODO: deprecate this behavior to instead treat symmetrically
# with integer dtypes. See discussion in GH#23675

elif is_timedelta64_dtype(data):
warnings.warn("Passing timedelta64-dtype data is deprecated, will "
"raise a TypeError in a future version",
FutureWarning, stacklevel=3)
data = data.view(_NS_DTYPE)

elif is_period_dtype(data):
# Note: without explicitly raising here, PeriondIndex
# test_setops.test_join_does_not_recur fails
raise TypeError("Passing PeriodDtype data is invalid. "
"Use `data.to_timestamp()` instead")

elif is_extension_type(data) and not is_datetime64tz_dtype(data):
# Includes categorical
# TODO: We have no tests for these
data = np.array(data, dtype=np.object_)
copy = False

return data, copy


def _generate_regular_range(cls, start, end, periods, freq):
"""
Generate a range of dates with the spans between dates described by
Expand Down
50 changes: 25 additions & 25 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,17 @@

from pandas.core.dtypes.common import (
_INT64_DTYPE, _NS_DTYPE, ensure_int64, is_datetime64_dtype,
is_datetime64_ns_dtype, is_datetimetz, is_dtype_equal, is_float,
is_integer, is_integer_dtype, is_list_like, is_period_dtype, is_scalar,
is_string_like, pandas_dtype)
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_datetimetz,
is_dtype_equal, is_float, is_integer, is_integer_dtype, is_list_like,
is_period_dtype, is_scalar, is_string_like, pandas_dtype)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays.datetimes import (
DatetimeArrayMixin as DatetimeArray, _to_m8)
DatetimeArrayMixin as DatetimeArray, _to_m8, maybe_convert_dtype,
maybe_infer_tz)
from pandas.core.base import _shared_docs
import pandas.core.common as com
from pandas.core.indexes.base import Index, _index_shared_docs
Expand Down Expand Up @@ -246,50 +247,49 @@ def __new__(cls, data=None,
name = data.name

freq, freq_infer = dtl.maybe_infer_freq(freq)
if freq is None and hasattr(data, "freq"):
# i.e. DatetimeArray/Index
freq = data.freq
verify_integrity = False

# if dtype has an embedded tz, capture it
tz = dtl.validate_tz_from_dtype(dtype, tz)

if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArray)):
# other iterable of some kind
if not isinstance(data, (list, tuple)):
if not hasattr(data, "dtype"):
# e.g. list, tuple
if np.ndim(data) == 0:
# i.e. generator
data = list(data)
data = np.asarray(data, dtype='O')
data = np.asarray(data)
copy = False
elif isinstance(data, ABCSeries):
data = data._values

# data must be Index or np.ndarray here
# By this point we are assured to have either a numpy array or Index
data, copy = maybe_convert_dtype(data, copy)

if not (is_datetime64_dtype(data) or is_datetimetz(data) or
is_integer_dtype(data) or lib.infer_dtype(data) == 'integer'):
data = tools.to_datetime(data, dayfirst=dayfirst,
yearfirst=yearfirst)

if isinstance(data, DatetimeArray):
if tz is None:
tz = data.tz
elif data.tz is None:
data = data.tz_localize(tz, ambiguous=ambiguous)
else:
# the tz's must match
if not timezones.tz_compare(tz, data.tz):
msg = ('data is already tz-aware {0}, unable to '
'set specified tz: {1}')
raise TypeError(msg.format(data.tz, tz))

if is_datetime64tz_dtype(data):
tz = maybe_infer_tz(tz, data.tz)
subarr = data._data

if freq is None:
freq = data.freq
verify_integrity = False
elif issubclass(data.dtype.type, np.datetime64):
elif is_datetime64_dtype(data):
# tz-naive DatetimeArray/Index or ndarray[datetime64]
data = getattr(data, "_data", data)
if data.dtype != _NS_DTYPE:
data = conversion.ensure_datetime64ns(data)

if tz is not None:
# Convert tz-naive to UTC
tz = timezones.maybe_get_tz(tz)
data = conversion.tz_localize_to_utc(data.view('i8'), tz,
ambiguous=ambiguous)
subarr = data.view(_NS_DTYPE)

else:
# must be integer dtype otherwise
# assume this data are epoch timestamps
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,8 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
- ndarray of Timestamps if box=False
"""
from pandas import DatetimeIndex
from pandas.core.arrays.datetimes import maybe_convert_dtype

if isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype='O')

Expand Down Expand Up @@ -208,6 +210,11 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
raise TypeError('arg must be a string, datetime, list, tuple, '
'1-d array, or Series')

# warn if passing timedelta64, raise for PeriodDtype
# NB: this must come after unit transformation
orig_arg = arg
arg, _ = maybe_convert_dtype(arg, copy=False)

arg = ensure_object(arg)
require_iso8601 = False

Expand All @@ -231,7 +238,10 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
# shortcut formatting here
if format == '%Y%m%d':
try:
result = _attempt_YYYYMMDD(arg, errors=errors)
# pass orig_arg as float-dtype may have been converted to
# datetime64[ns]
orig_arg = ensure_object(orig_arg)
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
raise ValueError("cannot convert the input to "
"'%Y%m%d' date format")
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/indexes/datetimes/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,51 @@
from pandas import (
DatetimeIndex, Index, Timestamp, date_range, datetime, offsets,
to_datetime)
from pandas.core.arrays import period_array
import pandas.util.testing as tm


class TestDatetimeIndex(object):

def test_dti_with_period_data_raises(self):
# GH#23675
data = pd.PeriodIndex(['2016Q1', '2016Q2'], freq='Q')

with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
DatetimeIndex(data)

with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
to_datetime(data)

with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
DatetimeIndex(period_array(data))

with pytest.raises(TypeError, match="PeriodDtype data is invalid"):
to_datetime(period_array(data))

def test_dti_with_timedelta64_data_deprecation(self):
# GH#23675
data = np.array([0], dtype='m8[ns]')
with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(data)

assert result[0] == Timestamp('1970-01-01')

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = to_datetime(data)

assert result[0] == Timestamp('1970-01-01')

with tm.assert_produces_warning(FutureWarning):
result = DatetimeIndex(pd.TimedeltaIndex(data))

assert result[0] == Timestamp('1970-01-01')

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = to_datetime(pd.TimedeltaIndex(data))

assert result[0] == Timestamp('1970-01-01')

def test_construction_caching(self):

df = pd.DataFrame({'dt': pd.date_range('20130101', periods=3),
Expand Down