From afc2d304598bf4319614abbf4c6b67f6873d5010 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 6 Nov 2018 21:10:27 -0800 Subject: [PATCH 01/14] simplify+fix+test TimedeltaIndex constructor --- pandas/core/arrays/datetimes.py | 4 +- pandas/core/arrays/timedeltas.py | 4 +- pandas/core/indexes/datetimes.py | 31 +++---- pandas/core/indexes/timedeltas.py | 94 +++++++++++++++------ pandas/tests/arithmetic/test_timedelta64.py | 4 +- pandas/tests/indexes/timedeltas/test_ops.py | 3 +- 6 files changed, 87 insertions(+), 53 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e7edd54c4177b..38cbc821e2d3a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -221,9 +221,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): result = cls._simple_new(values, freq=freq, tz=tz) if freq_infer: - inferred = result.inferred_freq - if inferred: - result.freq = to_offset(inferred) + result.freq = to_offset(result.inferred_freq) # NB: Among other things not yet ported from the DatetimeIndex # constructor, this does not call _deepcopy_if_needed diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9653121879c0d..a244c7f48fe70 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -137,9 +137,7 @@ def __new__(cls, values, freq=None): result = cls._simple_new(values, freq=freq) if freq_infer: - inferred = result.inferred_freq - if inferred: - result.freq = to_offset(inferred) + result.freq = to_offset(result.inferred_freq) return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3a2f9986760d3..70c8ee61a68c0 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -239,6 +239,21 @@ def __new__(cls, data=None, dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None, verify_integrity=True): + if data is None: + # TODO: Remove this block and associated kwargs; GH#20535 + result = cls._generate_range(start, end, periods, + freq=freq, tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) + result.name = name + return result + + if is_scalar(data): + raise ValueError("{cls}() must be called with a " + "collection of some kind, {data} was passed" + .format(cls=cls.__name__, data=repr(data))) + + # - Cases checked above all return/raise before reaching here - # + # This allows to later ensure that the 'copy' parameter is honored: if isinstance(data, Index): ref_to_data = data._data @@ -253,20 +268,8 @@ def __new__(cls, data=None, # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) - if data is None: - # TODO: Remove this block and associated kwargs; GH#20535 - result = cls._generate_range(start, end, periods, - freq=freq, tz=tz, normalize=normalize, - closed=closed, ambiguous=ambiguous) - result.name = name - return result - if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArrayMixin)): - if is_scalar(data): - raise ValueError('DatetimeIndex() must be called with a ' - 'collection of some kind, %s was passed' - % repr(data)) # other iterable of some kind if not isinstance(data, (list, tuple)): data = list(data) @@ -328,9 +331,7 @@ def __new__(cls, data=None, cls._validate_frequency(subarr, freq, ambiguous=ambiguous) if freq_infer: - inferred = subarr.inferred_freq - if inferred: - subarr.freq = to_offset(inferred) + subarr.freq = to_offset(subarr.inferred_freq) return subarr._deepcopy_if_needed(ref_to_data, copy) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5b077a6984114..a3cf6485798ac 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -8,11 +8,16 @@ is_float, is_list_like, is_scalar, + is_integer_dtype, + is_float_dtype, + is_object_dtype, + is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype, ensure_int64) from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays.timedeltas import ( TimedeltaArrayMixin, _is_convertible_to_td, _to_m8) @@ -35,7 +40,7 @@ from pandas.core.tools.timedeltas import ( to_timedelta, _coerce_scalar_to_timedelta_type) from pandas._libs import (lib, index as libindex, - join as libjoin, Timedelta, NaT) + join as libjoin, Timedelta, NaT, iNaT) from pandas._libs.tslibs.timedeltas import array_to_timedelta64 @@ -139,12 +144,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): - if isinstance(data, TimedeltaIndex) and freq is None and name is None: - if copy: - return data.copy() - else: - return data._shallow_copy() - freq, freq_infer = dtl.maybe_infer_freq(freq) if data is None: @@ -154,32 +153,73 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, result.name = name return result - if unit is not None: - data = to_timedelta(data, unit=unit, box=False) - if is_scalar(data): - raise ValueError('TimedeltaIndex() must be called with a ' + raise ValueError('{cls}() must be called with a ' 'collection of some kind, {data} was passed' - .format(data=repr(data))) + .format(cls=cls.__name__, data=repr(data))) + + if isinstance(data, TimedeltaIndex) and freq is None and name is None: + if copy: + return data.copy() + else: + return data._shallow_copy() - # convert if not already - if getattr(data, 'dtype', None) != _TD_DTYPE: + # - Cases checked above all return/raise before reaching here - # + + if unit is not None: data = to_timedelta(data, unit=unit, box=False) - elif copy: - data = np.array(data, copy=True) - - data = np.array(data, copy=False) - if data.dtype == np.object_: - data = array_to_timedelta64(data) - if data.dtype != _TD_DTYPE: - if is_timedelta64_dtype(data): + + if not hasattr(data, 'dtype'): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e.g generator + data = list(data) + data = np.array(data, copy=False) + elif isinstance(data, ABCSeries): + data = data._values + elif isinstance(data, (cls, TimedeltaArrayMixin)): + data = data._data + + if is_object_dtype(data) or is_string_dtype(data): + # no need to make a copy, need to convert if string-dtyped + data = np.array(data, dtype=np.object_, copy=False) + data = array_to_timedelta64(data).view(_TD_DTYPE) + copy = False + + elif is_integer_dtype(data): + # treat as nanoseconds + # if something other than int64, convert + data = ensure_int64(data) + if copy: + # TODO: can we avoid branching here? `astype(data, copy=False)` + # appears to be making a copy + data = data.astype(_TD_DTYPE) + copy = False + else: + data = data.view(_TD_DTYPE) + + elif is_float_dtype(data): + # We allow it if and only if it can be converted lossessly + mask = np.isnan(data) + casted = data.astype(np.int64) + if not (casted[~mask] == data[~mask]).all(): + raise TypeError("floating-dtype data cannot be losslessly " + "converted to {cls}".format(cls=cls.__name__)) + data = casted.view(_TD_DTYPE) + data[mask] = iNaT + + elif is_timedelta64_dtype(data): + if data.dtype != _TD_DTYPE: # non-nano unit # TODO: watch out for overflows data = data.astype(_TD_DTYPE) - else: - data = ensure_int64(data).view(_TD_DTYPE) - assert data.dtype == 'm8[ns]', data.dtype + else: + raise TypeError("dtype {dtype} is invalid for constructing {cls}" + .format(dtype=data.dtype, cls=cls.__name__)) + + data = np.array(data, copy=copy) + assert data.dtype == 'm8[ns]', data subarr = cls._simple_new(data, name=name, freq=freq) # check that we are matching freqs @@ -188,9 +228,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, cls._validate_frequency(subarr, freq) if freq_infer: - inferred = subarr.inferred_freq - if inferred: - subarr.freq = to_offset(inferred) + subarr.freq = to_offset(subarr.inferred_freq) return subarr diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 902d0716aed8d..5966348b44c20 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -1054,11 +1054,11 @@ def test_tdi_mul_float_series(self, box_df_fail): idx = tm.box_expected(idx, box) rng5f = np.arange(5, dtype='float64') - expected = TimedeltaIndex(rng5f * (rng5f + 0.1)) + expected = TimedeltaIndex(rng5f * (rng5f + 1.0)) box2 = pd.Series if box is pd.Index else box expected = tm.box_expected(expected, box2) - result = idx * Series(rng5f + 0.1) + result = idx * Series(rng5f + 1.0) tm.assert_equal(result, expected) # TODO: Put Series/DataFrame in others? diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index a8cfdd0add178..8659d84e97212 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -54,8 +54,7 @@ def test_minmax(self): assert pd.isna(getattr(obj, op)()) def test_numpy_minmax(self): - dr = pd.date_range(start='2016-01-15', end='2016-01-20') - td = TimedeltaIndex(np.asarray(dr)) + td = timedelta_range('16815 days', '16820 days', freq='D') assert np.min(td) == Timedelta('16815 days') assert np.max(td) == Timedelta('16820 days') From e4b06caafd41e659a53da0c006a6561f1cdbb66b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 6 Nov 2018 21:16:00 -0800 Subject: [PATCH 02/14] tests for datetime64 data being invalid, floats being valid iff non-lossy --- .../indexes/timedeltas/test_construction.py | 23 +++++++++++++++++++ .../tests/scalar/timedelta/test_arithmetic.py | 6 ++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index a5cfad98b31c1..be6c4a587a8f6 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -10,6 +10,29 @@ class TestTimedeltaIndex(object): + def test_dt64_data_invalid(self): + dti = pd.date_range('2016-01-01', periods=3) + with pytest.raises(TypeError): + TimedeltaIndex(dti) + + with pytest.raises(TypeError): + TimedeltaIndex(np.asarray(dti)) + + def test_float64_lossy_invalid(self): + # passing floats that would be truncated is unsupported + with pytest.raises(TypeError): + TimedeltaIndex([2.3, 9.0]) + + # but non-lossy floats are OK + tdi = TimedeltaIndex([2.0, 9.0]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) + + # NaNs get converted to NaT + tdi = TimedeltaIndex([2.0, np.nan]) + expected = TimedeltaIndex([pd.Timedelta(nanoseconds=2), pd.NaT]) + tm.assert_index_equal(tdi, expected) + def test_construction_base_constructor(self): arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 65709b0eebaf7..79fa49b564ad6 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -506,6 +506,9 @@ def test_td_rfloordiv_numeric_series(self): # TODO: GH-19761. Change to TypeError. ser // td + # ---------------------------------------------------------------- + # Timedelta.__mod__, __rmod__ + def test_mod_timedeltalike(self): # GH#19365 td = Timedelta(hours=37) @@ -545,9 +548,6 @@ def test_mod_offset(self): assert isinstance(result, Timedelta) assert result == Timedelta(hours=2) - # ---------------------------------------------------------------- - # Timedelta.__mod__, __rmod__ - def test_mod_numeric(self): # GH#19365 td = Timedelta(hours=37) From 1ff432b1f6927812c5fe0bfbb98b2bf594c8fd14 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 6 Nov 2018 21:33:04 -0800 Subject: [PATCH 03/14] comments and whitespace --- pandas/core/indexes/timedeltas.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index a3cf6485798ac..5e83fc14efb04 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -169,6 +169,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, if unit is not None: data = to_timedelta(data, unit=unit, box=False) + # Unwrap whatever we have into a np.ndarray if not hasattr(data, 'dtype'): # e.g. list, tuple if np.ndim(data) == 0: @@ -180,12 +181,12 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, elif isinstance(data, (cls, TimedeltaArrayMixin)): data = data._data + # Convert whatever we have into timedelta64[ns] dtype if is_object_dtype(data) or is_string_dtype(data): # no need to make a copy, need to convert if string-dtyped data = np.array(data, dtype=np.object_, copy=False) data = array_to_timedelta64(data).view(_TD_DTYPE) copy = False - elif is_integer_dtype(data): # treat as nanoseconds # if something other than int64, convert @@ -197,7 +198,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, copy = False else: data = data.view(_TD_DTYPE) - elif is_float_dtype(data): # We allow it if and only if it can be converted lossessly mask = np.isnan(data) @@ -207,13 +207,13 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, "converted to {cls}".format(cls=cls.__name__)) data = casted.view(_TD_DTYPE) data[mask] = iNaT - + copy = False elif is_timedelta64_dtype(data): if data.dtype != _TD_DTYPE: # non-nano unit # TODO: watch out for overflows data = data.astype(_TD_DTYPE) - + copy = False else: raise TypeError("dtype {dtype} is invalid for constructing {cls}" .format(dtype=data.dtype, cls=cls.__name__)) From 645e99cd880c7507fb21ecf1ae77d98e41627e41 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 6 Nov 2018 21:34:46 -0800 Subject: [PATCH 04/14] GH references --- pandas/tests/indexes/timedeltas/test_construction.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index be6c4a587a8f6..c219ecf5871c5 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -11,6 +11,7 @@ class TestTimedeltaIndex(object): def test_dt64_data_invalid(self): + # GH#23539 dti = pd.date_range('2016-01-01', periods=3) with pytest.raises(TypeError): TimedeltaIndex(dti) @@ -19,7 +20,7 @@ def test_dt64_data_invalid(self): TimedeltaIndex(np.asarray(dti)) def test_float64_lossy_invalid(self): - # passing floats that would be truncated is unsupported + # GH#23539 passing floats that would be truncated is unsupported with pytest.raises(TypeError): TimedeltaIndex([2.3, 9.0]) From 9c897469c3b1993f25e10f0b9962b575e7bb7969 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Nov 2018 09:25:02 -0800 Subject: [PATCH 05/14] deprecate instead of raising for datetime64 dtypes --- pandas/core/indexes/timedeltas.py | 13 +++++++++++-- .../tests/indexes/timedeltas/test_construction.py | 12 +++++++++--- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5e83fc14efb04..03eb9dd24ea56 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,5 +1,6 @@ """ implement the TimedeltaIndex """ from datetime import datetime +import warnings import numpy as np from pandas.core.dtypes.common import ( @@ -12,6 +13,7 @@ is_float_dtype, is_object_dtype, is_string_dtype, + is_datetime64_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype, @@ -173,7 +175,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, if not hasattr(data, 'dtype'): # e.g. list, tuple if np.ndim(data) == 0: - # i.e.g generator + # i.e. generator data = list(data) data = np.array(data, copy=False) elif isinstance(data, ABCSeries): @@ -199,7 +201,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, else: data = data.view(_TD_DTYPE) elif is_float_dtype(data): - # We allow it if and only if it can be converted lossessly + # We allow it if and only if it can be converted losslessly mask = np.isnan(data) casted = data.astype(np.int64) if not (casted[~mask] == data[~mask]).all(): @@ -214,6 +216,13 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, # TODO: watch out for overflows data = data.astype(_TD_DTYPE) copy = False + elif is_datetime64_dtype(data): + # GH#23539 + warnings.warn("Passing datetime64-dtype data to {cls} is " + "deprecated, will raise a TypeError in a future " + "version".format(cls=cls.__name__), + FutureWarning, stacklevel=2) + data = ensure_int64(data).view(_TD_DTYPE) else: raise TypeError("dtype {dtype} is invalid for constructing {cls}" .format(dtype=data.dtype, cls=cls.__name__)) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index c219ecf5871c5..fab691f3a817d 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -12,16 +12,22 @@ class TestTimedeltaIndex(object): def test_dt64_data_invalid(self): # GH#23539 + # passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64] + # does not yet, but will in the future dti = pd.date_range('2016-01-01', periods=3) - with pytest.raises(TypeError): + + with tm.assert_raises_regex(TypeError, "is invalid for constructing"): + TimedeltaIndex(dti.tz_localize('Europe/Brussels')) + + with tm.assert_produces_warning(FutureWarning): TimedeltaIndex(dti) - with pytest.raises(TypeError): + with tm.assert_produces_warning(FutureWarning): TimedeltaIndex(np.asarray(dti)) def test_float64_lossy_invalid(self): # GH#23539 passing floats that would be truncated is unsupported - with pytest.raises(TypeError): + with tm.assert_raises_regex(TypeError, "data cannot be losslessly"): TimedeltaIndex([2.3, 9.0]) # but non-lossy floats are OK From ef3f2774d8af27746d0d9f8526403d1d707cc444 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Nov 2018 18:25:51 -0800 Subject: [PATCH 06/14] implement sequence_to_td64ns, deprecate datetime64 data, add and test fastpath for inference/validation --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/arrays/timedeltas.py | 177 +++++++++++++++++- pandas/core/indexes/timedeltas.py | 88 ++------- pandas/core/tools/timedeltas.py | 51 +++-- .../indexes/timedeltas/test_construction.py | 27 ++- 5 files changed, 238 insertions(+), 106 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 149d618c4a621..b85935bf15e3f 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -964,6 +964,7 @@ Deprecations - The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`) - Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of `use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`) +- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`) .. _whatsnew_0240.deprecations.datetimelike_int_ops: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a244c7f48fe70..268afff4f266b 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -1,18 +1,28 @@ # -*- coding: utf-8 -*- from datetime import timedelta +import warnings import numpy as np from pandas._libs import tslibs -from pandas._libs.tslibs import Timedelta, Timestamp, NaT +from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT from pandas._libs.tslibs.fields import get_timedelta_field -from pandas._libs.tslibs.timedeltas import array_to_timedelta64 +from pandas._libs.tslibs.timedeltas import ( + array_to_timedelta64, parse_timedelta_unit) from pandas import compat from pandas.core.dtypes.common import ( - _TD_DTYPE, is_list_like) -from pandas.core.dtypes.generic import ABCSeries + _TD_DTYPE, + is_object_dtype, + is_string_dtype, + is_float_dtype, + is_integer_dtype, + is_timedelta64_dtype, + is_datetime64_dtype, + is_list_like, + ensure_int64) +from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex from pandas.core.dtypes.missing import isna import pandas.core.common as com @@ -393,6 +403,165 @@ def f(x): # --------------------------------------------------------------------- # Constructor Helpers +def sequence_to_td64ns(data, copy=False, unit='ns', errors='raise'): + """ + Parameters + ---------- + array : list-like + copy : bool, default False + unit : str, default "ns" + errors : {"raise", "coerce", "ignore"}, default "raise" + + Returns + ------- + ndarray[timedelta64[ns]] + inferred_freq : Tick or None + + Raises + ------ + ValueError : data cannot be converted to timedelta64[ns] + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + inferred_freq = None + unit = parse_timedelta_unit(unit) + + # Unwrap whatever we have into a np.ndarray + if not hasattr(data, 'dtype'): + # e.g. list, tuple + if np.ndim(data) == 0: + # i.e. generator + data = list(data) + data = np.array(data, copy=False) + elif isinstance(data, ABCSeries): + data = data._values + elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArrayMixin)): + inferred_freq = data.freq + data = data._data + + # Convert whatever we have into timedelta64[ns] dtype + if is_object_dtype(data) or is_string_dtype(data): + # no need to make a copy, need to convert if string-dtyped + data = objects_to_td64ns(data, unit=unit, errors=errors) + copy = False + + elif is_integer_dtype(data): + # treat as nanoseconds + data, copy_made = ints_to_td64ns(data, unit=unit) + copy = copy and not copy_made + + elif is_float_dtype(data): + # We allow it if and only if it can be converted losslessly + mask = np.isnan(data) + casted = data.astype(np.int64) + if not (casted[~mask] == data[~mask]).all(): + raise TypeError("floating-dtype data cannot be losslessly " + "converted to timedelta64[ns]") + copy = False + data, copy_made = ints_to_td64ns(data, unit=unit) + copy = copy and not copy_made + data[mask] = iNaT + + elif is_timedelta64_dtype(data): + if data.dtype != _TD_DTYPE: + # non-nano unit + # TODO: watch out for overflows + data = data.astype(_TD_DTYPE) + copy = False + + elif is_datetime64_dtype(data): + # GH#23539 + warnings.warn("Passing datetime64-dtype data to TimedeltaIndex is " + "deprecated, will raise a TypeError in a future " + "version", + FutureWarning, stacklevel=3) + data = ensure_int64(data).view(_TD_DTYPE) + + else: + raise TypeError("dtype {dtype} cannot be converted to timedelta64[ns]" + .format(dtype=data.dtype)) + + data = np.array(data, copy=copy) + assert data.dtype == 'm8[ns]', data + return data, inferred_freq + + +def ints_to_td64ns(data, unit="ns"): + """ + Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating + the integers as multiples of the given timedelta unit. + + Parameters + ---------- + data : np.ndarray with integer-dtype + unit : str, default "ns" + + Returns + ------- + ndarray[timedelta64[ns]] + bool : whether a copy was made + """ + copy_made = False + unit = unit if unit is not None else "ns" + + if data.dtype != np.int64: + # converting to int64 makes a copy, so we can avoid + # re-copying later + data = data.astype(np.int64) + copy_made = True + + if unit != "ns": + dtype_str = "timedelta64[{unit}]".format(unit=unit) + data = data.view(dtype_str) + + # TODO: watch out for overflows when converting from lower-resolution + data = data.astype("timedelta64[ns]") + # the astype conversion makes a copy, so we can avoid re-copying later + copy_made = True + + else: + data = data.view("timedelta64[ns]") + + return data, copy_made + + +def objects_to_td64ns(data, unit="ns", errors="raise"): + """ + Convert a object-dtyped or string-dtyped array into an + timedelta64[ns]-dtyped array. + + Parameters + ---------- + data : ndarray or Index + unit : str, default "ns" + errors : {"raise", "coerce", "ignore"}, default "raise" + + Returns + ------- + ndarray[timedelta64[ns]] + + Raises + ------ + ValueError : data cannot be converted to timedelta64[ns] + + Notes + ----- + Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause + errors to be ignored; they are caught and subsequently ignored at a + higher level. + """ + # coerce Index to np.ndarray, converting string-dtype if necessary + values = np.array(data, dtype=np.object_, copy=False) + + result = array_to_timedelta64(values, + unit=unit, errors=errors) + return result.view('timedelta64[ns]') + + def _generate_regular_range(start, end, periods, offset): stride = offset.nanos if periods is None: diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 03eb9dd24ea56..4802f8045eccb 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,6 +1,5 @@ """ implement the TimedeltaIndex """ from datetime import datetime -import warnings import numpy as np from pandas.core.dtypes.common import ( @@ -9,20 +8,15 @@ is_float, is_list_like, is_scalar, - is_integer_dtype, - is_float_dtype, - is_object_dtype, - is_string_dtype, - is_datetime64_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype, ensure_int64) from pandas.core.dtypes.missing import isna -from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays.timedeltas import ( - TimedeltaArrayMixin, _is_convertible_to_td, _to_m8) + TimedeltaArrayMixin, _is_convertible_to_td, _to_m8, + sequence_to_td64ns) from pandas.core.arrays import datetimelike as dtl from pandas.core.indexes.base import Index @@ -40,10 +34,9 @@ TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op, wrap_array_method, wrap_field_accessor) from pandas.core.tools.timedeltas import ( - to_timedelta, _coerce_scalar_to_timedelta_type) + _coerce_scalar_to_timedelta_type) from pandas._libs import (lib, index as libindex, - join as libjoin, Timedelta, NaT, iNaT) -from pandas._libs.tslibs.timedeltas import array_to_timedelta64 + join as libjoin, Timedelta, NaT) class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, @@ -168,67 +161,18 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, # - Cases checked above all return/raise before reaching here - # - if unit is not None: - data = to_timedelta(data, unit=unit, box=False) - - # Unwrap whatever we have into a np.ndarray - if not hasattr(data, 'dtype'): - # e.g. list, tuple - if np.ndim(data) == 0: - # i.e. generator - data = list(data) - data = np.array(data, copy=False) - elif isinstance(data, ABCSeries): - data = data._values - elif isinstance(data, (cls, TimedeltaArrayMixin)): - data = data._data - - # Convert whatever we have into timedelta64[ns] dtype - if is_object_dtype(data) or is_string_dtype(data): - # no need to make a copy, need to convert if string-dtyped - data = np.array(data, dtype=np.object_, copy=False) - data = array_to_timedelta64(data).view(_TD_DTYPE) - copy = False - elif is_integer_dtype(data): - # treat as nanoseconds - # if something other than int64, convert - data = ensure_int64(data) - if copy: - # TODO: can we avoid branching here? `astype(data, copy=False)` - # appears to be making a copy - data = data.astype(_TD_DTYPE) - copy = False - else: - data = data.view(_TD_DTYPE) - elif is_float_dtype(data): - # We allow it if and only if it can be converted losslessly - mask = np.isnan(data) - casted = data.astype(np.int64) - if not (casted[~mask] == data[~mask]).all(): - raise TypeError("floating-dtype data cannot be losslessly " - "converted to {cls}".format(cls=cls.__name__)) - data = casted.view(_TD_DTYPE) - data[mask] = iNaT - copy = False - elif is_timedelta64_dtype(data): - if data.dtype != _TD_DTYPE: - # non-nano unit - # TODO: watch out for overflows - data = data.astype(_TD_DTYPE) - copy = False - elif is_datetime64_dtype(data): - # GH#23539 - warnings.warn("Passing datetime64-dtype data to {cls} is " - "deprecated, will raise a TypeError in a future " - "version".format(cls=cls.__name__), - FutureWarning, stacklevel=2) - data = ensure_int64(data).view(_TD_DTYPE) - else: - raise TypeError("dtype {dtype} is invalid for constructing {cls}" - .format(dtype=data.dtype, cls=cls.__name__)) - - data = np.array(data, copy=copy) - assert data.dtype == 'm8[ns]', data + data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) + if inferred_freq is not None: + if freq is not None and freq != inferred_freq: + raise ValueError('Inferred frequency {inferred} from passed ' + 'values does not conform to passed frequency ' + '{passed}' + .format(inferred=inferred_freq, + passed=freq.freqstr)) + elif freq_infer: + freq = inferred_freq + freq_infer = False + verify_integrity = False subarr = cls._simple_new(data, name=name, freq=freq) # check that we are matching freqs diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 220b14a9cb7c6..90556b4a833e2 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -6,16 +6,13 @@ import pandas as pd from pandas._libs import tslibs from pandas._libs.tslibs.timedeltas import (convert_to_timedelta64, - array_to_timedelta64, parse_timedelta_unit) -from pandas.core.dtypes.common import ( - ensure_object, - is_integer_dtype, - is_timedelta64_dtype, - is_list_like) +from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass +from pandas.core.arrays.timedeltas import sequence_to_td64ns + def to_timedelta(arg, unit='ns', box=True, errors='raise'): """ @@ -129,31 +126,27 @@ def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): """Convert a list of objects to a timedelta index object.""" if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'): + # This is needed only to ensure that in the case where we end up + # returning arg (errors == "ignore"), and where the input is a + # generator, we return a useful list-like instead of a + # used-up generator arg = np.array(list(arg), dtype='O') - # these are shortcut-able - if is_timedelta64_dtype(arg): - value = arg.astype('timedelta64[ns]') - elif is_integer_dtype(arg): - value = arg.astype('timedelta64[{unit}]'.format(unit=unit)).astype( - 'timedelta64[ns]', copy=False) - else: - try: - value = array_to_timedelta64(ensure_object(arg), - unit=unit, errors=errors) - value = value.astype('timedelta64[ns]', copy=False) - except ValueError: - if errors == 'ignore': - return arg - else: - # This else-block accounts for the cases when errors='raise' - # and errors='coerce'. If errors == 'raise', these errors - # should be raised. If errors == 'coerce', we shouldn't - # expect any errors to be raised, since all parsing errors - # cause coercion to pd.NaT. However, if an error / bug is - # introduced that causes an Exception to be raised, we would - # like to surface it. - raise + try: + value = sequence_to_td64ns(arg, unit=unit, + errors=errors, copy=False)[0] + except ValueError: + if errors == 'ignore': + return arg + else: + # This else-block accounts for the cases when errors='raise' + # and errors='coerce'. If errors == 'raise', these errors + # should be raised. If errors == 'coerce', we shouldn't + # expect any errors to be raised, since all parsing errors + # cause coercion to pd.NaT. However, if an error / bug is + # introduced that causes an Exception to be raised, we would + # like to surface it. + raise if box: from pandas import TimedeltaIndex diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index fab691f3a817d..3b38018505695 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -10,13 +10,38 @@ class TestTimedeltaIndex(object): + def test_infer_from_tdi(self): + # GH#23539 + # fast-path for inferring a frequency if the passed data already + # has one + tdi = pd.timedelta_range('1 second', periods=10**7, freq='1s') + + result = pd.TimedeltaIndex(tdi, freq='infer') + assert result.freq == tdi.freq + + # check that inferred_freq was not called by checking that the + # value has not been cached + assert "inferred_freq" not in getattr(result, "_cache", {}) + + def test_infer_from_tdi_mismatch(self): + # GH#23539 + # fast-path for invalidating a frequency if the passed data already + # has one and it does not match the `freq` input + tdi = pd.timedelta_range('1 second', periods=100, freq='1s') + + msg = ("Inferred frequency .* from passed values does " + "not conform to passed frequency") + with tm.assert_raises_regex(ValueError, msg): + TimedeltaIndex(tdi, freq='D') + def test_dt64_data_invalid(self): # GH#23539 # passing tz-aware DatetimeIndex raises, naive or ndarray[datetime64] # does not yet, but will in the future dti = pd.date_range('2016-01-01', periods=3) - with tm.assert_raises_regex(TypeError, "is invalid for constructing"): + msg = "cannot be converted to timedelta64" + with tm.assert_raises_regex(TypeError, msg): TimedeltaIndex(dti.tz_localize('Europe/Brussels')) with tm.assert_produces_warning(FutureWarning): From 3f76c0214772fa00ca751008fcb5db7bdcd42e13 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 7 Nov 2018 18:36:40 -0800 Subject: [PATCH 07/14] catch warnings --- pandas/tests/indexes/timedeltas/test_arithmetic.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index a03698c9ea0de..82337ac37fbee 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -453,10 +453,16 @@ def test_timedelta_ops_with_missing_values(self): # setup s1 = pd.to_timedelta(Series(['00:00:01'])) s2 = pd.to_timedelta(Series(['00:00:02'])) - sn = pd.to_timedelta(Series([pd.NaT])) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # Passing datetime64-dtype data to TimedeltaIndex is deprecated + sn = pd.to_timedelta(Series([pd.NaT])) + df1 = pd.DataFrame(['00:00:01']).apply(pd.to_timedelta) df2 = pd.DataFrame(['00:00:02']).apply(pd.to_timedelta) - dfn = pd.DataFrame([pd.NaT]).apply(pd.to_timedelta) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # Passing datetime64-dtype data to TimedeltaIndex is deprecated + dfn = pd.DataFrame([pd.NaT]).apply(pd.to_timedelta) + scalar1 = pd.to_timedelta('00:00:01') scalar2 = pd.to_timedelta('00:00:02') timedelta_NaT = pd.to_timedelta('NaT') From d73bee625eb80b673cb4650e07cc931853e8db5c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 8 Nov 2018 09:10:17 -0800 Subject: [PATCH 08/14] revert float changes, and tests --- pandas/core/arrays/timedeltas.py | 16 +++++++-------- .../indexes/timedeltas/test_construction.py | 20 +++++++++++++------ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 268afff4f266b..b9c681885e4de 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -450,21 +450,19 @@ def sequence_to_td64ns(data, copy=False, unit='ns', errors='raise'): copy = False elif is_integer_dtype(data): - # treat as nanoseconds + # treat as multiples of the given unit data, copy_made = ints_to_td64ns(data, unit=unit) copy = copy and not copy_made elif is_float_dtype(data): - # We allow it if and only if it can be converted losslessly + # treat as multiples of the given unit. If after converting to nanos, + # there are fractional components left, these are truncated + # (i.e. NOT rounded) mask = np.isnan(data) - casted = data.astype(np.int64) - if not (casted[~mask] == data[~mask]).all(): - raise TypeError("floating-dtype data cannot be losslessly " - "converted to timedelta64[ns]") - copy = False - data, copy_made = ints_to_td64ns(data, unit=unit) - copy = copy and not copy_made + coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns') + data = (coeff * data).astype(np.int64).view('timedelta64[ns]') data[mask] = iNaT + copy = False elif is_timedelta64_dtype(data): if data.dtype != _TD_DTYPE: diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 3b38018505695..215217169258c 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -5,7 +5,7 @@ import pandas as pd import pandas.util.testing as tm -from pandas import TimedeltaIndex, timedelta_range, to_timedelta +from pandas import TimedeltaIndex, timedelta_range, to_timedelta, Timedelta class TestTimedeltaIndex(object): @@ -50,12 +50,14 @@ def test_dt64_data_invalid(self): with tm.assert_produces_warning(FutureWarning): TimedeltaIndex(np.asarray(dti)) - def test_float64_lossy_invalid(self): - # GH#23539 passing floats that would be truncated is unsupported - with tm.assert_raises_regex(TypeError, "data cannot be losslessly"): - TimedeltaIndex([2.3, 9.0]) + def test_float64_ns_rounded(self): + # GH#23539 without specifying a unit, floats are regarded as nanos, + # and fractional portions are truncated + tdi = TimedeltaIndex([2.3, 9.7]) + expected = TimedeltaIndex([2, 9]) + tm.assert_index_equal(tdi, expected) - # but non-lossy floats are OK + # integral floats are non-lossy tdi = TimedeltaIndex([2.0, 9.0]) expected = TimedeltaIndex([2, 9]) tm.assert_index_equal(tdi, expected) @@ -65,6 +67,12 @@ def test_float64_lossy_invalid(self): expected = TimedeltaIndex([pd.Timedelta(nanoseconds=2), pd.NaT]) tm.assert_index_equal(tdi, expected) + def test_float64_unit_conversion(self): + # GH#23539 + tdi = TimedeltaIndex([1.5, 2.25], unit='D') + expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) + tm.assert_index_equal(tdi, expected) + def test_construction_base_constructor(self): arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')] tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr)) From ccc7fcf348790fa1dbe7c0f828ca0791fc8dd3b4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 10 Nov 2018 08:40:06 -0800 Subject: [PATCH 09/14] change ValueError-->TypeError --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/indexes/datetimes.py | 6 +++--- pandas/core/indexes/timedeltas.py | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 87853c380d17a..dd948c3151482 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -246,6 +246,7 @@ Backwards incompatible API changes - A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`) - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`) +- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`) .. _whatsnew_0240.api_breaking.deps: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e27362d4f7581..dbe975ce6f216 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -248,9 +248,9 @@ def __new__(cls, data=None, return result if is_scalar(data): - raise ValueError("{cls}() must be called with a " - "collection of some kind, {data} was passed" - .format(cls=cls.__name__, data=repr(data))) + raise TypeError("{cls}() must be called with a " + "collection of some kind, {data} was passed" + .format(cls=cls.__name__, data=repr(data))) # - Cases checked above all return/raise before reaching here - # diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 4802f8045eccb..35e17c7400892 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -149,9 +149,9 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, return result if is_scalar(data): - raise ValueError('{cls}() must be called with a ' - 'collection of some kind, {data} was passed' - .format(cls=cls.__name__, data=repr(data))) + raise TypeError('{cls}() must be called with a ' + 'collection of some kind, {data} was passed' + .format(cls=cls.__name__, data=repr(data))) if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: From 6fda27e123b077bf8ecc19bf20059b671ff06967 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 10 Nov 2018 08:41:00 -0800 Subject: [PATCH 10/14] double quotes --- pandas/core/arrays/timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index b0e1f2733e583..1f78e0c00bf00 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -405,7 +405,7 @@ def f(x): # --------------------------------------------------------------------- # Constructor Helpers -def sequence_to_td64ns(data, copy=False, unit='ns', errors='raise'): +def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"): """ Parameters ---------- From c63796a3874b18e082350c887b9d54bf7fc6e9ac Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 10 Nov 2018 08:48:57 -0800 Subject: [PATCH 11/14] test that no copy is made with int64 data --- pandas/tests/indexes/timedeltas/test_construction.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 215217169258c..2f3a1fce22061 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -10,6 +10,13 @@ class TestTimedeltaIndex(object): + def test_int64_nocopy(self): + # GH#23539 check that a copy isn't made when we pass int64 data + # and copy=False + arr = np.arange(10, dtype=np.int64) + tdi = TimedeltaIndex(arr, copy=False) + assert tdi._data.base is arr + def test_infer_from_tdi(self): # GH#23539 # fast-path for inferring a frequency if the passed data already From e9b5da636ee38e0cda6561c2e954fd268c229a1f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 10 Nov 2018 12:24:58 -0800 Subject: [PATCH 12/14] update tests to TypeError --- pandas/tests/indexes/datetimes/test_construction.py | 3 ++- pandas/tests/indexes/timedeltas/test_construction.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index 7a251a8ecfb28..239847915e8d0 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -320,7 +320,8 @@ def test_constructor_coverage(self): pytest.raises(ValueError, DatetimeIndex, start='1/1/2000', end='1/10/2000') - pytest.raises(ValueError, DatetimeIndex, '1/1/2000') + with pytest.raises(TypeError): + DatetimeIndex('1/1/2000') # generator expression gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 2f3a1fce22061..0f5925c3fef71 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -133,7 +133,8 @@ def test_constructor_coverage(self): pytest.raises(ValueError, TimedeltaIndex, start='1 days', end='10 days') - pytest.raises(ValueError, TimedeltaIndex, '1 days') + with pytest.raises(TypeError): + TimedeltaIndex('1 days') # generator expression gen = (timedelta(i) for i in range(10)) From da6b286a3b3ea7d96e562ef4d335000ad1195c11 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 11 Nov 2018 09:18:54 -0800 Subject: [PATCH 13/14] dtype=object instead of 'O' --- pandas/core/tools/timedeltas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 90556b4a833e2..fad136b3b5a45 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -130,7 +130,7 @@ def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): # returning arg (errors == "ignore"), and where the input is a # generator, we return a useful list-like instead of a # used-up generator - arg = np.array(list(arg), dtype='O') + arg = np.array(list(arg), dtype=object) try: value = sequence_to_td64ns(arg, unit=unit, From 898444f17553dbb7ed012fbdfd49f1b2864f9466 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 11 Nov 2018 10:58:09 -0800 Subject: [PATCH 14/14] use pytest.raises instead of tm.assert_raises_regex --- pandas/tests/indexes/timedeltas/test_construction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py index 5a5f5590a4fce..074c8904b55b1 100644 --- a/pandas/tests/indexes/timedeltas/test_construction.py +++ b/pandas/tests/indexes/timedeltas/test_construction.py @@ -38,7 +38,7 @@ def test_infer_from_tdi_mismatch(self): msg = ("Inferred frequency .* from passed values does " "not conform to passed frequency") - with tm.assert_raises_regex(ValueError, msg): + with pytest.raises(ValueError, match=msg): TimedeltaIndex(tdi, freq='D') def test_dt64_data_invalid(self): @@ -48,7 +48,7 @@ def test_dt64_data_invalid(self): dti = pd.date_range('2016-01-01', periods=3) msg = "cannot be converted to timedelta64" - with tm.assert_raises_regex(TypeError, msg): + with pytest.raises(TypeError, match=msg): TimedeltaIndex(dti.tz_localize('Europe/Brussels')) with tm.assert_produces_warning(FutureWarning):