From 7caf43c5911d990d7024b29c2e8d9f66fdbd237b Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 18 May 2021 16:11:57 -0700 Subject: [PATCH 1/2] DEPR: Series/DataFrame with tzaware data and tznaive dtype --- pandas/core/dtypes/cast.py | 52 +++++++++++++++++++++++- pandas/tests/frame/test_constructors.py | 46 ++++++++++++++++++++- pandas/tests/series/test_constructors.py | 20 ++++++++- 3 files changed, 112 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 783474c53f304..7b1b116d89ed7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -219,6 +219,8 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: elif isinstance(value, Timestamp): if value.tz is None: value = value.to_datetime64() + elif not isinstance(dtype, DatetimeTZDtype): + raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype") elif isinstance(value, Timedelta): value = value.to_timedelta64() @@ -1611,9 +1613,21 @@ def maybe_cast_to_datetime( # didn't specify one if dta.tz is not None: + warnings.warn( + "Data is timezone-aware. Converting " + "timezone-aware data to timezone-naive by " + "passing dtype='datetime64[ns]' to " + "DataFrame or Series is deprecated and will " + "raise in a future version. Use " + "`pd.Series(values).dt.tz_localize(None)` " + "instead.", + FutureWarning, + stacklevel=8, + ) # equiv: dta.view(dtype) # Note: NOT equivalent to dta.astype(dtype) dta = dta.tz_localize(None) + value = dta elif is_datetime64tz: dtype = cast(DatetimeTZDtype, dtype) @@ -1806,7 +1820,7 @@ def construct_2d_arraylike_from_scalar( shape = (length, width) if dtype.kind in ["m", "M"]: - value = maybe_unbox_datetimelike(value, dtype) + value = maybe_unbox_datetimelike_tz_deprecation(value, dtype, stacklevel=4) elif dtype == object: if isinstance(value, (np.timedelta64, np.datetime64)): # calling np.array below would cast to pytimedelta/pydatetime @@ -1869,7 +1883,7 @@ def construct_1d_arraylike_from_scalar( if not isna(value): value = ensure_str(value) elif dtype.kind in ["M", "m"]: - value = maybe_unbox_datetimelike(value, dtype) + value = maybe_unbox_datetimelike_tz_deprecation(value, dtype) subarr = np.empty(length, dtype=dtype) subarr.fill(value) @@ -1877,6 +1891,40 @@ def construct_1d_arraylike_from_scalar( return subarr +def maybe_unbox_datetimelike_tz_deprecation( + value: Scalar, dtype: DtypeObj, stacklevel: int = 5 +): + """ + Wrap maybe_unbox_datetimelike with a check for a timezone-aware Timestamp + along with a timezone-naive datetime64 dtype, which is deprecated. + """ + # Caller is responsible for checking dtype.kind in ["m", "M"] + try: + value = maybe_unbox_datetimelike(value, dtype) + except TypeError: + if ( + isinstance(value, Timestamp) + and value.tz is not None + and isinstance(dtype, np.dtype) + ): + warnings.warn( + "Data is timezone-aware. Converting " + "timezone-aware data to timezone-naive by " + "passing dtype='datetime64[ns]' to " + "DataFrame or Series is deprecated and will " + "raise in a future version. Use " + "`pd.Series(values).dt.tz_localize(None)` " + "instead.", + FutureWarning, + stacklevel=stacklevel, + ) + new_value = value.tz_localize(None) + return maybe_unbox_datetimelike(new_value, dtype) + else: + raise + return value + + def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: """ Transform any list-like object in a 1-dimensional numpy array of object diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 03376bdce26f8..6246c09ae47a5 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2438,10 +2438,39 @@ def test_construction_preserves_tzaware_dtypes(self, tz): def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): # GH#25843 tz = tz_aware_fixture - result = DataFrame({"d": [Timestamp("2019", tz=tz)]}, dtype="datetime64[ns]") - expected = DataFrame({"d": [Timestamp("2019")]}) + ts = Timestamp("2019", tz=tz) + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning(FutureWarning): + result = DataFrame({0: [ts]}, dtype="datetime64[ns]") + + expected = DataFrame({0: [ts_naive]}) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = DataFrame({0: ts}, index=[0], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = DataFrame([ts], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = DataFrame(np.array([ts], dtype=object), dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(FutureWarning): + result = DataFrame(ts, index=[0], columns=[0], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + df = DataFrame([Series([ts])], dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + df = DataFrame([[ts]], columns=[0], dtype="datetime64[ns]") + tm.assert_equal(df, expected) + def test_from_dict(self): # 8260 @@ -2681,3 +2710,16 @@ def test_from_out_of_bounds_timedelta(self, constructor, cls): result = constructor(scalar) assert type(get1(result)) is cls + + def test_tzaware_data_tznaive_dtype(self, constructor): + tz = "US/Eastern" + ts = Timestamp("2019", tz=tz) + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning( + FutureWarning, match="Data is timezone-aware", check_stacklevel=False + ): + result = constructor(ts, dtype="M8[ns]") + + assert np.all(result.dtypes == "M8[ns]") + assert np.all(result == ts_naive) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 67649e6e37b35..2162d64f3c0a5 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1538,8 +1538,24 @@ def test_constructor_tz_mixed_data(self): def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): # GH#25843 tz = tz_aware_fixture - result = Series([Timestamp("2019", tz=tz)], dtype="datetime64[ns]") - expected = Series([Timestamp("2019")]) + ts = Timestamp("2019", tz=tz) + ts_naive = Timestamp("2019") + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series([ts], dtype="datetime64[ns]") + expected = Series([ts_naive]) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = Series(np.array([ts], dtype=object), dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = Series({0: ts}, dtype="datetime64[ns]") + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning): + result = Series(ts, index=[0], dtype="datetime64[ns]") tm.assert_series_equal(result, expected) def test_constructor_datetime64(self): From 15c9a4a3f9ebe553ba629debc15acf5f74516b2f Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 May 2021 08:51:39 -0700 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/tests/frame/test_constructors.py | 2 +- pandas/tests/series/test_constructors.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 070e47d73cfae..ae82bdf5395d1 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -675,6 +675,7 @@ Deprecations - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) - Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`) - Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`) +- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`) .. --------------------------------------------------------------------------- diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a83754051a3d9..a5dc37ef32735 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2447,7 +2447,7 @@ def test_construction_preserves_tzaware_dtypes(self, tz): tm.assert_series_equal(result, expected) def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): - # GH#25843 + # GH#25843, GH#41555, GH#33401 tz = tz_aware_fixture ts = Timestamp("2019", tz=tz) ts_naive = Timestamp("2019") diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index e87c76c9437c2..41c0cbf58e438 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1536,7 +1536,7 @@ def test_constructor_tz_mixed_data(self): tm.assert_series_equal(result, expected) def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): - # GH#25843 + # GH#25843, GH#41555, GH#33401 tz = tz_aware_fixture ts = Timestamp("2019", tz=tz) ts_naive = Timestamp("2019")