Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Series/DataFrame with tzaware data and tznaive dtype #41555

Merged
merged 3 commits into from
May 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,7 @@ Deprecations
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
- Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`)
- Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`)
- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`)

.. ---------------------------------------------------------------------------

Expand Down
52 changes: 50 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
elif isinstance(value, Timestamp):
if value.tz is None:
value = value.to_datetime64()
elif not isinstance(dtype, DatetimeTZDtype):
raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype")
jreback marked this conversation as resolved.
Show resolved Hide resolved
elif isinstance(value, Timedelta):
value = value.to_timedelta64()

Expand Down Expand Up @@ -1616,9 +1618,21 @@ def maybe_cast_to_datetime(
# didn't specify one

if dta.tz is not None:
warnings.warn(
"Data is timezone-aware. Converting "
"timezone-aware data to timezone-naive by "
"passing dtype='datetime64[ns]' to "
"DataFrame or Series is deprecated and will "
"raise in a future version. Use "
"`pd.Series(values).dt.tz_localize(None)` "
"instead.",
FutureWarning,
stacklevel=8,
)
# equiv: dta.view(dtype)
# Note: NOT equivalent to dta.astype(dtype)
dta = dta.tz_localize(None)

value = dta
elif is_datetime64tz:
dtype = cast(DatetimeTZDtype, dtype)
Expand Down Expand Up @@ -1810,7 +1824,7 @@ def construct_2d_arraylike_from_scalar(
shape = (length, width)

if dtype.kind in ["m", "M"]:
value = maybe_unbox_datetimelike(value, dtype)
value = maybe_unbox_datetimelike_tz_deprecation(value, dtype, stacklevel=4)
elif dtype == object:
if isinstance(value, (np.timedelta64, np.datetime64)):
# calling np.array below would cast to pytimedelta/pydatetime
Expand Down Expand Up @@ -1873,14 +1887,48 @@ def construct_1d_arraylike_from_scalar(
if not isna(value):
value = ensure_str(value)
elif dtype.kind in ["M", "m"]:
value = maybe_unbox_datetimelike(value, dtype)
value = maybe_unbox_datetimelike_tz_deprecation(value, dtype)

subarr = np.empty(length, dtype=dtype)
subarr.fill(value)

return subarr


def maybe_unbox_datetimelike_tz_deprecation(
value: Scalar, dtype: DtypeObj, stacklevel: int = 5
):
"""
Wrap maybe_unbox_datetimelike with a check for a timezone-aware Timestamp
along with a timezone-naive datetime64 dtype, which is deprecated.
"""
# Caller is responsible for checking dtype.kind in ["m", "M"]
try:
value = maybe_unbox_datetimelike(value, dtype)
except TypeError:
if (
isinstance(value, Timestamp)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this apply to a datetime as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch, will update, along with test

and value.tz is not None
and isinstance(dtype, np.dtype)
):
warnings.warn(
"Data is timezone-aware. Converting "
"timezone-aware data to timezone-naive by "
"passing dtype='datetime64[ns]' to "
"DataFrame or Series is deprecated and will "
"raise in a future version. Use "
"`pd.Series(values).dt.tz_localize(None)` "
"instead.",
FutureWarning,
stacklevel=stacklevel,
)
new_value = value.tz_localize(None)
return maybe_unbox_datetimelike(new_value, dtype)
else:
raise
return value


def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
"""
Transform any list-like object in a 1-dimensional numpy array of object
Expand Down
68 changes: 55 additions & 13 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2404,6 +2404,17 @@ def test_from_series_with_name_with_columns(self):
expected = DataFrame(columns=["bar"])
tm.assert_frame_equal(result, expected)

def test_nested_list_columns(self):
# GH 14467
result = DataFrame(
[[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]]
)
expected = DataFrame(
[[1, 2, 3], [4, 5, 6]],
columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]),
)
tm.assert_frame_equal(result, expected)


class TestDataFrameConstructorWithDatetimeTZ:
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
Expand Down Expand Up @@ -2436,12 +2447,41 @@ def test_construction_preserves_tzaware_dtypes(self, tz):
tm.assert_series_equal(result, expected)

def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
# GH#25843
# GH#25843, GH#41555, GH#33401
tz = tz_aware_fixture
result = DataFrame({"d": [Timestamp("2019", tz=tz)]}, dtype="datetime64[ns]")
expected = DataFrame({"d": [Timestamp("2019")]})
ts = Timestamp("2019", tz=tz)
ts_naive = Timestamp("2019")

with tm.assert_produces_warning(FutureWarning):
result = DataFrame({0: [ts]}, dtype="datetime64[ns]")

expected = DataFrame({0: [ts_naive]})
tm.assert_frame_equal(result, expected)

jreback marked this conversation as resolved.
Show resolved Hide resolved
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = DataFrame({0: ts}, index=[0], dtype="datetime64[ns]")
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = DataFrame([ts], dtype="datetime64[ns]")
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = DataFrame(np.array([ts], dtype=object), dtype="datetime64[ns]")
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
result = DataFrame(ts, index=[0], columns=[0], dtype="datetime64[ns]")
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
df = DataFrame([Series([ts])], dtype="datetime64[ns]")
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
df = DataFrame([[ts]], columns=[0], dtype="datetime64[ns]")
tm.assert_equal(df, expected)

def test_from_dict(self):

# 8260
Expand Down Expand Up @@ -2682,13 +2722,15 @@ def test_from_out_of_bounds_timedelta(self, constructor, cls):

assert type(get1(result)) is cls

def test_nested_list_columns(self):
# GH 14467
result = DataFrame(
[[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]]
)
expected = DataFrame(
[[1, 2, 3], [4, 5, 6]],
columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]),
)
tm.assert_frame_equal(result, expected)
def test_tzaware_data_tznaive_dtype(self, constructor):
tz = "US/Eastern"
ts = Timestamp("2019", tz=tz)
ts_naive = Timestamp("2019")

with tm.assert_produces_warning(
FutureWarning, match="Data is timezone-aware", check_stacklevel=False
):
result = constructor(ts, dtype="M8[ns]")

assert np.all(result.dtypes == "M8[ns]")
assert np.all(result == ts_naive)
22 changes: 19 additions & 3 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1536,10 +1536,26 @@ def test_constructor_tz_mixed_data(self):
tm.assert_series_equal(result, expected)

def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
# GH#25843
# GH#25843, GH#41555, GH#33401
tz = tz_aware_fixture
result = Series([Timestamp("2019", tz=tz)], dtype="datetime64[ns]")
expected = Series([Timestamp("2019")])
ts = Timestamp("2019", tz=tz)
ts_naive = Timestamp("2019")

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series([ts], dtype="datetime64[ns]")
expected = Series([ts_naive])
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
result = Series(np.array([ts], dtype=object), dtype="datetime64[ns]")
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
result = Series({0: ts}, dtype="datetime64[ns]")
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
result = Series(ts, index=[0], dtype="datetime64[ns]")
tm.assert_series_equal(result, expected)

def test_constructor_datetime64(self):
Expand Down