diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ee1323b71f146..79ecf8620c70c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -346,7 +346,7 @@ def astype(self, dtype, copy=True): elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): if is_extension_array_dtype(dtype): arr_cls = dtype.construct_array_type() - return arr_cls._from_sequence(self, dtype=dtype) + return arr_cls._from_sequence(self, dtype=dtype, copy=copy) else: return self._format_native_types() elif is_integer_dtype(dtype): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b072ac3cec52e..b76051e4dce80 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -589,10 +589,15 @@ def astype(self, dtype, copy=True): if is_datetime64_ns_dtype(dtype) and not is_dtype_equal(dtype, self.dtype): # GH#18951: datetime64_ns dtype but not equal means different tz + # FIXME: this doesn't match DatetimeBlock.astype, xref GH#33401 new_tz = getattr(dtype, "tz", None) - if getattr(self.dtype, "tz", None) is None: + if self.tz is None: return self.tz_localize(new_tz) - result = self.tz_convert(new_tz) + elif new_tz is None: + result = self.tz_convert("UTC").tz_localize(None) + else: + result = self.tz_convert(new_tz) + if copy: result = result.copy() if new_tz is None: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index abcc60a15c641..3302ae0d6b8cf 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -30,7 +30,6 @@ conversion, iNaT, ints_to_pydatetime, - ints_to_pytimedelta, ) from pandas._libs.tslibs.timezones import tz_compare from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar @@ -987,15 +986,21 @@ def astype_nansafe( elif not isinstance(dtype, np.dtype): raise ValueError("dtype must be np.dtype or ExtensionDtype") + if arr.dtype.kind in ["m", "M"] and ( + issubclass(dtype.type, str) or dtype == object + ): + from pandas.core.construction import ensure_wrapped_if_datetimelike + + arr = ensure_wrapped_if_datetimelike(arr) + return arr.astype(dtype, copy=copy) + if issubclass(dtype.type, str): return lib.ensure_string_array( arr.ravel(), skipna=skipna, convert_na_value=False ).reshape(arr.shape) elif is_datetime64_dtype(arr): - if is_object_dtype(dtype): - return ints_to_pydatetime(arr.view(np.int64)) - elif dtype == np.int64: + if dtype == np.int64: if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) @@ -1007,9 +1012,7 @@ def astype_nansafe( raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") elif is_timedelta64_dtype(arr): - if is_object_dtype(dtype): - return ints_to_pytimedelta(arr.view(np.int64)) - elif dtype == np.int64: + if dtype == np.int64: if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9b0c3caa0b407..446186f8414ee 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5852,6 +5852,7 @@ def astype( elif is_extension_array_dtype(dtype) and self.ndim > 1: # GH 18099/22869: columnwise conversion to extension dtype # GH 24704: use iloc to handle duplicate column names + # TODO(EA2D): special case not needed with 2D EAs results = [ self.iloc[:, i].astype(dtype, copy=copy) for i in range(len(self.columns)) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 59301391a7dad..0bd60931f9a7e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -75,7 +75,7 @@ ) from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.core.construction import array as pd_array, extract_array +from pandas.core.construction import extract_array from pandas.core.indexers import ( check_setitem_lengths, is_empty_indexer, @@ -676,24 +676,6 @@ def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike: values = values.astype(dtype, copy=copy) else: - if issubclass(dtype.type, str): - if values.dtype.kind in ["m", "M"]: - # use native type formatting for datetime/tz/timedelta - arr = pd_array(values) - # Note: in the case where dtype is an np.dtype, i.e. not - # StringDtype, this matches arr.astype(dtype), xref GH#36153 - values = arr._format_native_types(na_rep="NaT") - - elif is_object_dtype(dtype): - if values.dtype.kind in ["m", "M"]: - # Wrap in Timedelta/Timestamp - arr = pd_array(values) - values = arr.astype(object) - else: - values = values.astype(object) - # We still need to go through astype_nansafe for - # e.g. dtype = Sparse[object, 0] - values = astype_nansafe(values, dtype, copy=True) return values diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 7cc032e61e989..5365929213503 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -126,7 +126,12 @@ def test_string_methods(input, method, dtype, request): def test_astype_roundtrip(dtype, request): if dtype == "arrow_string": reason = "ValueError: Could not convert object to NumPy datetime" - mark = pytest.mark.xfail(reason=reason) + mark = pytest.mark.xfail(reason=reason, raises=ValueError) + request.node.add_marker(mark) + else: + mark = pytest.mark.xfail( + reason="GH#36153 casting from StringArray to dt64 fails", raises=ValueError + ) request.node.add_marker(mark) ser = pd.Series(pd.date_range("2000", periods=12)) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 54559400e3510..3c65551aafd0f 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -611,3 +611,31 @@ def test_astype_tz_object_conversion(self, tz): # do real test: object dtype to a specified tz, different from construction tz. result = result.astype({"tz": "datetime64[ns, Europe/London]"}) tm.assert_frame_equal(result, expected) + + def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture, request): + tz = tz_naive_fixture + if tz is None: + mark = pytest.mark.xfail( + reason="GH#36153 uses ndarray formatting instead of DTA formatting" + ) + request.node.add_marker(mark) + + dti = date_range("2016-01-01", periods=3, tz=tz) + dta = dti._data + dta[0] = NaT + + obj = frame_or_series(dta) + result = obj.astype("string") + + # Check that Series/DataFrame.astype matches DatetimeArray.astype + expected = frame_or_series(dta.astype("string")) + tm.assert_equal(result, expected) + + item = result.iloc[0] + if frame_or_series is DataFrame: + item = item.iloc[0] + assert item is pd.NA + + # For non-NA values, we should match what we get for non-EA str + alt = obj.astype(str) + assert np.all(alt.iloc[1:] == result.iloc[1:])