From dc90ce6a5ad0e1a7edd7c799a6cacf03ee0913d0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 13 Dec 2020 10:29:57 -0800
Subject: [PATCH 1/3] REF: roll DatetimeBlock.astype into Block._astype

---
 pandas/core/arrays/datetimes.py | 9 +++++++--
 pandas/core/generic.py          | 1 +
 pandas/core/internals/blocks.py | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 8c94a1a080dca..359c17d61239a 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -589,10 +589,15 @@ def astype(self, dtype, copy=True):
 
         if is_datetime64_ns_dtype(dtype) and not is_dtype_equal(dtype, self.dtype):
             # GH#18951: datetime64_ns dtype but not equal means different tz
+            # FIXME: this doesn't match DatetimeBlock.astype, xref GH#33401
             new_tz = getattr(dtype, "tz", None)
-            if getattr(self.dtype, "tz", None) is None:
+            if self.tz is None:
                 return self.tz_localize(new_tz)
-            result = self.tz_convert(new_tz)
+            elif new_tz is None:
+                result = self.tz_convert("UTC").tz_localize(None)
+            else:
+                result = self.tz_convert(new_tz)
+
             if copy:
                 result = result.copy()
             if new_tz is None:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index f9aa5ca9e8ea9..e9228dde9d546 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5852,6 +5852,7 @@ def astype(
         elif is_extension_array_dtype(dtype) and self.ndim > 1:
             # GH 18099/22869: columnwise conversion to extension dtype
             # GH 24704: use iloc to handle duplicate column names
+            # TODO(EA2D): special case not needed with 2D EAs
             results = [
                 self.iloc[:, i].astype(dtype, copy=copy)
                 for i in range(len(self.columns))
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index b9558daf05ad2..1da2b1482f686 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -654,7 +654,7 @@ def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike:
 
             return Categorical(values, dtype=dtype)
 
-        elif is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
+        if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
             # if we are passed a datetime64[ns, tz]
             if copy:
                 # this should be the only copy

From 59cb93e698ea1ee4fb4cc891c8bbef627c05f7df Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 15 Dec 2020 08:46:03 -0800
Subject: [PATCH 2/3] Make Series[dt64].astype(string) match
 DTA.astype(string), astype_nansafe

---
 pandas/core/arrays/datetimelike.py         |  2 +-
 pandas/core/dtypes/cast.py                 |  8 +++++++
 pandas/core/internals/blocks.py            | 12 ++++++----
 pandas/tests/arrays/string_/test_string.py |  7 +++++-
 pandas/tests/frame/methods/test_astype.py  | 28 ++++++++++++++++++++++
 5 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index be9864731842d..3e21a7a242b46 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -346,7 +346,7 @@ def astype(self, dtype, copy=True):
         elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
             if is_extension_array_dtype(dtype):
                 arr_cls = dtype.construct_array_type()
-                return arr_cls._from_sequence(self, dtype=dtype)
+                return arr_cls._from_sequence(self, dtype=dtype, copy=copy)
             else:
                 return self._format_native_types()
         elif is_integer_dtype(dtype):
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 63445d0e1598d..7bc864279e0a5 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -980,6 +980,14 @@ def astype_nansafe(
     elif not isinstance(dtype, np.dtype):
         raise ValueError("dtype must be np.dtype or ExtensionDtype")
 
+    if arr.dtype.kind in ["m", "M"] and (
+        issubclass(dtype.type, str) or dtype == object
+    ):
+        from pandas.core.construction import ensure_wrapped_if_datetimelike
+
+        arr = ensure_wrapped_if_datetimelike(arr)
+        return arr.astype(dtype, copy=copy)
+
     if issubclass(dtype.type, str):
         return lib.ensure_string_array(
             arr.ravel(), skipna=skipna, convert_na_value=False
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 1da2b1482f686..9339a9197bae8 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -672,14 +672,17 @@ def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike:
         if isinstance(values, ExtensionArray):
             values = values.astype(dtype, copy=copy)
 
+        elif isinstance(dtype, ExtensionDtype):
+            # same thing we do in astype_nansafe
+            cls = dtype.construct_array_type()
+            return cls._from_sequence(values, dtype=dtype, copy=copy)
+
         else:
             if issubclass(dtype.type, str):
                 if values.dtype.kind in ["m", "M"]:
                     # use native type formatting for datetime/tz/timedelta
                     arr = pd_array(values)
-                    # Note: in the case where dtype is an np.dtype, i.e. not
-                    #  StringDtype, this matches arr.astype(dtype), xref GH#36153
-                    values = arr._format_native_types(na_rep="NaT")
+                    return arr.astype(dtype)
 
             elif is_object_dtype(dtype):
                 if values.dtype.kind in ["m", "M"]:
@@ -688,8 +691,7 @@ def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike:
                     values = arr.astype(object)
                 else:
                     values = values.astype(object)
-                # We still need to go through astype_nansafe for
-                #  e.g. dtype = Sparse[object, 0]
+                return values
 
             # astype_nansafe works with 1-d only
             vals1d = values.ravel()
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 7cc032e61e989..5365929213503 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -126,7 +126,12 @@ def test_string_methods(input, method, dtype, request):
 def test_astype_roundtrip(dtype, request):
     if dtype == "arrow_string":
         reason = "ValueError: Could not convert object to NumPy datetime"
-        mark = pytest.mark.xfail(reason=reason)
+        mark = pytest.mark.xfail(reason=reason, raises=ValueError)
+        request.node.add_marker(mark)
+    else:
+        mark = pytest.mark.xfail(
+            reason="GH#36153 casting from StringArray to dt64 fails", raises=ValueError
+        )
         request.node.add_marker(mark)
 
     ser = pd.Series(pd.date_range("2000", periods=12))
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 54559400e3510..3c65551aafd0f 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -611,3 +611,31 @@ def test_astype_tz_object_conversion(self, tz):
         # do real test: object dtype to a specified tz, different from construction tz.
         result = result.astype({"tz": "datetime64[ns, Europe/London]"})
         tm.assert_frame_equal(result, expected)
+
+    def test_astype_dt64_to_string(self, frame_or_series, tz_naive_fixture, request):
+        tz = tz_naive_fixture
+        if tz is None:
+            mark = pytest.mark.xfail(
+                reason="GH#36153 uses ndarray formatting instead of DTA formatting"
+            )
+            request.node.add_marker(mark)
+
+        dti = date_range("2016-01-01", periods=3, tz=tz)
+        dta = dti._data
+        dta[0] = NaT
+
+        obj = frame_or_series(dta)
+        result = obj.astype("string")
+
+        # Check that Series/DataFrame.astype matches DatetimeArray.astype
+        expected = frame_or_series(dta.astype("string"))
+        tm.assert_equal(result, expected)
+
+        item = result.iloc[0]
+        if frame_or_series is DataFrame:
+            item = item.iloc[0]
+        assert item is pd.NA
+
+        # For non-NA values, we should match what we get for non-EA str
+        alt = obj.astype(str)
+        assert np.all(alt.iloc[1:] == result.iloc[1:])

From 63c8fdf93b1c5c1f812392410205ba92e440e298 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 15 Dec 2020 10:07:21 -0800
Subject: [PATCH 3/3] REF: Block._astype call astype_nansafe more

---
 pandas/core/dtypes/cast.py      |  9 ++-------
 pandas/core/internals/blocks.py | 24 ++----------------------
 2 files changed, 4 insertions(+), 29 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 7bc864279e0a5..1b89b68f80432 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -30,7 +30,6 @@
     conversion,
     iNaT,
     ints_to_pydatetime,
-    ints_to_pytimedelta,
 )
 from pandas._libs.tslibs.timezones import tz_compare
 from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar
@@ -994,9 +993,7 @@ def astype_nansafe(
         ).reshape(arr.shape)
 
     elif is_datetime64_dtype(arr):
-        if is_object_dtype(dtype):
-            return ints_to_pydatetime(arr.view(np.int64))
-        elif dtype == np.int64:
+        if dtype == np.int64:
             if isna(arr).any():
                 raise ValueError("Cannot convert NaT values to integer")
             return arr.view(dtype)
@@ -1008,9 +1005,7 @@ def astype_nansafe(
         raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")
 
     elif is_timedelta64_dtype(arr):
-        if is_object_dtype(dtype):
-            return ints_to_pytimedelta(arr.view(np.int64))
-        elif dtype == np.int64:
+        if dtype == np.int64:
             if isna(arr).any():
                 raise ValueError("Cannot convert NaT values to integer")
             return arr.view(dtype)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 9339a9197bae8..f1c2fa847ffc2 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -68,7 +68,7 @@
 )
 from pandas.core.base import PandasObject
 import pandas.core.common as com
-from pandas.core.construction import array as pd_array, extract_array
+from pandas.core.construction import extract_array
 from pandas.core.indexers import (
     check_setitem_lengths,
     is_empty_indexer,
@@ -654,7 +654,7 @@ def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike:
 
             return Categorical(values, dtype=dtype)
 
-        if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
+        elif is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
             # if we are passed a datetime64[ns, tz]
             if copy:
                 # this should be the only copy
@@ -672,27 +672,7 @@ def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike:
         if isinstance(values, ExtensionArray):
             values = values.astype(dtype, copy=copy)
 
-        elif isinstance(dtype, ExtensionDtype):
-            # same thing we do in astype_nansafe
-            cls = dtype.construct_array_type()
-            return cls._from_sequence(values, dtype=dtype, copy=copy)
-
         else:
-            if issubclass(dtype.type, str):
-                if values.dtype.kind in ["m", "M"]:
-                    # use native type formatting for datetime/tz/timedelta
-                    arr = pd_array(values)
-                    return arr.astype(dtype)
-
-            elif is_object_dtype(dtype):
-                if values.dtype.kind in ["m", "M"]:
-                    # Wrap in Timedelta/Timestamp
-                    arr = pd_array(values)
-                    values = arr.astype(object)
-                else:
-                    values = values.astype(object)
-                return values
-
             # astype_nansafe works with 1-d only
             vals1d = values.ravel()
             values = astype_nansafe(vals1d, dtype, copy=True)