From 2dfbe0ceccda6445f9572856680f77a75a61d468 Mon Sep 17 00:00:00 2001
From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com>
Date: Tue, 27 Sep 2022 00:51:45 +0200
Subject: [PATCH] Backport PR #48782 on branch 1.5.x (REGR: describe raising
 when result contains NA) (#48793)

Backport PR #48782: REGR: describe raising when result contains NA

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
---
 doc/source/whatsnew/v1.5.1.rst               |  1 +
 pandas/core/describe.py                      | 11 ++++++++++-
 pandas/tests/frame/methods/test_describe.py  | 12 ++++++++++++
 pandas/tests/series/methods/test_describe.py | 12 ++++++++++--
 4 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst
index da0bd746e3da5..6798c7074228c 100644
--- a/doc/source/whatsnew/v1.5.1.rst
+++ b/doc/source/whatsnew/v1.5.1.rst
@@ -72,6 +72,7 @@ Fixed regressions
 - Fixed Regression in :meth:`Series.__setitem__` casting ``None`` to ``NaN`` for object dtype (:issue:`48665`)
 - Fixed Regression in :meth:`DataFrame.loc` when setting values as a :class:`DataFrame` with all ``True`` indexer (:issue:`48701`)
 - Regression in :func:`.read_csv` causing an ``EmptyDataError`` when using an UTF-8 file handle that was already read from (:issue:`48646`)
+- Fixed regression in :meth:`DataFrame.describe` raising ``TypeError`` when result contains ``NA`` (:issue:`48778`)
 - Fixed regression in :meth:`DataFrame.plot` ignoring invalid ``colormap`` for ``kind="scatter"`` (:issue:`48726`)
 - Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`)
 -
diff --git a/pandas/core/describe.py b/pandas/core/describe.py
index d6546b06ec711..e6f567b123b59 100644
--- a/pandas/core/describe.py
+++ b/pandas/core/describe.py
@@ -24,6 +24,7 @@
 
 from pandas._libs.tslibs import Timestamp
 from pandas._typing import (
+    DtypeObj,
     NDFrameT,
     npt,
 )
@@ -34,10 +35,12 @@
     is_bool_dtype,
     is_complex_dtype,
     is_datetime64_any_dtype,
+    is_extension_array_dtype,
     is_numeric_dtype,
     is_timedelta64_dtype,
 )
 
+import pandas as pd
 from pandas.core.reshape.concat import concat
 
 from pandas.io.formats.format import format_percentiles
@@ -242,7 +245,13 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
         + [series.max()]
     )
     # GH#48340 - always return float on non-complex numeric data
-    dtype = float if is_numeric_dtype(series) and not is_complex_dtype(series) else None
+    dtype: DtypeObj | None
+    if is_extension_array_dtype(series):
+        dtype = pd.Float64Dtype()
+    elif is_numeric_dtype(series) and not is_complex_dtype(series):
+        dtype = np.dtype("float")
+    else:
+        dtype = None
     return Series(d, index=stat_index, name=series.name, dtype=dtype)
 
 
diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py
index 3a1228ee5c4a5..24d327a101143 100644
--- a/pandas/tests/frame/methods/test_describe.py
+++ b/pandas/tests/frame/methods/test_describe.py
@@ -397,3 +397,15 @@ def test_describe_with_duplicate_columns(self):
         ser = df.iloc[:, 0].describe()
         expected = pd.concat([ser, ser, ser], keys=df.columns, axis=1)
         tm.assert_frame_equal(result, expected)
+
+    def test_ea_with_na(self, any_numeric_ea_dtype):
+        # GH#48778
+
+        df = DataFrame({"a": [1, pd.NA, pd.NA], "b": pd.NA}, dtype=any_numeric_ea_dtype)
+        result = df.describe()
+        expected = DataFrame(
+            {"a": [1.0, 1.0, pd.NA] + [1.0] * 5, "b": [0.0] + [pd.NA] * 7},
+            index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
+            dtype="Float64",
+        )
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_describe.py b/pandas/tests/series/methods/test_describe.py
index d7650e2768781..a7cedd580b2d0 100644
--- a/pandas/tests/series/methods/test_describe.py
+++ b/pandas/tests/series/methods/test_describe.py
@@ -1,6 +1,9 @@
 import numpy as np
 
-from pandas.core.dtypes.common import is_complex_dtype
+from pandas.core.dtypes.common import (
+    is_complex_dtype,
+    is_extension_array_dtype,
+)
 
 from pandas import (
     Period,
@@ -154,6 +157,11 @@ def test_datetime_is_numeric_includes_datetime(self):
 
     def test_numeric_result_dtype(self, any_numeric_dtype):
         # GH#48340 - describe should always return float on non-complex numeric input
+        if is_extension_array_dtype(any_numeric_dtype):
+            dtype = "Float64"
+        else:
+            dtype = "complex128" if is_complex_dtype(any_numeric_dtype) else None
+
         ser = Series([0, 1], dtype=any_numeric_dtype)
         result = ser.describe()
         expected = Series(
@@ -168,6 +176,6 @@ def test_numeric_result_dtype(self, any_numeric_dtype):
                 1.0,
             ],
             index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
-            dtype="complex128" if is_complex_dtype(ser) else None,
+            dtype=dtype,
         )
         tm.assert_series_equal(result, expected)