pandas-dev · jreback · Sep 30, 2020 · May 15, 2020 · May 22, 2020 · May 22, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -109,6 +109,54 @@ Beginning with this version, the default is now to use the more accurate parser
 ``floating_precision="legacy"`` to use the legacy parser. The change to using the higher precision
 parser by default should have no impact on performance. (:issue:`17154`)
 
+.. _whatsnew_110.floating:
+
+Experimental nullable data types for float data
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We've added :class:`Float32Dtype` / :class:`Float64Dtype` and :class:`~arrays.FloatingArray`,
+an extension data type dedicated to floating point data that can hold the
+``pd.NA`` missing value indicator (:issue:`32265`, :issue:`34307`).
+
+While the default float data type already supports missing values using ``np.nan``,
+this new data type uses ``pd.NA`` (and its corresponding behaviour) as missing
+value indicator, in line with the already existing nullable :ref:`integer <integer_na>`
+and :ref:`boolean <boolean>` data types.
+
+One example where the behaviour of ``np.nan`` and ``pd.NA`` is different is
+comparison operations:
+
+.. ipython:: python
+
+  # the default numpy float64 dtype
+  s1 = pd.Series([1.5, None])
+  s1
+  s1 > 1
+
+.. ipython:: python
+
+  # the new nullable float64 dtype
+  s2 = pd.Series([1.5, None], dtype="Float64")
+  s2
+  s2 > 1
+
+See the :ref:`missing_data.NA` doc section for more details on the behaviour
+when using the ``pd.NA`` missing value indicator.
+
+As shown above, the dtype can be specified using the "Float64" or "Float32"
+string (capitalized to distinguish it from the default "float64" data type).
+Alternatively, you can also use the dtype object:
+
+.. ipython:: python
+
+   pd.Series([1.5, None], dtype=pd.Float32Dtype())
+
+.. warning::
+
+   Experimental: the new floating data types are currently experimental, and its
+   behaviour or API may still change without warning. Expecially the behaviour
+   regarding NaN (distinct from NA missing values) is subject to change.
+
 .. _whatsnew_120.enhancements.other:
 
 Other enhancements

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -58,6 +58,8 @@
     UInt16Dtype,
     UInt32Dtype,
     UInt64Dtype,
+    Float32Dtype,
+    Float64Dtype,
     CategoricalDtype,
     PeriodDtype,
     IntervalDtype,

diff --git a/pandas/_testing.py b/pandas/_testing.py
@@ -84,6 +84,7 @@
 ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES
 
 FLOAT_DTYPES: List[Dtype] = [float, "float32", "float64"]
+FLOAT_EA_DTYPES: List[Dtype] = ["Float32", "Float64"]
 COMPLEX_DTYPES: List[Dtype] = [complex, "complex64", "complex128"]
 STRING_DTYPES: List[Dtype] = [str, "str", "U"]
 

diff --git a/pandas/arrays/__init__.py b/pandas/arrays/__init__.py
@@ -7,6 +7,7 @@
     BooleanArray,
     Categorical,
     DatetimeArray,
+    FloatingArray,
     IntegerArray,
     IntervalArray,
     PandasArray,
@@ -20,6 +21,7 @@
     "BooleanArray",
     "Categorical",
     "DatetimeArray",
+    "FloatingArray",
     "IntegerArray",
     "IntervalArray",
     "PandasArray",

diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -984,6 +984,17 @@ def float_dtype(request):
     return request.param
 
 
+@pytest.fixture(params=tm.FLOAT_EA_DTYPES)
+def float_ea_dtype(request):
+    """
+    Parameterized fixture for float dtypes.
+
+    * 'Float32'
+    * 'Float64'
+    """
+    return request.param
+
+
 @pytest.fixture(params=tm.COMPLEX_DTYPES)
 def complex_dtype(request):
     """

diff --git a/pandas/core/api.py b/pandas/core/api.py
@@ -14,6 +14,7 @@
 from pandas.core.algorithms import factorize, unique, value_counts
 from pandas.core.arrays import Categorical
 from pandas.core.arrays.boolean import BooleanDtype
+from pandas.core.arrays.floating import Float32Dtype, Float64Dtype
 from pandas.core.arrays.integer import (
     Int8Dtype,
     Int16Dtype,

diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
@@ -6,8 +6,10 @@
 from pandas.core.arrays.boolean import BooleanArray
 from pandas.core.arrays.categorical import Categorical
 from pandas.core.arrays.datetimes import DatetimeArray
+from pandas.core.arrays.floating import FloatingArray
 from pandas.core.arrays.integer import IntegerArray, integer_array
 from pandas.core.arrays.interval import IntervalArray
+from pandas.core.arrays.masked import BaseMaskedArray
 from pandas.core.arrays.numpy_ import PandasArray, PandasDtype
 from pandas.core.arrays.period import PeriodArray, period_array
 from pandas.core.arrays.sparse import SparseArray
@@ -18,9 +20,11 @@
     "ExtensionArray",
     "ExtensionOpsMixin",
     "ExtensionScalarOpsMixin",
+    "BaseMaskedArray",
     "BooleanArray",
     "Categorical",
     "DatetimeArray",
+    "FloatingArray",
     "IntegerArray",
     "integer_array",
     "IntervalArray",

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
@@ -59,7 +59,7 @@ class BooleanDtype(BaseMaskedDtype):
     name = "boolean"
 
     @property
-    def type(self) -> Type[np.bool_]:
+    def type(self) -> Type:
         return np.bool_
 
     @property
@@ -606,10 +606,9 @@ def logical_method(self, other):
     def _create_comparison_method(cls, op):
         @ops.unpack_zerodim_and_defer(op.__name__)
         def cmp_method(self, other):
-            from pandas.arrays import IntegerArray
+            from pandas.arrays import FloatingArray, IntegerArray
 
-            if isinstance(other, IntegerArray):
-                # Rely on pandas to unbox and dispatch to us.
+            if isinstance(other, (IntegerArray, FloatingArray)):
                 return NotImplemented
 
             mask = None