From 1f93779575610596dba79789f750ef37ce46802e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 21 Jan 2022 17:16:12 -0600 Subject: [PATCH 01/13] ENH: add NDArrayBackedExtensionArray to public API --- pandas/api/extensions/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index ea5f1ba926899..7b8444ba91876 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -19,6 +19,7 @@ ExtensionArray, ExtensionScalarOpsMixin, ) +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray __all__ = [ "no_default", @@ -30,4 +31,5 @@ "take", "ExtensionArray", "ExtensionScalarOpsMixin", + "NDArrayBackedExtensionArray", ] From 522b548bdd2204f2ef0f0f2db412a67bf64a254c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 21 Jan 2022 17:17:26 -0600 Subject: [PATCH 02/13] add whatsnew --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c688ced673514..5d8c32992ec87 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -34,7 +34,7 @@ Other enhancements - :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`) - Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`) - :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`) -- +- :class:`NDArrayBackedExtensionArray` now exposed in the public API. (:issue:`45544`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: From 945f8404b124f9966b797d9b67d73b7bb01944d2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Jan 2022 10:09:04 -0600 Subject: [PATCH 03/13] add NDArrayBackedExtensionArray to pandas.core.arrays.__init__ --- pandas/api/extensions/__init__.py | 2 +- pandas/core/arrays/__init__.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py index 7b8444ba91876..2c3fee93fffe0 100644 --- a/pandas/api/extensions/__init__.py +++ b/pandas/api/extensions/__init__.py @@ -18,8 +18,8 @@ from pandas.core.arrays import ( ExtensionArray, ExtensionScalarOpsMixin, + NDArrayBackedExtensionArray, ) -from pandas.core.arrays._mixins import NDArrayBackedExtensionArray __all__ = [ "no_default", diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index e301e82a0ee75..5ce1480d5506e 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -1,3 +1,4 @@ +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.base import ( ExtensionArray, ExtensionOpsMixin, @@ -32,6 +33,7 @@ "FloatingArray", "IntegerArray", "IntervalArray", + "NDArrayBackedExtensionArray", "PandasArray", "PeriodArray", "period_array", From 721ae110e3a39bfa0f8ce065b6b914fbe9e7e734 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Jan 2022 10:22:40 -0600 Subject: [PATCH 04/13] add tests for extensions api --- pandas/tests/api/test_api.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 2e306c76d246c..8769cc7cc91da 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -8,6 +8,7 @@ import pandas as pd from pandas import api import pandas._testing as tm +from pandas.api import extensions class Base: @@ -280,6 +281,33 @@ def test_api(self): self.check(api, self.allowed) +class TestExtensions(Base): + # top-level classes + classes = [ + "ExtensionDtype", + "ExtensionArray", + "ExtensionScalarOpsMixin", + "NDArrayBackedExtensionArray", + ] + + # top-level functions + funcs = [ + "register_extension_dtype", + "register_dataframe_accessor", + "register_index_accessor", + "register_series_accessor", + "take", + ] + + # misc + misc = ["no_default"] + + def test_api(self): + checkthese = self.classes + self.funcs + self.misc + + self.check(namespace=extensions, expected=checkthese) + + class TestTesting(Base): funcs = [ "assert_frame_equal", From ae68f9ddd8797d20019c6854ecda24159761d901 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Jan 2022 11:07:05 -0600 Subject: [PATCH 05/13] add docs --- doc/source/development/extending.rst | 8 ++++++++ doc/source/reference/extensions.rst | 1 + 2 files changed, 9 insertions(+) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 5347aab2c731a..20bd1604afde7 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -134,6 +134,14 @@ by some other storage type, like Python lists. See the `extension array source`_ for the interface definition. The docstrings and comments contain guidance for properly implementing the interface. +:class:`~pandas.api.extensions.NDArrayBackedExtensionArray` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For ExtensionArrays backed by a single NumPy array, the +:class:`~pandas.api.extensions.NDArrayBackedExtensionArray` class can save you +some effort. It contains a private property ``_ndarray`` with the backing NumPy +array and implements the extension array interface. + .. _extending.extension.operator: :class:`~pandas.api.extensions.ExtensionArray` operator support diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index ce8d8d5c2ca10..cfe7878dbc977 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -24,6 +24,7 @@ objects. :template: autosummary/class_without_autosummary.rst api.extensions.ExtensionArray + api.extensions.NDArrayBackedExtensionArray arrays.PandasArray .. We need this autosummary so that methods and attributes are generated. From 38113c818f3a0d45c456515ea8fc1b003228b600 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Jan 2022 14:29:37 -0600 Subject: [PATCH 06/13] add autosummary for methods and attributes --- doc/source/reference/extensions.rst | 32 +++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index cfe7878dbc977..f7a984ce50848 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -63,6 +63,38 @@ objects. api.extensions.ExtensionArray.ndim api.extensions.ExtensionArray.shape api.extensions.ExtensionArray.tolist + api.extensions.NDArrayBackedExtensionArray.dtype + api.extensions.NDArrayBackedExtensionArray.T + api.extensions.NDArrayBackedExtensionArray.nbytes + api.extensions.NDArrayBackedExtensionArray.ndim + api.extensions.NDArrayBackedExtensionArray.shape + api.extensions.NDArrayBackedExtensionArray.size + api.extensions.NDArrayBackedExtensionArray.argmax + api.extensions.NDArrayBackedExtensionArray.argmin + api.extensions.NDArrayBackedExtensionArray.argsort + api.extensions.NDArrayBackedExtensionArray.astype + api.extensions.NDArrayBackedExtensionArray.dropna + api.extensions.NDArrayBackedExtensionArray.equals + api.extensions.NDArrayBackedExtensionArray.factorize + api.extensions.NDArrayBackedExtensionArray.fillna + api.extensions.NDArrayBackedExtensionArray.insert + api.extensions.NDArrayBackedExtensionArray.isin + api.extensions.NDArrayBackedExtensionArray.isna + api.extensions.NDArrayBackedExtensionArray.searchsorted + api.extensions.NDArrayBackedExtensionArray.shift + api.extensions.NDArrayBackedExtensionArray.take + api.extensions.NDArrayBackedExtensionArray.to_numpy + api.extensions.NDArrayBackedExtensionArray.tolist + api.extensions.NDArrayBackedExtensionArray.unique + api.extensions.NDArrayBackedExtensionArray.value_counts + api.extensions.NDArrayBackedExtensionArray.view + api.extensions.NDArrayBackedExtensionArray.copy + api.extensions.NDArrayBackedExtensionArray.delete + api.extensions.NDArrayBackedExtensionArray.ravel + api.extensions.NDArrayBackedExtensionArray.repeat + api.extensions.NDArrayBackedExtensionArray.reshape + api.extensions.NDArrayBackedExtensionArray.swapaxes + api.extensions.NDArrayBackedExtensionArray.transpose Additionally, we have some utility methods for ensuring your object behaves correctly. From 18ec784440632a8950262f5bad39ce5298f8b2fd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 24 Jan 2022 15:55:11 -0600 Subject: [PATCH 07/13] remove unreferenced methods from docs --- doc/source/reference/extensions.rst | 12 ------------ pandas/core/arrays/_mixins.py | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index f7a984ce50848..8bad4e9a5e85a 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -64,11 +64,6 @@ objects. api.extensions.ExtensionArray.shape api.extensions.ExtensionArray.tolist api.extensions.NDArrayBackedExtensionArray.dtype - api.extensions.NDArrayBackedExtensionArray.T - api.extensions.NDArrayBackedExtensionArray.nbytes - api.extensions.NDArrayBackedExtensionArray.ndim - api.extensions.NDArrayBackedExtensionArray.shape - api.extensions.NDArrayBackedExtensionArray.size api.extensions.NDArrayBackedExtensionArray.argmax api.extensions.NDArrayBackedExtensionArray.argmin api.extensions.NDArrayBackedExtensionArray.argsort @@ -88,13 +83,6 @@ objects. api.extensions.NDArrayBackedExtensionArray.unique api.extensions.NDArrayBackedExtensionArray.value_counts api.extensions.NDArrayBackedExtensionArray.view - api.extensions.NDArrayBackedExtensionArray.copy - api.extensions.NDArrayBackedExtensionArray.delete - api.extensions.NDArrayBackedExtensionArray.ravel - api.extensions.NDArrayBackedExtensionArray.repeat - api.extensions.NDArrayBackedExtensionArray.reshape - api.extensions.NDArrayBackedExtensionArray.swapaxes - api.extensions.NDArrayBackedExtensionArray.transpose Additionally, we have some utility methods for ensuring your object behaves correctly. diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index a40be5a988f26..80dcb8734f65a 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -112,6 +112,13 @@ def _validate_scalar(self, value): # ------------------------------------------------------------------------ def view(self, dtype: Dtype | None = None) -> ArrayLike: + """ + Return a view on the array. + + See also + -------- + pandas.api.extensions.ExtensionArray.view + """ # We handle datetime64, datetime64tz, timedelta64, and period # dtypes here. Everything else we pass through to the underlying # ndarray. @@ -152,6 +159,14 @@ def take( fill_value: Any = None, axis: int = 0, ) -> NDArrayBackedExtensionArrayT: + """ + Take elements from an array. + + See also + -------- + pandas.api.extensions.ExtensionArray.take + """ + if allow_fill: fill_value = self._validate_scalar(fill_value) From 2919f6051108ffa6c135c8630a65e34b9d30c940 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 25 Jan 2022 09:33:10 -0600 Subject: [PATCH 08/13] fix docstrings --- pandas/core/arrays/_mixins.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 80dcb8734f65a..9494aa29d3fcc 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -115,7 +115,12 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: """ Return a view on the array. - See also + Returns + ------- + ExtensionArray or np.ndarray + A view on the :class:`ExtensionArray`'s data. + + See Also -------- pandas.api.extensions.ExtensionArray.view """ @@ -162,7 +167,11 @@ def take( """ Take elements from an array. - See also + Returns + ------- + NDArrayBackedExtensionArray + + See Also -------- pandas.api.extensions.ExtensionArray.take """ From 319ac2b5f93c282042e64e70f666c35cce86c913 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 26 Jan 2022 10:11:10 -0600 Subject: [PATCH 09/13] use doc decorator --- pandas/core/arrays/_mixins.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 9494aa29d3fcc..3c4ecc1418c42 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -111,19 +111,8 @@ def _validate_scalar(self, value): # ------------------------------------------------------------------------ + @doc(ExtensionArray.view) def view(self, dtype: Dtype | None = None) -> ArrayLike: - """ - Return a view on the array. - - Returns - ------- - ExtensionArray or np.ndarray - A view on the :class:`ExtensionArray`'s data. - - See Also - -------- - pandas.api.extensions.ExtensionArray.view - """ # We handle datetime64, datetime64tz, timedelta64, and period # dtypes here. Everything else we pass through to the underlying # ndarray. @@ -156,6 +145,7 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike: # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" return arr.view(dtype=dtype) # type: ignore[arg-type] + @doc(ExtensionArray.view) def take( self: NDArrayBackedExtensionArrayT, indices: TakeIndexer, @@ -164,18 +154,6 @@ def take( fill_value: Any = None, axis: int = 0, ) -> NDArrayBackedExtensionArrayT: - """ - Take elements from an array. - - Returns - ------- - NDArrayBackedExtensionArray - - See Also - -------- - pandas.api.extensions.ExtensionArray.take - """ - if allow_fill: fill_value = self._validate_scalar(fill_value) From 8513863c6c4334570ef27add0d49c9d135928f2a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 26 Jan 2022 10:37:05 -0600 Subject: [PATCH 10/13] add code samples and reference to test suite --- doc/source/development/extending.rst | 35 +++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 20bd1604afde7..5a6cc7b45e2b5 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -140,7 +140,40 @@ and comments contain guidance for properly implementing the interface. For ExtensionArrays backed by a single NumPy array, the :class:`~pandas.api.extensions.NDArrayBackedExtensionArray` class can save you some effort. It contains a private property ``_ndarray`` with the backing NumPy -array and implements the extension array interface. +array and implements the extension array interface. Implement the ``_box_func`` +method to convert from array values to the type you wish to expose to users. +Implement the ``_validate_scalar`` method to convert from an object to a value +which can be stored in the NumPy array. + +.. code-block:: python + + class CustomArray(NDArrayBackedExtensionArray): + def __init__(self, values): + backing_array_dtype = "int64" + super().__init__(values=values, dtype=backing_array_dtype) + + def _box_func(self, value): + scalar = CustomObject(value) + return scalar + + def _validate_scalar(self, scalar): + if not isinstance(scalar, CustomObject): + raise TypeError("can't convert scalar of this type") + return scalar.convert_to_int64() + +Optionally, subclass :class:`pandas.tests.extension.base.NDArrayBacked2DTests` +in your test suite to validate your implementation. + +.. code-block:: python + + @pytest.fixture + def data(): + return CustomArray(numpy.arange(-10, 10, 1) + + + class Test2DCompat(base.NDArrayBacked2DTests): + pass + .. _extending.extension.operator: From cc75eda45c173b34380880321293c7e0b1b8ea00 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 6 Apr 2022 16:42:03 -0500 Subject: [PATCH 11/13] add missing methods to extension docs --- doc/source/development/extending.rst | 87 +++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 13 deletions(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 5a6cc7b45e2b5..3276ca090d20e 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -140,29 +140,81 @@ and comments contain guidance for properly implementing the interface. For ExtensionArrays backed by a single NumPy array, the :class:`~pandas.api.extensions.NDArrayBackedExtensionArray` class can save you some effort. It contains a private property ``_ndarray`` with the backing NumPy -array and implements the extension array interface. Implement the ``_box_func`` -method to convert from array values to the type you wish to expose to users. -Implement the ``_validate_scalar`` method to convert from an object to a value -which can be stored in the NumPy array. +array and implements the extension array interface. + +Implement the following: + +``_box_func`` + Convert from array values to the type you wish to expose to users. + +``_internal_fill_value`` + Scalar used to denote ``NA`` value inside our ``self._ndarray``, e.g. ``-1`` + for ``Categorical``, ``iNaT`` for ``Period``. + +``_validate_scalar`` + Convert from an object to a value which can be stored in the NumPy array. + +``_validate_setitem_value`` + Convert a value or values for use in setting a value or values in the backing + NumPy array. + +``_validate_searchsorted_value`` + Convert a value for use in searching for a value in the backing NumPy array. .. code-block:: python - class CustomArray(NDArrayBackedExtensionArray): + class DateArray(NDArrayBackedExtensionArray): + _internal_fill_value = numpy.datetime64("NaT") + def __init__(self, values): - backing_array_dtype = "int64" + backing_array_dtype = " Date: Fri, 26 Aug 2022 17:30:21 -0500 Subject: [PATCH 12/13] clarify _validate_searchsorted_value and 2d backing array --- doc/source/development/extending.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/development/extending.rst b/doc/source/development/extending.rst index 17e761daf3f89..b74713e7102dc 100644 --- a/doc/source/development/extending.rst +++ b/doc/source/development/extending.rst @@ -161,6 +161,8 @@ Implement the following: ``_validate_searchsorted_value`` Convert a value for use in searching for a value in the backing NumPy array. + Note: in most cases, the implementation can be identical to that of + ``_validate_setitem_value``. .. code-block:: python @@ -196,7 +198,7 @@ Implement the following: To support 2D arrays, use the ``_from_backing_data`` helper function when a -method is called on multi-dimensional data. +method is called on multi-dimensional data of the same dtype as ``_ndarray``. .. code-block:: python From 38018e618428538e1ab79f03d4da194d53653551 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 23 Nov 2022 09:35:51 -0600 Subject: [PATCH 13/13] DOC: make insert docstring have single line summary --- pandas/core/arrays/_mixins.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 186526ac99227..1ceffac10dc11 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -390,8 +390,9 @@ def insert( self: NDArrayBackedExtensionArrayT, loc: int, item ) -> NDArrayBackedExtensionArrayT: """ - Make new ExtensionArray inserting new item at location. Follows - Python list.append semantics for negative values. + Make new ExtensionArray inserting new item at location. + + Follows Python list.append semantics for negative values. Parameters ----------