From 6cacdde5630c593999059833b516e1fec60aaf72 Mon Sep 17 00:00:00 2001 From: "Dr. Irv" Date: Thu, 26 Apr 2018 11:34:45 -0400 Subject: [PATCH] Change _can_hold_na to a class attribute and document that it shouldn't be changed (#20819) --- pandas/core/arrays/base.py | 26 ++++++++++-------------- pandas/tests/extension/base/interface.py | 3 ++- pandas/tests/extension/base/missing.py | 5 +---- pandas/tests/extension/conftest.py | 2 +- 4 files changed, 15 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9958be47267ee..f1a81b5eefddd 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -38,10 +38,9 @@ class ExtensionArray(object): * copy * _concat_same_type - Some additional methods are available to satisfy pandas' internal, private - block API: + An additional method is available to satisfy pandas' internal, + private block API. - * _can_hold_na * _formatting_values Some methods require casting the ExtensionArray to an ndarray of Python @@ -399,7 +398,8 @@ def _values_for_factorize(self): Returns ------- values : ndarray - An array suitable for factoraization. This should maintain order + + An array suitable for factorization. This should maintain order and be a supported dtype (Float64, Int64, UInt64, String, Object). By default, the extension array is cast to object dtype. na_value : object @@ -422,7 +422,7 @@ def factorize(self, na_sentinel=-1): Returns ------- labels : ndarray - An interger NumPy array that's an indexer into the original + An integer NumPy array that's an indexer into the original ExtensionArray. uniques : ExtensionArray An ExtensionArray containing the unique values of `self`. @@ -566,16 +566,12 @@ def _concat_same_type(cls, to_concat): """ raise AbstractMethodError(cls) - @property - def _can_hold_na(self): - # type: () -> bool - """Whether your array can hold missing values. True by default. - - Notes - ----- - Setting this to false will optimize some operations like fillna. - """ - return True + # The _can_hold_na attribute is set to True so that pandas internals + # will use the ExtensionDtype.na_value as the NA value in operations + # such as take(), reindex(), shift(), etc. In addition, those results + # will then be of the ExtensionArray subclass rather than an array + # of objects + _can_hold_na = True @property def _ndarray_values(self): diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index 9b60652fbace3..8ef8debbdc666 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -21,7 +21,8 @@ def test_ndim(self, data): assert data.ndim == 1 def test_can_hold_na_valid(self, data): - assert data._can_hold_na in {True, False} + # GH-20761 + assert data._can_hold_na is True def test_memory_usage(self, data): s = pd.Series(data) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index f6cee9af0b722..32cf29818e069 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -9,10 +9,7 @@ class BaseMissingTests(BaseExtensionTests): def test_isna(self, data_missing): - if data_missing._can_hold_na: - expected = np.array([True, False]) - else: - expected = np.array([False, False]) + expected = np.array([True, False]) result = pd.isna(data_missing) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index 4cb4ea21d9be3..bbd31c4071b91 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -57,7 +57,7 @@ def na_cmp(): Should return a function of two arguments that returns True if both arguments are (scalar) NA for your type. - By default, uses ``operator.or`` + By default, uses ``operator.is_`` """ return operator.is_