diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 18a3785867714..901e4f6942897 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -673,6 +673,8 @@ Indexing ^^^^^^^^ - Improved exception message when calling :meth:`DataFrame.iloc` with a list of non-numeric objects (:issue:`25753`). +- Improved exception message when calling ``.iloc`` or ``.loc`` with a boolean indexer with different length (:issue:`26658`). +- Bug in ``.iloc`` and ``.loc`` with a boolean indexer not raising an ``IndexError`` when too few items are passed (:issue:`26658`). - Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` where ``KeyError`` was not raised for a ``MultiIndex`` when the key was less than or equal to the number of levels in the :class:`MultiIndex` (:issue:`14885`). - Bug in which :meth:`DataFrame.append` produced an erroneous warning indicating that a ``KeyError`` will be thrown in the future when the data to be appended contains new columns (:issue:`22252`). - Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`). diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index f6aa54f4836d9..1539feb2e0856 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2407,29 +2407,54 @@ def convert_to_index_sliceable(obj, key): return None -def check_bool_indexer(ax, key): - # boolean indexing, need to check that the data are aligned, otherwise - # disallowed +def check_bool_indexer(index: Index, key) -> np.ndarray: + """ + Check if key is a valid boolean indexer for an object with such index and + perform reindexing or conversion if needed. + + This function assumes that is_bool_indexer(key) == True. + + Parameters + ---------- + index : Index + Index of the object on which the indexing is done + key : list-like + Boolean indexer to check - # this function assumes that is_bool_indexer(key) == True + Returns + ------- + result: np.array + Resulting key + Raises + ------ + IndexError + If the key does not have the same length as index + + IndexingError + If the index of the key is unalignable to index + + """ result = key - if isinstance(key, ABCSeries) and not key.index.equals(ax): - result = result.reindex(ax) + if isinstance(key, ABCSeries) and not key.index.equals(index): + result = result.reindex(index) mask = isna(result._values) if mask.any(): raise IndexingError('Unalignable boolean Series provided as ' 'indexer (index of the boolean Series and of ' - 'the indexed object do not match') + 'the indexed object do not match).') result = result.astype(bool)._values - elif is_sparse(result): - result = result.to_dense() - result = np.asarray(result, dtype=bool) else: - # is_bool_indexer has already checked for nulls in the case of an - # object array key, so no check needed here + if is_sparse(result): + result = result.to_dense() result = np.asarray(result, dtype=bool) + # GH26658 + if len(result) != len(index): + raise IndexError( + 'Item wrong length {} instead of {}.'.format(len(result), + len(index))) + return result diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 6b5ad66e268df..8b54907131b8c 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -265,6 +265,16 @@ def test_iloc_getitem_bool(self): typs=['labels', 'mixed', 'ts', 'floats', 'empty'], fails=IndexError) + @pytest.mark.parametrize('index', [[True, False], + [True, False, True, False]]) + def test_iloc_getitem_bool_diff_len(self, index): + # GH26658 + s = Series([1, 2, 3]) + with pytest.raises(IndexError, + match=('Item wrong length {} instead of {}.'.format( + len(index), len(s)))): + _ = s.iloc[index] + def test_iloc_getitem_slice(self): # slices @@ -614,10 +624,10 @@ def test_iloc_mask(self): 'cannot use an indexable as a mask'), ('locs', ''): 'Unalignable boolean Series provided as indexer ' '(index of the boolean Series and of the indexed ' - 'object do not match', + 'object do not match).', ('locs', '.loc'): 'Unalignable boolean Series provided as indexer ' '(index of the boolean Series and of the ' - 'indexed object do not match', + 'indexed object do not match).', ('locs', '.iloc'): ('iLocation based boolean indexing on an ' 'integer type is not available'), } diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 11d0fa2602baa..2f6e908717071 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -194,7 +194,17 @@ def test_loc_getitem_bool(self): typs=['ints', 'uints', 'labels', 'mixed', 'ts', 'floats']) self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'], - fails=KeyError) + fails=IndexError) + + @pytest.mark.parametrize('index', [[True, False], + [True, False, True, False]]) + def test_loc_getitem_bool_diff_len(self, index): + # GH26658 + s = Series([1, 2, 3]) + with pytest.raises(IndexError, + match=('Item wrong length {} instead of {}.'.format( + len(index), len(s)))): + _ = s.loc[index] def test_loc_getitem_int_slice(self):