Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise a ValueError when index and data lengths don't match #26911

Merged
merged 16 commits into from
Jun 26, 2019
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,7 @@ Indexing

- Improved exception message when calling :meth:`DataFrame.iloc` with a list of non-numeric objects (:issue:`25753`).
- Bug in :meth:`DataFrame.loc` and :meth:`Series.loc` where ``KeyError`` was not raised for a ``MultiIndex`` when the key was less than or equal to the number of levels in the :class:`MultiIndex` (:issue:`14885`).
- Bug in ``.iloc`` and ``.loc`` where ``ValueError`` was not raised for a boolean index with different length (:issue:`26658`).
- Bug in which :meth:`DataFrame.append` produced an erroneous warning indicating that a ``KeyError`` will be thrown in the future when the data to be appended contains new columns (:issue:`22252`).
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
Expand Down
49 changes: 37 additions & 12 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2407,29 +2407,54 @@ def convert_to_index_sliceable(obj, key):
return None


def check_bool_indexer(ax, key):
# boolean indexing, need to check that the data are aligned, otherwise
# disallowed
def check_bool_indexer(index: Index, key) -> np.ndarray:
"""
Check if key is a valid boolean indexer for an object with such index and
perform reindexing or conversion if needed.

This function assumes that is_bool_indexer(key) == True.

Parameters
----------
index : Index
Index of the object on which the indexing is done
key : list-like
Boolean indexer to check

# this function assumes that is_bool_indexer(key) == True
Returns
-------
result: np.array
Resulting key

Raises
------
ValueError
If the key does not have the same length as index

IndexingError
If the index of the key is unalignable to index

"""
result = key
if isinstance(key, ABCSeries) and not key.index.equals(ax):
result = result.reindex(ax)
if isinstance(key, ABCSeries) and not key.index.equals(index):
result = result.reindex(index)
mask = isna(result._values)
if mask.any():
raise IndexingError('Unalignable boolean Series provided as '
'indexer (index of the boolean Series and of '
'the indexed object do not match')
'the indexed object do not match).')
result = result.astype(bool)._values
elif is_sparse(result):
result = result.to_dense()
result = np.asarray(result, dtype=bool)
else:
# is_bool_indexer has already checked for nulls in the case of an
# object array key, so no check needed here
if is_sparse(result):
result = result.to_dense()
result = np.asarray(result, dtype=bool)

# GH26658
if len(result) != len(index):
raise ValueError(
'Item wrong length {} instead of {}.'.format(len(result),
len(index)))

return result


Expand Down
19 changes: 16 additions & 3 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,21 @@ def test_iloc_getitem_bool(self):
b = [True, False, True, False, ]
self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints'])
self.check_result('bool', 'iloc', b, 'ix', b,
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
typs=['labels', 'mixed', 'ts', 'floats'],
fails=IndexError)
self.check_result('bool', 'iloc', b, 'ix', b,
typs=['empty'],
fails=ValueError)

@pytest.mark.parametrize('index', [[True, False],
[True, False, True, False]])
def test_iloc_getitem_bool_diff_len(self, index):
# GH26658
s = Series([1, 2, 3])
with pytest.raises(ValueError,
match=('Item wrong length {} instead of {}.'.format(
len(index), len(s)))):
_ = s.iloc[index]

def test_iloc_getitem_slice(self):

Expand Down Expand Up @@ -614,10 +627,10 @@ def test_iloc_mask(self):
'cannot use an indexable as a mask'),
('locs', ''): 'Unalignable boolean Series provided as indexer '
'(index of the boolean Series and of the indexed '
'object do not match',
'object do not match).',
('locs', '.loc'): 'Unalignable boolean Series provided as indexer '
'(index of the boolean Series and of the '
'indexed object do not match',
'indexed object do not match).',
('locs', '.iloc'): ('iLocation based boolean indexing on an '
'integer type is not available'),
}
Expand Down
12 changes: 11 additions & 1 deletion pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,17 @@ def test_loc_getitem_bool(self):
typs=['ints', 'uints', 'labels',
'mixed', 'ts', 'floats'])
self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'],
fails=KeyError)
fails=ValueError)

@pytest.mark.parametrize('index', [[True, False],
[True, False, True, False]])
def test_loc_getitem_bool_diff_len(self, index):
# GH26658
s = Series([1, 2, 3])
with pytest.raises(ValueError,
match=('Item wrong length {} instead of {}.'.format(
len(index), len(s)))):
_ = s.loc[index]

def test_loc_getitem_int_slice(self):

Expand Down