Skip to content

Commit

Permalink
Fix no raise dup index when using drop with axis=0 (#19230)
Browse files Browse the repository at this point in the history
  • Loading branch information
aschade authored and jreback committed Jan 18, 2018
1 parent 6e0927e commit d7a2e94
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 17 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ Other API Changes
- :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`)
- The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`)
- ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`)
- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`)
- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`)
- Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`)
- :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`)
Expand Down Expand Up @@ -455,6 +456,8 @@ Indexing
- Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`)
- Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`)
- Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`)
- Bug in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`)
-

I/O
^^^
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2806,6 +2806,11 @@ def drop(self, labels=None, axis=0, index=None, columns=None, level=None,
-------
dropped : type of caller
Raises
------
KeyError
If none of the labels are found in the selected axis
Examples
--------
>>> df = pd.DataFrame(np.arange(12).reshape(3,4),
Expand Down Expand Up @@ -2909,6 +2914,9 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'):
else:
indexer = ~axis.isin(labels)

if errors == 'raise' and indexer.all():
raise KeyError('{} not found in axis'.format(labels))

slicer = [slice(None)] * self.ndim
slicer[self._get_axis_number(axis_name)] = indexer

Expand Down
9 changes: 7 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3759,15 +3759,20 @@ def drop(self, labels, errors='raise'):
Returns
-------
dropped : Index
Raises
------
KeyError
If none of the labels are found in the selected axis
"""
arr_dtype = 'object' if self.dtype == 'object' else None
labels = _index_labels_to_array(labels, dtype=arr_dtype)
indexer = self.get_indexer(labels)
mask = indexer == -1
if mask.any():
if errors != 'ignore':
raise ValueError('labels %s not contained in axis' %
labels[mask])
raise KeyError(
'labels %s not contained in axis' % labels[mask])
indexer = indexer[~mask]
return self.delete(indexer)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
for key in keys:
try:
values = values.drop(key)
except (TypeError, ValueError):
except (TypeError, ValueError, KeyError):
pass
values = list(values)

Expand Down
35 changes: 29 additions & 6 deletions pandas/tests/frame/test_axis_select_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def test_drop_names(self):
assert obj.columns.name == 'second'
assert list(df.columns) == ['d', 'e', 'f']

pytest.raises(ValueError, df.drop, ['g'])
pytest.raises(ValueError, df.drop, ['g'], 1)
pytest.raises(KeyError, df.drop, ['g'])
pytest.raises(KeyError, df.drop, ['g'], 1)

# errors = 'ignore'
dropped = df.drop(['g'], errors='ignore')
Expand Down Expand Up @@ -87,10 +87,10 @@ def test_drop(self):
assert_frame_equal(simple.drop(
[0, 3], axis='index'), simple.loc[[1, 2], :])

pytest.raises(ValueError, simple.drop, 5)
pytest.raises(ValueError, simple.drop, 'C', 1)
pytest.raises(ValueError, simple.drop, [1, 5])
pytest.raises(ValueError, simple.drop, ['A', 'C'], 1)
pytest.raises(KeyError, simple.drop, 5)
pytest.raises(KeyError, simple.drop, 'C', 1)
pytest.raises(KeyError, simple.drop, [1, 5])
pytest.raises(KeyError, simple.drop, ['A', 'C'], 1)

# errors = 'ignore'
assert_frame_equal(simple.drop(5, errors='ignore'), simple)
Expand Down Expand Up @@ -1128,3 +1128,26 @@ def test_reindex_multi(self):
expected = df.reindex([0, 1]).reindex(columns=['a', 'b'])

assert_frame_equal(result, expected)

data = [[1, 2, 3], [1, 2, 3]]

@pytest.mark.parametrize('actual', [
DataFrame(data=data, index=['a', 'a']),
DataFrame(data=data, index=['a', 'b']),
DataFrame(data=data, index=['a', 'b']).set_index([0, 1]),
DataFrame(data=data, index=['a', 'a']).set_index([0, 1])
])
def test_raise_on_drop_duplicate_index(self, actual):

# issue 19186
level = 0 if isinstance(actual.index, MultiIndex) else None
with pytest.raises(KeyError):
actual.drop('c', level=level, axis=0)
with pytest.raises(KeyError):
actual.T.drop('c', level=level, axis=1)
expected_no_err = actual.drop('c', axis=0, level=level,
errors='ignore')
assert_frame_equal(expected_no_err, actual)
expected_no_err = actual.T.drop('c', axis=1, level=level,
errors='ignore')
assert_frame_equal(expected_no_err.T, actual)
8 changes: 4 additions & 4 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1396,8 +1396,8 @@ def test_drop(self):
expected = self.strIndex[lrange(5) + lrange(10, n)]
tm.assert_index_equal(dropped, expected)

pytest.raises(ValueError, self.strIndex.drop, ['foo', 'bar'])
pytest.raises(ValueError, self.strIndex.drop, ['1', 'bar'])
pytest.raises(KeyError, self.strIndex.drop, ['foo', 'bar'])
pytest.raises(KeyError, self.strIndex.drop, ['1', 'bar'])

# errors='ignore'
mixed = drop.tolist() + ['foo']
Expand All @@ -1419,7 +1419,7 @@ def test_drop(self):
tm.assert_index_equal(dropped, expected)

# errors='ignore'
pytest.raises(ValueError, ser.drop, [3, 4])
pytest.raises(KeyError, ser.drop, [3, 4])

dropped = ser.drop(4, errors='ignore')
expected = Index([1, 2, 3])
Expand Down Expand Up @@ -1448,7 +1448,7 @@ def test_drop_tuple(self, values, to_drop):

removed = index.drop(to_drop[1])
for drop_me in to_drop[1], [to_drop[1]]:
pytest.raises(ValueError, removed.drop, drop_me)
pytest.raises(KeyError, removed.drop, drop_me)

def test_tuple_union_bug(self):
import pandas
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/series/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,8 +1838,8 @@ def test_drop(self):

# single string/tuple-like
s = Series(range(3), index=list('abc'))
pytest.raises(ValueError, s.drop, 'bc')
pytest.raises(ValueError, s.drop, ('a', ))
pytest.raises(KeyError, s.drop, 'bc')
pytest.raises(KeyError, s.drop, ('a', ))

# errors='ignore'
s = Series(range(3), index=list('abc'))
Expand All @@ -1861,7 +1861,7 @@ def test_drop(self):

# GH 16877
s = Series([2, 3], index=[0, 1])
with tm.assert_raises_regex(ValueError, 'not contained in axis'):
with tm.assert_raises_regex(KeyError, 'not contained in axis'):
s.drop([False, True])

def test_align(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2302,7 +2302,7 @@ def check_drop(drop_val, axis_number, aliases, expected):
expected = Panel({"One": df})
check_drop('Two', 0, ['items'], expected)

pytest.raises(ValueError, panel.drop, 'Three')
pytest.raises(KeyError, panel.drop, 'Three')

# errors = 'ignore'
dropped = panel.drop('Three', errors='ignore')
Expand Down

0 comments on commit d7a2e94

Please sign in to comment.