From d7a2e94914b7707e1c37a4524a9a7c3fef649d01 Mon Sep 17 00:00:00 2001 From: Alexander Michael Schade <3345464+aschade@users.noreply.github.com> Date: Wed, 17 Jan 2018 19:53:07 -0500 Subject: [PATCH] Fix no raise dup index when using drop with axis=0 (#19230) --- doc/source/whatsnew/v0.23.0.txt | 3 ++ pandas/core/generic.py | 8 +++++ pandas/core/indexes/base.py | 9 +++-- pandas/core/reshape/pivot.py | 2 +- .../tests/frame/test_axis_select_reindex.py | 35 +++++++++++++++---- pandas/tests/indexes/test_base.py | 8 ++--- pandas/tests/series/test_indexing.py | 6 ++-- pandas/tests/test_panel.py | 2 +- 8 files changed, 56 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index c59222bf91eac..2f3b62febed7d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -307,6 +307,7 @@ Other API Changes - :class:`IntervalIndex` and ``IntervalDtype`` no longer support categorical, object, and string subtypes (:issue:`19016`) - The default ``Timedelta`` constructor now accepts an ``ISO 8601 Duration`` string as an argument (:issue:`19040`) - ``IntervalDtype`` now returns ``True`` when compared against ``'interval'`` regardless of subtype, and ``IntervalDtype.name`` now returns ``'interval'`` regardless of subtype (:issue:`18980`) +- ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) - Addition or subtraction of ``NaT`` from :class:`TimedeltaIndex` will return ``TimedeltaIndex`` instead of ``DatetimeIndex`` (:issue:`19124`) - :func:`DatetimeIndex.shift` and :func:`TimedeltaIndex.shift` will now raise ``NullFrequencyError`` (which subclasses ``ValueError``, which was raised in older versions) when the index object frequency is ``None`` (:issue:`19147`) @@ -455,6 +456,8 @@ Indexing - Bug in :func:`MultiIndex.set_labels` which would cause casting (and potentially clipping) of the new labels if the ``level`` argument is not 0 or a list like [0, 1, ... ] (:issue:`19057`) - Bug in ``str.extractall`` when there were no matches empty :class:`Index` was returned instead of appropriate :class:`MultiIndex` (:issue:`19034`) - Bug in :class:`IntervalIndex` where set operations that returned an empty ``IntervalIndex`` had the wrong dtype (:issue:`19101`) +- Bug in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` where no ``KeyError`` is raised when dropping a non-existent element from an axis that contains duplicates (:issue:`19186`) +- I/O ^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cef1e551f948e..7ffef9c8a86d7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2806,6 +2806,11 @@ def drop(self, labels=None, axis=0, index=None, columns=None, level=None, ------- dropped : type of caller + Raises + ------ + KeyError + If none of the labels are found in the selected axis + Examples -------- >>> df = pd.DataFrame(np.arange(12).reshape(3,4), @@ -2909,6 +2914,9 @@ def _drop_axis(self, labels, axis, level=None, errors='raise'): else: indexer = ~axis.isin(labels) + if errors == 'raise' and indexer.all(): + raise KeyError('{} not found in axis'.format(labels)) + slicer = [slice(None)] * self.ndim slicer[self._get_axis_number(axis_name)] = indexer diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d5330768059fd..6d0a415f5b420 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3759,6 +3759,11 @@ def drop(self, labels, errors='raise'): Returns ------- dropped : Index + + Raises + ------ + KeyError + If none of the labels are found in the selected axis """ arr_dtype = 'object' if self.dtype == 'object' else None labels = _index_labels_to_array(labels, dtype=arr_dtype) @@ -3766,8 +3771,8 @@ def drop(self, labels, errors='raise'): mask = indexer == -1 if mask.any(): if errors != 'ignore': - raise ValueError('labels %s not contained in axis' % - labels[mask]) + raise KeyError( + 'labels %s not contained in axis' % labels[mask]) indexer = indexer[~mask] return self.delete(indexer) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 77babf718d78c..0e92fc4edce85 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -75,7 +75,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', for key in keys: try: values = values.drop(key) - except (TypeError, ValueError): + except (TypeError, ValueError, KeyError): pass values = list(values) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 343e235fb741c..28e82f7585850 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -41,8 +41,8 @@ def test_drop_names(self): assert obj.columns.name == 'second' assert list(df.columns) == ['d', 'e', 'f'] - pytest.raises(ValueError, df.drop, ['g']) - pytest.raises(ValueError, df.drop, ['g'], 1) + pytest.raises(KeyError, df.drop, ['g']) + pytest.raises(KeyError, df.drop, ['g'], 1) # errors = 'ignore' dropped = df.drop(['g'], errors='ignore') @@ -87,10 +87,10 @@ def test_drop(self): assert_frame_equal(simple.drop( [0, 3], axis='index'), simple.loc[[1, 2], :]) - pytest.raises(ValueError, simple.drop, 5) - pytest.raises(ValueError, simple.drop, 'C', 1) - pytest.raises(ValueError, simple.drop, [1, 5]) - pytest.raises(ValueError, simple.drop, ['A', 'C'], 1) + pytest.raises(KeyError, simple.drop, 5) + pytest.raises(KeyError, simple.drop, 'C', 1) + pytest.raises(KeyError, simple.drop, [1, 5]) + pytest.raises(KeyError, simple.drop, ['A', 'C'], 1) # errors = 'ignore' assert_frame_equal(simple.drop(5, errors='ignore'), simple) @@ -1128,3 +1128,26 @@ def test_reindex_multi(self): expected = df.reindex([0, 1]).reindex(columns=['a', 'b']) assert_frame_equal(result, expected) + + data = [[1, 2, 3], [1, 2, 3]] + + @pytest.mark.parametrize('actual', [ + DataFrame(data=data, index=['a', 'a']), + DataFrame(data=data, index=['a', 'b']), + DataFrame(data=data, index=['a', 'b']).set_index([0, 1]), + DataFrame(data=data, index=['a', 'a']).set_index([0, 1]) + ]) + def test_raise_on_drop_duplicate_index(self, actual): + + # issue 19186 + level = 0 if isinstance(actual.index, MultiIndex) else None + with pytest.raises(KeyError): + actual.drop('c', level=level, axis=0) + with pytest.raises(KeyError): + actual.T.drop('c', level=level, axis=1) + expected_no_err = actual.drop('c', axis=0, level=level, + errors='ignore') + assert_frame_equal(expected_no_err, actual) + expected_no_err = actual.T.drop('c', axis=1, level=level, + errors='ignore') + assert_frame_equal(expected_no_err.T, actual) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c4e8682934369..508c3a73f48c7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1396,8 +1396,8 @@ def test_drop(self): expected = self.strIndex[lrange(5) + lrange(10, n)] tm.assert_index_equal(dropped, expected) - pytest.raises(ValueError, self.strIndex.drop, ['foo', 'bar']) - pytest.raises(ValueError, self.strIndex.drop, ['1', 'bar']) + pytest.raises(KeyError, self.strIndex.drop, ['foo', 'bar']) + pytest.raises(KeyError, self.strIndex.drop, ['1', 'bar']) # errors='ignore' mixed = drop.tolist() + ['foo'] @@ -1419,7 +1419,7 @@ def test_drop(self): tm.assert_index_equal(dropped, expected) # errors='ignore' - pytest.raises(ValueError, ser.drop, [3, 4]) + pytest.raises(KeyError, ser.drop, [3, 4]) dropped = ser.drop(4, errors='ignore') expected = Index([1, 2, 3]) @@ -1448,7 +1448,7 @@ def test_drop_tuple(self, values, to_drop): removed = index.drop(to_drop[1]) for drop_me in to_drop[1], [to_drop[1]]: - pytest.raises(ValueError, removed.drop, drop_me) + pytest.raises(KeyError, removed.drop, drop_me) def test_tuple_union_bug(self): import pandas diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 29b4363ec70b9..bafc6d268c266 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -1838,8 +1838,8 @@ def test_drop(self): # single string/tuple-like s = Series(range(3), index=list('abc')) - pytest.raises(ValueError, s.drop, 'bc') - pytest.raises(ValueError, s.drop, ('a', )) + pytest.raises(KeyError, s.drop, 'bc') + pytest.raises(KeyError, s.drop, ('a', )) # errors='ignore' s = Series(range(3), index=list('abc')) @@ -1861,7 +1861,7 @@ def test_drop(self): # GH 16877 s = Series([2, 3], index=[0, 1]) - with tm.assert_raises_regex(ValueError, 'not contained in axis'): + with tm.assert_raises_regex(KeyError, 'not contained in axis'): s.drop([False, True]) def test_align(self): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 770560134d8d6..1955fc301be9b 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -2302,7 +2302,7 @@ def check_drop(drop_val, axis_number, aliases, expected): expected = Panel({"One": df}) check_drop('Two', 0, ['items'], expected) - pytest.raises(ValueError, panel.drop, 'Three') + pytest.raises(KeyError, panel.drop, 'Three') # errors = 'ignore' dropped = panel.drop('Three', errors='ignore')