Skip to content

Commit

Permalink
ENH: Add ignore_index to Series.drop_duplicates (#50844)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Jan 19, 2023
1 parent c1ce0a7 commit b5abe5d
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ Other enhancements
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
- :meth:`Series.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`48304`)
- Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
- Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`)
- Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`)
Expand Down
27 changes: 23 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2131,22 +2131,32 @@ def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation

@overload
def drop_duplicates(
self, *, keep: DropKeep = ..., inplace: Literal[False] = ...
self,
*,
keep: DropKeep = ...,
inplace: Literal[False] = ...,
ignore_index: bool = ...,
) -> Series:
...

@overload
def drop_duplicates(self, *, keep: DropKeep = ..., inplace: Literal[True]) -> None:
def drop_duplicates(
self, *, keep: DropKeep = ..., inplace: Literal[True], ignore_index: bool = ...
) -> None:
...

@overload
def drop_duplicates(
self, *, keep: DropKeep = ..., inplace: bool = ...
self, *, keep: DropKeep = ..., inplace: bool = ..., ignore_index: bool = ...
) -> Series | None:
...

def drop_duplicates(
self, *, keep: DropKeep = "first", inplace: bool = False
self,
*,
keep: DropKeep = "first",
inplace: bool = False,
ignore_index: bool = False,
) -> Series | None:
"""
Return Series with duplicate values removed.
Expand All @@ -2163,6 +2173,11 @@ def drop_duplicates(
inplace : bool, default ``False``
If ``True``, performs operation inplace and returns None.
ignore_index : bool, default ``False``
If ``True``, the resulting axis will be labeled 0, 1, …, n - 1.
.. versionadded:: 2.0.0
Returns
-------
Series or None
Expand Down Expand Up @@ -2225,6 +2240,10 @@ def drop_duplicates(
"""
inplace = validate_bool_kwarg(inplace, "inplace")
result = super().drop_duplicates(keep=keep)

if ignore_index:
result.index = default_index(len(result))

if inplace:
self._update_inplace(result)
return None
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/series/methods/test_drop_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,10 @@ def test_drop_duplicates_categorical_bool_na(self, nulls_fixture):
index=[0, 1, 4],
)
tm.assert_series_equal(result, expected)

def test_drop_duplicates_ignore_index(self):
# GH#48304
ser = Series([1, 2, 2, 3])
result = ser.drop_duplicates(ignore_index=True)
expected = Series([1, 2, 3])
tm.assert_series_equal(result, expected)

0 comments on commit b5abe5d

Please sign in to comment.