diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 0ca19ffd1f496..d32eeb493b2c2 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -693,7 +693,6 @@ Other API changes - :func: `pandas.api.dtypes.is_string_dtype` no longer incorrectly identifies categorical series as string. - :func:`read_excel` no longer takes ``**kwds`` arguments. This means that passing in keyword ``chunksize`` now raises a ``TypeError`` (previously raised a ``NotImplementedError``), while passing in keyword ``encoding`` now raises a ``TypeError`` (:issue:`34464`) -- :func: `merge` now checks ``suffixes`` parameter type to be ``tuple`` and raises ``TypeError``, whereas before a ``list`` or ``set`` were accepted and that the ``set`` could produce unexpected results (:issue:`33740`) - :class:`Period` no longer accepts tuples for the ``freq`` argument (:issue:`34658`) - :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` now raises ValueError if ``limit_direction`` is 'forward' or 'both' and ``method`` is 'backfill' or 'bfill' or ``limit_direction`` is 'backward' or 'both' and ``method`` is 'pad' or 'ffill' (:issue:`34746`) - The :class:`DataFrame` constructor no longer accepts a list of ``DataFrame`` objects. Because of changes to NumPy, ``DataFrame`` objects are now consistently treated as 2D objects, so a list of ``DataFrames`` is considered 3D, and no longer acceptible for the ``DataFrame`` constructor (:issue:`32289`). @@ -787,6 +786,7 @@ Deprecations - :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`) - :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`) - The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`) +- Providing ``suffixes`` as a ``set`` in :func:`pandas.merge` is deprecated. Provide a tuple instead (:issue:`33740`, :issue:`34741`). - :meth:`Index.is_mixed` is deprecated and will be removed in a future version, check ``index.inferred_type`` directly instead (:issue:`32922`) - Passing any arguments but the first one to :func:`read_html` as diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a21a45f415a47..b6993e9ed851a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -227,10 +227,13 @@ sort : bool, default False Sort the join keys lexicographically in the result DataFrame. If False, the order of the join keys depends on the join type (how keyword). -suffixes : tuple of (str, str), default ('_x', '_y') - Suffix to apply to overlapping column names in the left and right - side, respectively. To raise an exception on overlapping columns use - (False, False). +suffixes : list-like, default is ("_x", "_y") + A length-2 sequence where each element is optionally a string + indicating the suffix to add to overlapping column names in + `left` and `right` respectively. Pass a value of `None` instead + of a string to indicate that the column name from `left` or + `right` should be left as-is, with no suffix. At least one of the + values must not be None. copy : bool, default True If False, avoid copy if possible. indicator : bool or str, default False diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 5e4eb89f0b45f..27b331babe692 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -194,7 +194,7 @@ def merge_ordered( left DataFrame. fill_method : {'ffill', None}, default None Interpolation method for data. - suffixes : Sequence, default is ("_x", "_y") + suffixes : list-like, default is ("_x", "_y") A length-2 sequence where each element is optionally a string indicating the suffix to add to overlapping column names in `left` and `right` respectively. Pass a value of `None` instead @@ -2072,9 +2072,13 @@ def _items_overlap_with_suffix(left: Index, right: Index, suffixes: Tuple[str, s If corresponding suffix is empty, the entry is simply converted to string. """ - if not isinstance(suffixes, tuple): - raise TypeError( - f"suffixes should be tuple of (str, str). But got {type(suffixes).__name__}" + if not is_list_like(suffixes, allow_sets=False): + warnings.warn( + f"Passing 'suffixes' as a {type(suffixes)}, is not supported and may give " + "unexpected results. Provide 'suffixes' as a tuple instead. In the " + "future a 'TypeError' will be raised.", + FutureWarning, + stacklevel=4, ) to_rename = left.intersection(right) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 0a4d5f17a48cc..4fd3c688b8771 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1999,6 +1999,7 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): (0, 0, dict(suffixes=("", "_dup")), ["0", "0_dup"]), (0, 0, dict(suffixes=(None, "_dup")), [0, "0_dup"]), (0, 0, dict(suffixes=("_x", "_y")), ["0_x", "0_y"]), + (0, 0, dict(suffixes=["_x", "_y"]), ["0_x", "0_y"]), ("a", 0, dict(suffixes=(None, "_y")), ["a", 0]), (0.0, 0.0, dict(suffixes=("_x", None)), ["0.0_x", 0.0]), ("b", "b", dict(suffixes=(None, "_y")), ["b", "b_y"]), @@ -2069,18 +2070,13 @@ def test_merge_suffix_error(col1, col2, suffixes): pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) -@pytest.mark.parametrize( - "col1, col2, suffixes", [("a", "a", {"a", "b"}), ("a", "a", None), (0, 0, None)], -) -def test_merge_suffix_type_error(col1, col2, suffixes): - a = pd.DataFrame({col1: [1, 2, 3]}) - b = pd.DataFrame({col2: [3, 4, 5]}) +@pytest.mark.parametrize("suffixes", [{"left", "right"}, {"left": 0, "right": 0}]) +def test_merge_suffix_warns(suffixes): + a = pd.DataFrame({"a": [1, 2, 3]}) + b = pd.DataFrame({"b": [3, 4, 5]}) - msg = ( - f"suffixes should be tuple of \\(str, str\\). But got {type(suffixes).__name__}" - ) - with pytest.raises(TypeError, match=msg): - pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) + with tm.assert_produces_warning(FutureWarning): + pd.merge(a, b, left_index=True, right_index=True, suffixes={"left", "right"}) @pytest.mark.parametrize(