pandas-dev · jreback · Nov 23, 2017 · Nov 18, 2017 · Nov 20, 2017 · Nov 20, 2017
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -51,7 +51,7 @@ Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 - :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`)
--
+- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`)
 -
 
 

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -906,16 +906,31 @@ def _maybe_coerce_merge_keys(self):
                 continue
 
             # if we are numeric, then allow differing
-            # kinds to proceed, eg. int64 and int8
+            # kinds to proceed, eg. int64 and int8, int and float
             # further if we are object, but we infer to
             # the same, then proceed
             if is_numeric_dtype(lk) and is_numeric_dtype(rk):
                 if lk.dtype.kind == rk.dtype.kind:
-                    continue
+                    pass
+
+                # check whether ints and floats
+                elif is_integer_dtype(rk) and is_float_dtype(lk):
+                    if not (lk == lk.astype(rk.dtype)).all():
+                        warnings.warn('You are merging on int and float '
+                                      'columns where the float values '
+                                      'are not equal to their int '
+                                      'representation', UserWarning)
+
+                elif is_float_dtype(rk) and is_integer_dtype(lk):
+                    if not (rk == rk.astype(lk.dtype)).all():
+                        warnings.warn('You are merging on int and float '
+                                      'columns where the float values '
+                                      'are not equal to their int '
+                                      'representation', UserWarning)
 
                 # let's infer and see if we are ok
-                if lib.infer_dtype(lk) == lib.infer_dtype(rk):
-                    continue
+                elif lib.infer_dtype(lk) == lib.infer_dtype(rk):
+                    pass
 
             # Houston, we have a problem!
             # let's coerce to object if the dtypes aren't
@@ -924,14 +939,15 @@ def _maybe_coerce_merge_keys(self):
             # then we would lose type information on some
             # columns, and end up trying to merge
             # incompatible dtypes. See GH 16900.
-            if name in self.left.columns:
-                typ = lk.categories.dtype if lk_is_cat else object
-                self.left = self.left.assign(
-                    **{name: self.left[name].astype(typ)})
-            if name in self.right.columns:
-                typ = rk.categories.dtype if rk_is_cat else object
-                self.right = self.right.assign(
-                    **{name: self.right[name].astype(typ)})
+            else:
+                if name in self.left.columns:
+                    typ = lk.categories.dtype if lk_is_cat else object
+                    self.left = self.left.assign(
+                        **{name: self.left[name].astype(typ)})
+                if name in self.right.columns:
+                    typ = rk.categories.dtype if rk_is_cat else object
+                    self.right = self.right.assign(
+                        **{name: self.right[name].astype(typ)})
 
     def _validate_specification(self):
         # Hm, any way to make this logic less complicated??

diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py
@@ -13,7 +13,10 @@
 from pandas.core.reshape.merge import merge, MergeError
 from pandas.util.testing import assert_frame_equal, assert_series_equal
 from pandas.core.dtypes.dtypes import CategoricalDtype
-from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype
+from pandas.core.dtypes.common import (
+    is_categorical_dtype,
+    is_object_dtype,
+)
 from pandas import DataFrame, Index, MultiIndex, Series, Categorical
 import pandas.util.testing as tm
 from pandas.api.types import CategoricalDtype as CDT
@@ -1408,6 +1411,42 @@ def test_join_multi_dtypes(self, d1, d2):
         expected.sort_values(['k1', 'k2'], kind='mergesort', inplace=True)
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize('int_vals, float_vals, exp_vals', [
+        ([1, 2, 3], [1.0, 2.0, 3.0], {'X': [1, 2, 3], 'Y': [1.0, 2.0, 3.0]}),
+        ([1, 2, 3], [1.0, 3.0], {'X': [1, 3], 'Y': [1.0, 3.0]}),
+        ([1, 2], [1.0, 2.0, 3.0], {'X': [1, 2], 'Y': [1.0, 2.0]}),
+    ])
+    def test_merge_on_ints_floats(self, int_vals, float_vals, exp_vals):
+        # GH 16572
+        # Check that float column is not cast to object if
+        # merging on float and int columns
+        A = DataFrame({'X': int_vals})
+        B = DataFrame({'Y': float_vals})
+        expected = DataFrame(exp_vals)
+
+        result = A.merge(B, left_on='X', right_on='Y')
+        assert_frame_equal(result, expected)
+
+        result = B.merge(A, left_on='Y', right_on='X')
+        assert_frame_equal(result, expected[['Y', 'X']])
+
+    def test_merge_on_ints_floats_warning(self):
+        # GH 16572
+        # merge will produce a warning when merging on int and
+        # float columns where the float values are not exactly
+        # equal to their int representation
+        A = DataFrame({'X': [1, 2, 3]})
+        B = DataFrame({'Y': [1.1, 2.5, 3.0]})
+        expected = DataFrame({'X': [3], 'Y': [3.0]})
+
+        with tm.assert_produces_warning(UserWarning):
+            result = A.merge(B, left_on='X', right_on='Y')
+            assert_frame_equal(result, expected)
+
+        with tm.assert_produces_warning(UserWarning):
+            result = B.merge(A, left_on='Y', right_on='X')
+            assert_frame_equal(result, expected[['Y', 'X']])
+
 
 @pytest.fixture
 def left():
-Original file line number
+Diff line change
@@ Expand Up / @@ -51,7 +51,7 @@ Backwards incompatible API changes @@
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     - :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`)
-    -
+    - :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`)
     -
@@ Expand Down @@