diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index ee781ec4b0361..4a3122a78b234 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -648,6 +648,7 @@ Reshaping - :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`). - Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`) - Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`) +- Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`) Numeric ^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 579d9f10d5875..a12e611f6618a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3453,18 +3453,13 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, if len(by) > 1: from pandas.core.sorting import lexsort_indexer - def trans(v): - if needs_i8_conversion(v): - return v.view('i8') - return v - keys = [] for x in by: k = self.xs(x, axis=other_axis).values if k.ndim == 2: raise ValueError('Cannot sort by duplicate column %s' % str(x)) - keys.append(trans(k)) + keys.append(k) indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position) indexer = _ensure_platform_int(indexer) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 891c94b59074a..e6f823bf6fac2 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -269,6 +269,11 @@ def test_sort_datetimes(self): df2 = df.sort_values(by=['B']) assert_frame_equal(df1, df2) + df1 = df.sort_values(by='B') + + df2 = df.sort_values(by=['C', 'B']) + assert_frame_equal(df1, df2) + def test_frame_column_inplace_sort_exception(self): s = self.frame['A'] with tm.assert_raises_regex(ValueError, "This Series is a view"): @@ -321,7 +326,29 @@ def test_sort_nat_values_in_int_column(self): assert_frame_equal(df_sorted, df_reversed) df_sorted = df.sort_values(["datetime", "float"], na_position="last") - assert_frame_equal(df_sorted, df_reversed) + assert_frame_equal(df_sorted, df) + + # Ascending should not affect the results. + df_sorted = df.sort_values(["datetime", "float"], ascending=False) + assert_frame_equal(df_sorted, df) + + def test_sort_nat(self): + + # GH 16836 + + d1 = [Timestamp(x) for x in ['2016-01-01', '2015-01-01', + np.nan, '2016-01-01']] + d2 = [Timestamp(x) for x in ['2017-01-01', '2014-01-01', + '2016-01-01', '2015-01-01']] + df = pd.DataFrame({'a': d1, 'b': d2}, index=[0, 1, 2, 3]) + + d3 = [Timestamp(x) for x in ['2015-01-01', '2016-01-01', + '2016-01-01', np.nan]] + d4 = [Timestamp(x) for x in ['2014-01-01', '2015-01-01', + '2017-01-01', '2016-01-01']] + expected = pd.DataFrame({'a': d3, 'b': d4}, index=[1, 3, 0, 2]) + sorted_df = df.sort_values(by=['a', 'b'], ) + tm.assert_frame_equal(sorted_df, expected) class TestDataFrameSortIndexKinds(TestData):