diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index 75d93ed2aafc68..0a3d2e1c9a8fc9 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -285,3 +285,12 @@ def test_diff_readonly(self): result = df.diff() expected = DataFrame(np.array(df)).diff() tm.assert_frame_equal(result, expected) + + def test_diff_all_int_dtype(self, any_int_dtype): + # GH 14773 + df = DataFrame(range(5)) + df = df.astype(any_int_dtype) + result = df.diff() + expected_dtype = "float32" if any_int_dtype in ("int8", "int16") else "float64" + expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype=expected_dtype) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index dbb5cb357de478..aca061cdd197b0 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -4,6 +4,7 @@ import pandas as pd from pandas import ( DataFrame, + Index, Series, Timestamp, ) @@ -650,3 +651,68 @@ def test_quantile_ea_scalar(self, index, frame_or_series): assert result == expected else: tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected_data, expected_index, axis", + [ + ["float64", [], [], 1], + ["int64", [], [], 1], + ["float64", [np.nan, np.nan], ["a", "b"], 0], + ["int64", [np.nan, np.nan], ["a", "b"], 0], + ], + ) + def test_empty_numeric(self, dtype, expected_data, expected_index, axis): + # GH 14564 + df = DataFrame(columns=["a", "b"], dtype=dtype) + result = df.quantile(0.5, axis=axis) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype="float64" + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype, expected_data, expected_index, axis, expected_dtype", + [ + pytest.param( + "datetime64[ns]", + [], + [], + 1, + "datetime64[ns]", + marks=pytest.mark.xfail(reason="#GH 41544"), + ), + ["datetime64[ns]", [pd.NaT, pd.NaT], ["a", "b"], 0, "datetime64[ns]"], + ], + ) + def test_empty_datelike( + self, dtype, expected_data, expected_index, axis, expected_dtype + ): + # GH 14564 + df = DataFrame(columns=["a", "b"], dtype=dtype) + result = df.quantile(0.5, axis=axis, numeric_only=False) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype=expected_dtype + ) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "expected_data, expected_index, axis", + [ + [[np.nan, np.nan], range(2), 1], + [[], [], 0], + ], + ) + def test_datelike_numeric_only(self, expected_data, expected_index, axis): + # GH 14564 + df = DataFrame( + { + "a": pd.to_datetime(["2010", "2011"]), + "b": [0, 5], + "c": pd.to_datetime(["2011", "2012"]), + } + ) + result = df[["a", "c"]].quantile(0.5, axis=axis) + expected = Series( + expected_data, name=0.5, index=Index(expected_index), dtype=np.float64 + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 03376bdce26f8f..6e9991ff17ac31 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2681,3 +2681,14 @@ def test_from_out_of_bounds_timedelta(self, constructor, cls): result = constructor(scalar) assert type(get1(result)) is cls + + def test_nested_list_columns(self): + # GH 14467 + result = DataFrame( + [[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]] + ) + expected = DataFrame( + [[1, 2, 3], [4, 5, 6]], + columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 6348014ca72d26..4a7c4faade00dd 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1999,6 +1999,39 @@ def test_stack_nan_in_multiindex_columns(self): ) tm.assert_frame_equal(result, expected) + def test_multi_level_stack_categorical(self): + # GH 15239 + midx = MultiIndex.from_arrays( + [ + ["A"] * 2 + ["B"] * 2, + pd.Categorical(list("abab")), + pd.Categorical(list("ccdd")), + ] + ) + df = DataFrame(np.arange(8).reshape(2, 4), columns=midx) + result = df.stack([1, 2]) + expected = DataFrame( + [ + [0, np.nan], + [np.nan, 2], + [1, np.nan], + [np.nan, 3], + [4, np.nan], + [np.nan, 6], + [5, np.nan], + [np.nan, 7], + ], + columns=["A", "B"], + index=MultiIndex.from_arrays( + [ + [0] * 4 + [1] * 4, + pd.Categorical(list("aabbaabb")), + pd.Categorical(list("cdcdcdcd")), + ] + ), + ) + tm.assert_frame_equal(result, expected) + def test_stack_nan_level(self): # GH 9406 df_nan = DataFrame( diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 83aeb29ec53df0..d256b19dbb1487 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2265,3 +2265,20 @@ def test_groupby_mean_duplicate_index(rand_series_with_duplicate_datetimeindex): result = dups.groupby(level=0).mean() expected = dups.groupby(dups.index).mean() tm.assert_series_equal(result, expected) + + +def test_groupby_all_nan_groups_drop(): + # GH 15036 + s = Series([1, 2, 3], [np.nan, np.nan, np.nan]) + result = s.groupby(s.index).sum() + expected = Series([], index=Index([], dtype=np.float64), dtype=np.int64) + tm.assert_series_equal(result, expected) + + +def test_groupby_empty_multi_column(): + # GH 15106 + result = DataFrame(data=[], columns=["A", "B", "C"]).groupby(["A", "B"]).sum() + expected = DataFrame( + [], columns=["C"], index=MultiIndex([[], []], [[], []], names=["A", "B"]) + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 3cc77aa723fe9a..0ffc6044a58976 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1750,3 +1750,23 @@ def test_readjson_bool_series(self): result = read_json("[true, true, false]", typ="series") expected = Series([True, True, False]) tm.assert_series_equal(result, expected) + + def test_to_json_multiindex_escape(self): + # GH 15273 + df = DataFrame( + True, + index=pd.date_range("2017-01-20", "2017-01-23"), + columns=["foo", "bar"], + ).stack() + result = df.to_json() + expected = ( + "{\"(Timestamp('2017-01-20 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-20 00:00:00'), 'bar')\":true," + "\"(Timestamp('2017-01-21 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-21 00:00:00'), 'bar')\":true," + "\"(Timestamp('2017-01-22 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-22 00:00:00'), 'bar')\":true," + "\"(Timestamp('2017-01-23 00:00:00'), 'foo')\":true," + "\"(Timestamp('2017-01-23 00:00:00'), 'bar')\":true}" + ) + assert result == expected