From 5fddbb3576eacfa0dcce759b2329c07156a82f3e Mon Sep 17 00:00:00 2001 From: "alexprincel@gmail.com" Date: Thu, 25 Feb 2021 14:45:39 -0500 Subject: [PATCH] STYLE: Inconsistent namespace - groupby (#39992) --- .../tests/groupby/aggregate/test_aggregate.py | 22 +++++++++---------- pandas/tests/groupby/aggregate/test_other.py | 2 +- pandas/tests/groupby/test_apply.py | 6 ++--- pandas/tests/groupby/test_categorical.py | 6 ++--- pandas/tests/groupby/test_function.py | 6 ++--- pandas/tests/groupby/test_groupby.py | 8 +++---- pandas/tests/groupby/test_grouping.py | 2 +- pandas/tests/groupby/test_timegrouper.py | 12 +++++----- .../tests/groupby/transform/test_transform.py | 20 ++++++++--------- 9 files changed, 41 insertions(+), 43 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index ca96cc8b17638..ce75d37d2e776 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -127,7 +127,7 @@ def test_groupby_aggregation_multi_level_column(): ] df = DataFrame( data=lst, - columns=pd.MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]), + columns=MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]), ) result = df.groupby(level=1, axis=1).sum() @@ -310,7 +310,7 @@ def test_agg_multiple_functions_same_name_with_ohlc_present(): {"A": ["ohlc", partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]} ) expected_index = pd.date_range("1/1/2012", freq="3T", periods=6) - expected_columns = pd.MultiIndex.from_tuples( + expected_columns = MultiIndex.from_tuples( [ ("A", "ohlc", "open"), ("A", "ohlc", "high"), @@ -484,7 +484,7 @@ def test_func_duplicates_raises(): pd.CategoricalIndex(list("abc")), pd.interval_range(0, 3), pd.period_range("2020", periods=3, freq="D"), - pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), + MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), ], ) def test_agg_index_has_complex_internals(index): @@ -665,7 +665,7 @@ def test_duplicate_no_raises(self): def test_agg_relabel_with_level(self): df = DataFrame( {"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}, - index=pd.MultiIndex.from_product([["A", "B"], ["a", "b"]]), + index=MultiIndex.from_product([["A", "B"], ["a", "b"]]), ) result = df.groupby(level=0).agg( aa=("A", "max"), bb=("A", "min"), cc=("B", "mean") @@ -745,7 +745,7 @@ def test_agg_relabel_multiindex_column( df = DataFrame( {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} ) - df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) idx = Index(["a", "b"], name=("x", "group")) result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")) @@ -766,7 +766,7 @@ def test_agg_relabel_multiindex_raises_not_exist(): df = DataFrame( {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} ) - df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) with pytest.raises(KeyError, match="do not exist"): df.groupby(("x", "group")).agg(a=(("Y", "a"), "max")) @@ -779,7 +779,7 @@ def test_agg_relabel_multiindex_duplicates(): df = DataFrame( {"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]} ) - df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) + df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")]) result = df.groupby(("x", "group")).agg( a=(("y", "A"), "min"), b=(("y", "A"), "min") @@ -797,7 +797,7 @@ def test_groupby_aggregate_empty_key(kwargs): expected = DataFrame( [1, 4], index=Index([1, 2], dtype="int64", name="a"), - columns=pd.MultiIndex.from_tuples([["c", "min"]]), + columns=MultiIndex.from_tuples([["c", "min"]]), ) tm.assert_frame_equal(result, expected) @@ -806,7 +806,7 @@ def test_groupby_aggregate_empty_key_empty_return(): # GH: 32580 Check if everything works, when return is empty df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]}) result = df.groupby("a").agg({"b": []}) - expected = DataFrame(columns=pd.MultiIndex(levels=[["b"], []], codes=[[], []])) + expected = DataFrame(columns=MultiIndex(levels=[["b"], []], codes=[[], []])) tm.assert_frame_equal(result, expected) @@ -851,7 +851,7 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex(): def test_multiindex_custom_func(func): # GH 31777 data = [[1, 4, 2], [5, 7, 1]] - df = DataFrame(data, columns=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]])) + df = DataFrame(data, columns=MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]])) result = df.groupby(np.array([0, 1])).agg(func) expected_dict = {(1, 3): {0: 1, 1: 5}, (1, 4): {0: 4, 1: 7}, (2, 3): {0: 2, 1: 1}} expected = DataFrame(expected_dict) @@ -1150,7 +1150,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): multi_index_list.append([k, value]) else: multi_index_list.append([k, v]) - multi_index = pd.MultiIndex.from_tuples(tuple(multi_index_list)) + multi_index = MultiIndex.from_tuples(tuple(multi_index_list)) expected_df = DataFrame(data=exp_data, columns=multi_index, index=cat_index) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 8dd1ac33bf8ae..681192881c301 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -439,7 +439,7 @@ def test_agg_over_numpy_arrays(): def test_agg_tzaware_non_datetime_result(as_period): # discussed in GH#29589, fixed in GH#29641, operating on tzaware values # with function that is not dtype-preserving - dti = pd.date_range("2012-01-01", periods=4, tz="UTC") + dti = date_range("2012-01-01", periods=4, tz="UTC") if as_period: dti = dti.tz_localize(None).to_period("D") diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 4bbdba9fedbff..639fe308529dc 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -930,7 +930,7 @@ def test_groupby_apply_datetime_result_dtypes(): pd.CategoricalIndex(list("abc")), pd.interval_range(0, 3), pd.period_range("2020", periods=3, freq="D"), - pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), + MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]), ], ) def test_apply_index_has_complex_internals(index): @@ -1070,7 +1070,7 @@ def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp(): expected = df.iloc[[0, 2, 3]] expected = expected.reset_index() - expected.index = pd.MultiIndex.from_frame(expected[["A", "B", "idx"]]) + expected.index = MultiIndex.from_frame(expected[["A", "B", "idx"]]) expected = expected.drop(columns="idx") tm.assert_frame_equal(result, expected) @@ -1086,7 +1086,7 @@ def test_apply_by_cols_equals_apply_by_rows_transposed(): df = DataFrame( np.random.random([6, 4]), - columns=pd.MultiIndex.from_product([["A", "B"], [1, 2]]), + columns=MultiIndex.from_product([["A", "B"], [1, 2]]), ) by_rows = df.T.groupby(axis=0, level=0).apply( diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 4049ef46f3006..f0356ad90a3ff 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1568,7 +1568,7 @@ def test_aggregate_categorical_with_isnan(): df = df.astype({"categorical_col": "category"}) result = df.groupby(["A", "B"]).agg(lambda df: df.isna().sum()) - index = pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B")) + index = MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B")) expected = DataFrame( data={ "numerical_col": [1.0, 0.0], @@ -1640,7 +1640,7 @@ def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals( df = DataFrame({"a": cat, "b": cat, "c": val}) cat2 = Categorical([0, 1]) - idx = pd.MultiIndex.from_product([cat2, cat2], names=["a", "b"]) + idx = MultiIndex.from_product([cat2, cat2], names=["a", "b"]) expected_dict = { "first": Series([0, np.NaN, np.NaN, 1], idx, name="c"), "last": Series([1, np.NaN, np.NaN, 0], idx, name="c"), @@ -1665,7 +1665,7 @@ def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals( df = DataFrame({"a": cat, "b": cat, "c": val}) cat2 = Categorical([0, 1]) - idx = pd.MultiIndex.from_product([cat2, cat2], names=["a", "b"]) + idx = MultiIndex.from_product([cat2, cat2], names=["a", "b"]) expected_dict = { "first": Series([0, np.NaN, np.NaN, 1], idx, name="c"), "last": Series([1, np.NaN, np.NaN, 0], idx, name="c"), diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index ecd9d16228939..cab5417e81445 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -370,7 +370,7 @@ def test_mad(self, gb, gni): def test_describe(self, df, gb, gni): # describe expected_index = Index([1, 3], name="A") - expected_col = pd.MultiIndex( + expected_col = MultiIndex( levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], codes=[[0] * 8, list(range(8))], ) @@ -566,7 +566,7 @@ def test_idxmin_idxmax_axis1(): tm.assert_series_equal(alt[indexer], res.droplevel("A")) - df["E"] = pd.date_range("2016-01-01", periods=10) + df["E"] = date_range("2016-01-01", periods=10) gb2 = df.groupby("A") msg = "reduction operation 'argmax' not allowed for this dtype" @@ -958,7 +958,7 @@ def test_frame_describe_multikey(tsframe): for col in tsframe: group = grouped[col].describe() # GH 17464 - Remove duplicate MultiIndex levels - group_col = pd.MultiIndex( + group_col = MultiIndex( levels=[[col], group.columns], codes=[[0] * len(group.columns), range(len(group.columns))], ) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6731790c89384..afde1daca74c1 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1234,7 +1234,7 @@ def test_groupby_list_infer_array_like(df): def test_groupby_keys_same_size_as_index(): # GH 11185 freq = "s" - index = pd.date_range( + index = date_range( start=Timestamp("2015-09-29T11:34:44-0700"), periods=2, freq=freq ) df = DataFrame([["A", 10], ["B", 15]], columns=["metric", "values"], index=index) @@ -1704,7 +1704,7 @@ def test_pivot_table_values_key_error(): # This test is designed to replicate the error in issue #14938 df = DataFrame( { - "eventDate": pd.date_range(datetime.today(), periods=20, freq="M").tolist(), + "eventDate": date_range(datetime.today(), periods=20, freq="M").tolist(), "thename": range(0, 20), } ) @@ -1793,7 +1793,7 @@ def test_groupby_agg_ohlc_non_first(): df = DataFrame( [[1], [1]], columns=["foo"], - index=pd.date_range("2018-01-01", periods=2, freq="D"), + index=date_range("2018-01-01", periods=2, freq="D"), ) expected = DataFrame( @@ -1807,7 +1807,7 @@ def test_groupby_agg_ohlc_non_first(): ("foo", "ohlc", "close"), ) ), - index=pd.date_range("2018-01-01", periods=2, freq="D"), + index=date_range("2018-01-01", periods=2, freq="D"), ) result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"]) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index a1d956a6fe096..2d10dd8d18dc1 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -611,7 +611,7 @@ def test_grouping_labels(self, mframe): def test_list_grouper_with_nat(self): # GH 14715 - df = DataFrame({"date": pd.date_range("1/1/2011", periods=365, freq="D")}) + df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")}) df.iloc[-1] = pd.NaT grouper = pd.Grouper(key="date", freq="AS") diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 28095c0b0c39f..a89aabc3763f1 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -228,7 +228,7 @@ def test_timegrouper_with_reg_groups(self): # multi names df = df.copy() - df["Date"] = df.index + pd.offsets.MonthEnd(2) + df["Date"] = df.index + offsets.MonthEnd(2) result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum() expected = DataFrame( { @@ -434,7 +434,7 @@ def sumfunc_value(x): def test_groupby_groups_datetimeindex(self): # GH#1430 periods = 1000 - ind = pd.date_range(start="2012/1/1", freq="5min", periods=periods) + ind = date_range(start="2012/1/1", freq="5min", periods=periods) df = DataFrame( {"high": np.arange(periods), "low": np.arange(periods)}, index=ind ) @@ -445,7 +445,7 @@ def test_groupby_groups_datetimeindex(self): assert isinstance(list(groups.keys())[0], datetime) # GH#11442 - index = pd.date_range("2015/01/01", periods=5, name="date") + index = date_range("2015/01/01", periods=5, name="date") df = DataFrame({"A": [5, 6, 7, 8, 9], "B": [1, 2, 3, 4, 5]}, index=index) result = df.groupby(level="date").groups dates = ["2015-01-05", "2015-01-04", "2015-01-03", "2015-01-02", "2015-01-01"] @@ -672,9 +672,7 @@ def test_groupby_with_timezone_selection(self): df = DataFrame( { "factor": np.random.randint(0, 3, size=60), - "time": pd.date_range( - "01/01/2000 00:00", periods=60, freq="s", tz="UTC" - ), + "time": date_range("01/01/2000 00:00", periods=60, freq="s", tz="UTC"), } ) df1 = df.groupby("factor").max()["time"] @@ -693,7 +691,7 @@ def test_timezone_info(self): def test_datetime_count(self): df = DataFrame( - {"a": [1, 2, 3] * 2, "dates": pd.date_range("now", periods=6, freq="T")} + {"a": [1, 2, 3] * 2, "dates": date_range("now", periods=6, freq="T")} ) result = df.groupby("a").dates.count() expected = Series([2, 2, 2], index=Index([1, 2, 3], name="a"), name="dates") diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index ae0f7545df8cf..4956454ef2d4f 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -101,7 +101,7 @@ def test_transform_fast(): { "grouping": [0, 1, 1, 3], "f": [1.1, 2.1, 3.1, 4.5], - "d": pd.date_range("2014-1-1", "2014-1-4"), + "d": date_range("2014-1-1", "2014-1-4"), "i": [1, 2, 3, 4], }, columns=["grouping", "f", "i", "d"], @@ -347,7 +347,7 @@ def test_transform_transformation_func(request, transformation_func): "A": ["foo", "foo", "foo", "foo", "bar", "bar", "baz"], "B": [1, 2, np.nan, 3, 3, np.nan, 4], }, - index=pd.date_range("2020-01-01", "2020-01-07"), + index=date_range("2020-01-01", "2020-01-07"), ) if transformation_func == "cumcount": @@ -413,7 +413,7 @@ def test_transform_function_aliases(df): def test_series_fast_transform_date(): # GH 13191 df = DataFrame( - {"grouping": [np.nan, 1, 1, 3], "d": pd.date_range("2014-1-1", "2014-1-4")} + {"grouping": [np.nan, 1, 1, 3], "d": date_range("2014-1-1", "2014-1-4")} ) result = df.groupby("grouping")["d"].transform("first") dates = [ @@ -649,7 +649,7 @@ def test_cython_transform_frame(op, args, targop): "float": s, "float_missing": s_missing, "int": [1, 1, 1, 1, 2] * 200, - "datetime": pd.date_range("1990-1-1", periods=1000), + "datetime": date_range("1990-1-1", periods=1000), "timedelta": pd.timedelta_range(1, freq="s", periods=1000), "string": strings * 50, "string_missing": strings_missing * 50, @@ -667,7 +667,7 @@ def test_cython_transform_frame(op, args, targop): df["cat"] = df["string"].astype("category") df2 = df.copy() - df2.index = pd.MultiIndex.from_product([range(100), range(10)]) + df2.index = MultiIndex.from_product([range(100), range(10)]) # DataFrame - Single and MultiIndex, # group by values, index level, columns @@ -691,7 +691,7 @@ def test_cython_transform_frame(op, args, targop): # to apply separately and concat i = gb[["int"]].apply(targop) f = gb[["float", "float_missing"]].apply(targop) - expected = pd.concat([f, i], axis=1) + expected = concat([f, i], axis=1) else: expected = gb.apply(targop) @@ -715,7 +715,7 @@ def test_cython_transform_frame(op, args, targop): def test_transform_with_non_scalar_group(): # GH 10165 - cols = pd.MultiIndex.from_tuples( + cols = MultiIndex.from_tuples( [ ("syn", "A"), ("mis", "A"), @@ -761,7 +761,7 @@ def test_transform_numeric_ret(cols, exp, comp_func, agg_func, request): # GH 19200 df = DataFrame( - {"a": pd.date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)} + {"a": date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)} ) result = df.groupby("b")[cols].transform(agg_func) @@ -958,7 +958,7 @@ def test_groupby_transform_rename(): def demean_rename(x): result = x - x.mean() - if isinstance(x, pd.Series): + if isinstance(x, Series): return result result = result.rename(columns={c: "{c}_demeaned" for c in result.columns}) @@ -993,7 +993,7 @@ def test_groupby_transform_timezone_column(func): ) def test_groupby_transform_with_datetimes(func, values): # GH 15306 - dates = pd.date_range("1/1/2011", periods=10, freq="D") + dates = date_range("1/1/2011", periods=10, freq="D") stocks = DataFrame({"price": np.arange(10.0)}, index=dates) stocks["week_id"] = dates.isocalendar().week