Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

STYLE: Inconsistent namespace - groupby (#39992) #40056

Merged
merged 1 commit into from
Feb 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def test_groupby_aggregation_multi_level_column():
]
df = DataFrame(
data=lst,
columns=pd.MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]),
columns=MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]),
)

result = df.groupby(level=1, axis=1).sum()
Expand Down Expand Up @@ -310,7 +310,7 @@ def test_agg_multiple_functions_same_name_with_ohlc_present():
{"A": ["ohlc", partial(np.quantile, q=0.9999), partial(np.quantile, q=0.1111)]}
)
expected_index = pd.date_range("1/1/2012", freq="3T", periods=6)
expected_columns = pd.MultiIndex.from_tuples(
expected_columns = MultiIndex.from_tuples(
[
("A", "ohlc", "open"),
("A", "ohlc", "high"),
Expand Down Expand Up @@ -484,7 +484,7 @@ def test_func_duplicates_raises():
pd.CategoricalIndex(list("abc")),
pd.interval_range(0, 3),
pd.period_range("2020", periods=3, freq="D"),
pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]),
MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]),
],
)
def test_agg_index_has_complex_internals(index):
Expand Down Expand Up @@ -665,7 +665,7 @@ def test_duplicate_no_raises(self):
def test_agg_relabel_with_level(self):
df = DataFrame(
{"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]},
index=pd.MultiIndex.from_product([["A", "B"], ["a", "b"]]),
index=MultiIndex.from_product([["A", "B"], ["a", "b"]]),
)
result = df.groupby(level=0).agg(
aa=("A", "max"), bb=("A", "min"), cc=("B", "mean")
Expand Down Expand Up @@ -745,7 +745,7 @@ def test_agg_relabel_multiindex_column(
df = DataFrame(
{"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
)
df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
idx = Index(["a", "b"], name=("x", "group"))

result = df.groupby(("x", "group")).agg(a_max=(("y", "A"), "max"))
Expand All @@ -766,7 +766,7 @@ def test_agg_relabel_multiindex_raises_not_exist():
df = DataFrame(
{"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
)
df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])

with pytest.raises(KeyError, match="do not exist"):
df.groupby(("x", "group")).agg(a=(("Y", "a"), "max"))
Expand All @@ -779,7 +779,7 @@ def test_agg_relabel_multiindex_duplicates():
df = DataFrame(
{"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]}
)
df.columns = pd.MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])
df.columns = MultiIndex.from_tuples([("x", "group"), ("y", "A"), ("y", "B")])

result = df.groupby(("x", "group")).agg(
a=(("y", "A"), "min"), b=(("y", "A"), "min")
Expand All @@ -797,7 +797,7 @@ def test_groupby_aggregate_empty_key(kwargs):
expected = DataFrame(
[1, 4],
index=Index([1, 2], dtype="int64", name="a"),
columns=pd.MultiIndex.from_tuples([["c", "min"]]),
columns=MultiIndex.from_tuples([["c", "min"]]),
)
tm.assert_frame_equal(result, expected)

Expand All @@ -806,7 +806,7 @@ def test_groupby_aggregate_empty_key_empty_return():
# GH: 32580 Check if everything works, when return is empty
df = DataFrame({"a": [1, 1, 2], "b": [1, 2, 3], "c": [1, 2, 4]})
result = df.groupby("a").agg({"b": []})
expected = DataFrame(columns=pd.MultiIndex(levels=[["b"], []], codes=[[], []]))
expected = DataFrame(columns=MultiIndex(levels=[["b"], []], codes=[[], []]))
tm.assert_frame_equal(result, expected)


Expand Down Expand Up @@ -851,7 +851,7 @@ def test_grouby_agg_loses_results_with_as_index_false_relabel_multiindex():
def test_multiindex_custom_func(func):
# GH 31777
data = [[1, 4, 2], [5, 7, 1]]
df = DataFrame(data, columns=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]]))
df = DataFrame(data, columns=MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]]))
result = df.groupby(np.array([0, 1])).agg(func)
expected_dict = {(1, 3): {0: 1, 1: 5}, (1, 4): {0: 4, 1: 7}, (2, 3): {0: 2, 1: 1}}
expected = DataFrame(expected_dict)
Expand Down Expand Up @@ -1150,7 +1150,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
multi_index_list.append([k, value])
else:
multi_index_list.append([k, v])
multi_index = pd.MultiIndex.from_tuples(tuple(multi_index_list))
multi_index = MultiIndex.from_tuples(tuple(multi_index_list))

expected_df = DataFrame(data=exp_data, columns=multi_index, index=cat_index)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/aggregate/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ def test_agg_over_numpy_arrays():
def test_agg_tzaware_non_datetime_result(as_period):
# discussed in GH#29589, fixed in GH#29641, operating on tzaware values
# with function that is not dtype-preserving
dti = pd.date_range("2012-01-01", periods=4, tz="UTC")
dti = date_range("2012-01-01", periods=4, tz="UTC")
if as_period:
dti = dti.tz_localize(None).to_period("D")

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ def test_groupby_apply_datetime_result_dtypes():
pd.CategoricalIndex(list("abc")),
pd.interval_range(0, 3),
pd.period_range("2020", periods=3, freq="D"),
pd.MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]),
MultiIndex.from_tuples([("a", 0), ("a", 1), ("b", 0)]),
],
)
def test_apply_index_has_complex_internals(index):
Expand Down Expand Up @@ -1070,7 +1070,7 @@ def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp():

expected = df.iloc[[0, 2, 3]]
expected = expected.reset_index()
expected.index = pd.MultiIndex.from_frame(expected[["A", "B", "idx"]])
expected.index = MultiIndex.from_frame(expected[["A", "B", "idx"]])
expected = expected.drop(columns="idx")

tm.assert_frame_equal(result, expected)
Expand All @@ -1086,7 +1086,7 @@ def test_apply_by_cols_equals_apply_by_rows_transposed():

df = DataFrame(
np.random.random([6, 4]),
columns=pd.MultiIndex.from_product([["A", "B"], [1, 2]]),
columns=MultiIndex.from_product([["A", "B"], [1, 2]]),
)

by_rows = df.T.groupby(axis=0, level=0).apply(
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1568,7 +1568,7 @@ def test_aggregate_categorical_with_isnan():
df = df.astype({"categorical_col": "category"})

result = df.groupby(["A", "B"]).agg(lambda df: df.isna().sum())
index = pd.MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B"))
index = MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B"))
expected = DataFrame(
data={
"numerical_col": [1.0, 0.0],
Expand Down Expand Up @@ -1640,7 +1640,7 @@ def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(
df = DataFrame({"a": cat, "b": cat, "c": val})

cat2 = Categorical([0, 1])
idx = pd.MultiIndex.from_product([cat2, cat2], names=["a", "b"])
idx = MultiIndex.from_product([cat2, cat2], names=["a", "b"])
expected_dict = {
"first": Series([0, np.NaN, np.NaN, 1], idx, name="c"),
"last": Series([1, np.NaN, np.NaN, 0], idx, name="c"),
Expand All @@ -1665,7 +1665,7 @@ def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals(
df = DataFrame({"a": cat, "b": cat, "c": val})

cat2 = Categorical([0, 1])
idx = pd.MultiIndex.from_product([cat2, cat2], names=["a", "b"])
idx = MultiIndex.from_product([cat2, cat2], names=["a", "b"])
expected_dict = {
"first": Series([0, np.NaN, np.NaN, 1], idx, name="c"),
"last": Series([1, np.NaN, np.NaN, 0], idx, name="c"),
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def test_mad(self, gb, gni):
def test_describe(self, df, gb, gni):
# describe
expected_index = Index([1, 3], name="A")
expected_col = pd.MultiIndex(
expected_col = MultiIndex(
levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]],
codes=[[0] * 8, list(range(8))],
)
Expand Down Expand Up @@ -566,7 +566,7 @@ def test_idxmin_idxmax_axis1():

tm.assert_series_equal(alt[indexer], res.droplevel("A"))

df["E"] = pd.date_range("2016-01-01", periods=10)
df["E"] = date_range("2016-01-01", periods=10)
gb2 = df.groupby("A")

msg = "reduction operation 'argmax' not allowed for this dtype"
Expand Down Expand Up @@ -958,7 +958,7 @@ def test_frame_describe_multikey(tsframe):
for col in tsframe:
group = grouped[col].describe()
# GH 17464 - Remove duplicate MultiIndex levels
group_col = pd.MultiIndex(
group_col = MultiIndex(
levels=[[col], group.columns],
codes=[[0] * len(group.columns), range(len(group.columns))],
)
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,7 +1234,7 @@ def test_groupby_list_infer_array_like(df):
def test_groupby_keys_same_size_as_index():
# GH 11185
freq = "s"
index = pd.date_range(
index = date_range(
start=Timestamp("2015-09-29T11:34:44-0700"), periods=2, freq=freq
)
df = DataFrame([["A", 10], ["B", 15]], columns=["metric", "values"], index=index)
Expand Down Expand Up @@ -1704,7 +1704,7 @@ def test_pivot_table_values_key_error():
# This test is designed to replicate the error in issue #14938
df = DataFrame(
{
"eventDate": pd.date_range(datetime.today(), periods=20, freq="M").tolist(),
"eventDate": date_range(datetime.today(), periods=20, freq="M").tolist(),
"thename": range(0, 20),
}
)
Expand Down Expand Up @@ -1793,7 +1793,7 @@ def test_groupby_agg_ohlc_non_first():
df = DataFrame(
[[1], [1]],
columns=["foo"],
index=pd.date_range("2018-01-01", periods=2, freq="D"),
index=date_range("2018-01-01", periods=2, freq="D"),
)

expected = DataFrame(
Expand All @@ -1807,7 +1807,7 @@ def test_groupby_agg_ohlc_non_first():
("foo", "ohlc", "close"),
)
),
index=pd.date_range("2018-01-01", periods=2, freq="D"),
index=date_range("2018-01-01", periods=2, freq="D"),
)

result = df.groupby(Grouper(freq="D")).agg(["sum", "ohlc"])
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ def test_grouping_labels(self, mframe):

def test_list_grouper_with_nat(self):
# GH 14715
df = DataFrame({"date": pd.date_range("1/1/2011", periods=365, freq="D")})
df = DataFrame({"date": date_range("1/1/2011", periods=365, freq="D")})
df.iloc[-1] = pd.NaT
grouper = pd.Grouper(key="date", freq="AS")

Expand Down
12 changes: 5 additions & 7 deletions pandas/tests/groupby/test_timegrouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def test_timegrouper_with_reg_groups(self):

# multi names
df = df.copy()
df["Date"] = df.index + pd.offsets.MonthEnd(2)
df["Date"] = df.index + offsets.MonthEnd(2)
result = df.groupby([Grouper(freq="1M", key="Date"), "Buyer"]).sum()
expected = DataFrame(
{
Expand Down Expand Up @@ -434,7 +434,7 @@ def sumfunc_value(x):
def test_groupby_groups_datetimeindex(self):
# GH#1430
periods = 1000
ind = pd.date_range(start="2012/1/1", freq="5min", periods=periods)
ind = date_range(start="2012/1/1", freq="5min", periods=periods)
df = DataFrame(
{"high": np.arange(periods), "low": np.arange(periods)}, index=ind
)
Expand All @@ -445,7 +445,7 @@ def test_groupby_groups_datetimeindex(self):
assert isinstance(list(groups.keys())[0], datetime)

# GH#11442
index = pd.date_range("2015/01/01", periods=5, name="date")
index = date_range("2015/01/01", periods=5, name="date")
df = DataFrame({"A": [5, 6, 7, 8, 9], "B": [1, 2, 3, 4, 5]}, index=index)
result = df.groupby(level="date").groups
dates = ["2015-01-05", "2015-01-04", "2015-01-03", "2015-01-02", "2015-01-01"]
Expand Down Expand Up @@ -672,9 +672,7 @@ def test_groupby_with_timezone_selection(self):
df = DataFrame(
{
"factor": np.random.randint(0, 3, size=60),
"time": pd.date_range(
"01/01/2000 00:00", periods=60, freq="s", tz="UTC"
),
"time": date_range("01/01/2000 00:00", periods=60, freq="s", tz="UTC"),
}
)
df1 = df.groupby("factor").max()["time"]
Expand All @@ -693,7 +691,7 @@ def test_timezone_info(self):

def test_datetime_count(self):
df = DataFrame(
{"a": [1, 2, 3] * 2, "dates": pd.date_range("now", periods=6, freq="T")}
{"a": [1, 2, 3] * 2, "dates": date_range("now", periods=6, freq="T")}
)
result = df.groupby("a").dates.count()
expected = Series([2, 2, 2], index=Index([1, 2, 3], name="a"), name="dates")
Expand Down
20 changes: 10 additions & 10 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test_transform_fast():
{
"grouping": [0, 1, 1, 3],
"f": [1.1, 2.1, 3.1, 4.5],
"d": pd.date_range("2014-1-1", "2014-1-4"),
"d": date_range("2014-1-1", "2014-1-4"),
"i": [1, 2, 3, 4],
},
columns=["grouping", "f", "i", "d"],
Expand Down Expand Up @@ -347,7 +347,7 @@ def test_transform_transformation_func(request, transformation_func):
"A": ["foo", "foo", "foo", "foo", "bar", "bar", "baz"],
"B": [1, 2, np.nan, 3, 3, np.nan, 4],
},
index=pd.date_range("2020-01-01", "2020-01-07"),
index=date_range("2020-01-01", "2020-01-07"),
)

if transformation_func == "cumcount":
Expand Down Expand Up @@ -413,7 +413,7 @@ def test_transform_function_aliases(df):
def test_series_fast_transform_date():
# GH 13191
df = DataFrame(
{"grouping": [np.nan, 1, 1, 3], "d": pd.date_range("2014-1-1", "2014-1-4")}
{"grouping": [np.nan, 1, 1, 3], "d": date_range("2014-1-1", "2014-1-4")}
)
result = df.groupby("grouping")["d"].transform("first")
dates = [
Expand Down Expand Up @@ -649,7 +649,7 @@ def test_cython_transform_frame(op, args, targop):
"float": s,
"float_missing": s_missing,
"int": [1, 1, 1, 1, 2] * 200,
"datetime": pd.date_range("1990-1-1", periods=1000),
"datetime": date_range("1990-1-1", periods=1000),
"timedelta": pd.timedelta_range(1, freq="s", periods=1000),
"string": strings * 50,
"string_missing": strings_missing * 50,
Expand All @@ -667,7 +667,7 @@ def test_cython_transform_frame(op, args, targop):
df["cat"] = df["string"].astype("category")

df2 = df.copy()
df2.index = pd.MultiIndex.from_product([range(100), range(10)])
df2.index = MultiIndex.from_product([range(100), range(10)])

# DataFrame - Single and MultiIndex,
# group by values, index level, columns
Expand All @@ -691,7 +691,7 @@ def test_cython_transform_frame(op, args, targop):
# to apply separately and concat
i = gb[["int"]].apply(targop)
f = gb[["float", "float_missing"]].apply(targop)
expected = pd.concat([f, i], axis=1)
expected = concat([f, i], axis=1)
else:
expected = gb.apply(targop)

Expand All @@ -715,7 +715,7 @@ def test_cython_transform_frame(op, args, targop):

def test_transform_with_non_scalar_group():
# GH 10165
cols = pd.MultiIndex.from_tuples(
cols = MultiIndex.from_tuples(
[
("syn", "A"),
("mis", "A"),
Expand Down Expand Up @@ -761,7 +761,7 @@ def test_transform_numeric_ret(cols, exp, comp_func, agg_func, request):

# GH 19200
df = DataFrame(
{"a": pd.date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)}
{"a": date_range("2018-01-01", periods=3), "b": range(3), "c": range(7, 10)}
)

result = df.groupby("b")[cols].transform(agg_func)
Expand Down Expand Up @@ -958,7 +958,7 @@ def test_groupby_transform_rename():
def demean_rename(x):
result = x - x.mean()

if isinstance(x, pd.Series):
if isinstance(x, Series):
return result

result = result.rename(columns={c: "{c}_demeaned" for c in result.columns})
Expand Down Expand Up @@ -993,7 +993,7 @@ def test_groupby_transform_timezone_column(func):
)
def test_groupby_transform_with_datetimes(func, values):
# GH 15306
dates = pd.date_range("1/1/2011", periods=10, freq="D")
dates = date_range("1/1/2011", periods=10, freq="D")

stocks = DataFrame({"price": np.arange(10.0)}, index=dates)
stocks["week_id"] = dates.isocalendar().week
Expand Down