Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start setting up tpch planning benchmarks #8665

Merged
merged 4 commits into from
Dec 30, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions datafusion/core/benches/sql_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,104 @@ pub fn create_table_provider(column_prefix: &str, num_columns: usize) -> Arc<Mem
MemTable::try_new(schema, vec![]).map(Arc::new).unwrap()
}

pub fn create_tpch_schemas() -> [(String, Schema); 8] {
let lineitem_schema = Schema::new(vec![
Field::new("l_orderkey", DataType::Int64, false),
Field::new("l_partkey", DataType::Int64, false),
Field::new("l_suppkey", DataType::Int64, false),
Field::new("l_linenumber", DataType::Int32, false),
Field::new("l_quantity", DataType::Decimal128(15, 2), false),
Field::new("l_extendedprice", DataType::Decimal128(15, 2), false),
Field::new("l_discount", DataType::Decimal128(15, 2), false),
Field::new("l_tax", DataType::Decimal128(15, 2), false),
Field::new("l_returnflag", DataType::Utf8, false),
Field::new("l_linestatus", DataType::Utf8, false),
Field::new("l_shipdate", DataType::Date32, false),
Field::new("l_commitdate", DataType::Date32, false),
Field::new("l_receiptdate", DataType::Date32, false),
Field::new("l_shipinstruct", DataType::Utf8, false),
Field::new("l_shipmode", DataType::Utf8, false),
Field::new("l_comment", DataType::Utf8, false),
]);

let orders_schema = Schema::new(vec![
Field::new("o_orderkey", DataType::Int64, false),
Field::new("o_custkey", DataType::Int64, false),
Field::new("o_orderstatus", DataType::Utf8, false),
Field::new("o_totalprice", DataType::Decimal128(15, 2), false),
Field::new("o_orderdate", DataType::Date32, false),
Field::new("o_orderpriority", DataType::Utf8, false),
Field::new("o_clerk", DataType::Utf8, false),
Field::new("o_shippriority", DataType::Int32, false),
Field::new("o_comment", DataType::Utf8, false),
]);

let part_schema = Schema::new(vec![
Field::new("p_partkey", DataType::Int64, false),
Field::new("p_name", DataType::Utf8, false),
Field::new("p_mfgr", DataType::Utf8, false),
Field::new("p_brand", DataType::Utf8, false),
Field::new("p_type", DataType::Utf8, false),
Field::new("p_size", DataType::Int32, false),
Field::new("p_container", DataType::Utf8, false),
Field::new("p_retailprice", DataType::Decimal128(15, 2), false),
Field::new("p_comment", DataType::Utf8, false),
]);

let supplier_schema = Schema::new(vec![
Field::new("s_suppkey", DataType::Int64, false),
Field::new("s_name", DataType::Utf8, false),
Field::new("s_address", DataType::Utf8, false),
Field::new("s_nationkey", DataType::Int64, false),
Field::new("s_phone", DataType::Utf8, false),
Field::new("s_acctbal", DataType::Decimal128(15, 2), false),
Field::new("s_comment", DataType::Utf8, false),
]);

let partsupp_schema = Schema::new(vec![
Field::new("ps_partkey", DataType::Int64, false),
Field::new("ps_suppkey", DataType::Int64, false),
Field::new("ps_availqty", DataType::Int32, false),
Field::new("ps_supplycost", DataType::Decimal128(15, 2), false),
Field::new("ps_comment", DataType::Utf8, false),
]);

let customer_schema = Schema::new(vec![
Field::new("c_custkey", DataType::Int64, false),
Field::new("c_name", DataType::Utf8, false),
Field::new("c_address", DataType::Utf8, false),
Field::new("c_nationkey", DataType::Int64, false),
Field::new("c_phone", DataType::Utf8, false),
Field::new("c_acctbal", DataType::Decimal128(15, 2), false),
Field::new("c_mktsegment", DataType::Utf8, false),
Field::new("c_comment", DataType::Utf8, false),
]);

let nation_schema = Schema::new(vec![
Field::new("n_nationkey", DataType::Int64, false),
Field::new("n_name", DataType::Utf8, false),
Field::new("n_regionkey", DataType::Int64, false),
Field::new("n_comment", DataType::Utf8, false),
]);

let region_schema = Schema::new(vec![
Field::new("r_regionkey", DataType::Int64, false),
Field::new("r_name", DataType::Utf8, false),
Field::new("r_comment", DataType::Utf8, false),
]);

[
("lineitem".to_string(), lineitem_schema),
("orders".to_string(), orders_schema),
("part".to_string(), part_schema),
("supplier".to_string(), supplier_schema),
("partsupp".to_string(), partsupp_schema),
("customer".to_string(), customer_schema),
("nation".to_string(), nation_schema),
("region".to_string(), region_schema),
]
}

fn create_context() -> SessionContext {
let ctx = SessionContext::new();
ctx.register_table("t1", create_table_provider("a", 200))
Expand All @@ -68,6 +166,16 @@ fn create_context() -> SessionContext {
.unwrap();
ctx.register_table("t700", create_table_provider("c", 700))
.unwrap();

let tpch_schemas = create_tpch_schemas();
tpch_schemas.iter().for_each(|(name, schema)| {
ctx.register_table(
name,
Arc::new(MemTable::try_new(Arc::new(schema.clone()), vec![]).unwrap()),
)
.unwrap();
});

ctx
}

Expand Down Expand Up @@ -115,6 +223,54 @@ fn criterion_benchmark(c: &mut Criterion) {
)
})
});

let q1_sql = std::fs::read_to_string("../../benchmarks/queries/q1.sql").unwrap();
let q2_sql = std::fs::read_to_string("../../benchmarks/queries/q2.sql").unwrap();
let q3_sql = std::fs::read_to_string("../../benchmarks/queries/q3.sql").unwrap();
let q4_sql = std::fs::read_to_string("../../benchmarks/queries/q4.sql").unwrap();
let q5_sql = std::fs::read_to_string("../../benchmarks/queries/q5.sql").unwrap();
let q6_sql = std::fs::read_to_string("../../benchmarks/queries/q6.sql").unwrap();
let q7_sql = std::fs::read_to_string("../../benchmarks/queries/q7.sql").unwrap();
let q8_sql = std::fs::read_to_string("../../benchmarks/queries/q8.sql").unwrap();
let q9_sql = std::fs::read_to_string("../../benchmarks/queries/q9.sql").unwrap();
let q10_sql = std::fs::read_to_string("../../benchmarks/queries/q10.sql").unwrap();
let q11_sql = std::fs::read_to_string("../../benchmarks/queries/q11.sql").unwrap();
let q12_sql = std::fs::read_to_string("../../benchmarks/queries/q12.sql").unwrap();
let q13_sql = std::fs::read_to_string("../../benchmarks/queries/q13.sql").unwrap();
let q14_sql = std::fs::read_to_string("../../benchmarks/queries/q14.sql").unwrap();
// let q15_sql = std::fs::read_to_string("../../benchmarks/queries/q15.sql").unwrap();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it might be good in a follow on PR to note why this query is commented out.

let q16_sql = std::fs::read_to_string("../../benchmarks/queries/q16.sql").unwrap();
let q17_sql = std::fs::read_to_string("../../benchmarks/queries/q17.sql").unwrap();
let q18_sql = std::fs::read_to_string("../../benchmarks/queries/q18.sql").unwrap();
let q19_sql = std::fs::read_to_string("../../benchmarks/queries/q19.sql").unwrap();
let q20_sql = std::fs::read_to_string("../../benchmarks/queries/q20.sql").unwrap();
let q21_sql = std::fs::read_to_string("../../benchmarks/queries/q21.sql").unwrap();
let q22_sql = std::fs::read_to_string("../../benchmarks/queries/q22.sql").unwrap();

c.bench_function("physical_plan_tpch", |b| {
b.iter(|| physical_plan(&ctx, &q1_sql));
b.iter(|| physical_plan(&ctx, &q2_sql));
b.iter(|| physical_plan(&ctx, &q3_sql));
b.iter(|| physical_plan(&ctx, &q4_sql));
b.iter(|| physical_plan(&ctx, &q5_sql));
b.iter(|| physical_plan(&ctx, &q6_sql));
b.iter(|| physical_plan(&ctx, &q7_sql));
b.iter(|| physical_plan(&ctx, &q8_sql));
b.iter(|| physical_plan(&ctx, &q9_sql));
b.iter(|| physical_plan(&ctx, &q10_sql));
b.iter(|| physical_plan(&ctx, &q11_sql));
b.iter(|| physical_plan(&ctx, &q12_sql));
b.iter(|| physical_plan(&ctx, &q13_sql));
b.iter(|| physical_plan(&ctx, &q14_sql));
// b.iter(|| physical_plan(&ctx, &q15_sql));
b.iter(|| physical_plan(&ctx, &q16_sql));
b.iter(|| physical_plan(&ctx, &q17_sql));
b.iter(|| physical_plan(&ctx, &q18_sql));
b.iter(|| physical_plan(&ctx, &q19_sql));
b.iter(|| physical_plan(&ctx, &q20_sql));
b.iter(|| physical_plan(&ctx, &q21_sql));
b.iter(|| physical_plan(&ctx, &q22_sql));
});
}

criterion_group!(benches, criterion_benchmark);
Expand Down