From 7626eee0f4e0a4c961bf2142eb9817161ae293a7 Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Wed, 27 Dec 2023 11:29:59 -0500 Subject: [PATCH 1/4] Start setting up tpch planning benchmarks --- datafusion/core/benches/sql_planner.rs | 58 ++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs index 7a41b6bec6f5..b6304793f56c 100644 --- a/datafusion/core/benches/sql_planner.rs +++ b/datafusion/core/benches/sql_planner.rs @@ -60,6 +60,44 @@ pub fn create_table_provider(column_prefix: &str, num_columns: usize) -> Arc [(String, Schema); 2] { + let lineitem_schema = Schema::new(vec![ + Field::new("l_orderkey", DataType::Int32, false), + Field::new("l_partkey", DataType::Int32, false), + Field::new("l_suppkey", DataType::Int32, false), + Field::new("l_linenumber", DataType::Int32, false), + Field::new("l_quantity", DataType::Float64, false), + Field::new("l_extendedprice", DataType::Float64, false), + Field::new("l_discount", DataType::Float64, false), + Field::new("l_tax", DataType::Float64, false), + Field::new("l_returnflag", DataType::Utf8, false), + Field::new("l_linestatus", DataType::Utf8, false), + Field::new("l_shipdate", DataType::Date32, false), + Field::new("l_commitdate", DataType::Date32, false), + Field::new("l_receiptdate", DataType::Date32, false), + Field::new("l_shipinstruct", DataType::Utf8, false), + Field::new("l_shipmode", DataType::Utf8, false), + Field::new("l_comment", DataType::Utf8, false), + ]); + + let orders_schema = Schema::new(vec![ + Field::new("o_orderkey", DataType::Int32, false), + Field::new("o_custkey", DataType::Int32, false), + Field::new("o_orderstatus", DataType::Utf8, false), + Field::new("o_totalprice", DataType::Float64, false), + Field::new("o_orderdate", DataType::Date32, false), + Field::new("o_orderpriority", DataType::Utf8, false), + Field::new("o_clerk", DataType::Utf8, false), + Field::new("o_shippriority", DataType::Int32, false), + Field::new("o_comment", DataType::Utf8, false), + ]); + + return [ + ("lineitem".to_string(), lineitem_schema), + ("orders".to_string(), orders_schema), + ]; +} + fn create_context() -> SessionContext { let ctx = SessionContext::new(); ctx.register_table("t1", create_table_provider("a", 200)) @@ -68,6 +106,16 @@ fn create_context() -> SessionContext { .unwrap(); ctx.register_table("t700", create_table_provider("c", 700)) .unwrap(); + + let tpch_schemas = create_tpch_schemas(); + tpch_schemas.iter().for_each(|(name, schema)| { + ctx.register_table( + name, + Arc::new(MemTable::try_new(Arc::new(schema.clone()), vec![]).unwrap()), + ) + .unwrap(); + }); + ctx } @@ -115,6 +163,16 @@ fn criterion_benchmark(c: &mut Criterion) { ) }) }); + + let q1_sql = std::fs::read_to_string("../../benchmarks/queries/q1.sql").unwrap(); + c.bench_function("physical_plan_tpch_q1", |b| { + b.iter(|| physical_plan(&ctx, &q1_sql)) + }); + + let q12_sql = std::fs::read_to_string("../../benchmarks/queries/q12.sql").unwrap(); + c.bench_function("physical_plan_tpch_q12", |b| { + b.iter(|| physical_plan(&ctx, &q12_sql)) + }); } criterion_group!(benches, criterion_benchmark); From 707a01809b6db5fcaac3ef20744abbf0ac928387 Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Thu, 28 Dec 2023 20:14:44 -0500 Subject: [PATCH 2/4] Add remaining tpch queries --- datafusion/core/benches/sql_planner.rs | 132 +++++++++++++++++++++---- 1 file changed, 115 insertions(+), 17 deletions(-) diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs index b6304793f56c..67a9d2a4ccad 100644 --- a/datafusion/core/benches/sql_planner.rs +++ b/datafusion/core/benches/sql_planner.rs @@ -60,16 +60,16 @@ pub fn create_table_provider(column_prefix: &str, num_columns: usize) -> Arc [(String, Schema); 2] { +pub fn create_tpch_schemas() -> [(String, Schema); 8] { let lineitem_schema = Schema::new(vec![ - Field::new("l_orderkey", DataType::Int32, false), - Field::new("l_partkey", DataType::Int32, false), - Field::new("l_suppkey", DataType::Int32, false), + Field::new("l_orderkey", DataType::Int64, false), + Field::new("l_partkey", DataType::Int64, false), + Field::new("l_suppkey", DataType::Int64, false), Field::new("l_linenumber", DataType::Int32, false), - Field::new("l_quantity", DataType::Float64, false), - Field::new("l_extendedprice", DataType::Float64, false), - Field::new("l_discount", DataType::Float64, false), - Field::new("l_tax", DataType::Float64, false), + Field::new("l_quantity", DataType::Decimal128(15, 2), false), + Field::new("l_extendedprice", DataType::Decimal128(15, 2), false), + Field::new("l_discount", DataType::Decimal128(15, 2), false), + Field::new("l_tax", DataType::Decimal128(15, 2), false), Field::new("l_returnflag", DataType::Utf8, false), Field::new("l_linestatus", DataType::Utf8, false), Field::new("l_shipdate", DataType::Date32, false), @@ -81,10 +81,10 @@ pub fn create_tpch_schemas() -> [(String, Schema); 2] { ]); let orders_schema = Schema::new(vec![ - Field::new("o_orderkey", DataType::Int32, false), - Field::new("o_custkey", DataType::Int32, false), + Field::new("o_orderkey", DataType::Int64, false), + Field::new("o_custkey", DataType::Int64, false), Field::new("o_orderstatus", DataType::Utf8, false), - Field::new("o_totalprice", DataType::Float64, false), + Field::new("o_totalprice", DataType::Decimal128(15, 2), false), Field::new("o_orderdate", DataType::Date32, false), Field::new("o_orderpriority", DataType::Utf8, false), Field::new("o_clerk", DataType::Utf8, false), @@ -92,9 +92,69 @@ pub fn create_tpch_schemas() -> [(String, Schema); 2] { Field::new("o_comment", DataType::Utf8, false), ]); + let part_schema = Schema::new(vec![ + Field::new("p_partkey", DataType::Int64, false), + Field::new("p_name", DataType::Utf8, false), + Field::new("p_mfgr", DataType::Utf8, false), + Field::new("p_brand", DataType::Utf8, false), + Field::new("p_type", DataType::Utf8, false), + Field::new("p_size", DataType::Int32, false), + Field::new("p_container", DataType::Utf8, false), + Field::new("p_retailprice", DataType::Decimal128(15, 2), false), + Field::new("p_comment", DataType::Utf8, false), + ]); + + let supplier_schema = Schema::new(vec![ + Field::new("s_suppkey", DataType::Int64, false), + Field::new("s_name", DataType::Utf8, false), + Field::new("s_address", DataType::Utf8, false), + Field::new("s_nationkey", DataType::Int64, false), + Field::new("s_phone", DataType::Utf8, false), + Field::new("s_acctbal", DataType::Decimal128(15, 2), false), + Field::new("s_comment", DataType::Utf8, false), + ]); + + let partsupp_schema = Schema::new(vec![ + Field::new("ps_partkey", DataType::Int64, false), + Field::new("ps_suppkey", DataType::Int64, false), + Field::new("ps_availqty", DataType::Int32, false), + Field::new("ps_supplycost", DataType::Decimal128(15, 2), false), + Field::new("ps_comment", DataType::Utf8, false), + ]); + + let customer_schema = Schema::new(vec![ + Field::new("c_custkey", DataType::Int64, false), + Field::new("c_name", DataType::Utf8, false), + Field::new("c_address", DataType::Utf8, false), + Field::new("c_nationkey", DataType::Int64, false), + Field::new("c_phone", DataType::Utf8, false), + Field::new("c_acctbal", DataType::Decimal128(15, 2), false), + Field::new("c_mktsegment", DataType::Utf8, false), + Field::new("c_comment", DataType::Utf8, false), + ]); + + let nation_schema = Schema::new(vec![ + Field::new("n_nationkey", DataType::Int64, false), + Field::new("n_name", DataType::Utf8, false), + Field::new("n_regionkey", DataType::Int64, false), + Field::new("n_comment", DataType::Utf8, false), + ]); + + let region_schema = Schema::new(vec![ + Field::new("r_regionkey", DataType::Int64, false), + Field::new("r_name", DataType::Utf8, false), + Field::new("r_comment", DataType::Utf8, false), + ]); + return [ ("lineitem".to_string(), lineitem_schema), ("orders".to_string(), orders_schema), + ("part".to_string(), part_schema), + ("supplier".to_string(), supplier_schema), + ("partsupp".to_string(), partsupp_schema), + ("customer".to_string(), customer_schema), + ("nation".to_string(), nation_schema), + ("region".to_string(), region_schema), ]; } @@ -165,13 +225,51 @@ fn criterion_benchmark(c: &mut Criterion) { }); let q1_sql = std::fs::read_to_string("../../benchmarks/queries/q1.sql").unwrap(); - c.bench_function("physical_plan_tpch_q1", |b| { - b.iter(|| physical_plan(&ctx, &q1_sql)) - }); - + let q2_sql = std::fs::read_to_string("../../benchmarks/queries/q2.sql").unwrap(); + let q3_sql = std::fs::read_to_string("../../benchmarks/queries/q3.sql").unwrap(); + let q4_sql = std::fs::read_to_string("../../benchmarks/queries/q4.sql").unwrap(); + let q5_sql = std::fs::read_to_string("../../benchmarks/queries/q5.sql").unwrap(); + let q6_sql = std::fs::read_to_string("../../benchmarks/queries/q6.sql").unwrap(); + let q7_sql = std::fs::read_to_string("../../benchmarks/queries/q7.sql").unwrap(); + let q8_sql = std::fs::read_to_string("../../benchmarks/queries/q8.sql").unwrap(); + let q9_sql = std::fs::read_to_string("../../benchmarks/queries/q9.sql").unwrap(); + let q10_sql = std::fs::read_to_string("../../benchmarks/queries/q10.sql").unwrap(); + let q11_sql = std::fs::read_to_string("../../benchmarks/queries/q11.sql").unwrap(); let q12_sql = std::fs::read_to_string("../../benchmarks/queries/q12.sql").unwrap(); - c.bench_function("physical_plan_tpch_q12", |b| { - b.iter(|| physical_plan(&ctx, &q12_sql)) + let q13_sql = std::fs::read_to_string("../../benchmarks/queries/q13.sql").unwrap(); + let q14_sql = std::fs::read_to_string("../../benchmarks/queries/q14.sql").unwrap(); + // let q15_sql = std::fs::read_to_string("../../benchmarks/queries/q15.sql").unwrap(); + let q16_sql = std::fs::read_to_string("../../benchmarks/queries/q16.sql").unwrap(); + let q17_sql = std::fs::read_to_string("../../benchmarks/queries/q17.sql").unwrap(); + let q18_sql = std::fs::read_to_string("../../benchmarks/queries/q18.sql").unwrap(); + let q19_sql = std::fs::read_to_string("../../benchmarks/queries/q19.sql").unwrap(); + let q20_sql = std::fs::read_to_string("../../benchmarks/queries/q20.sql").unwrap(); + let q21_sql = std::fs::read_to_string("../../benchmarks/queries/q21.sql").unwrap(); + let q22_sql = std::fs::read_to_string("../../benchmarks/queries/q22.sql").unwrap(); + + c.bench("physical_plan_tpch", |b| { + b.iter(|| physical_plan(&ctx, &q1_sql)); + b.iter(|| physical_plan(&ctx, &q2_sql)); + b.iter(|| physical_plan(&ctx, &q3_sql)); + b.iter(|| physical_plan(&ctx, &q4_sql)); + b.iter(|| physical_plan(&ctx, &q5_sql)); + b.iter(|| physical_plan(&ctx, &q6_sql)); + b.iter(|| physical_plan(&ctx, &q7_sql)); + b.iter(|| physical_plan(&ctx, &q8_sql)); + b.iter(|| physical_plan(&ctx, &q9_sql)); + b.iter(|| physical_plan(&ctx, &q10_sql)); + b.iter(|| physical_plan(&ctx, &q11_sql)); + b.iter(|| physical_plan(&ctx, &q12_sql)); + b.iter(|| physical_plan(&ctx, &q13_sql)); + b.iter(|| physical_plan(&ctx, &q14_sql)); + // b.iter(|| physical_plan(&ctx, &q15_sql)); + b.iter(|| physical_plan(&ctx, &q16_sql)); + b.iter(|| physical_plan(&ctx, &q17_sql)); + b.iter(|| physical_plan(&ctx, &q18_sql)); + b.iter(|| physical_plan(&ctx, &q19_sql)); + b.iter(|| physical_plan(&ctx, &q20_sql)); + b.iter(|| physical_plan(&ctx, &q21_sql)); + b.iter(|| physical_plan(&ctx, &q22_sql)); }); } From ee67c4d2c991947abe9a3b3648dcf93285823f7e Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Thu, 28 Dec 2023 20:42:57 -0500 Subject: [PATCH 3/4] Fix bench function --- datafusion/core/benches/sql_planner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs index 67a9d2a4ccad..478ca03726e1 100644 --- a/datafusion/core/benches/sql_planner.rs +++ b/datafusion/core/benches/sql_planner.rs @@ -247,7 +247,7 @@ fn criterion_benchmark(c: &mut Criterion) { let q21_sql = std::fs::read_to_string("../../benchmarks/queries/q21.sql").unwrap(); let q22_sql = std::fs::read_to_string("../../benchmarks/queries/q22.sql").unwrap(); - c.bench("physical_plan_tpch", |b| { + c.bench_function("physical_plan_tpch", |b| { b.iter(|| physical_plan(&ctx, &q1_sql)); b.iter(|| physical_plan(&ctx, &q2_sql)); b.iter(|| physical_plan(&ctx, &q3_sql)); From 5d432b9269b6b5b756743476ce359835cdeeb6e8 Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Thu, 28 Dec 2023 20:57:44 -0500 Subject: [PATCH 4/4] Clippy --- datafusion/core/benches/sql_planner.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs index 478ca03726e1..1754129a768f 100644 --- a/datafusion/core/benches/sql_planner.rs +++ b/datafusion/core/benches/sql_planner.rs @@ -146,7 +146,7 @@ pub fn create_tpch_schemas() -> [(String, Schema); 8] { Field::new("r_comment", DataType::Utf8, false), ]); - return [ + [ ("lineitem".to_string(), lineitem_schema), ("orders".to_string(), orders_schema), ("part".to_string(), part_schema), @@ -155,7 +155,7 @@ pub fn create_tpch_schemas() -> [(String, Schema); 8] { ("customer".to_string(), customer_schema), ("nation".to_string(), nation_schema), ("region".to_string(), region_schema), - ]; + ] } fn create_context() -> SessionContext {