Morsel-driven Parallelism using rayon (apache#2199)

tustvold · Apr 13, 2022 · f43b114 · f43b114
1 parent 8058fbb
commit f43b114
Show file tree

Hide file tree

Showing 12 changed files with 1,355 additions and 22 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -150,7 +150,7 @@ The parquet SQL benchmarks can be run with
  cargo bench --bench parquet_query_sql
 ```
 
-These randomly generate a parquet file, and then benchmark queries sourced from [parquet_query_sql.sql](./datafusion/benches/parquet_query_sql.sql) against it. This can therefore be a quick way to add coverage of particular query and/or data paths.
+These randomly generate a parquet file, and then benchmark queries sourced from [parquet_query_sql.sql](./datafusion/scheduler/benches/parquet_query_sql.sql) against it. This can therefore be a quick way to add coverage of particular query and/or data paths.
 
 If the environment variable `PARQUET_FILE` is set, the benchmark will run queries against this file instead of a randomly generated one. This can be useful for performing multiple runs, potentially with different code, against the same source data, or for testing against a custom dataset.
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -17,12 +17,13 @@
 
 [workspace]
 members = [
-    "datafusion/core",
     "datafusion/common",
+    "datafusion/core",
     "datafusion/expr",
     "datafusion/jit",
     "datafusion/physical-expr",
     "datafusion/proto",
+    "datafusion/scheduler",
     "datafusion-examples",
     "benchmarks",
     "ballista/rust/client",

diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
@@ -117,10 +117,6 @@ name = "scalar"
 harness = false
 name = "physical_plan"
 
-[[bench]]
-harness = false
-name = "parquet_query_sql"
-
 [[bench]]
 harness = false
 name = "jit"

diff --git a/datafusion/scheduler/Cargo.toml b/datafusion/scheduler/Cargo.toml
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-scheduler"
+description = "Scheduling for DataFusion query engine"
+version = "7.0.0"
+homepage = "https://github.com/apache/arrow-datafusion"
+repository = "https://github.com/apache/arrow-datafusion"
+readme = "../README.md"
+authors = ["Apache Arrow <dev@arrow.apache.org>"]
+license = "Apache-2.0"
+keywords = ["arrow", "query", "sql"]
+edition = "2021"
+rust-version = "1.58"
+
+[lib]
+name = "datafusion_scheduler"
+path = "src/lib.rs"
+
+[features]
+
+[dependencies]
+ahash = { version = "0.7", default-features = false }
+arrow = { version = "11" }
+async-trait = "0.1"
+datafusion = { path = "../core", version = "7.0.0" }
+futures = "0.3"
+log = "0.4"
+parking_lot = "0.12"
+rayon = "1.5"
+
+[dev-dependencies]
+criterion = "0.3"
+rand = "0.8"
+tokio = { version = "1.0", features = ["macros", "rt"] }
+parquet = "11.0"
+tempfile = "3"
+
+[[bench]]
+harness = false
+name = "parquet_query_sql"
diff --git a/datafusion/core/benches/parquet_query_sql.rs → ...on/scheduler/benches/parquet_query_sql.rs b/datafusion/core/benches/parquet_query_sql.rs → ...on/scheduler/benches/parquet_query_sql.rs
@@ -24,7 +24,9 @@ use arrow::datatypes::{
 };
 use arrow::record_batch::RecordBatch;
 use criterion::{criterion_group, criterion_main, Criterion};
-use datafusion::prelude::{ParquetReadOptions, SessionContext};
+use datafusion::prelude::{SessionConfig, SessionContext};
+use datafusion_scheduler::Scheduler;
+use futures::stream::StreamExt;
 use parquet::arrow::ArrowWriter;
 use parquet::file::properties::{WriterProperties, WriterVersion};
 use rand::distributions::uniform::SampleUniform;
@@ -37,7 +39,6 @@ use std::path::Path;
 use std::sync::Arc;
 use std::time::Instant;
 use tempfile::NamedTempFile;
-use tokio_stream::StreamExt;
 
 /// The number of batches to write
 const NUM_BATCHES: usize = 2048;
@@ -193,15 +194,24 @@ fn criterion_benchmark(c: &mut Criterion) {
     assert!(Path::new(&file_path).exists(), "path not found");
     println!("Using parquet file {}", file_path);
 
-    let context = SessionContext::new();
+    let partitions = 4;
+    let config = SessionConfig::new().with_target_partitions(partitions);
+    let mut context = SessionContext::with_config(config);
 
-    let rt = tokio::runtime::Builder::new_multi_thread().build().unwrap();
-    rt.block_on(context.register_parquet(
-        "t",
-        file_path.as_str(),
-        ParquetReadOptions::default(),
-    ))
-    .unwrap();
+    let scheduler = Scheduler::new(partitions);
+
+    let local_rt = tokio::runtime::Builder::new_current_thread()
+        .build()
+        .unwrap();
+
+    let query_rt = tokio::runtime::Builder::new_multi_thread()
+        .worker_threads(partitions)
+        .build()
+        .unwrap();
+
+    local_rt
+        .block_on(context.register_parquet("t", file_path.as_str()))
+        .unwrap();
 
     // We read the queries from a file so they can be changed without recompiling the benchmark
     let mut queries_file = File::open("benches/parquet_query_sql.sql").unwrap();
@@ -220,17 +230,42 @@ fn criterion_benchmark(c: &mut Criterion) {
             continue;
         }
 
-        let query = query.as_str();
-        c.bench_function(query, |b| {
+        c.bench_function(&format!("tokio: {}", query), |b| {
             b.iter(|| {
-                let context = context.clone();
-                rt.block_on(async move {
-                    let query = context.sql(query).await.unwrap();
+                let query = query.clone();
+                let mut context = context.clone();
+                let (sender, mut receiver) = futures::channel::mpsc::unbounded();
+
+                // Spawn work to a separate tokio thread pool
+                query_rt.spawn(async move {
+                    let query = context.sql(&query).await.unwrap();
                     let mut stream = query.execute_stream().await.unwrap();
-                    while criterion::black_box(stream.next().await).is_some() {}
+
+                    while let Some(next) = stream.next().await {
+                        sender.unbounded_send(next).unwrap();
+                    }
+                });
+
+                local_rt.block_on(async {
+                    while receiver.next().await.transpose().unwrap().is_some() {}
                 })
             });
         });
+
+        c.bench_function(&format!("scheduled: {}", query), |b| {
+            b.iter(|| {
+                let query = query.clone();
+                let mut context = context.clone();
+
+                local_rt.block_on(async {
+                    let query = context.sql(&query).await.unwrap();
+                    let plan = query.create_physical_plan().await.unwrap();
+                    let mut stream =
+                        scheduler.schedule(plan, context.task_ctx()).unwrap();
+                    while stream.next().await.transpose().unwrap().is_some() {}
+                });
+            });
+        });
     }
 
     // Temporary file must outlive the benchmarks, it is deleted when dropped

diff --git a/...fusion/core/benches/parquet_query_sql.sql → ...n/scheduler/benches/parquet_query_sql.sql b/...fusion/core/benches/parquet_query_sql.sql → ...n/scheduler/benches/parquet_query_sql.sql