Skip to content

Commit

Permalink
Introduce new optional scheduler, using Morsel-driven Parallelism + r…
Browse files Browse the repository at this point in the history
…ayon (#2199) (#2226)

* Morsel-driven Parallelism using rayon (#2199)

* Fix LIFO spawn ordering

* Further docs for ExecutionPipeline

* Deduplicate concurrent wakes

* Add license headers

* Sort Cargo.toml

* Revert accidental change to ParquetExec

* Handle wakeups triggered by other threads

* Use SeqCst memory ordering

* Review feedback

* Add panic handler

* Cleanup structs

Add test of tokio interoperation

* Review feedback

* Use BatchPartitioner

Cleanup error handling

* Clarify shutdown characteristics

* Fix racy test_panic

* Don't overload Query nomenclature

* Rename QueryResults to ExecutionResults

* Further review feedback

* Merge scheduler into datafusion/core

* Review feedback

* Fix partitioned execution

* Format

* Format Cargo.toml

* Fix doc link
  • Loading branch information
tustvold authored May 4, 2022
1 parent e8ba45c commit dc76ec1
Show file tree
Hide file tree
Showing 11 changed files with 1,905 additions and 19 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ The parquet SQL benchmarks can be run with
cargo bench --bench parquet_query_sql
```

These randomly generate a parquet file, and then benchmark queries sourced from [parquet_query_sql.sql](./datafusion/benches/parquet_query_sql.sql) against it. This can therefore be a quick way to add coverage of particular query and/or data paths.
These randomly generate a parquet file, and then benchmark queries sourced from [parquet_query_sql.sql](./datafusion/core/benches/parquet_query_sql.sql) against it. This can therefore be a quick way to add coverage of particular query and/or data paths.

If the environment variable `PARQUET_FILE` is set, the benchmark will run queries against this file instead of a randomly generated one. This can be useful for performing multiple runs, potentially with different code, against the same source data, or for testing against a custom dataset.

Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

[workspace]
members = [
"datafusion/core",
"datafusion/common",
"datafusion/core",
"datafusion/expr",
"datafusion/jit",
"datafusion/physical-expr",
Expand Down
9 changes: 7 additions & 2 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ repository = "https://github.com/apache/arrow-datafusion"
readme = "../README.md"
authors = ["Apache Arrow <dev@arrow.apache.org>"]
license = "Apache-2.0"
keywords = [ "arrow", "query", "sql" ]
keywords = ["arrow", "query", "sql"]
include = [
"benches/*.rs",
"src/**/*.rs",
Expand All @@ -50,6 +50,8 @@ pyarrow = ["pyo3", "arrow/pyarrow", "datafusion-common/pyarrow"]
regex_expressions = ["datafusion-physical-expr/regex_expressions"]
# Used to enable row format experiment
row = ["datafusion-row"]
# Used to enable scheduler
scheduler = ["rayon"]
simd = ["arrow/simd"]
unicode_expressions = ["datafusion-physical-expr/regex_expressions"]

Expand All @@ -75,9 +77,10 @@ ordered-float = "3.0"
parking_lot = "0.12"
parquet = { version = "13", features = ["arrow"] }
paste = "^1.0"
pin-project-lite= "^0.2.7"
pin-project-lite = "^0.2.7"
pyo3 = { version = "0.16", optional = true }
rand = "0.8"
rayon = { version = "1.5", optional = true }
smallvec = { version = "1.6", features = ["union"] }
sqlparser = "0.16"
tempfile = "3"
Expand All @@ -88,6 +91,7 @@ uuid = { version = "1.0", features = ["v4"] }
[dev-dependencies]
criterion = "0.3"
doc-comment = "0.3"
env_logger = "0.9"
fuzz-utils = { path = "fuzz-utils" }

[[bench]]
Expand Down Expand Up @@ -121,6 +125,7 @@ name = "physical_plan"
[[bench]]
harness = false
name = "parquet_query_sql"
required-features = ["scheduler"]

[[bench]]
harness = false
Expand Down
65 changes: 50 additions & 15 deletions datafusion/core/benches/parquet_query_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ use arrow::datatypes::{
};
use arrow::record_batch::RecordBatch;
use criterion::{criterion_group, criterion_main, Criterion};
use datafusion::prelude::{ParquetReadOptions, SessionContext};
use datafusion::prelude::{SessionConfig, SessionContext};
use datafusion::scheduler::Scheduler;
use futures::stream::StreamExt;
use parquet::arrow::ArrowWriter;
use parquet::file::properties::{WriterProperties, WriterVersion};
use rand::distributions::uniform::SampleUniform;
Expand All @@ -37,7 +39,6 @@ use std::path::Path;
use std::sync::Arc;
use std::time::Instant;
use tempfile::NamedTempFile;
use tokio_stream::StreamExt;

/// The number of batches to write
const NUM_BATCHES: usize = 2048;
Expand Down Expand Up @@ -193,15 +194,24 @@ fn criterion_benchmark(c: &mut Criterion) {
assert!(Path::new(&file_path).exists(), "path not found");
println!("Using parquet file {}", file_path);

let context = SessionContext::new();
let partitions = 4;
let config = SessionConfig::new().with_target_partitions(partitions);
let context = SessionContext::with_config(config);

let rt = tokio::runtime::Builder::new_multi_thread().build().unwrap();
rt.block_on(context.register_parquet(
"t",
file_path.as_str(),
ParquetReadOptions::default(),
))
.unwrap();
let scheduler = Scheduler::new(partitions);

let local_rt = tokio::runtime::Builder::new_current_thread()
.build()
.unwrap();

let query_rt = tokio::runtime::Builder::new_multi_thread()
.worker_threads(partitions)
.build()
.unwrap();

local_rt
.block_on(context.register_parquet("t", file_path.as_str(), Default::default()))
.unwrap();

// We read the queries from a file so they can be changed without recompiling the benchmark
let mut queries_file = File::open("benches/parquet_query_sql.sql").unwrap();
Expand All @@ -220,17 +230,42 @@ fn criterion_benchmark(c: &mut Criterion) {
continue;
}

let query = query.as_str();
c.bench_function(query, |b| {
c.bench_function(&format!("tokio: {}", query), |b| {
b.iter(|| {
let query = query.clone();
let context = context.clone();
rt.block_on(async move {
let query = context.sql(query).await.unwrap();
let (sender, mut receiver) = futures::channel::mpsc::unbounded();

// Spawn work to a separate tokio thread pool
query_rt.spawn(async move {
let query = context.sql(&query).await.unwrap();
let mut stream = query.execute_stream().await.unwrap();
while criterion::black_box(stream.next().await).is_some() {}

while let Some(next) = stream.next().await {
sender.unbounded_send(next).unwrap();
}
});

local_rt.block_on(async {
while receiver.next().await.transpose().unwrap().is_some() {}
})
});
});

c.bench_function(&format!("scheduled: {}", query), |b| {
b.iter(|| {
let query = query.clone();
let context = context.clone();

local_rt.block_on(async {
let query = context.sql(&query).await.unwrap();
let plan = query.create_physical_plan().await.unwrap();
let mut stream =
scheduler.schedule(plan, context.task_ctx()).unwrap();
while stream.next().await.transpose().unwrap().is_some() {}
});
});
});
}

// Temporary file must outlive the benchmarks, it is deleted when dropped
Expand Down
2 changes: 2 additions & 0 deletions datafusion/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ pub mod physical_optimizer;
pub mod physical_plan;
pub mod prelude;
pub mod scalar;
#[cfg(feature = "scheduler")]
pub mod scheduler;
pub mod sql;
pub mod variable;

Expand Down
Loading

0 comments on commit dc76ec1

Please sign in to comment.