Skip to content

Commit

Permalink
Parse target formats for benchmarks with clap (#2383)
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamGS authored Feb 17, 2025
1 parent da5c45f commit ed8452e
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 47 deletions.
17 changes: 5 additions & 12 deletions bench-vortex/src/bin/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::time::{Duration, Instant};

use bench_vortex::clickbench::{self, clickbench_queries, HITS_SCHEMA};
use bench_vortex::display::{print_measurements_json, render_table, DisplayFormat};
use bench_vortex::formats::parse_formats;
use bench_vortex::measurements::QueryMeasurement;
use bench_vortex::{
default_env_filter, execute_physical_plan, feature_flagged_allocator, get_session_with_cache,
Expand All @@ -29,8 +28,8 @@ struct Args {
iterations: usize,
#[arg(short, long)]
threads: Option<usize>,
#[arg(long, value_delimiter = ',')]
formats: Option<Vec<String>>,
#[arg(long, value_delimiter = ',', value_enum, default_values_t = vec![Format::Parquet, Format::OnDiskVortex])]
formats: Vec<Format>,
#[arg(long)]
only_vortex: bool,
#[arg(short, long)]
Expand Down Expand Up @@ -131,12 +130,6 @@ fn main() {
.unwrap();
});

// The formats to run against (vs the baseline)
let formats = match args.formats {
None => vec![Format::Parquet, Format::OnDiskVortex],
Some(formats) => parse_formats(formats),
};

let queries = match args.queries.clone() {
None => clickbench_queries(),
Some(queries) => clickbench_queries()
Expand All @@ -145,11 +138,11 @@ fn main() {
.collect(),
};

let progress_bar = ProgressBar::new((queries.len() * formats.len()) as u64);
let progress_bar = ProgressBar::new((queries.len() * args.formats.len()) as u64);

let mut all_measurements = Vec::default();

for format in &formats {
for format in &args.formats {
let session_context = get_session_with_cache(args.emulate_object_store);
let context = session_context.clone();
match format {
Expand Down Expand Up @@ -240,7 +233,7 @@ fn main() {
}

match args.display_format {
DisplayFormat::Table => render_table(all_measurements, &formats).unwrap(),
DisplayFormat::Table => render_table(all_measurements, &args.formats).unwrap(),
DisplayFormat::GhJson => print_measurements_json(all_measurements).unwrap(),
}
}
13 changes: 3 additions & 10 deletions bench-vortex/src/bin/random_access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::process::ExitCode;
use std::time::{Duration, Instant};

use bench_vortex::display::{print_measurements_json, render_table, DisplayFormat};
use bench_vortex::formats::parse_formats;
use bench_vortex::measurements::GenericMeasurement;
use bench_vortex::reader::{take_parquet, take_vortex_tokio};
use bench_vortex::taxi_data::{taxi_data_parquet, taxi_data_vortex};
Expand All @@ -23,8 +22,8 @@ struct Args {
iterations: usize,
#[arg(short, long)]
threads: Option<usize>,
#[arg(long, value_delimiter = ',')]
formats: Option<Vec<String>>,
#[arg(long, value_delimiter = ',', value_enum, default_values_t = vec![Format::Parquet, Format::OnDiskVortex])]
formats: Vec<Format>,
#[arg(short, long)]
verbose: bool,
#[arg(short, long, default_value_t, value_enum)]
Expand All @@ -44,17 +43,11 @@ fn main() -> ExitCode {
}
.expect("Failed building the Runtime");

// The formats to run against (vs the baseline)
let formats = match args.formats {
None => vec![Format::Parquet, Format::OnDiskVortex],
Some(formats) => parse_formats(formats),
};

let indices = buffer![10u64, 11, 12, 13, 100_000, 3_000_000];
random_access(
runtime,
args.iterations,
formats,
args.formats,
args.display_format,
args.verbose,
indices,
Expand Down
13 changes: 3 additions & 10 deletions bench-vortex/src/bin/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use std::process::ExitCode;
use std::time::{Duration, Instant};

use bench_vortex::display::{print_measurements_json, render_table, DisplayFormat};
use bench_vortex::formats::parse_formats;
use bench_vortex::measurements::QueryMeasurement;
use bench_vortex::tpch::dbgen::{DBGen, DBGenOptions};
use bench_vortex::tpch::{load_datasets, run_tpch_query, tpch_queries, EXPECTED_ROW_COUNTS};
Expand Down Expand Up @@ -31,8 +30,8 @@ struct Args {
use_remote_data_dir: Option<String>,
#[arg(short, long, default_value = "5")]
iterations: usize,
#[arg(long, value_delimiter = ',')]
formats: Option<Vec<String>>,
#[arg(long, value_delimiter = ',', value_enum, default_values_t = vec![Format::Arrow, Format::Parquet, Format::OnDiskVortex])]
formats: Vec<Format>,
#[arg(long, default_value_t = 1)]
scale_factor: u8,
#[arg(long)]
Expand Down Expand Up @@ -101,17 +100,11 @@ fn main() -> ExitCode {
}
};

// The formats to run against (vs the baseline)
let formats = match args.formats {
None => vec![Format::Arrow, Format::Parquet, Format::OnDiskVortex],
Some(formats) => parse_formats(formats),
};

runtime.block_on(bench_main(
args.queries,
args.exclude_queries,
args.iterations,
formats,
args.formats,
args.display_format,
args.emulate_object_store,
url,
Expand Down
13 changes: 0 additions & 13 deletions bench-vortex/src/formats.rs

This file was deleted.

9 changes: 7 additions & 2 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::sync::{Arc, LazyLock};

use arrow_array::{RecordBatch, RecordBatchReader};
use blob::SlowObjectStoreRegistry;
use clap::ValueEnum;
use datafusion::execution::cache::cache_manager::CacheManagerConfig;
use datafusion::execution::cache::cache_unit::{DefaultFileStatisticsCache, DefaultListFilesCache};
use datafusion::execution::object_store::DefaultObjectStoreRegistry;
Expand Down Expand Up @@ -39,7 +40,6 @@ pub mod blob;
pub mod clickbench;
pub mod data_downloads;
pub mod display;
pub mod formats;
pub mod measurements;
pub mod public_bi_data;
pub mod reader;
Expand Down Expand Up @@ -69,12 +69,17 @@ pub static CTX: LazyLock<ContextRef> = LazyLock::new(|| {
)
});

#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, ValueEnum)]
pub enum Format {
#[clap(name = "csv")]
Csv,
#[clap(name = "arrow")]
Arrow,
#[clap(name = "parquet")]
Parquet,
#[clap(name = "in-memory-vortex")]
InMemoryVortex,
#[clap(name = "vortex")]
OnDiskVortex,
}

Expand Down

0 comments on commit ed8452e

Please sign in to comment.