Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse target formats for benchmarks with clap #2383

Merged
merged 4 commits into from
Feb 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 5 additions & 12 deletions bench-vortex/src/bin/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::time::{Duration, Instant};

use bench_vortex::clickbench::{self, clickbench_queries, HITS_SCHEMA};
use bench_vortex::display::{print_measurements_json, render_table, DisplayFormat};
use bench_vortex::formats::parse_formats;
use bench_vortex::measurements::QueryMeasurement;
use bench_vortex::{
default_env_filter, execute_physical_plan, feature_flagged_allocator, get_session_with_cache,
Expand All @@ -29,8 +28,8 @@ struct Args {
iterations: usize,
#[arg(short, long)]
threads: Option<usize>,
#[arg(long, value_delimiter = ',')]
formats: Option<Vec<String>>,
#[arg(long, value_delimiter = ',', value_enum, default_values_t = vec![Format::Parquet, Format::OnDiskVortex])]
formats: Vec<Format>,
#[arg(long)]
only_vortex: bool,
#[arg(short, long)]
Expand Down Expand Up @@ -131,12 +130,6 @@ fn main() {
.unwrap();
});

// The formats to run against (vs the baseline)
let formats = match args.formats {
None => vec![Format::Parquet, Format::OnDiskVortex],
Some(formats) => parse_formats(formats),
};

let queries = match args.queries.clone() {
None => clickbench_queries(),
Some(queries) => clickbench_queries()
Expand All @@ -145,11 +138,11 @@ fn main() {
.collect(),
};

let progress_bar = ProgressBar::new((queries.len() * formats.len()) as u64);
let progress_bar = ProgressBar::new((queries.len() * args.formats.len()) as u64);

let mut all_measurements = Vec::default();

for format in &formats {
for format in &args.formats {
let session_context = get_session_with_cache(args.emulate_object_store);
let context = session_context.clone();
match format {
Expand Down Expand Up @@ -240,7 +233,7 @@ fn main() {
}

match args.display_format {
DisplayFormat::Table => render_table(all_measurements, &formats).unwrap(),
DisplayFormat::Table => render_table(all_measurements, &args.formats).unwrap(),
DisplayFormat::GhJson => print_measurements_json(all_measurements).unwrap(),
}
}
13 changes: 3 additions & 10 deletions bench-vortex/src/bin/random_access.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use std::process::ExitCode;
use std::time::{Duration, Instant};

use bench_vortex::display::{print_measurements_json, render_table, DisplayFormat};
use bench_vortex::formats::parse_formats;
use bench_vortex::measurements::GenericMeasurement;
use bench_vortex::reader::{take_parquet, take_vortex_tokio};
use bench_vortex::taxi_data::{taxi_data_parquet, taxi_data_vortex};
Expand All @@ -23,8 +22,8 @@ struct Args {
iterations: usize,
#[arg(short, long)]
threads: Option<usize>,
#[arg(long, value_delimiter = ',')]
formats: Option<Vec<String>>,
#[arg(long, value_delimiter = ',', value_enum, default_values_t = vec![Format::Parquet, Format::OnDiskVortex])]
formats: Vec<Format>,
#[arg(short, long)]
verbose: bool,
#[arg(short, long, default_value_t, value_enum)]
Expand All @@ -44,17 +43,11 @@ fn main() -> ExitCode {
}
.expect("Failed building the Runtime");

// The formats to run against (vs the baseline)
let formats = match args.formats {
None => vec![Format::Parquet, Format::OnDiskVortex],
Some(formats) => parse_formats(formats),
};

let indices = buffer![10u64, 11, 12, 13, 100_000, 3_000_000];
random_access(
runtime,
args.iterations,
formats,
args.formats,
args.display_format,
args.verbose,
indices,
Expand Down
13 changes: 3 additions & 10 deletions bench-vortex/src/bin/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use std::sync;
use std::time::Instant;

use bench_vortex::display::{print_measurements_json, render_table, DisplayFormat};
use bench_vortex::formats::parse_formats;
use bench_vortex::measurements::QueryMeasurement;
use bench_vortex::tpch::dbgen::{DBGen, DBGenOptions};
use bench_vortex::tpch::{load_datasets, run_tpch_query, tpch_queries, EXPECTED_ROW_COUNTS};
Expand Down Expand Up @@ -32,8 +31,8 @@ struct Args {
use_remote_data_dir: Option<String>,
#[arg(short, long, default_value = "5")]
iterations: usize,
#[arg(long, value_delimiter = ',')]
formats: Option<Vec<String>>,
#[arg(long, value_delimiter = ',', value_enum, default_values_t = vec![Format::Arrow, Format::Parquet, Format::OnDiskVortex])]
formats: Vec<Format>,
#[arg(long, default_value_t = 1)]
scale_factor: u8,
#[arg(long)]
Expand Down Expand Up @@ -102,17 +101,11 @@ fn main() -> ExitCode {
}
};

// The formats to run against (vs the baseline)
let formats = match args.formats {
None => vec![Format::Arrow, Format::Parquet, Format::OnDiskVortex],
Some(formats) => parse_formats(formats),
};

runtime.block_on(bench_main(
args.queries,
args.exclude_queries,
args.iterations,
formats,
args.formats,
args.display_format,
args.emulate_object_store,
url,
Expand Down
13 changes: 0 additions & 13 deletions bench-vortex/src/formats.rs

This file was deleted.

9 changes: 7 additions & 2 deletions bench-vortex/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::sync::{Arc, LazyLock};

use arrow_array::{RecordBatch, RecordBatchReader};
use blob::SlowObjectStoreRegistry;
use clap::ValueEnum;
use datafusion::execution::cache::cache_manager::CacheManagerConfig;
use datafusion::execution::cache::cache_unit::{DefaultFileStatisticsCache, DefaultListFilesCache};
use datafusion::execution::object_store::DefaultObjectStoreRegistry;
Expand Down Expand Up @@ -39,7 +40,6 @@ pub mod blob;
pub mod clickbench;
pub mod data_downloads;
pub mod display;
pub mod formats;
pub mod measurements;
pub mod public_bi_data;
pub mod reader;
Expand Down Expand Up @@ -69,12 +69,17 @@ pub static CTX: LazyLock<ContextRef> = LazyLock::new(|| {
)
});

#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, ValueEnum)]
pub enum Format {
#[clap(name = "csv")]
Csv,
#[clap(name = "arrow")]
Arrow,
#[clap(name = "parquet")]
Parquet,
#[clap(name = "in-memory-vortex")]
InMemoryVortex,
#[clap(name = "vortex")]
OnDiskVortex,
}

Expand Down
Loading