diff --git a/datafusion/README.md b/datafusion/README.md index b9c5b5d38..cc9157468 100644 --- a/datafusion/README.md +++ b/datafusion/README.md @@ -20,7 +20,7 @@ The benchmark should be completed in under an hour. On-demand pricing is $0.6 pe 1. `cd ClickBench/datafusion` 1. `vi benchmark.sh` and modify following line to target Datafusion version ``` - git checkout 40.0.0 + git checkout 43.0.0 ``` 1. `bash benchmark.sh` diff --git a/datafusion/benchmark.sh b/datafusion/benchmark.sh index 38e1bbb03..759f6590c 100755 --- a/datafusion/benchmark.sh +++ b/datafusion/benchmark.sh @@ -14,7 +14,7 @@ sudo yum install gcc -y # Install DataFusion main branch git clone https://github.com/apache/arrow-datafusion.git cd arrow-datafusion/datafusion-cli -git checkout 40.0.0 +git checkout 43.0.0 CARGO_PROFILE_RELEASE_LTO=true RUSTFLAGS="-C codegen-units=1" cargo build --release export PATH="`pwd`/target/release:$PATH" cd ../.. diff --git a/datafusion/create_partitioned.sql b/datafusion/create_partitioned.sql index 17a8b5b4d..3b3330c1b 100644 --- a/datafusion/create_partitioned.sql +++ b/datafusion/create_partitioned.sql @@ -1,3 +1,4 @@ CREATE EXTERNAL TABLE hits STORED AS PARQUET -LOCATION 'partitioned'; +LOCATION 'partitioned' +OPTIONS ('binary_as_string' 'true'); diff --git a/datafusion/create_single.sql b/datafusion/create_single.sql index eedd4c038..19e623b07 100644 --- a/datafusion/create_single.sql +++ b/datafusion/create_single.sql @@ -1,3 +1,4 @@ CREATE EXTERNAL TABLE hits STORED AS PARQUET -LOCATION 'hits.parquet'; +LOCATION 'hits.parquet' +OPTIONS ('binary_as_string' 'true');