Convert random_access from criterion to a custom binary

spiraldb · Feb 16, 2025 · d6a6cf1 · d6a6cf1
1 parent 3cebb33
commit d6a6cf1
Show file tree

Hide file tree

Showing 15 changed files with 422 additions and 387 deletions.
diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml
@@ -48,16 +48,14 @@ jobs:
           run: cargo codspeed run
           token: ${{ secrets.CODSPEED_TOKEN }}
 
-  bench:
+  bench-criterion:
     needs: label_trigger
     strategy:
       matrix:
         benchmark:
-          - id: random_access
-            name: Random Access
           - id: compress
             name: Vortex Compression
-    runs-on: [self-hosted, gcp]
+    runs-on: [ self-hosted, gcp ]
     if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
     steps:
       - uses: actions/checkout@v4
@@ -125,7 +123,70 @@ jobs:
         with:
           file-path: comment.md
           comment-tag: bench-pr-comment-${{ matrix.benchmark.id }}
+  bench:
+    needs: label_trigger
+    runs-on: [ self-hosted, gcp ]
+    strategy:
+      matrix:
+        benchmark:
+          - id: random_access
+            bin: random_access
+            name: Random Access
+    if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/cleanup
+      - uses: ./.github/actions/setup-rust
+      # The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
+      - name: Install DuckDB
+        uses: opt-nc/setup-duckdb-action@v1.0.10
+        if: runner.environment != 'self-hosted'
+        with:
+          version: v1.0.0
+
+      - name: Set tempdir
+        if: runner.environment == 'self-hosted'
+        run: |
+          echo "TMPDIR=/work" >> $GITHUB_ENV
+
+      - name: Run TPC-H benchmark
+        shell: bash
+        env:
+          BENCH_VORTEX_RATIOS: '.*'
+          RUSTFLAGS: '-C target-cpu=native'
+        run: |
+          cargo run --bin ${{ matrix.benchmark.bin }} --release -- -d gh-json -t 1 | tee tpch.json
+
+      - name: Setup AWS CLI
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
+          aws-region: us-east-1
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+      - name: Compare results
+        shell: bash
+        run: |
+          set -Eeu -o pipefail -x
+
+          base_commit_sha=${{ github.event.pull_request.base.sha }}
 
+          aws s3 cp s3://vortex-benchmark-results-database/data.json - \
+            | grep $base_commit_sha \
+            > base.json
+
+          echo '# Benchmarks: ${{ matrix.benchmark.id }}' > comment.md
+          echo '<details>' >> comment.md
+          echo '<summary>Table of Results</summary>' >> comment.md
+          echo '' >> comment.md
+          uv run scripts/compare-benchmark-jsons.py base.json ${{ matrix.benchmark.id }}.json \
+            >> comment.md
+          echo '</details>' >> comment.md
+      - name: Comment PR
+        uses: thollander/actions-comment-pull-request@v3
+        with:
+          file-path: comment.md
+          comment-tag: bench-pr-comment-${{ matrix.benchmark.id }}
   sql:
     needs: label_trigger
     uses: ./.github/workflows/sql-benchmarks.yml

diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -55,15 +55,13 @@ jobs:
           run: cargo codspeed run
           token: ${{ secrets.CODSPEED_TOKEN }}
 
-  bench:
+  bench-criterion:
     strategy:
       matrix:
         benchmark:
-          - id: random_access
-            name: Random Access
           - id: compress
             name: Vortex Compression
-    runs-on: [self-hosted, gcp]
+    runs-on: [ self-hosted, gcp ]
     steps:
       - uses: actions/checkout@v4
       - uses: ./.github/actions/cleanup
@@ -109,7 +107,48 @@ jobs:
         shell: bash
         run: |
           bash scripts/cat-s3.sh vortex-benchmark-results-database data.json ${{ matrix.benchmark.id }}.json
+  bench:
+    runs-on: [ self-hosted, gcp ]
+    strategy:
+      matrix:
+        benchmark:
+          - id: random_access
+            bin: random_access
+            name: Random Access
+    if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/cleanup
+      - uses: ./.github/actions/setup-rust
+      # The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
+      - name: Install DuckDB
+        uses: opt-nc/setup-duckdb-action@v1.0.10
+        if: runner.environment != 'self-hosted'
+        with:
+          version: v1.0.0
 
+      - name: Set tempdir
+        if: runner.environment == 'self-hosted'
+        run: |
+          echo "TMPDIR=/work" >> $GITHUB_ENV
+
+      - name: Run ${{ matrix.benchmark.name }} benchmark
+        shell: bash
+        env:
+          BENCH_VORTEX_RATIOS: '.*'
+          RUSTFLAGS: '-C target-cpu=native'
+        run: |
+          cargo run --bin ${{ matrix.benchmark.bin }} --release -- -d gh-json | tee ${{ matrix.benchmark.id }}.json
+
+      - name: Setup AWS CLI
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
+          aws-region: us-east-1
+      - name: Upload Benchmark Results
+        shell: bash
+        run: |
+          bash scripts/cat-s3.sh vortex-benchmark-results-database data.json ${{ matrix.benchmark.id }}.json
   sql:
     uses: ./.github/workflows/sql-benchmarks.yml
     with:

diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml
@@ -8,23 +8,42 @@ on:
         type: string
 
 jobs:
-  tpch:
-    runs-on: [self-hosted, gcp]
+  bench:
+    runs-on: [ self-hosted, gcp ]
+    strategy:
+      matrix:
+        benchmark:
+          - id: tpch
+            bin: tpch
+            name: TPC-H
+          - id: clickbench
+            bin: clickbench
+            name: Clickbench
+    if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'benchmark' && github.event_name == 'pull_request' }}
     steps:
       - uses: actions/checkout@v4
       - uses: ./.github/actions/cleanup
       - uses: ./.github/actions/setup-rust
+      # The compression benchmarks rely on DuckDB being installed to convert CSV to Parquet
+      - name: Install DuckDB
+        uses: opt-nc/setup-duckdb-action@v1.0.10
+        if: runner.environment != 'self-hosted'
+        with:
+          version: v1.0.0
+
       - name: Set tempdir
         if: runner.environment == 'self-hosted'
         run: |
           echo "TMPDIR=/work" >> $GITHUB_ENV
-      - name: Run TPC-H benchmark
+
+      - name: Run ${{ matrix.benchmark.name }} benchmark
         shell: bash
         env:
-          BENCH_VORTEX_RATIOS: ".*"
-          RUSTFLAGS: "-C target-cpu=native"
+          BENCH_VORTEX_RATIOS: '.*'
+          RUSTFLAGS: '-C target-cpu=native'
         run: |
-          cargo run --bin tpch_benchmark --release -- -d gh-json -t 1 | tee tpch.json
+          cargo run --bin ${{ matrix.benchmark.bin }} --release -- -d gh-json | tee ${{ matrix.benchmark.id }}.json
+
       - name: Setup AWS CLI
         uses: aws-actions/configure-aws-credentials@v4
         with:
@@ -45,7 +64,7 @@ jobs:
             | grep $base_commit_sha \
             > base.json
 
-          echo '# Benchmarks: TPC-H' > comment.md
+          echo '# Benchmarks: ${{ matrix.benchmark.name }}' > comment.md
           echo '<details>' >> comment.md
           echo '<summary>Table of Results</summary>' >> comment.md
           echo '' >> comment.md
@@ -62,61 +81,4 @@ jobs:
         if: inputs.mode == 'develop'
         shell: bash
         run: |
-          bash scripts/cat-s3.sh vortex-benchmark-results-database data.json tpch.json
-
-  clickbench:
-    runs-on: [self-hosted, gcp]
-    steps:
-      - uses: actions/checkout@v4
-      - uses: ./.github/actions/cleanup
-      - uses: ./.github/actions/setup-rust
-      - name: Set tempdir
-        if: runner.environment == 'self-hosted'
-        run: |
-          echo "TMPDIR=/work" >> $GITHUB_ENV
-      - name: Run Clickbench benchmark
-        shell: bash
-        env:
-          BENCH_VORTEX_RATIOS: ".*"
-          RUSTFLAGS: "-C target-cpu=native"
-          HOME: /home/ci-runner
-        run: |
-          cargo run --bin clickbench --release -- -d gh-json | tee clickbench.json
-      - name: Setup AWS CLI
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
-          aws-region: us-east-1
-      - name: Install uv
-        if: inputs.mode == 'pr'
-        uses: astral-sh/setup-uv@v5
-      - name: Compare results
-        if: inputs.mode == 'pr'
-        shell: bash
-        run: |
-            set -Eeu -o pipefail -x
-  
-            base_commit_sha=${{ github.event.pull_request.base.sha }}
-
-            aws s3 cp s3://vortex-benchmark-results-database/data.json - \
-              | grep $base_commit_sha \
-              > base.json
-
-            echo '# Benchmarks: Clickbench' > comment.md
-            echo '<details>' >> comment.md
-            echo '<summary>Table of Results</summary>' >> comment.md
-            echo '' >> comment.md
-            uv run --no-project scripts/compare-benchmark-jsons.py base.json clickbench.json \
-              >> comment.md
-            echo '</details>' >> comment.md
-      - name: Comment PR
-        if: inputs.mode == 'pr'
-        uses: thollander/actions-comment-pull-request@v3
-        with:
-          file-path: comment.md
-          comment-tag: bench-pr-comment-clickbench
-      - name: Upload Benchmark Results
-        if: inputs.mode == 'develop'
-        shell: bash
-        run: |
-          bash scripts/cat-s3.sh vortex-benchmark-results-database data.json clickbench.json
+          bash scripts/cat-s3.sh vortex-benchmark-results-database data.json ${{ matrix.benchmark.id }}.json
diff --git a/bench-vortex/Cargo.toml b/bench-vortex/Cargo.toml
@@ -84,8 +84,3 @@ tracing = ["vortex-datafusion/tracing"]
 name = "compress"
 test = false
 harness = false
-
-[[bench]]
-name = "random_access"
-test = false
-harness = false
diff --git a/bench-vortex/benches/random_access.rs b/bench-vortex/benches/random_access.rs