From a71c5661bbb3c8c46eedd8cdac38d766154022c5 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Mon, 18 Nov 2024 11:35:31 -0800 Subject: [PATCH 01/13] Add workflows to build a commit and run tpch benchmarks - slightly different than existing one --- .github/actions/install/action.yaml | 29 +++++ .github/assets/ray.yaml | 60 +++++++++++ .github/scripts/build-commit.sh | 7 ++ .github/scripts/csv_to_md.py | 29 +++++ .github/scripts/performance-comparisons.sh | 2 + .github/workflows/build-commit-run-tpch.yaml | 36 +++++++ .github/workflows/build-commit.yaml | 66 ++++++++++++ .../workflows/performance-comparisons.yaml | 61 +++++++++++ .github/workflows/run-tpch.yaml | 102 ++++++++++++++++++ benchmarking/tpch/__main__.py | 13 ++- 10 files changed, 402 insertions(+), 3 deletions(-) create mode 100644 .github/actions/install/action.yaml create mode 100644 .github/assets/ray.yaml create mode 100755 .github/scripts/build-commit.sh create mode 100644 .github/scripts/csv_to_md.py create mode 100755 .github/scripts/performance-comparisons.sh create mode 100644 .github/workflows/build-commit-run-tpch.yaml create mode 100644 .github/workflows/build-commit.yaml create mode 100644 .github/workflows/performance-comparisons.yaml create mode 100644 .github/workflows/run-tpch.yaml diff --git a/.github/actions/install/action.yaml b/.github/actions/install/action.yaml new file mode 100644 index 0000000000..543d9a3e86 --- /dev/null +++ b/.github/actions/install/action.yaml @@ -0,0 +1,29 @@ +name: Install uv, rust, and python +description: Install uv, rust, and python +inputs: + python_version: + description: The version of python to install + required: false + default: '3.9' +runs: + using: composite + steps: + - shell: bash + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + CARGO_BIN="$HOME/.cargo/bin" + echo 'export PATH="$CARGO_BIN:$PATH"' >> $HOME/.bashrc + echo "$CARGO_BIN" >> $GITHUB_PATH + - shell: bash + run: | + curl -LsSf https://astral.sh/uv/install.sh | sh + UV_BIN="$HOME/.local/bin" + echo 'export PATH="$UV_BIN:$PATH"' >> $HOME/.bashrc + echo "$UV_BIN" >> $GITHUB_PATH + - shell: bash + run: | + source $HOME/.bashrc + - shell: bash + run: | + uv python install ${{ inputs.python_version }} + uv python pin ${{ inputs.python_version }} diff --git a/.github/assets/ray.yaml b/.github/assets/ray.yaml new file mode 100644 index 0000000000..5bff72ab3c --- /dev/null +++ b/.github/assets/ray.yaml @@ -0,0 +1,60 @@ +cluster_name: performance-comparisons + +provider: + type: aws + region: us-west-2 + cache_stopped_nodes: true + security_group: + GroupName: ray-autoscaler-c1 + +auth: + ssh_user: ubuntu + ssh_private_key: ~/.ssh/ci-github-actions-ray-cluster-key.pem + +max_workers: 2 +available_node_types: + ray.head.default: + resources: {"CPU": 0} + node_config: + KeyName: ci-github-actions-ray-cluster-key + InstanceType: i3.2xlarge + ImageId: ami-04dd23e62ed049936 + IamInstanceProfile: + Name: ray-autoscaler-v1 + + ray.worker.default: + min_workers: 2 + max_workers: 2 + resources: {} + node_config: + KeyName: ci-github-actions-ray-cluster-key + InstanceType: i3.2xlarge + ImageId: ami-04dd23e62ed049936 + IamInstanceProfile: + Name: ray-autoscaler-v1 + +setup_commands: +# Mount drive +- sudo mkfs.ext4 /dev/nvme0n1 +- sudo mount -t ext4 /dev/nvme0n1 /tmp +- sudo chmod 777 /tmp +# Install dependencies +- sudo snap install aws-cli --classic +- curl -LsSf https://astral.sh/uv/install.sh | sh +- echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc +- source ~/.bashrc +- uv python install 3.9 +- uv python pin 3.9 +- uv v +- echo "source $HOME/.venv/bin/activate" >> $HOME/.bashrc +- source .venv/bin/activate +- uv pip install pip ray[default] py-spy +# GitHub Actions workflow will replace all parameters between `<<...>>` with the +# actual values as determined dynamically during runtime of the actual workflow. +- uv pip install https://github-actions-artifacts-bucket.s3.us-west-2.amazonaws.com/builds/<>/<> +# Download benchmarking fixtures +- | + aws s3 sync \ + s3://eventual-dev-benchmarking-fixtures/uncompressed/tpch-dbgen/<>/<>/parquet/ \ + /tmp/data/<>/<>/parquet/ \ + --quiet diff --git a/.github/scripts/build-commit.sh b/.github/scripts/build-commit.sh new file mode 100755 index 0000000000..a6ed6e3f71 --- /dev/null +++ b/.github/scripts/build-commit.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD) +gh workflow run build-commit.yaml \ + --ref $BRANCH_NAME \ + -f commit=$BRANCH_NAME \ + -f machine_type="buildjet-8vcpu-ubuntu-2004" diff --git a/.github/scripts/csv_to_md.py b/.github/scripts/csv_to_md.py new file mode 100644 index 0000000000..b9a74f4f40 --- /dev/null +++ b/.github/scripts/csv_to_md.py @@ -0,0 +1,29 @@ +import csv +import sys +from pathlib import Path + +file = Path(sys.argv[1]) +assert file.exists() + +output = Path(sys.argv[2]) +assert not output.exists() + + +def make_md_row(row: list[str]) -> str: + return f'|{"|".join(row)}|\n' + + +with open(file) as file: + with open(output, "w+") as output: + csv_reader = csv.reader(file) + header = next(csv_reader) + + header_str = make_md_row(header) + output.write(header_str) + + separator_str = make_md_row(["-"] * len(header)) + output.write(separator_str) + + for row in csv_reader: + row_str = make_md_row(row) + output.write(row_str) diff --git a/.github/scripts/performance-comparisons.sh b/.github/scripts/performance-comparisons.sh new file mode 100755 index 0000000000..686cefe118 --- /dev/null +++ b/.github/scripts/performance-comparisons.sh @@ -0,0 +1,2 @@ +BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD) +gh workflow run performance-comparisons.yaml --ref $BRANCH_NAME -f c1=$BRANCH_NAME diff --git a/.github/workflows/build-commit-run-tpch.yaml b/.github/workflows/build-commit-run-tpch.yaml new file mode 100644 index 0000000000..49e7c78482 --- /dev/null +++ b/.github/workflows/build-commit-run-tpch.yaml @@ -0,0 +1,36 @@ +name: Build commit and run tpch benchmarks + +on: + workflow_dispatch: + inputs: + wheel: + description: The wheel artifact to use + required: false + default: getdaft-0.3.0.dev0-cp38-abi3-manylinux_2_31_x86_64.whl + skip_questions: + description: The TPC-H questions to skip + required: false + default: "2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" + scale_factor: + description: Which scale factor to use + required: false + default: 2 + partition_size: + description: Which partition size to use + required: false + default: 2 + +jobs: + build: + uses: ./.github/workflows/build-commit.yaml + secrets: + ACTIONS_AWS_ROLE_ARN: ${{ secrets.ACTIONS_AWS_ROLE_ARN }} + + run: + needs: build + uses: ./.github/workflows/run-tpch.yaml + with: + wheel: ${{ needs.build.outputs.wheel }} + skip_questions: ${{ inputs.skip_questions }} + scale_factor: ${{ inputs.scale_factor }} + partition_size: ${{ inputs.partition_size }} diff --git a/.github/workflows/build-commit.yaml b/.github/workflows/build-commit.yaml new file mode 100644 index 0000000000..89186738d1 --- /dev/null +++ b/.github/workflows/build-commit.yaml @@ -0,0 +1,66 @@ +name: Build a Daft commit and store the outputted wheel in AWS S3 + +on: + workflow_dispatch: + workflow_call: + secrets: + ACTIONS_AWS_ROLE_ARN: + description: The ARN of the AWS role to assume + required: true + outputs: + wheel: + description: The wheel file that was built + value: ${{ jobs.build-commit.outputs.wheel }} + +jobs: + build-commit: + runs-on: buildjet-8vcpu-ubuntu-2004 + timeout-minutes: 15 # Remove for ssh debugging + permissions: + id-token: write + contents: read + outputs: + wheel: ${{ steps.build_and_upload.outputs.wheel }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + - uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-west-2 + role-session-name: build-commit-workflow + role-to-assume: ${{ secrets.ACTIONS_AWS_ROLE_ARN }} + - uses: ./.github/actions/install + - uses: buildjet/cache@v4 + with: + path: ~/target + key: ${{ runner.os }}-cargo-deps-${{ hashFiles('**/Cargo.lock') }} + restore-keys: ${{ runner.os }}-cargo-deps- + - id: build_and_upload + run: | + if ! ls ~/target/wheels/*.whl 1> /dev/null 2>&1; then + # Build wheel + export CARGO_TARGET_DIR=~/target + uv v + source .venv/bin/activate + uv pip install pip maturin + maturin build --release + fi + + count=$(ls ~/target/wheels/*.whl 2> /dev/null | wc -l) + if [ "$count" -gt 1 ]; then + echo "Found more than 1 wheel" + exit 1 + elif [ "$count" -eq 0 ]; then + echo "Found no wheel files" + exit 1 + fi + + # Upload wheel + for file in ~/target/wheels/*.whl; do + aws s3 cp $file s3://github-actions-artifacts-bucket/builds/${{ github.sha }}/ --acl public-read --no-progress; + file_basename=$(basename $file) + echo "wheel=$file_basename" >> $GITHUB_OUTPUT + echo "Output wheel has been built and stored in S3 at the following location:" >> $GITHUB_STEP_SUMMARY + echo "https://us-west-2.console.aws.amazon.com/s3/buckets/github-actions-artifacts-bucket?prefix=builds/${{ github.sha }}/" >> $GITHUB_STEP_SUMMARY + done diff --git a/.github/workflows/performance-comparisons.yaml b/.github/workflows/performance-comparisons.yaml new file mode 100644 index 0000000000..8362ad8657 --- /dev/null +++ b/.github/workflows/performance-comparisons.yaml @@ -0,0 +1,61 @@ +name: Run performance comparisons + +on: + workflow_dispatch: + +jobs: + build: + uses: ./.github/workflows/build-commit.yaml + secrets: + ACTIONS_AWS_ROLE_ARN: ${{ secrets.ACTIONS_AWS_ROLE_ARN }} + + run: + needs: build + runs-on: [self-hosted, linux, x64, ci-dev] + timeout-minutes: 15 # Remove for ssh debugging + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + - uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-west-2 + role-session-name: run-tpch-workflow + - uses: ./.github/actions/install + - run: | + # Dynamically update ray config file + sed -i 's|<>|${{ github.sha }}|g' .github/assets/ray.yaml + sed -i 's|<>|${{ needs.build.outputs.wheel }}|g' .github/assets/ray.yaml + + # Download private ssh key + KEY=$(aws secretsmanager get-secret-value --secret-id ci-github-actions-ray-cluster-key-3 --query SecretString --output text) + echo "$KEY" >> ~/.ssh/ci-github-actions-ray-cluster-key.pem + chmod 600 ~/.ssh/ci-github-actions-ray-cluster-key.pem + + # Install dependencies + uv v + source .venv/bin/activate + rm -rf daft + uv pip install ray[default] boto3 https://github-actions-artifacts-bucket.s3.us-west-2.amazonaws.com/builds/${{ github.sha }}/${{ needs.build.outputs.wheel }} + + # Boot up ray cluster + ray up .github/assets/ray.yaml -y + HEAD_NODE_IP=$(ray get-head-ip .github/assets/ray.yaml | tail -n 1) + ssh -o StrictHostKeyChecking=no -fN -L 8265:localhost:8265 -i ~/.ssh/ci-github-actions-ray-cluster-key.pem ubuntu@$HEAD_NODE_IP + DAFT_RUNNER=ray python -m benchmarking.tpch \ + --skip_questions="2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" \ + --num_parts 2 \ + --scale_factor 2 \ + --parquet_file_cache /tmp/data \ + --output_csv output.csv \ + --ray_job_dashboard_url http://localhost:8265 \ + --skip_warmup \ + --no_pymodules + ray down .github/assets/ray.yaml -y + - uses: actions/upload-artifact@v4 + with: + name: output.csv + path: output.csv diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml new file mode 100644 index 0000000000..e5261ab995 --- /dev/null +++ b/.github/workflows/run-tpch.yaml @@ -0,0 +1,102 @@ +name: Run tpch benchmarks + +on: + workflow_dispatch: + inputs: + wheel: + description: The wheel artifact to use + required: false + default: getdaft-0.3.0.dev0-cp38-abi3-manylinux_2_31_x86_64.whl + skip_questions: + description: The TPC-H questions to skip + required: false + default: "2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" + scale_factor: + description: Which scale factor to use + required: false + default: 2 + partition_size: + description: Which partition size to use + required: false + default: 2 + workflow_call: + inputs: + wheel: + type: string + description: The wheel artifact to use + required: false + default: getdaft-0.3.0.dev0-cp38-abi3-manylinux_2_31_x86_64.whl + skip_questions: + type: string + description: The TPC-H questions to skip + required: false + default: "2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" + scale_factor: + type: string + description: Which scale factor to use + required: false + default: 2 + partition_size: + type: string + description: Which partition size to use + required: false + default: 2 + +jobs: + run-tpch: + runs-on: [self-hosted, linux, x64, ci-dev] + timeout-minutes: 15 # Remove for ssh debugging + permissions: + id-token: write + contents: read + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + - uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: us-west-2 + role-session-name: run-tpch-workflow + - uses: ./.github/actions/install + - run: | + scale_factor_str="${{ inputs.scale_factor }}_0" + + # Dynamically update ray config file + sed -i 's|<>|${{ github.sha }}|g' .github/assets/ray.yaml + sed -i 's|<>|${{ inputs.wheel }}|g' .github/assets/ray.yaml + sed -i "s|<>|$scale_factor_str|g" .github/assets/ray.yaml + sed -i 's|<>|${{ inputs.partition_size }}|g' .github/assets/ray.yaml + + # Download private ssh key + KEY=$(aws secretsmanager get-secret-value --secret-id ci-github-actions-ray-cluster-key-3 --query SecretString --output text) + echo "$KEY" >> ~/.ssh/ci-github-actions-ray-cluster-key.pem + chmod 600 ~/.ssh/ci-github-actions-ray-cluster-key.pem + + # Install dependencies + uv v + source .venv/bin/activate + rm -rf daft + uv pip install ray[default] boto3 https://github-actions-artifacts-bucket.s3.us-west-2.amazonaws.com/builds/${{ github.sha }}/${{ inputs.wheel }} + + # Boot up ray cluster + ray up .github/assets/ray.yaml -y + HEAD_NODE_IP=$(ray get-head-ip .github/assets/ray.yaml | tail -n 1) + ssh -o StrictHostKeyChecking=no -fN -L 8265:localhost:8265 -i ~/.ssh/ci-github-actions-ray-cluster-key.pem ubuntu@$HEAD_NODE_IP + DAFT_RUNNER=ray python -m benchmarking.tpch \ + --skip_questions="${{ inputs.skip_questions }}" \ + --scale_factor ${{ inputs.scale_factor }} \ + --num_parts ${{ inputs.partition_size }} \ + --parquet_file_cache /tmp/data \ + --output_csv output.csv \ + --ray_job_dashboard_url http://localhost:8265 \ + --skip_warmup \ + --no_pymodules + ray down .github/assets/ray.yaml -y + + python .github/scripts/csv_to_md.py output.csv output.md + echo "# Results" >> $GITHUB_STEP_SUMMARY + cat output.md >> $GITHUB_STEP_SUMMARY + - uses: actions/upload-artifact@v4 + with: + name: output.csv + path: output.csv diff --git a/benchmarking/tpch/__main__.py b/benchmarking/tpch/__main__.py index 8ad131e08f..29f2bb1e75 100644 --- a/benchmarking/tpch/__main__.py +++ b/benchmarking/tpch/__main__.py @@ -126,6 +126,7 @@ def run_all_benchmarks( csv_output_location: str | None, ray_job_dashboard_url: str | None = None, requirements: str | None = None, + no_pymodules: bool = False, ): get_df = get_df_with_parquet_folder(parquet_folder) @@ -143,7 +144,7 @@ def run_all_benchmarks( tpch_qnum=i, working_dir=working_dir, entrypoint=entrypoint, - runtime_env=get_ray_runtime_env(requirements), + runtime_env=get_ray_runtime_env(requirements, no_pymodules=no_pymodules), ) # Run once as a warmup step @@ -202,9 +203,9 @@ def get_daft_benchmark_runner_name() -> Literal["ray"] | Literal["py"] | Literal return name -def get_ray_runtime_env(requirements: str | None) -> dict: +def get_ray_runtime_env(requirements: str | None, no_pymodules: bool = False) -> dict: runtime_env = { - "py_modules": [daft], + "py_modules": None if no_pymodules else [daft], "eager_install": True, "env_vars": { "DAFT_PROGRESS_BAR": "0", @@ -293,6 +294,11 @@ def warm_up_function(): default=None, help="Ray Dashboard URL to submit jobs instead of using Ray client, most useful when running on a remote cluster", ) + parser.add_argument( + "--no_pymodules", + action="store_true", + help="Avoid pickling any modules in the ray-environment before initializing the Ray cluster; useful in CI", + ) args = parser.parse_args() if args.output_csv_headers: @@ -331,4 +337,5 @@ def warm_up_function(): csv_output_location=args.output_csv, ray_job_dashboard_url=args.ray_job_dashboard_url, requirements=args.requirements, + no_pymodules=True if args.no_pymodules else False, ) From 45f696178288c37596968e81e0c1133dea27c4cd Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Mon, 18 Nov 2024 19:52:20 -0800 Subject: [PATCH 02/13] Change default for `skip_questions` input --- .github/workflows/build-commit-run-tpch.yaml | 2 +- .github/workflows/run-tpch.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-commit-run-tpch.yaml b/.github/workflows/build-commit-run-tpch.yaml index 49e7c78482..a178e520a1 100644 --- a/.github/workflows/build-commit-run-tpch.yaml +++ b/.github/workflows/build-commit-run-tpch.yaml @@ -10,7 +10,7 @@ on: skip_questions: description: The TPC-H questions to skip required: false - default: "2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" + default: "" scale_factor: description: Which scale factor to use required: false diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml index e5261ab995..e4d665ca10 100644 --- a/.github/workflows/run-tpch.yaml +++ b/.github/workflows/run-tpch.yaml @@ -10,7 +10,7 @@ on: skip_questions: description: The TPC-H questions to skip required: false - default: "2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" + default: "" scale_factor: description: Which scale factor to use required: false From 5dc5988168dcac1e549801121630a489244cb9f5 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Mon, 18 Nov 2024 20:04:41 -0800 Subject: [PATCH 03/13] Add check to see if `skip_questions` should be passed to `benchmarking.tpch` python module --- .github/workflows/run-tpch.yaml | 34 ++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml index e4d665ca10..6d008f32ff 100644 --- a/.github/workflows/run-tpch.yaml +++ b/.github/workflows/run-tpch.yaml @@ -30,7 +30,7 @@ on: type: string description: The TPC-H questions to skip required: false - default: "2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" + default: "" scale_factor: type: string description: Which scale factor to use @@ -82,16 +82,28 @@ jobs: ray up .github/assets/ray.yaml -y HEAD_NODE_IP=$(ray get-head-ip .github/assets/ray.yaml | tail -n 1) ssh -o StrictHostKeyChecking=no -fN -L 8265:localhost:8265 -i ~/.ssh/ci-github-actions-ray-cluster-key.pem ubuntu@$HEAD_NODE_IP - DAFT_RUNNER=ray python -m benchmarking.tpch \ - --skip_questions="${{ inputs.skip_questions }}" \ - --scale_factor ${{ inputs.scale_factor }} \ - --num_parts ${{ inputs.partition_size }} \ - --parquet_file_cache /tmp/data \ - --output_csv output.csv \ - --ray_job_dashboard_url http://localhost:8265 \ - --skip_warmup \ - --no_pymodules - ray down .github/assets/ray.yaml -y + if [[ -n "${{ inputs.skip_questions }}" ]]; then + DAFT_RUNNER=ray python -m benchmarking.tpch \ + --skip_questions="${{ inputs.skip_questions }}" \ + --scale_factor ${{ inputs.scale_factor }} \ + --num_parts ${{ inputs.partition_size }} \ + --parquet_file_cache /tmp/data \ + --output_csv output.csv \ + --ray_job_dashboard_url http://localhost:8265 \ + --skip_warmup \ + --no_pymodules + ray down .github/assets/ray.yaml -y + else + DAFT_RUNNER=ray python -m benchmarking.tpch \ + --scale_factor ${{ inputs.scale_factor }} \ + --num_parts ${{ inputs.partition_size }} \ + --parquet_file_cache /tmp/data \ + --output_csv output.csv \ + --ray_job_dashboard_url http://localhost:8265 \ + --skip_warmup \ + --no_pymodules + ray down .github/assets/ray.yaml -y + fi python .github/scripts/csv_to_md.py output.csv output.md echo "# Results" >> $GITHUB_STEP_SUMMARY From bc2797e8fdcb897c7ebd30072d4cf2bdb47e8607 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Tue, 19 Nov 2024 13:45:07 -0800 Subject: [PATCH 04/13] Add check to check if fs is not already mounted before mounting - without checking first, the mount command may fail --- .github/assets/ray.yaml | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/assets/ray.yaml b/.github/assets/ray.yaml index 5bff72ab3c..63e3a87a2c 100644 --- a/.github/assets/ray.yaml +++ b/.github/assets/ray.yaml @@ -35,9 +35,14 @@ available_node_types: setup_commands: # Mount drive -- sudo mkfs.ext4 /dev/nvme0n1 -- sudo mount -t ext4 /dev/nvme0n1 /tmp -- sudo chmod 777 /tmp +- | + findmnt /tmp 1> /dev/null + code=$? + if [ $code -ne 0 ]; then + sudo mkfs.ext4 /dev/nvme0n1 + sudo mount -t ext4 /dev/nvme0n1 /tmp + sudo chmod 777 /tmp + fi # Install dependencies - sudo snap install aws-cli --classic - curl -LsSf https://astral.sh/uv/install.sh | sh From a5e3b8fefe80ae808637c367b67f3bd17293cf9f Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Tue, 19 Nov 2024 13:51:10 -0800 Subject: [PATCH 05/13] Add ssh breakpoint; clean up step logic --- .github/scripts/build-commit.sh | 7 ------- .github/scripts/performance-comparisons.sh | 2 -- .github/workflows/run-tpch.yaml | 10 ++++++---- 3 files changed, 6 insertions(+), 13 deletions(-) delete mode 100755 .github/scripts/build-commit.sh delete mode 100755 .github/scripts/performance-comparisons.sh diff --git a/.github/scripts/build-commit.sh b/.github/scripts/build-commit.sh deleted file mode 100755 index a6ed6e3f71..0000000000 --- a/.github/scripts/build-commit.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD) -gh workflow run build-commit.yaml \ - --ref $BRANCH_NAME \ - -f commit=$BRANCH_NAME \ - -f machine_type="buildjet-8vcpu-ubuntu-2004" diff --git a/.github/scripts/performance-comparisons.sh b/.github/scripts/performance-comparisons.sh deleted file mode 100755 index 686cefe118..0000000000 --- a/.github/scripts/performance-comparisons.sh +++ /dev/null @@ -1,2 +0,0 @@ -BRANCH_NAME=$(git rev-parse --abbrev-ref HEAD) -gh workflow run performance-comparisons.yaml --ref $BRANCH_NAME -f c1=$BRANCH_NAME diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml index 6d008f32ff..29b1f96b1f 100644 --- a/.github/workflows/run-tpch.yaml +++ b/.github/workflows/run-tpch.yaml @@ -84,15 +84,14 @@ jobs: ssh -o StrictHostKeyChecking=no -fN -L 8265:localhost:8265 -i ~/.ssh/ci-github-actions-ray-cluster-key.pem ubuntu@$HEAD_NODE_IP if [[ -n "${{ inputs.skip_questions }}" ]]; then DAFT_RUNNER=ray python -m benchmarking.tpch \ - --skip_questions="${{ inputs.skip_questions }}" \ --scale_factor ${{ inputs.scale_factor }} \ --num_parts ${{ inputs.partition_size }} \ --parquet_file_cache /tmp/data \ --output_csv output.csv \ --ray_job_dashboard_url http://localhost:8265 \ --skip_warmup \ - --no_pymodules - ray down .github/assets/ray.yaml -y + --no_pymodules \ + --skip_questions="${{ inputs.skip_questions }}" else DAFT_RUNNER=ray python -m benchmarking.tpch \ --scale_factor ${{ inputs.scale_factor }} \ @@ -102,9 +101,12 @@ jobs: --ray_job_dashboard_url http://localhost:8265 \ --skip_warmup \ --no_pymodules - ray down .github/assets/ray.yaml -y fi + - uses: lhotari/action-upterm@v1 + + - run: | + ray down .github/assets/ray.yaml -y python .github/scripts/csv_to_md.py output.csv output.md echo "# Results" >> $GITHUB_STEP_SUMMARY cat output.md >> $GITHUB_STEP_SUMMARY From 5e542613de6654862a89301651969444424d573a Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Tue, 19 Nov 2024 13:57:05 -0800 Subject: [PATCH 06/13] Remove timeout for debugging purposes --- .github/workflows/run-tpch.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml index 29b1f96b1f..f4585b13a3 100644 --- a/.github/workflows/run-tpch.yaml +++ b/.github/workflows/run-tpch.yaml @@ -45,7 +45,7 @@ on: jobs: run-tpch: runs-on: [self-hosted, linux, x64, ci-dev] - timeout-minutes: 15 # Remove for ssh debugging + # timeout-minutes: 15 # Remove for ssh debugging permissions: id-token: write contents: read From dbb07d235a2b05350f61fc42110abe06014f9798 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Tue, 19 Nov 2024 14:10:47 -0800 Subject: [PATCH 07/13] Add new env-flag --- .github/workflows/run-tpch.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml index f4585b13a3..9e47119909 100644 --- a/.github/workflows/run-tpch.yaml +++ b/.github/workflows/run-tpch.yaml @@ -82,8 +82,10 @@ jobs: ray up .github/assets/ray.yaml -y HEAD_NODE_IP=$(ray get-head-ip .github/assets/ray.yaml | tail -n 1) ssh -o StrictHostKeyChecking=no -fN -L 8265:localhost:8265 -i ~/.ssh/ci-github-actions-ray-cluster-key.pem ubuntu@$HEAD_NODE_IP + export DAFT_ENABLE_RAY_TRACING=1 + export DAFT_RUNNER=ray if [[ -n "${{ inputs.skip_questions }}" ]]; then - DAFT_RUNNER=ray python -m benchmarking.tpch \ + python -m benchmarking.tpch \ --scale_factor ${{ inputs.scale_factor }} \ --num_parts ${{ inputs.partition_size }} \ --parquet_file_cache /tmp/data \ @@ -93,7 +95,7 @@ jobs: --no_pymodules \ --skip_questions="${{ inputs.skip_questions }}" else - DAFT_RUNNER=ray python -m benchmarking.tpch \ + python -m benchmarking.tpch \ --scale_factor ${{ inputs.scale_factor }} \ --num_parts ${{ inputs.partition_size }} \ --parquet_file_cache /tmp/data \ @@ -106,6 +108,7 @@ jobs: - uses: lhotari/action-upterm@v1 - run: | + source .venv/bin/activate ray down .github/assets/ray.yaml -y python .github/scripts/csv_to_md.py output.csv output.md echo "# Results" >> $GITHUB_STEP_SUMMARY From 437eee26bf95e4b1594190d06d02b7cc0ac0df1d Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Tue, 19 Nov 2024 16:13:49 -0800 Subject: [PATCH 08/13] Download logs from ray-cluster and upload them to GitHub Actions UI --- .github/workflows/run-tpch.yaml | 8 +++++--- benchmarking/tpch/__main__.py | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml index 9e47119909..1ce4c6de80 100644 --- a/.github/workflows/run-tpch.yaml +++ b/.github/workflows/run-tpch.yaml @@ -105,10 +105,8 @@ jobs: --no_pymodules fi - - uses: lhotari/action-upterm@v1 + ray rsync-down .github/assets/ray.yaml /tmp/ray ray-logs - - run: | - source .venv/bin/activate ray down .github/assets/ray.yaml -y python .github/scripts/csv_to_md.py output.csv output.md echo "# Results" >> $GITHUB_STEP_SUMMARY @@ -117,3 +115,7 @@ jobs: with: name: output.csv path: output.csv + - uses: actions/upload-artifact@v4 + with: + name: ray-logs + path: ray-logs diff --git a/benchmarking/tpch/__main__.py b/benchmarking/tpch/__main__.py index 29f2bb1e75..125712ef92 100644 --- a/benchmarking/tpch/__main__.py +++ b/benchmarking/tpch/__main__.py @@ -204,10 +204,12 @@ def get_daft_benchmark_runner_name() -> Literal["ray"] | Literal["py"] | Literal def get_ray_runtime_env(requirements: str | None, no_pymodules: bool = False) -> dict: + daft_env_variables = dict(filter(lambda key_value: key_value[0].startswith("DAFT"), os.environ.items())) runtime_env = { "py_modules": None if no_pymodules else [daft], "eager_install": True, "env_vars": { + **daft_env_variables, "DAFT_PROGRESS_BAR": "0", "DAFT_RUNNER": "ray", }, From fc4f378b9cf8389b53daeb693d5603120185b953 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Tue, 19 Nov 2024 16:39:48 -0800 Subject: [PATCH 09/13] Add ssh breakpoint again --- .github/workflows/run-tpch.yaml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml index 1ce4c6de80..9ea6aeb767 100644 --- a/.github/workflows/run-tpch.yaml +++ b/.github/workflows/run-tpch.yaml @@ -107,6 +107,19 @@ jobs: ray rsync-down .github/assets/ray.yaml /tmp/ray ray-logs +# find /path/to/directory -depth -name '*:*' -exec bash -c ' +# for filepath; do +# dir=$(dirname "$filepath") +# base=$(basename "$filepath") +# new_base=${base//:/_} +# mv "$filepath" "$dir/$new_base" +# done +# ' _ {} + + + - uses: lhotari/action-upterm@v1 + + - run: | + source .venv/bin/activate ray down .github/assets/ray.yaml -y python .github/scripts/csv_to_md.py output.csv output.md echo "# Results" >> $GITHUB_STEP_SUMMARY From b8d62516c764ec8904db9a586bbc3f4ed410aea9 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Tue, 19 Nov 2024 17:26:56 -0800 Subject: [PATCH 10/13] Add command to convert all semicolons to underscores --- .github/workflows/run-tpch.yaml | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml index 9ea6aeb767..7c8e69b983 100644 --- a/.github/workflows/run-tpch.yaml +++ b/.github/workflows/run-tpch.yaml @@ -78,7 +78,7 @@ jobs: rm -rf daft uv pip install ray[default] boto3 https://github-actions-artifacts-bucket.s3.us-west-2.amazonaws.com/builds/${{ github.sha }}/${{ inputs.wheel }} - # Boot up ray cluster + # Boot up ray cluster and submit tpch benchmarking job ray up .github/assets/ray.yaml -y HEAD_NODE_IP=$(ray get-head-ip .github/assets/ray.yaml | tail -n 1) ssh -o StrictHostKeyChecking=no -fN -L 8265:localhost:8265 -i ~/.ssh/ci-github-actions-ray-cluster-key.pem ubuntu@$HEAD_NODE_IP @@ -105,22 +105,23 @@ jobs: --no_pymodules fi + # Download all logs + # (We also need to convert all files containing ':' to '_'. + # GHA `actions/upload-artifact@v4` does *not* allow semicolons!) ray rsync-down .github/assets/ray.yaml /tmp/ray ray-logs + find ray-logs/ray/session_*/logs/daft -depth -name '*:*' -exec bash -c ' + for filepath; do + dir=$(dirname "$filepath") + base=$(basename "$filepath") + new_base=${base//:/_} + mv "$filepath" "$dir/$new_base" + done + ' _ {} + -# find /path/to/directory -depth -name '*:*' -exec bash -c ' -# for filepath; do -# dir=$(dirname "$filepath") -# base=$(basename "$filepath") -# new_base=${base//:/_} -# mv "$filepath" "$dir/$new_base" -# done -# ' _ {} + - - - uses: lhotari/action-upterm@v1 - - - run: | - source .venv/bin/activate + # Tear down ray cluster ray down .github/assets/ray.yaml -y + + # Convert csv to markdown and print to GHA Summary Page python .github/scripts/csv_to_md.py output.csv output.md echo "# Results" >> $GITHUB_STEP_SUMMARY cat output.md >> $GITHUB_STEP_SUMMARY From 36d22fb883c5f8b56ab419031eaef808336fe4a3 Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Tue, 19 Nov 2024 17:33:55 -0800 Subject: [PATCH 11/13] Change which directory is downloaded and uploaded to GHA Summary Page --- .github/workflows/run-tpch.yaml | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml index 7c8e69b983..a66c74af3a 100644 --- a/.github/workflows/run-tpch.yaml +++ b/.github/workflows/run-tpch.yaml @@ -106,15 +106,16 @@ jobs: fi # Download all logs - # (We also need to convert all files containing ':' to '_'. - # GHA `actions/upload-artifact@v4` does *not* allow semicolons!) - ray rsync-down .github/assets/ray.yaml /tmp/ray ray-logs - find ray-logs/ray/session_*/logs/daft -depth -name '*:*' -exec bash -c ' + # + # We also need to convert all files containing ':' to '_'. + # GHA `actions/upload-artifact@v4` does *not* allow semicolons! + ray rsync-down .github/assets/ray.yaml /tmp/ray/session_*/logs/daft ray-daft-logs + find ray-daft-logs -depth -name '*:*' -exec bash -c ' for filepath; do - dir=$(dirname "$filepath") - base=$(basename "$filepath") - new_base=${base//:/_} - mv "$filepath" "$dir/$new_base" + dir=$(dirname "$filepath") + base=$(basename "$filepath") + new_base=${base//:/_} + mv "$filepath" "$dir/$new_base" done ' _ {} + @@ -131,5 +132,5 @@ jobs: path: output.csv - uses: actions/upload-artifact@v4 with: - name: ray-logs - path: ray-logs + name: ray-daft-logs + path: ray-daft-logs From 2781ad9d95f1afc21ee123828bef18c195a4b72e Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Tue, 19 Nov 2024 18:58:15 -0800 Subject: [PATCH 12/13] Delete old workflow file --- .../workflows/performance-comparisons.yaml | 61 ------------------- 1 file changed, 61 deletions(-) delete mode 100644 .github/workflows/performance-comparisons.yaml diff --git a/.github/workflows/performance-comparisons.yaml b/.github/workflows/performance-comparisons.yaml deleted file mode 100644 index 8362ad8657..0000000000 --- a/.github/workflows/performance-comparisons.yaml +++ /dev/null @@ -1,61 +0,0 @@ -name: Run performance comparisons - -on: - workflow_dispatch: - -jobs: - build: - uses: ./.github/workflows/build-commit.yaml - secrets: - ACTIONS_AWS_ROLE_ARN: ${{ secrets.ACTIONS_AWS_ROLE_ARN }} - - run: - needs: build - runs-on: [self-hosted, linux, x64, ci-dev] - timeout-minutes: 15 # Remove for ssh debugging - permissions: - id-token: write - contents: read - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 1 - - uses: aws-actions/configure-aws-credentials@v4 - with: - aws-region: us-west-2 - role-session-name: run-tpch-workflow - - uses: ./.github/actions/install - - run: | - # Dynamically update ray config file - sed -i 's|<>|${{ github.sha }}|g' .github/assets/ray.yaml - sed -i 's|<>|${{ needs.build.outputs.wheel }}|g' .github/assets/ray.yaml - - # Download private ssh key - KEY=$(aws secretsmanager get-secret-value --secret-id ci-github-actions-ray-cluster-key-3 --query SecretString --output text) - echo "$KEY" >> ~/.ssh/ci-github-actions-ray-cluster-key.pem - chmod 600 ~/.ssh/ci-github-actions-ray-cluster-key.pem - - # Install dependencies - uv v - source .venv/bin/activate - rm -rf daft - uv pip install ray[default] boto3 https://github-actions-artifacts-bucket.s3.us-west-2.amazonaws.com/builds/${{ github.sha }}/${{ needs.build.outputs.wheel }} - - # Boot up ray cluster - ray up .github/assets/ray.yaml -y - HEAD_NODE_IP=$(ray get-head-ip .github/assets/ray.yaml | tail -n 1) - ssh -o StrictHostKeyChecking=no -fN -L 8265:localhost:8265 -i ~/.ssh/ci-github-actions-ray-cluster-key.pem ubuntu@$HEAD_NODE_IP - DAFT_RUNNER=ray python -m benchmarking.tpch \ - --skip_questions="2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" \ - --num_parts 2 \ - --scale_factor 2 \ - --parquet_file_cache /tmp/data \ - --output_csv output.csv \ - --ray_job_dashboard_url http://localhost:8265 \ - --skip_warmup \ - --no_pymodules - ray down .github/assets/ray.yaml -y - - uses: actions/upload-artifact@v4 - with: - name: output.csv - path: output.csv From 32caf7df43bf4b7d3100df27000930caf17e133e Mon Sep 17 00:00:00 2001 From: Raunak Bhagat Date: Tue, 19 Nov 2024 19:06:13 -0800 Subject: [PATCH 13/13] Add ability to specify python version --- .github/assets/ray.yaml | 4 ++-- .github/workflows/build-commit-run-tpch.yaml | 5 +++++ .github/workflows/run-tpch.yaml | 12 +++++++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/.github/assets/ray.yaml b/.github/assets/ray.yaml index 63e3a87a2c..486b72bad4 100644 --- a/.github/assets/ray.yaml +++ b/.github/assets/ray.yaml @@ -48,8 +48,8 @@ setup_commands: - curl -LsSf https://astral.sh/uv/install.sh | sh - echo 'export PATH="$HOME/.local/bin:$PATH"' >> ~/.bashrc - source ~/.bashrc -- uv python install 3.9 -- uv python pin 3.9 +- uv python install <> +- uv python pin <> - uv v - echo "source $HOME/.venv/bin/activate" >> $HOME/.bashrc - source .venv/bin/activate diff --git a/.github/workflows/build-commit-run-tpch.yaml b/.github/workflows/build-commit-run-tpch.yaml index a178e520a1..52102c4196 100644 --- a/.github/workflows/build-commit-run-tpch.yaml +++ b/.github/workflows/build-commit-run-tpch.yaml @@ -19,6 +19,10 @@ on: description: Which partition size to use required: false default: 2 + python_version: + description: The version of python to use + required: false + default: '3.9' jobs: build: @@ -34,3 +38,4 @@ jobs: skip_questions: ${{ inputs.skip_questions }} scale_factor: ${{ inputs.scale_factor }} partition_size: ${{ inputs.partition_size }} + python_version: ${{ inputs.python_version }} diff --git a/.github/workflows/run-tpch.yaml b/.github/workflows/run-tpch.yaml index a66c74af3a..73ee503cb9 100644 --- a/.github/workflows/run-tpch.yaml +++ b/.github/workflows/run-tpch.yaml @@ -19,6 +19,10 @@ on: description: Which partition size to use required: false default: 2 + python_version: + description: The version of python to use + required: false + default: '3.9' workflow_call: inputs: wheel: @@ -41,11 +45,16 @@ on: description: Which partition size to use required: false default: 2 + python_version: + type: string + description: The version of python to use + required: false + default: '3.9' jobs: run-tpch: runs-on: [self-hosted, linux, x64, ci-dev] - # timeout-minutes: 15 # Remove for ssh debugging + timeout-minutes: 15 # Remove for ssh debugging permissions: id-token: write contents: read @@ -66,6 +75,7 @@ jobs: sed -i 's|<>|${{ inputs.wheel }}|g' .github/assets/ray.yaml sed -i "s|<>|$scale_factor_str|g" .github/assets/ray.yaml sed -i 's|<>|${{ inputs.partition_size }}|g' .github/assets/ray.yaml + sed -i 's|<>|${{ inputs.python_version }}|g' .github/assets/ray.yaml # Download private ssh key KEY=$(aws secretsmanager get-secret-value --secret-id ci-github-actions-ray-cluster-key-3 --query SecretString --output text)