instant benchmark tooling #92
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: instant benchmark tooling | |
on: | |
workflow_dispatch: | |
inputs: | |
running_template: | |
description: 'A json file that contains benchmark plans' | |
required: true | |
instance: | |
description: 'Instance used for benchmark' | |
required: true | |
default: 'g5.12xlarge' | |
type: choice | |
options: | |
- g5.2xlarge | |
- g5.12xlarge | |
- g5.48xlarge | |
- g4dn.12xlarge | |
- g4dn.2xlarge | |
- p4d.24xlarge | |
- inf2.8xlarge | |
- inf2.24xlarge | |
- trn1.2xlarge | |
- trn1.32xlarge | |
container: | |
description: 'The container used to run benchmark (overrides the template). Should be a full docker path such as deepjavalibrary/djl-serving:0.25.0-deepspeed' | |
required: false | |
default: '' | |
record: | |
description: 'Whether to record the results' | |
default: 'none' | |
type: choice | |
options: | |
- none | |
- table | |
- cloudwatch | |
permissions: | |
id-token: write | |
contents: read | |
jobs: | |
create-runners: | |
runs-on: [self-hosted, scheduler] | |
steps: | |
- name: Create new instance | |
id: create_instance | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ | |
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ | |
--fail \ | |
| jq '.token' | tr -d '"' ) | |
./start_instance.sh action_ib_${{ github.event.inputs.instance }} $token djl-serving | |
outputs: | |
gpu_instance_id: ${{ steps.create_instance.outputs.action_ib_instance_id }} | |
environment-setup: | |
runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ] | |
timeout-minutes: 15 | |
needs: [ create-runners ] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python3 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10.x' | |
- name: Clean env | |
run: | | |
yes | docker system prune -a --volumes | |
- name: install deps | |
run: | | |
sudo apt-get update | |
sudo apt-get install awscli -y | |
pip3 install boto3 | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving | |
aws-region: us-east-1 | |
- name: Parse job schema | |
working-directory: tests/integration | |
id: generate_matrix | |
run: | | |
python3 instant_benchmark.py --parse ${{ github.event.inputs.running_template }} \ | |
--container "${{ github.event.inputs.container }}" | |
outputs: | |
jobs: ${{ steps.generate_matrix.outputs.jobs }} | |
template: ${{ steps.generate_matrix.outputs.template }} | |
benchmark_run: | |
runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ] | |
timeout-minutes: 30 | |
needs: [ environment-setup ] | |
strategy: | |
matrix: | |
job: ${{ fromJSON(needs.environment-setup.outputs.jobs) }} | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python3 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: '3.10.x' | |
- name: install deps | |
run: | | |
sudo apt-get update | |
sudo apt-get install awscli -y | |
pip3 install boto3 | |
- name: Setup awscurl | |
working-directory: tests/integration | |
run: | | |
wget https://github.com/frankfliu/junkyard/releases/download/v0.3.1/awscurl | |
chmod +x awscurl | |
- name: Run benchmark job | |
working-directory: tests/integration | |
run: | | |
echo "${{ needs.environment-setup.outputs.template }}" >> template.json | |
python3 instant_benchmark.py --template template.json \ | |
--job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} | |
bash instant_benchmark.sh | |
- name: Configure AWS Credentials | |
uses: aws-actions/configure-aws-credentials@v4 | |
with: | |
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving | |
aws-region: us-east-1 | |
- name: Record benchmark job | |
if: ${{ github.event.inputs.record == 'table' || github.event.inputs.record == 'cloudwatch' }} | |
working-directory: tests/integration | |
run: | | |
python3 record_benchmark.py --template template.json \ | |
--job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} \ | |
--model models/test --record ${{ github.event.inputs.record }} | |
- name: Get serving logs | |
if: always() | |
working-directory: tests/integration | |
run: | | |
docker rm -f $(docker ps -aq) || true | |
cat logs/serving.log | |
- name: Upload test artifacts | |
uses: actions/upload-artifact@v3 | |
if: always() | |
with: | |
name: ${{ matrix.job }} | |
path: tests/integration | |
stop-runners: | |
if: always() | |
runs-on: [ self-hosted, scheduler ] | |
needs: [ create-runners, environment-setup, benchmark_run ] | |
steps: | |
- name: Stop instances | |
run: | | |
cd /home/ubuntu/djl_benchmark_script/scripts | |
instance_id=${{ needs.create-runners.outputs.gpu_instance_id }} | |
./stop_instance.sh $instance_id |