instant benchmark tooling #92

Workflow file for this run

.github/workflows/instant_benchmark.yml at 8520154

	name: instant benchmark tooling

	on:
	workflow_dispatch:
	inputs:
	running_template:
	description: 'A json file that contains benchmark plans'
	required: true
	instance:
	description: 'Instance used for benchmark'
	required: true
	default: 'g5.12xlarge'
	type: choice
	options:
	- g5.2xlarge
	- g5.12xlarge
	- g5.48xlarge
	- g4dn.12xlarge
	- g4dn.2xlarge
	- p4d.24xlarge
	- inf2.8xlarge
	- inf2.24xlarge
	- trn1.2xlarge
	- trn1.32xlarge
	container:
	description: 'The container used to run benchmark (overrides the template). Should be a full docker path such as deepjavalibrary/djl-serving:0.25.0-deepspeed'
	required: false
	default: ''
	record:
	description: 'Whether to record the results'
	default: 'none'
	type: choice
	options:
	- none
	- table
	- cloudwatch

	permissions:
	id-token: write
	contents: read

	jobs:
	create-runners:
	runs-on: [self-hosted, scheduler]
	steps:
	- name: Create new instance
	id: create_instance
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
	https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
	--fail \
	\| jq '.token' \| tr -d '"' )
	./start_instance.sh action_ib_${{ github.event.inputs.instance }} $token djl-serving
	outputs:
	gpu_instance_id: ${{ steps.create_instance.outputs.action_ib_instance_id }}

	environment-setup:
	runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ]
	timeout-minutes: 15
	needs: [ create-runners ]
	steps:
	- uses: actions/checkout@v4
	- name: Set up Python3
	uses: actions/setup-python@v5
	with:
	python-version: '3.10.x'
	- name: Clean env
	run: \|
	yes \| docker system prune -a --volumes
	- name: install deps
	run: \|
	sudo apt-get update
	sudo apt-get install awscli -y
	pip3 install boto3
	- name: Configure AWS Credentials
	uses: aws-actions/configure-aws-credentials@v4
	with:
	role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
	aws-region: us-east-1
	- name: Parse job schema
	working-directory: tests/integration
	id: generate_matrix
	run: \|
	python3 instant_benchmark.py --parse ${{ github.event.inputs.running_template }} \
	--container "${{ github.event.inputs.container }}"
	outputs:
	jobs: ${{ steps.generate_matrix.outputs.jobs }}
	template: ${{ steps.generate_matrix.outputs.template }}

	benchmark_run:
	runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ]
	timeout-minutes: 30
	needs: [ environment-setup ]
	strategy:
	matrix:
	job: ${{ fromJSON(needs.environment-setup.outputs.jobs) }}
	steps:
	- uses: actions/checkout@v4
	- name: Set up Python3
	uses: actions/setup-python@v5
	with:
	python-version: '3.10.x'
	- name: install deps
	run: \|
	sudo apt-get update
	sudo apt-get install awscli -y
	pip3 install boto3
	- name: Setup awscurl
	working-directory: tests/integration
	run: \|
	wget https://github.com/frankfliu/junkyard/releases/download/v0.3.1/awscurl
	chmod +x awscurl
	- name: Run benchmark job
	working-directory: tests/integration
	run: \|
	echo "${{ needs.environment-setup.outputs.template }}" >> template.json
	python3 instant_benchmark.py --template template.json \
	--job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }}

	bash instant_benchmark.sh
	- name: Configure AWS Credentials
	uses: aws-actions/configure-aws-credentials@v4
	with:
	role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
	aws-region: us-east-1
	- name: Record benchmark job
	if: ${{ github.event.inputs.record == 'table' \|\| github.event.inputs.record == 'cloudwatch' }}
	working-directory: tests/integration
	run: \|
	python3 record_benchmark.py --template template.json \
	--job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} \
	--model models/test --record ${{ github.event.inputs.record }}
	- name: Get serving logs
	if: always()
	working-directory: tests/integration
	run: \|
	docker rm -f $(docker ps -aq) \|\| true
	cat logs/serving.log
	- name: Upload test artifacts
	uses: actions/upload-artifact@v3
	if: always()
	with:
	name: ${{ matrix.job }}
	path: tests/integration

	stop-runners:
	if: always()
	runs-on: [ self-hosted, scheduler ]
	needs: [ create-runners, environment-setup, benchmark_run ]
	steps:
	- name: Stop instances
	run: \|
	cd /home/ubuntu/djl_benchmark_script/scripts
	instance_id=${{ needs.create-runners.outputs.gpu_instance_id }}
	./stop_instance.sh $instance_id

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

instant benchmark tooling #92

Workflow file

instant benchmark tooling #92

Jobs

Run details

Workflow file for this run