Skip to content

Commit

Permalink
[CI] Creates nightly benchmark (#1787)
Browse files Browse the repository at this point in the history
  • Loading branch information
zachgk authored Apr 23, 2024
1 parent c3dc093 commit 85c4364
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 17 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/benchmark-nightly.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: Benchmark Nightly

on:
schedule:
- cron: '0 1 * * *'

permissions:
id-token: write
contents: read

jobs:
g5-2xl:
uses: ./.github/workflows/instant_benchmark.yml
secrets: inherit
with:
running_template: ./benchmark/nightly/g5-2xl.txt
instance: g5.2xlarge
record: cloudwatch
38 changes: 29 additions & 9 deletions .github/workflows/instant_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,26 @@ on:
- none
- table
- cloudwatch
workflow_call:
inputs:
running_template:
description: 'A json file that contains benchmark plans'
required: true
type: string
instance:
description: 'Instance used for benchmark'
required: true
type: string
container:
description: 'The container used to run benchmark (overrides the template). Should be a full docker path such as deepjavalibrary/djl-serving:0.27.0-deepspeed'
required: false
type: string
default: ''
record:
description: 'Whether to record the results'
required: false
type: string
default: 'none'

permissions:
id-token: write
Expand All @@ -51,12 +71,12 @@ jobs:
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_ib_${{ github.event.inputs.instance }} $token djl-serving
./start_instance.sh action_ib_${{ inputs.instance }} $token djl-serving
outputs:
gpu_instance_id: ${{ steps.create_instance.outputs.action_ib_instance_id }}

environment-setup:
runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ]
runs-on: [ self-hosted, "${{ inputs.instance }}" ]
timeout-minutes: 15
needs: [ create-runners ]
steps:
Expand All @@ -82,14 +102,14 @@ jobs:
working-directory: tests/integration
id: generate_matrix
run: |
python3 instant_benchmark.py --parse ${{ github.event.inputs.running_template }} \
--container "${{ github.event.inputs.container }}"
python3 instant_benchmark.py --parse ${{ inputs.running_template }} \
--container "${{ inputs.container }}"
outputs:
jobs: ${{ steps.generate_matrix.outputs.jobs }}
template: ${{ steps.generate_matrix.outputs.template }}

benchmark_run:
runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ]
runs-on: [ self-hosted, "${{ inputs.instance }}" ]
timeout-minutes: 30
needs: [ environment-setup ]
strategy:
Expand All @@ -116,7 +136,7 @@ jobs:
run: |
echo "${{ needs.environment-setup.outputs.template }}" >> template.json
python3 instant_benchmark.py --template template.json \
--job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }}
--job ${{ matrix.job }} --instance ${{ inputs.instance }}
bash instant_benchmark.sh
- name: Configure AWS Credentials
Expand All @@ -125,12 +145,12 @@ jobs:
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
aws-region: us-east-1
- name: Record benchmark job
if: ${{ github.event.inputs.record == 'table' || github.event.inputs.record == 'cloudwatch' }}
if: ${{ inputs.record == 'table' || inputs.record == 'cloudwatch' }}
working-directory: tests/integration
run: |
python3 record_benchmark.py --template template.json \
--job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} \
--model models/test --record ${{ github.event.inputs.record }}
--job ${{ matrix.job }} --instance ${{ inputs.instance }} \
--model models/test --record ${{ inputs.record }}
- name: Get serving logs
if: always()
working-directory: tests/integration
Expand Down
16 changes: 16 additions & 0 deletions tests/integration/benchmark/nightly/g5-2xl.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[test_name]
mistral-vllm
[container]
deepjavalibrary/djl-serving:deepspeed-nightly
[serving_properties]
engine=Python
option.rolling_batch=vllm
option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B
option.tensor_parallel_degree=max
option.max_model_len=8192
[aws_curl]
TOKENIZER=NousResearch/Hermes-2-Pro-Mistral-7B ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
23 changes: 15 additions & 8 deletions tests/integration/record_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,10 @@ def record_table():


def record_cloudwatch():
esc = lambda n: n.replace("/", "-").replace(".", "-").strip(' -')
metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}_{esc(data['modelId'])}_{n}"
esc = lambda n: n.replace("/", "-").replace(".", "-").replace("=", "-"
).strip(' -')
job_name = "" if "job" not in data else "_" + data["job"]
metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}{esc(job_name)}_{esc(data['modelId'])}_{n}"
metric_data = [
{
'MetricName': metric_name("throughput"),
Expand Down Expand Up @@ -139,12 +141,16 @@ def data_container():
container = data["container"]
if container.startswith("deepjavalibrary/djl-serving:"):
container = container[len("deepjavalibrary/djl-serving:"):]
split = container.split("-", 1)
data["djlVersion"] = split[0]
if len(split) > 1:
data["image"] = split[1]
else:
data["image"] = "cpu"
if container[0] == "0": # Release build
split = container.split("-", 1)
data["djlVersion"] = split[0]
if len(split) > 1:
data["image"] = split[1]
else:
data["image"] = "cpu"
else: # Nightly build
data["djlNightly"] = "true"
data["image"] = container[:-len("-nightly")]
if "text-generation-inference" in container:
data["modelServer"] = "TGI"
version = container.split(":")[1]
Expand Down Expand Up @@ -210,6 +216,7 @@ def data_from_template():
with open(args.template, "r") as f:
template = json.load(f)
job_template = template[args.job]
data["job"] = args.job
data["awscurl"] = bytes.fromhex(
job_template['awscurl']).decode("utf-8")
if "container" not in data and "container" in job_template:
Expand Down

0 comments on commit 85c4364

Please sign in to comment.