Skip to content

Commit

Permalink
Creates initial benchmark suite (#1831)
Browse files Browse the repository at this point in the history
  • Loading branch information
zachgk authored Apr 29, 2024
1 parent 51d41bf commit 03e48af
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 20 deletions.
17 changes: 16 additions & 1 deletion .github/workflows/benchmark-nightly.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
name: Benchmark Nightly

on:
workflow_dispatch:
schedule:
- cron: '0 1 * * *'

Expand All @@ -15,4 +16,18 @@ jobs:
with:
running_template: ./benchmark/nightly/g5-2xl.txt
instance: g5.2xlarge
record: cloudwatch
record: cloudwatch
g5-12xl:
uses: ./.github/workflows/instant_benchmark.yml
secrets: inherit
with:
running_template: ./benchmark/nightly/g5-12xl.txt
instance: g5.12xlarge
record: cloudwatch
g5-48xl:
uses: ./.github/workflows/instant_benchmark.yml
secrets: inherit
with:
running_template: ./benchmark/nightly/g5-48xl.txt
instance: g5.48xlarge
record: cloudwatch
34 changes: 34 additions & 0 deletions tests/integration/benchmark/nightly/g5-12xl.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[test_name]
llama2
[vars]
ENGINE={vllm,lmi-dist}
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=Python
option.rolling_batch=$ENGINE
option.model_id=s3://djl-llm/llama-2-7b-hf/
option.tensor_parallel_degree=max
[aws_curl]
TOKENIZER=TOKENIZER=TheBloke/Llama-2-7B-fp16 ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
[test_name]
llama3
[vars]
ENGINE={vllm,lmi-dist}
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=Python
option.rolling_batch=$ENGINE
option.model_id=s3://djl-llm/llama-3-8b-hf/
option.tensor_parallel_degree=max
[aws_curl]
TOKENIZER=TOKENIZER=TheBloke/Llama-2-13B-fp16 ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
21 changes: 4 additions & 17 deletions tests/integration/benchmark/nightly/g5-2xl.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
[test_name]
mistral-vllm
mistral
[vars]
ENGINE={vllm,lmi-dist}
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=Python
option.rolling_batch=vllm
option.rolling_batch=$ENGINE
option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B
option.tensor_parallel_degree=max
option.max_model_len=8192
Expand All @@ -14,18 +16,3 @@ TOKENIZER=NousResearch/Hermes-2-Pro-Mistral-7B ./awscurl -c 32 -N 10 \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
[test_name]
mistral-lmi_dist
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=MPI
option.rolling_batch=lmi_dist
option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B
option.tensor_parallel_degree=max
[aws_curl]
TOKENIZER=NousResearch/Hermes-2-Pro-Mistral-7B ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
17 changes: 17 additions & 0 deletions tests/integration/benchmark/nightly/g5-48xl.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[test_name]
mixtral-8x7b
[vars]
ENGINE={vllm,lmi-dist}
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=Python
option.rolling_batch=$ENGINE
option.model_id=s3://djl-llm/mixtral-8x7b
option.tensor_parallel_degree=max
[aws_curl]
TOKENIZER=NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
4 changes: 2 additions & 2 deletions tests/integration/record_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ def record_table():
def record_cloudwatch():
esc = lambda n: n.replace("/", "-").replace(".", "-").replace("=", "-"
).strip(' -')
job_name = "" if "job" not in data else "_" + data["job"]
metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}{esc(job_name)}_{esc(data['modelId'])}_{n}"
job_name = data["modelId"] if "job" not in data else data["job"]
metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}_{esc(job_name)}_{n}"
metric_data = [
{
'MetricName': metric_name("throughput"),
Expand Down

0 comments on commit 03e48af

Please sign in to comment.