Creates initial benchmark suite (#1831)

deepjavalibrary · Apr 29, 2024 · 03e48af · 03e48af
1 parent 51d41bf
commit 03e48af
Show file tree

Hide file tree

Showing 5 changed files with 73 additions and 20 deletions.
diff --git a/.github/workflows/benchmark-nightly.yml b/.github/workflows/benchmark-nightly.yml
@@ -1,6 +1,7 @@
 name: Benchmark Nightly
 
 on:
+  workflow_dispatch:
   schedule:
     - cron: '0 1 * * *'
 
@@ -15,4 +16,18 @@ jobs:
     with:
       running_template: ./benchmark/nightly/g5-2xl.txt
       instance: g5.2xlarge
-      record: cloudwatch
+      record: cloudwatch
+  g5-12xl:
+    uses: ./.github/workflows/instant_benchmark.yml
+    secrets: inherit
+    with:
+      running_template: ./benchmark/nightly/g5-12xl.txt
+      instance: g5.12xlarge
+      record: cloudwatch
+  g5-48xl:
+    uses: ./.github/workflows/instant_benchmark.yml
+    secrets: inherit
+    with:
+      running_template: ./benchmark/nightly/g5-48xl.txt
+      instance: g5.48xlarge
+      record: cloudwatch
diff --git a/tests/integration/benchmark/nightly/g5-12xl.txt b/tests/integration/benchmark/nightly/g5-12xl.txt
@@ -0,0 +1,34 @@
+[test_name]
+llama2
+[vars]
+ENGINE={vllm,lmi-dist}
+[container]
+deepjavalibrary/djl-serving:lmi-nightly
+[serving_properties]
+engine=Python
+option.rolling_batch=$ENGINE
+option.model_id=s3://djl-llm/llama-2-7b-hf/
+option.tensor_parallel_degree=max
+[aws_curl]
+TOKENIZER=TOKENIZER=TheBloke/Llama-2-7B-fp16 ./awscurl -c 32 -N 10 \
+-X POST http://127.0.0.1:8080/invocations   \
+--connect-timeout 60   -H "Content-type: application/json"   \
+-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}'   \
+-t -o /tmp/output.txt
+[test_name]
+llama3
+[vars]
+ENGINE={vllm,lmi-dist}
+[container]
+deepjavalibrary/djl-serving:lmi-nightly
+[serving_properties]
+engine=Python
+option.rolling_batch=$ENGINE
+option.model_id=s3://djl-llm/llama-3-8b-hf/
+option.tensor_parallel_degree=max
+[aws_curl]
+TOKENIZER=TOKENIZER=TheBloke/Llama-2-13B-fp16 ./awscurl -c 32 -N 10 \
+-X POST http://127.0.0.1:8080/invocations   \
+--connect-timeout 60   -H "Content-type: application/json"   \
+-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}'   \
+-t -o /tmp/output.txt
diff --git a/tests/integration/benchmark/nightly/g5-2xl.txt b/tests/integration/benchmark/nightly/g5-2xl.txt
@@ -1,10 +1,12 @@
 [test_name]
-mistral-vllm
+mistral
+[vars]
+ENGINE={vllm,lmi-dist}
 [container]
 deepjavalibrary/djl-serving:lmi-nightly
 [serving_properties]
 engine=Python
-option.rolling_batch=vllm
+option.rolling_batch=$ENGINE
 option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B
 option.tensor_parallel_degree=max
 option.max_model_len=8192
@@ -14,18 +16,3 @@ TOKENIZER=NousResearch/Hermes-2-Pro-Mistral-7B ./awscurl -c 32 -N 10 \
 --connect-timeout 60   -H "Content-type: application/json"   \
 -d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}'   \
 -t -o /tmp/output.txt
-[test_name]
-mistral-lmi_dist
-[container]
-deepjavalibrary/djl-serving:lmi-nightly
-[serving_properties]
-engine=MPI
-option.rolling_batch=lmi_dist
-option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B
-option.tensor_parallel_degree=max
-[aws_curl]
-TOKENIZER=NousResearch/Hermes-2-Pro-Mistral-7B ./awscurl -c 32 -N 10 \
--X POST http://127.0.0.1:8080/invocations   \
---connect-timeout 60   -H "Content-type: application/json"   \
--d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}'   \
--t -o /tmp/output.txt
diff --git a/tests/integration/benchmark/nightly/g5-48xl.txt b/tests/integration/benchmark/nightly/g5-48xl.txt
@@ -0,0 +1,17 @@
+[test_name]
+mixtral-8x7b
+[vars]
+ENGINE={vllm,lmi-dist}
+[container]
+deepjavalibrary/djl-serving:lmi-nightly
+[serving_properties]
+engine=Python
+option.rolling_batch=$ENGINE
+option.model_id=s3://djl-llm/mixtral-8x7b
+option.tensor_parallel_degree=max
+[aws_curl]
+TOKENIZER=NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO ./awscurl -c 32 -N 10 \
+-X POST http://127.0.0.1:8080/invocations   \
+--connect-timeout 60   -H "Content-type: application/json"   \
+-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}'   \
+-t -o /tmp/output.txt
diff --git a/tests/integration/record_benchmark.py b/tests/integration/record_benchmark.py
@@ -66,8 +66,8 @@ def record_table():
 def record_cloudwatch():
     esc = lambda n: n.replace("/", "-").replace(".", "-").replace("=", "-"
                                                                   ).strip(' -')
-    job_name = "" if "job" not in data else "_" + data["job"]
-    metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}{esc(job_name)}_{esc(data['modelId'])}_{n}"
+    job_name = data["modelId"] if "job" not in data else data["job"]
+    metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}_{esc(job_name)}_{n}"
     metric_data = [
         {
             'MetricName': metric_name("throughput"),