diff --git a/.github/workflows/benchmark-nightly.yml b/.github/workflows/benchmark-nightly.yml index 8031e486d..d31353868 100644 --- a/.github/workflows/benchmark-nightly.yml +++ b/.github/workflows/benchmark-nightly.yml @@ -1,6 +1,7 @@ name: Benchmark Nightly on: + workflow_dispatch: schedule: - cron: '0 1 * * *' @@ -15,4 +16,18 @@ jobs: with: running_template: ./benchmark/nightly/g5-2xl.txt instance: g5.2xlarge - record: cloudwatch \ No newline at end of file + record: cloudwatch + g5-12xl: + uses: ./.github/workflows/instant_benchmark.yml + secrets: inherit + with: + running_template: ./benchmark/nightly/g5-12xl.txt + instance: g5.12xlarge + record: cloudwatch + g5-48xl: + uses: ./.github/workflows/instant_benchmark.yml + secrets: inherit + with: + running_template: ./benchmark/nightly/g5-48xl.txt + instance: g5.48xlarge + record: cloudwatch diff --git a/tests/integration/benchmark/nightly/g5-12xl.txt b/tests/integration/benchmark/nightly/g5-12xl.txt new file mode 100644 index 000000000..b3a0a63dd --- /dev/null +++ b/tests/integration/benchmark/nightly/g5-12xl.txt @@ -0,0 +1,34 @@ +[test_name] +llama2 +[vars] +ENGINE={vllm,lmi-dist} +[container] +deepjavalibrary/djl-serving:lmi-nightly +[serving_properties] +engine=Python +option.rolling_batch=$ENGINE +option.model_id=s3://djl-llm/llama-2-7b-hf/ +option.tensor_parallel_degree=max +[aws_curl] +TOKENIZER=TOKENIZER=TheBloke/Llama-2-7B-fp16 ./awscurl -c 32 -N 10 \ +-X POST http://127.0.0.1:8080/invocations \ +--connect-timeout 60 -H "Content-type: application/json" \ +-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \ +-t -o /tmp/output.txt +[test_name] +llama3 +[vars] +ENGINE={vllm,lmi-dist} +[container] +deepjavalibrary/djl-serving:lmi-nightly +[serving_properties] +engine=Python +option.rolling_batch=$ENGINE +option.model_id=s3://djl-llm/llama-3-8b-hf/ +option.tensor_parallel_degree=max +[aws_curl] +TOKENIZER=TOKENIZER=TheBloke/Llama-2-13B-fp16 ./awscurl -c 32 -N 10 \ +-X POST http://127.0.0.1:8080/invocations \ +--connect-timeout 60 -H "Content-type: application/json" \ +-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \ +-t -o /tmp/output.txt diff --git a/tests/integration/benchmark/nightly/g5-2xl.txt b/tests/integration/benchmark/nightly/g5-2xl.txt index 80ecdd519..9cf3af8ed 100644 --- a/tests/integration/benchmark/nightly/g5-2xl.txt +++ b/tests/integration/benchmark/nightly/g5-2xl.txt @@ -1,10 +1,12 @@ [test_name] -mistral-vllm +mistral +[vars] +ENGINE={vllm,lmi-dist} [container] deepjavalibrary/djl-serving:lmi-nightly [serving_properties] engine=Python -option.rolling_batch=vllm +option.rolling_batch=$ENGINE option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B option.tensor_parallel_degree=max option.max_model_len=8192 @@ -14,18 +16,3 @@ TOKENIZER=NousResearch/Hermes-2-Pro-Mistral-7B ./awscurl -c 32 -N 10 \ --connect-timeout 60 -H "Content-type: application/json" \ -d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \ -t -o /tmp/output.txt -[test_name] -mistral-lmi_dist -[container] -deepjavalibrary/djl-serving:lmi-nightly -[serving_properties] -engine=MPI -option.rolling_batch=lmi_dist -option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B -option.tensor_parallel_degree=max -[aws_curl] -TOKENIZER=NousResearch/Hermes-2-Pro-Mistral-7B ./awscurl -c 32 -N 10 \ --X POST http://127.0.0.1:8080/invocations \ ---connect-timeout 60 -H "Content-type: application/json" \ --d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \ --t -o /tmp/output.txt diff --git a/tests/integration/benchmark/nightly/g5-48xl.txt b/tests/integration/benchmark/nightly/g5-48xl.txt new file mode 100644 index 000000000..bfdda8953 --- /dev/null +++ b/tests/integration/benchmark/nightly/g5-48xl.txt @@ -0,0 +1,17 @@ +[test_name] +mixtral-8x7b +[vars] +ENGINE={vllm,lmi-dist} +[container] +deepjavalibrary/djl-serving:lmi-nightly +[serving_properties] +engine=Python +option.rolling_batch=$ENGINE +option.model_id=s3://djl-llm/mixtral-8x7b +option.tensor_parallel_degree=max +[aws_curl] +TOKENIZER=NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO ./awscurl -c 32 -N 10 \ +-X POST http://127.0.0.1:8080/invocations \ +--connect-timeout 60 -H "Content-type: application/json" \ +-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \ +-t -o /tmp/output.txt diff --git a/tests/integration/record_benchmark.py b/tests/integration/record_benchmark.py index 8a94da546..218ecb6c4 100755 --- a/tests/integration/record_benchmark.py +++ b/tests/integration/record_benchmark.py @@ -66,8 +66,8 @@ def record_table(): def record_cloudwatch(): esc = lambda n: n.replace("/", "-").replace(".", "-").replace("=", "-" ).strip(' -') - job_name = "" if "job" not in data else "_" + data["job"] - metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}{esc(job_name)}_{esc(data['modelId'])}_{n}" + job_name = data["modelId"] if "job" not in data else data["job"] + metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}_{esc(job_name)}_{n}" metric_data = [ { 'MetricName': metric_name("throughput"),