Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IB] support benchmark matrix #1913

Merged
merged 1 commit into from
May 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 23 additions & 14 deletions .github/workflows/instant_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,14 @@ on:
- g5.2xlarge
- g5.12xlarge
- g5.48xlarge
- g6.2xlarge
- g6.12xlarge
- g6.48xlarge
- g4dn.12xlarge
- g4dn.2xlarge
- p4d.24xlarge
- p4de.24xlarge
- p5.24xlarge
- inf2.8xlarge
- inf2.24xlarge
- trn1.2xlarge
Expand All @@ -34,6 +39,11 @@ on:
- none
- table
- cloudwatch
repo:
description: '[Do not change] The repo for runner registration'
required: false
type: string
default: 'djl-serving'
workflow_call:
inputs:
running_template:
Expand All @@ -54,6 +64,11 @@ on:
required: false
type: string
default: 'none'
repo:
description: 'The repo for runner registration'
required: false
type: string
default: 'djl-serving'

permissions:
id-token: write
Expand All @@ -68,10 +83,10 @@ jobs:
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
https://api.github.com/repos/deepjavalibrary/${{ inputs.repo }}/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_ib_${{ inputs.instance }} $token djl-serving
./start_instance.sh action_ib_${{ inputs.instance }} $token ${{ inputs.repo }}
outputs:
gpu_instance_id: ${{ steps.create_instance.outputs.action_ib_instance_id }}

Expand Down Expand Up @@ -127,26 +142,20 @@ jobs:
run: |
wget https://publish.djl.ai/awscurl/awscurl
chmod +x awscurl
- name: Run benchmark job
working-directory: tests/integration
run: |
echo "${{ needs.environment-setup.outputs.template }}" >> template.json
python3 instant_benchmark.py --template template.json \
--job ${{ matrix.job }} --instance ${{ inputs.instance }}

bash instant_benchmark.sh
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
aws-region: us-east-1
- name: Record benchmark job
if: ${{ inputs.record == 'table' || inputs.record == 'cloudwatch' }}
- name: Run benchmark job
working-directory: tests/integration
run: |
python3 record_benchmark.py --template template.json \
echo "${{ needs.environment-setup.outputs.template }}" >> template.json
python3 instant_benchmark.py --template template.json \
--job ${{ matrix.job }} --instance ${{ inputs.instance }} \
--model models/test --record ${{ inputs.record }}
--record ${{ inputs.record }}

bash instant_benchmark.sh
- name: Get serving logs
if: always()
working-directory: tests/integration
Expand Down
3 changes: 2 additions & 1 deletion tests/integration/benchmark/nightly/g5-12xl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ option.tensor_parallel_degree=max
TOKENIZER=TheBloke/Llama-2-7B-fp16 ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
--json-path benchmark_result.json \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
[test_name]
Expand All @@ -22,13 +23,13 @@ ENGINE={vllm,lmi-dist}
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=Python
option.rolling_batch=$ENGINE
option.model_id=s3://djl-llm/llama-3-8b-hf/
option.tensor_parallel_degree=max
[aws_curl]
TOKENIZER=TheBloke/Llama-2-13B-fp16 ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--json-path benchmark_result.json \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
6 changes: 4 additions & 2 deletions tests/integration/benchmark/nightly/g5-2xl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@
mistral
[vars]
ENGINE={vllm,lmi-dist}
[benchmark_vars]
CONCURRENCY={1,2,4,8}
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=Python
option.rolling_batch=$ENGINE
option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B
option.tensor_parallel_degree=max
option.max_model_len=8192
[aws_curl]
TOKENIZER=TheBloke/Mistral-7B-Instruct-v0.2-AWQ ./awscurl -c 32 -N 10 \
TOKENIZER=TheBloke/Mistral-7B-Instruct-v0.2-AWQ ./awscurl -c 32 -N $CONCURRENCY \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
--json-path benchmark_result.json \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
2 changes: 1 addition & 1 deletion tests/integration/benchmark/nightly/g5-48xl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ ENGINE={vllm,lmi-dist}
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=Python
option.rolling_batch=$ENGINE
option.model_id=s3://djl-llm/mixtral-8x7b
option.tensor_parallel_degree=max
[aws_curl]
TOKENIZER=NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--json-path benchmark_result.json \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
47 changes: 39 additions & 8 deletions tests/integration/instant_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
required=False,
type=str,
help="The current instance name")
parser.add_argument("--record",
required=False,
type=str,
help="Place to record metrics")

parser.add_argument("--job", required=False, type=str, help="The job string")
args = parser.parse_args()
Expand Down Expand Up @@ -110,6 +114,7 @@ def parse_raw_template(url, override_container):
commandline = []
requirements = []
vars = []
benchmark_vars = []
info = None
while iterator < len(lines):
if '[test_name]' == lines[iterator]:
Expand Down Expand Up @@ -148,6 +153,12 @@ def parse_raw_template(url, override_container):
lines[iterator]):
vars.append(lines[iterator])
iterator += 1
elif '[benchmark_vars]' == lines[iterator]:
iterator += 1
while iterator < len(lines) and not is_square_bracket(
lines[iterator]):
benchmark_vars.append(lines[iterator])
iterator += 1
elif '[info]' == lines[iterator]:
info = []
iterator += 1
Expand All @@ -174,13 +185,20 @@ def parse_raw_template(url, override_container):
if info is not None:
cur_result['info'] = info
mul_results = multiply_template_with_vars(name, cur_result, vars)
# each of the replicated deployment options
for r in mul_results.values():
r['awscurl'] = r['awscurl'].encode().hex()
replicated_awscurl = multiply_template_with_vars(
'', {'awscurl': cur_result['awscurl']}, benchmark_vars)
for option in replicated_awscurl.keys():
replicated_awscurl[option] = replicated_awscurl[option][
'awscurl'].encode().hex()
r['awscurl'] = replicated_awscurl
lanking520 marked this conversation as resolved.
Show resolved Hide resolved
final_result.update(mul_results)
name = ''
container = None
properties = []
env = []
benchmark_vars = []
commandline = []
requirements = []
vars = []
Expand Down Expand Up @@ -219,23 +237,35 @@ def machine_translation(machine_name: str):
return "lmi"


def build_running_script(template, job, instance):
def build_running_script(template, job, instance, record):
with open(template) as f:
template = json.load(f)
job_template = template[job]
job_template['awscurl'] = bytes.fromhex(
job_template['awscurl']).decode("utf-8")
for key in job_template['awscurl'].keys():
job_template['awscurl'][key] = bytes.fromhex(
job_template['awscurl'][key]).decode("utf-8")
write_model_artifacts(job_template['properties'],
job_template['requirements'], job_template['env'])

container = job_template['container']

benchmark_command = ['set -x']
record_benchmark = ('python3 record_benchmark.py --template template.json '
f'--job {job} --instance {instance} '
f'--model models/test --record {record}')

for key, value in job_template['awscurl'].items():
benchmark_command.append("rm -rf benchmark_result.json benchmark.log")
benchmark_command.append(value)
benchmark_command.append(record_benchmark +
f' --benchmark-vars "{key}"')

bash_command = [
'set -euo pipefail', 'echo "Start Launching container..."',
'set -euo pipefail',
'echo "Start Launching container..."',
f"docker pull {container}",
f"./launch_container.sh {container} $PWD/models {machine_translation(instance)}",
job_template['awscurl'] + " | tee benchmark.log"
]
bash_command.extend(benchmark_command)
lanking520 marked this conversation as resolved.
Show resolved Hide resolved
with open("instant_benchmark.sh", "w") as f:
f.write('\n'.join(bash_command))

Expand All @@ -249,7 +279,8 @@ def build_running_script(template, job, instance):
command = f"echo \"template={json.dumps(json.dumps(json.dumps(result)))}\" >> $GITHUB_OUTPUT"
sp.call(command, shell=True)
elif args.template and args.job and args.instance:
build_running_script(args.template, args.job, args.instance)
build_running_script(args.template, args.job, args.instance,
args.record)
else:
parser.print_help()
raise ValueError("args not supported")
Loading
Loading