Skip to content

Commit

Permalink
[IB] support benchmark matrix (#1913)
Browse files Browse the repository at this point in the history
  • Loading branch information
Qing Lan authored May 16, 2024
1 parent 5837ddf commit a3e1252
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 92 deletions.
37 changes: 23 additions & 14 deletions .github/workflows/instant_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,14 @@ on:
- g5.2xlarge
- g5.12xlarge
- g5.48xlarge
- g6.2xlarge
- g6.12xlarge
- g6.48xlarge
- g4dn.12xlarge
- g4dn.2xlarge
- p4d.24xlarge
- p4de.24xlarge
- p5.24xlarge
- inf2.8xlarge
- inf2.24xlarge
- trn1.2xlarge
Expand All @@ -34,6 +39,11 @@ on:
- none
- table
- cloudwatch
repo:
description: '[Do not change] The repo for runner registration'
required: false
type: string
default: 'djl-serving'
workflow_call:
inputs:
running_template:
Expand All @@ -54,6 +64,11 @@ on:
required: false
type: string
default: 'none'
repo:
description: 'The repo for runner registration'
required: false
type: string
default: 'djl-serving'

permissions:
id-token: write
Expand All @@ -68,10 +83,10 @@ jobs:
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
https://api.github.com/repos/deepjavalibrary/${{ inputs.repo }}/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_ib_${{ inputs.instance }} $token djl-serving
./start_instance.sh action_ib_${{ inputs.instance }} $token ${{ inputs.repo }}
outputs:
gpu_instance_id: ${{ steps.create_instance.outputs.action_ib_instance_id }}

Expand Down Expand Up @@ -127,26 +142,20 @@ jobs:
run: |
wget https://publish.djl.ai/awscurl/awscurl
chmod +x awscurl
- name: Run benchmark job
working-directory: tests/integration
run: |
echo "${{ needs.environment-setup.outputs.template }}" >> template.json
python3 instant_benchmark.py --template template.json \
--job ${{ matrix.job }} --instance ${{ inputs.instance }}
bash instant_benchmark.sh
- name: Configure AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving
aws-region: us-east-1
- name: Record benchmark job
if: ${{ inputs.record == 'table' || inputs.record == 'cloudwatch' }}
- name: Run benchmark job
working-directory: tests/integration
run: |
python3 record_benchmark.py --template template.json \
echo "${{ needs.environment-setup.outputs.template }}" >> template.json
python3 instant_benchmark.py --template template.json \
--job ${{ matrix.job }} --instance ${{ inputs.instance }} \
--model models/test --record ${{ inputs.record }}
--record ${{ inputs.record }}
bash instant_benchmark.sh
- name: Get serving logs
if: always()
working-directory: tests/integration
Expand Down
3 changes: 2 additions & 1 deletion tests/integration/benchmark/nightly/g5-12xl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ option.tensor_parallel_degree=max
TOKENIZER=TheBloke/Llama-2-7B-fp16 ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
--json-path benchmark_result.json \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
[test_name]
Expand All @@ -22,13 +23,13 @@ ENGINE={vllm,lmi-dist}
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=Python
option.rolling_batch=$ENGINE
option.model_id=s3://djl-llm/llama-3-8b-hf/
option.tensor_parallel_degree=max
[aws_curl]
TOKENIZER=TheBloke/Llama-2-13B-fp16 ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--json-path benchmark_result.json \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
6 changes: 4 additions & 2 deletions tests/integration/benchmark/nightly/g5-2xl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@
mistral
[vars]
ENGINE={vllm,lmi-dist}
[benchmark_vars]
CONCURRENCY={1,2,4,8}
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=Python
option.rolling_batch=$ENGINE
option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B
option.tensor_parallel_degree=max
option.max_model_len=8192
[aws_curl]
TOKENIZER=TheBloke/Mistral-7B-Instruct-v0.2-AWQ ./awscurl -c 32 -N 10 \
TOKENIZER=TheBloke/Mistral-7B-Instruct-v0.2-AWQ ./awscurl -c 32 -N $CONCURRENCY \
-X POST http://127.0.0.1:8080/invocations \
--connect-timeout 60 -H "Content-type: application/json" \
--json-path benchmark_result.json \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
2 changes: 1 addition & 1 deletion tests/integration/benchmark/nightly/g5-48xl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ ENGINE={vllm,lmi-dist}
[container]
deepjavalibrary/djl-serving:lmi-nightly
[serving_properties]
engine=Python
option.rolling_batch=$ENGINE
option.model_id=s3://djl-llm/mixtral-8x7b
option.tensor_parallel_degree=max
[aws_curl]
TOKENIZER=NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO ./awscurl -c 32 -N 10 \
-X POST http://127.0.0.1:8080/invocations \
--json-path benchmark_result.json \
--connect-timeout 60 -H "Content-type: application/json" \
-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \
-t -o /tmp/output.txt
47 changes: 39 additions & 8 deletions tests/integration/instant_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@
required=False,
type=str,
help="The current instance name")
parser.add_argument("--record",
required=False,
type=str,
help="Place to record metrics")

parser.add_argument("--job", required=False, type=str, help="The job string")
args = parser.parse_args()
Expand Down Expand Up @@ -110,6 +114,7 @@ def parse_raw_template(url, override_container):
commandline = []
requirements = []
vars = []
benchmark_vars = []
info = None
while iterator < len(lines):
if '[test_name]' == lines[iterator]:
Expand Down Expand Up @@ -148,6 +153,12 @@ def parse_raw_template(url, override_container):
lines[iterator]):
vars.append(lines[iterator])
iterator += 1
elif '[benchmark_vars]' == lines[iterator]:
iterator += 1
while iterator < len(lines) and not is_square_bracket(
lines[iterator]):
benchmark_vars.append(lines[iterator])
iterator += 1
elif '[info]' == lines[iterator]:
info = []
iterator += 1
Expand All @@ -174,13 +185,20 @@ def parse_raw_template(url, override_container):
if info is not None:
cur_result['info'] = info
mul_results = multiply_template_with_vars(name, cur_result, vars)
# each of the replicated deployment options
for r in mul_results.values():
r['awscurl'] = r['awscurl'].encode().hex()
replicated_awscurl = multiply_template_with_vars(
'', {'awscurl': cur_result['awscurl']}, benchmark_vars)
for option in replicated_awscurl.keys():
replicated_awscurl[option] = replicated_awscurl[option][
'awscurl'].encode().hex()
r['awscurl'] = replicated_awscurl
final_result.update(mul_results)
name = ''
container = None
properties = []
env = []
benchmark_vars = []
commandline = []
requirements = []
vars = []
Expand Down Expand Up @@ -219,23 +237,35 @@ def machine_translation(machine_name: str):
return "lmi"


def build_running_script(template, job, instance):
def build_running_script(template, job, instance, record):
with open(template) as f:
template = json.load(f)
job_template = template[job]
job_template['awscurl'] = bytes.fromhex(
job_template['awscurl']).decode("utf-8")
for key in job_template['awscurl'].keys():
job_template['awscurl'][key] = bytes.fromhex(
job_template['awscurl'][key]).decode("utf-8")
write_model_artifacts(job_template['properties'],
job_template['requirements'], job_template['env'])

container = job_template['container']

benchmark_command = ['set -x']
record_benchmark = ('python3 record_benchmark.py --template template.json '
f'--job {job} --instance {instance} '
f'--model models/test --record {record}')

for key, value in job_template['awscurl'].items():
benchmark_command.append("rm -rf benchmark_result.json benchmark.log")
benchmark_command.append(value)
benchmark_command.append(record_benchmark +
f' --benchmark-vars "{key}"')

bash_command = [
'set -euo pipefail', 'echo "Start Launching container..."',
'set -euo pipefail',
'echo "Start Launching container..."',
f"docker pull {container}",
f"./launch_container.sh {container} $PWD/models {machine_translation(instance)}",
job_template['awscurl'] + " | tee benchmark.log"
]
bash_command.extend(benchmark_command)
with open("instant_benchmark.sh", "w") as f:
f.write('\n'.join(bash_command))

Expand All @@ -249,7 +279,8 @@ def build_running_script(template, job, instance):
command = f"echo \"template={json.dumps(json.dumps(json.dumps(result)))}\" >> $GITHUB_OUTPUT"
sp.call(command, shell=True)
elif args.template and args.job and args.instance:
build_running_script(args.template, args.job, args.instance)
build_running_script(args.template, args.job, args.instance,
args.record)
else:
parser.print_help()
raise ValueError("args not supported")
Loading

0 comments on commit a3e1252

Please sign in to comment.