From 49501d61be4f1a6f023be8fa2d93271815ac3088 Mon Sep 17 00:00:00 2001 From: Zach Kimberg Date: Thu, 18 Apr 2024 15:39:43 -0700 Subject: [PATCH] Debug --- .github/workflows/benchmark-nightly.yml | 2 +- .github/workflows/instant_benchmark.yml | 18 +++++++++--------- .../benchmark/nightly/g5-2xl.txt | 5 +++-- 3 files changed, 13 insertions(+), 12 deletions(-) rename tests/{ => integration}/benchmark/nightly/g5-2xl.txt (71%) diff --git a/.github/workflows/benchmark-nightly.yml b/.github/workflows/benchmark-nightly.yml index ff49bc9706..982cc451ee 100644 --- a/.github/workflows/benchmark-nightly.yml +++ b/.github/workflows/benchmark-nightly.yml @@ -14,6 +14,6 @@ jobs: uses: ./.github/workflows/instant_benchmark.yml secrets: inherit with: - running_template: tests/benchmark/nightly/g5-2xl.txt + running_template: ./benchmark/nightly/g5-2xl.txt instance: g5.2xlarge record: cloudwatch \ No newline at end of file diff --git a/.github/workflows/instant_benchmark.yml b/.github/workflows/instant_benchmark.yml index 37177499f9..4677851d4b 100644 --- a/.github/workflows/instant_benchmark.yml +++ b/.github/workflows/instant_benchmark.yml @@ -71,12 +71,12 @@ jobs: https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ --fail \ | jq '.token' | tr -d '"' ) - ./start_instance.sh action_ib_${{ github.event.inputs.instance }} $token djl-serving + ./start_instance.sh action_ib_${{ inputs.instance }} $token djl-serving outputs: gpu_instance_id: ${{ steps.create_instance.outputs.action_ib_instance_id }} environment-setup: - runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ] + runs-on: [ self-hosted, "${{ inputs.instance }}" ] timeout-minutes: 15 needs: [ create-runners ] steps: @@ -102,14 +102,14 @@ jobs: working-directory: tests/integration id: generate_matrix run: | - python3 instant_benchmark.py --parse ${{ github.event.inputs.running_template }} \ - --container "${{ github.event.inputs.container }}" + python3 instant_benchmark.py --parse ${{ inputs.running_template }} \ + --container "${{ inputs.container }}" outputs: jobs: ${{ steps.generate_matrix.outputs.jobs }} template: ${{ steps.generate_matrix.outputs.template }} benchmark_run: - runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ] + runs-on: [ self-hosted, "${{ inputs.instance }}" ] timeout-minutes: 30 needs: [ environment-setup ] strategy: @@ -136,7 +136,7 @@ jobs: run: | echo "${{ needs.environment-setup.outputs.template }}" >> template.json python3 instant_benchmark.py --template template.json \ - --job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} + --job ${{ matrix.job }} --instance ${{ inputs.instance }} bash instant_benchmark.sh - name: Configure AWS Credentials @@ -145,12 +145,12 @@ jobs: role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving aws-region: us-east-1 - name: Record benchmark job - if: ${{ github.event.inputs.record == 'table' || github.event.inputs.record == 'cloudwatch' }} + if: ${{ inputs.record == 'table' || inputs.record == 'cloudwatch' }} working-directory: tests/integration run: | python3 record_benchmark.py --template template.json \ - --job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} \ - --model models/test --record ${{ github.event.inputs.record }} + --job ${{ matrix.job }} --instance ${{ inputs.instance }} \ + --model models/test --record ${{ inputs.record }} - name: Get serving logs if: always() working-directory: tests/integration diff --git a/tests/benchmark/nightly/g5-2xl.txt b/tests/integration/benchmark/nightly/g5-2xl.txt similarity index 71% rename from tests/benchmark/nightly/g5-2xl.txt rename to tests/integration/benchmark/nightly/g5-2xl.txt index 25d37a3b91..2f82bf5836 100644 --- a/tests/benchmark/nightly/g5-2xl.txt +++ b/tests/integration/benchmark/nightly/g5-2xl.txt @@ -7,9 +7,10 @@ CONCURRENCY={1,16,32} [serving_properties] engine=Python option.rolling_batch=vllm -option.model_id=mistralai/Mistral-7B-v0.1 +option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B +option.tensor_parallel_degree=max [aws_curl] -TOKENIZER=mistralai/Mistral-7B-v0.1 ./awscurl -c $CONCURRENCY -N 10 \ +TOKENIZER=NousResearch/Hermes-2-Pro-Mistral-7B ./awscurl -c $CONCURRENCY -N 10 \ -X POST http://127.0.0.1:8080/invocations \ --connect-timeout 60 -H "Content-type: application/json" \ -d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \