From ca03586cc37fc8d44cfb0e11bc6fbd64d973c3b2 Mon Sep 17 00:00:00 2001 From: Zach Kimberg Date: Thu, 18 Apr 2024 15:39:43 -0700 Subject: [PATCH] Debug --- .github/workflows/benchmark-nightly.yml | 2 +- .github/workflows/instant_benchmark.yml | 18 +++++++-------- .../benchmark/nightly/g5-2xl.txt | 12 +++++----- tests/integration/record_benchmark.py | 23 ++++++++++++------- 4 files changed, 31 insertions(+), 24 deletions(-) rename tests/{ => integration}/benchmark/nightly/g5-2xl.txt (58%) diff --git a/.github/workflows/benchmark-nightly.yml b/.github/workflows/benchmark-nightly.yml index ff49bc9706..982cc451ee 100644 --- a/.github/workflows/benchmark-nightly.yml +++ b/.github/workflows/benchmark-nightly.yml @@ -14,6 +14,6 @@ jobs: uses: ./.github/workflows/instant_benchmark.yml secrets: inherit with: - running_template: tests/benchmark/nightly/g5-2xl.txt + running_template: ./benchmark/nightly/g5-2xl.txt instance: g5.2xlarge record: cloudwatch \ No newline at end of file diff --git a/.github/workflows/instant_benchmark.yml b/.github/workflows/instant_benchmark.yml index 37177499f9..4677851d4b 100644 --- a/.github/workflows/instant_benchmark.yml +++ b/.github/workflows/instant_benchmark.yml @@ -71,12 +71,12 @@ jobs: https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ --fail \ | jq '.token' | tr -d '"' ) - ./start_instance.sh action_ib_${{ github.event.inputs.instance }} $token djl-serving + ./start_instance.sh action_ib_${{ inputs.instance }} $token djl-serving outputs: gpu_instance_id: ${{ steps.create_instance.outputs.action_ib_instance_id }} environment-setup: - runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ] + runs-on: [ self-hosted, "${{ inputs.instance }}" ] timeout-minutes: 15 needs: [ create-runners ] steps: @@ -102,14 +102,14 @@ jobs: working-directory: tests/integration id: generate_matrix run: | - python3 instant_benchmark.py --parse ${{ github.event.inputs.running_template }} \ - --container "${{ github.event.inputs.container }}" + python3 instant_benchmark.py --parse ${{ inputs.running_template }} \ + --container "${{ inputs.container }}" outputs: jobs: ${{ steps.generate_matrix.outputs.jobs }} template: ${{ steps.generate_matrix.outputs.template }} benchmark_run: - runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ] + runs-on: [ self-hosted, "${{ inputs.instance }}" ] timeout-minutes: 30 needs: [ environment-setup ] strategy: @@ -136,7 +136,7 @@ jobs: run: | echo "${{ needs.environment-setup.outputs.template }}" >> template.json python3 instant_benchmark.py --template template.json \ - --job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} + --job ${{ matrix.job }} --instance ${{ inputs.instance }} bash instant_benchmark.sh - name: Configure AWS Credentials @@ -145,12 +145,12 @@ jobs: role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving aws-region: us-east-1 - name: Record benchmark job - if: ${{ github.event.inputs.record == 'table' || github.event.inputs.record == 'cloudwatch' }} + if: ${{ inputs.record == 'table' || inputs.record == 'cloudwatch' }} working-directory: tests/integration run: | python3 record_benchmark.py --template template.json \ - --job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} \ - --model models/test --record ${{ github.event.inputs.record }} + --job ${{ matrix.job }} --instance ${{ inputs.instance }} \ + --model models/test --record ${{ inputs.record }} - name: Get serving logs if: always() working-directory: tests/integration diff --git a/tests/benchmark/nightly/g5-2xl.txt b/tests/integration/benchmark/nightly/g5-2xl.txt similarity index 58% rename from tests/benchmark/nightly/g5-2xl.txt rename to tests/integration/benchmark/nightly/g5-2xl.txt index 25d37a3b91..c181ff5fdb 100644 --- a/tests/benchmark/nightly/g5-2xl.txt +++ b/tests/integration/benchmark/nightly/g5-2xl.txt @@ -1,15 +1,15 @@ [test_name] -mistral +mistral-vllm [container] -deepjavalibrary/djl-serving:0.27.0-deepspeed -[vars] -CONCURRENCY={1,16,32} +deepjavalibrary/djl-serving:deepspeed-nightly [serving_properties] engine=Python option.rolling_batch=vllm -option.model_id=mistralai/Mistral-7B-v0.1 +option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B +option.tensor_parallel_degree=max +option.max_model_len=8192 [aws_curl] -TOKENIZER=mistralai/Mistral-7B-v0.1 ./awscurl -c $CONCURRENCY -N 10 \ +TOKENIZER=NousResearch/Hermes-2-Pro-Mistral-7B ./awscurl -c 32 -N 10 \ -X POST http://127.0.0.1:8080/invocations \ --connect-timeout 60 -H "Content-type: application/json" \ -d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \ diff --git a/tests/integration/record_benchmark.py b/tests/integration/record_benchmark.py index f08aa644a9..8a94da546a 100755 --- a/tests/integration/record_benchmark.py +++ b/tests/integration/record_benchmark.py @@ -64,8 +64,10 @@ def record_table(): def record_cloudwatch(): - esc = lambda n: n.replace("/", "-").replace(".", "-").strip(' -') - metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}_{esc(data['modelId'])}_{n}" + esc = lambda n: n.replace("/", "-").replace(".", "-").replace("=", "-" + ).strip(' -') + job_name = "" if "job" not in data else "_" + data["job"] + metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}{esc(job_name)}_{esc(data['modelId'])}_{n}" metric_data = [ { 'MetricName': metric_name("throughput"), @@ -139,12 +141,16 @@ def data_container(): container = data["container"] if container.startswith("deepjavalibrary/djl-serving:"): container = container[len("deepjavalibrary/djl-serving:"):] - split = container.split("-", 1) - data["djlVersion"] = split[0] - if len(split) > 1: - data["image"] = split[1] - else: - data["image"] = "cpu" + if container[0] == "0": # Release build + split = container.split("-", 1) + data["djlVersion"] = split[0] + if len(split) > 1: + data["image"] = split[1] + else: + data["image"] = "cpu" + else: # Nightly build + data["djlNightly"] = "true" + data["image"] = container[:-len("-nightly")] if "text-generation-inference" in container: data["modelServer"] = "TGI" version = container.split(":")[1] @@ -210,6 +216,7 @@ def data_from_template(): with open(args.template, "r") as f: template = json.load(f) job_template = template[args.job] + data["job"] = args.job data["awscurl"] = bytes.fromhex( job_template['awscurl']).decode("utf-8") if "container" not in data and "container" in job_template: