diff --git a/.github/workflows/benchmark-nightly.yml b/.github/workflows/benchmark-nightly.yml new file mode 100644 index 000000000..8031e486d --- /dev/null +++ b/.github/workflows/benchmark-nightly.yml @@ -0,0 +1,18 @@ +name: Benchmark Nightly + +on: + schedule: + - cron: '0 1 * * *' + +permissions: + id-token: write + contents: read + +jobs: + g5-2xl: + uses: ./.github/workflows/instant_benchmark.yml + secrets: inherit + with: + running_template: ./benchmark/nightly/g5-2xl.txt + instance: g5.2xlarge + record: cloudwatch \ No newline at end of file diff --git a/.github/workflows/instant_benchmark.yml b/.github/workflows/instant_benchmark.yml index 06317fa13..4677851d4 100644 --- a/.github/workflows/instant_benchmark.yml +++ b/.github/workflows/instant_benchmark.yml @@ -34,6 +34,26 @@ on: - none - table - cloudwatch + workflow_call: + inputs: + running_template: + description: 'A json file that contains benchmark plans' + required: true + type: string + instance: + description: 'Instance used for benchmark' + required: true + type: string + container: + description: 'The container used to run benchmark (overrides the template). Should be a full docker path such as deepjavalibrary/djl-serving:0.27.0-deepspeed' + required: false + type: string + default: '' + record: + description: 'Whether to record the results' + required: false + type: string + default: 'none' permissions: id-token: write @@ -51,12 +71,12 @@ jobs: https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ --fail \ | jq '.token' | tr -d '"' ) - ./start_instance.sh action_ib_${{ github.event.inputs.instance }} $token djl-serving + ./start_instance.sh action_ib_${{ inputs.instance }} $token djl-serving outputs: gpu_instance_id: ${{ steps.create_instance.outputs.action_ib_instance_id }} environment-setup: - runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ] + runs-on: [ self-hosted, "${{ inputs.instance }}" ] timeout-minutes: 15 needs: [ create-runners ] steps: @@ -82,14 +102,14 @@ jobs: working-directory: tests/integration id: generate_matrix run: | - python3 instant_benchmark.py --parse ${{ github.event.inputs.running_template }} \ - --container "${{ github.event.inputs.container }}" + python3 instant_benchmark.py --parse ${{ inputs.running_template }} \ + --container "${{ inputs.container }}" outputs: jobs: ${{ steps.generate_matrix.outputs.jobs }} template: ${{ steps.generate_matrix.outputs.template }} benchmark_run: - runs-on: [ self-hosted, "${{ github.event.inputs.instance }}" ] + runs-on: [ self-hosted, "${{ inputs.instance }}" ] timeout-minutes: 30 needs: [ environment-setup ] strategy: @@ -116,7 +136,7 @@ jobs: run: | echo "${{ needs.environment-setup.outputs.template }}" >> template.json python3 instant_benchmark.py --template template.json \ - --job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} + --job ${{ matrix.job }} --instance ${{ inputs.instance }} bash instant_benchmark.sh - name: Configure AWS Credentials @@ -125,12 +145,12 @@ jobs: role-to-assume: arn:aws:iam::185921645874:role/github-actions-djl-serving aws-region: us-east-1 - name: Record benchmark job - if: ${{ github.event.inputs.record == 'table' || github.event.inputs.record == 'cloudwatch' }} + if: ${{ inputs.record == 'table' || inputs.record == 'cloudwatch' }} working-directory: tests/integration run: | python3 record_benchmark.py --template template.json \ - --job ${{ matrix.job }} --instance ${{ github.event.inputs.instance }} \ - --model models/test --record ${{ github.event.inputs.record }} + --job ${{ matrix.job }} --instance ${{ inputs.instance }} \ + --model models/test --record ${{ inputs.record }} - name: Get serving logs if: always() working-directory: tests/integration diff --git a/tests/integration/benchmark/nightly/g5-2xl.txt b/tests/integration/benchmark/nightly/g5-2xl.txt new file mode 100644 index 000000000..c181ff5fd --- /dev/null +++ b/tests/integration/benchmark/nightly/g5-2xl.txt @@ -0,0 +1,16 @@ +[test_name] +mistral-vllm +[container] +deepjavalibrary/djl-serving:deepspeed-nightly +[serving_properties] +engine=Python +option.rolling_batch=vllm +option.model_id=NousResearch/Hermes-2-Pro-Mistral-7B +option.tensor_parallel_degree=max +option.max_model_len=8192 +[aws_curl] +TOKENIZER=NousResearch/Hermes-2-Pro-Mistral-7B ./awscurl -c 32 -N 10 \ +-X POST http://127.0.0.1:8080/invocations \ +--connect-timeout 60 -H "Content-type: application/json" \ +-d '{"inputs":"The new movie that got Oscar this year","parameters":{"max_new_tokens":256, "do_sample":true}}' \ +-t -o /tmp/output.txt \ No newline at end of file diff --git a/tests/integration/record_benchmark.py b/tests/integration/record_benchmark.py index f08aa644a..8a94da546 100755 --- a/tests/integration/record_benchmark.py +++ b/tests/integration/record_benchmark.py @@ -64,8 +64,10 @@ def record_table(): def record_cloudwatch(): - esc = lambda n: n.replace("/", "-").replace(".", "-").strip(' -') - metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}_{esc(data['modelId'])}_{n}" + esc = lambda n: n.replace("/", "-").replace(".", "-").replace("=", "-" + ).strip(' -') + job_name = "" if "job" not in data else "_" + data["job"] + metric_name = lambda n: f"lmi_{data['instance']}_{esc(data['image'])}{esc(job_name)}_{esc(data['modelId'])}_{n}" metric_data = [ { 'MetricName': metric_name("throughput"), @@ -139,12 +141,16 @@ def data_container(): container = data["container"] if container.startswith("deepjavalibrary/djl-serving:"): container = container[len("deepjavalibrary/djl-serving:"):] - split = container.split("-", 1) - data["djlVersion"] = split[0] - if len(split) > 1: - data["image"] = split[1] - else: - data["image"] = "cpu" + if container[0] == "0": # Release build + split = container.split("-", 1) + data["djlVersion"] = split[0] + if len(split) > 1: + data["image"] = split[1] + else: + data["image"] = "cpu" + else: # Nightly build + data["djlNightly"] = "true" + data["image"] = container[:-len("-nightly")] if "text-generation-inference" in container: data["modelServer"] = "TGI" version = container.split(":")[1] @@ -210,6 +216,7 @@ def data_from_template(): with open(args.template, "r") as f: template = json.load(f) job_template = template[args.job] + data["job"] = args.job data["awscurl"] = bytes.fromhex( job_template['awscurl']).decode("utf-8") if "container" not in data and "container" in job_template: