From 5d02634128c487fb16673cb18b7f4861128d204c Mon Sep 17 00:00:00 2001 From: Zach Kimberg Date: Wed, 12 Jun 2024 11:54:37 -0700 Subject: [PATCH 1/2] [CI] Inferentia tests through pytest This moves the inferentia2 integration tests into the merged suite along with the rest of the llm integration tests. --- .github/workflows/client-test.yml | 2 +- .github/workflows/llm_inf2_integration.yml | 463 --------------------- .github/workflows/llm_integration.yml | 125 +++++- tests/{ => integration}/test_client.py | 7 +- tests/integration/tests.py | 192 ++++++++- 5 files changed, 303 insertions(+), 486 deletions(-) delete mode 100644 .github/workflows/llm_inf2_integration.yml rename tests/{ => integration}/test_client.py (94%) diff --git a/.github/workflows/client-test.yml b/.github/workflows/client-test.yml index 188924814..1362636ae 100644 --- a/.github/workflows/client-test.yml +++ b/.github/workflows/client-test.yml @@ -62,7 +62,7 @@ jobs: cd tests djl-serving -m test::Python=file://$PWD/python &> output.log & sleep 15 - python test_client.py + python integration/test_client.py jobs kill %1 - name: On failure step diff --git a/.github/workflows/llm_inf2_integration.yml b/.github/workflows/llm_inf2_integration.yml deleted file mode 100644 index f46a893b0..000000000 --- a/.github/workflows/llm_inf2_integration.yml +++ /dev/null @@ -1,463 +0,0 @@ -name: Inferentia2 integration tests - -on: - workflow_dispatch: - inputs: - djl-version: - description: 'The released version of DJL' - required: false - default: '' - schedule: - - cron: '0 15 * * *' - - -jobs: - create-runners: - runs-on: [self-hosted, scheduler] - steps: - - name: Create new Inf2.24xl instance - id: create_inf2 - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ - https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ - --fail \ - | jq '.token' | tr -d '"' ) - ./start_instance.sh action_inf2 $token djl-serving - - name: Create new Inf2.24xl instance - id: create_inf2_2 - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ - https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ - --fail \ - | jq '.token' | tr -d '"' ) - ./start_instance.sh action_inf2 $token djl-serving - outputs: - inf2_instance_id_1: ${{ steps.create_inf2.outputs.action_inf2_instance_id }} - inf2_instance_id_2: ${{ steps.create_inf2_2.outputs.action_inf2_instance_id }} - - transformers-neuronx-container-unit-tests: - runs-on: [ self-hosted, inf2 ] - timeout-minutes: 15 - needs: create-runners - steps: - - uses: actions/checkout@v4 - - name: Clean env - run: | - yes | docker system prune -a --volumes - sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ - echo "wait dpkg lock..." - while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done - - name: Set up Python3 - uses: actions/setup-python@v5 - with: - python-version: '3.10.x' - - name: Install pip dependencies - run: pip3 install requests "numpy<2" pillow wheel - - name: Build container name - run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} - - name: Download models and dockers - run: | - docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG - - name: Run djl_python unit/integration tests on container - working-directory: engines/python/setup - run: | - # Setup - pip install setuptools - python3 -m setup bdist_wheel - mkdir logs - docker run -t --rm --network="host" \ - --name neuron-test \ - -v $PWD/:/opt/ml/model/ \ - -w /opt/ml/model \ - --device=/dev/neuron0:/dev/neuron0 \ - deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG \ - /bin/bash -c "'pip install /opt/ml/model/dist/*.whl pytest' && \ - pytest djl_python/tests/neuron_test_scripts/ | tee logs/results.log" - - # Cleanup - sudo rm -rf TinyLlama .pytest_cache djl_python - - # Fail on failed tests - if grep -F "failed" logs/results.log &>/dev/null; then exit 1; fi - - name: On fail step - if: ${{ failure() }} - working-directory: engines/python/setup - run: | - cat logs/results.log - - name: Upload test logs - uses: actions/upload-artifact@v3 - with: - name: transformers-neuronx-${{ matrix.arch }}-logs - path: engines/python/setup/logs/ - - transformers-neuronx-test-1: - runs-on: [ self-hosted, inf2 ] - timeout-minutes: 90 - needs: create-runners - steps: - - uses: actions/checkout@v4 - - name: Clean env - run: | - yes | docker system prune -a --volumes - sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ - echo "wait dpkg lock..." - while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done - - name: Set up Python3 - uses: actions/setup-python@v5 - with: - python-version: '3.10.x' - - name: Install pip dependencies - run: pip3 install requests "numpy<2" pillow - - name: Build container name - run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} - - name: Download models and dockers - working-directory: tests/integration - run: | - docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG - mkdir logs - ./download_models.sh pytorch-inf2 - - name: Test Pytorch model - working-directory: tests/integration - run: | - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ - serve -m test::PyTorch:nc0=file:/opt/ml/model/resnet18_inf2_2_4.tar.gz - ./test_client.sh image/jpg models/kitten.jpg - docker rm -f $(docker ps -aq) - - name: Test Python mode - working-directory: tests/integration - run: | - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ - serve -m test::Python:nc0=file:/opt/ml/model/resnet18_no_reqs_inf2_2_4.tar.gz - ./test_client.sh image/jpg models/kitten.jpg - docker rm -f $(docker ps -aq) - - name: Test transformers-neuronx gpt2 with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx gpt2 - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx gpt2 - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test transformers-neuronx gpt2 quantization with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx gpt2-quantize - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx gpt2-quantize - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test transformers-neuronx opt-1.3b with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx opt-1.3b - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx opt-1.3b - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test transformers-neuronx gpt-j-6b with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx gpt-j-6b - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx gpt-j-6b - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test transformers-neuronx pythia-2.8b with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx pythia-2.8b - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx pythia-2.8b - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test transformers-neuronx bloom-7b1 with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx bloom-7b1 - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx bloom-7b1 - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test gpt2 partition - working-directory: tests/integration - run: | - sudo rm -rf models - python3 llm/prepare.py transformers_neuronx_aot gpt2 - # To test the requirements.txt download. - echo "dummy_test" >> $PWD/models/test/requirements.txt - - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ - partition --model-dir /opt/ml/input/data/training/ --skip-copy | tee partition_output.log - - # checking if neff files are generated. - sudo mv $PWD/models/test/partition-test $PWD/models/ - if ls $PWD/models/partition-test/compiled/*.neff &>/dev/null; \ - then echo "compiled files generated"; else exit 1; fi - - # checking whether requirements.txt download is successful - if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \ - then echo "requirements.txt install was successful"; else exit 1; fi - if [ -d models ]; then sudo rm -rf models; fi - - name: Test gpt2-quantize partition - working-directory: tests/integration - run: | - sudo rm -rf models - python3 llm/prepare.py transformers_neuronx_aot gpt2-quantize - # To test the requirements.txt download. - echo "dummy_test" >> $PWD/models/test/requirements.txt - - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ - partition --model-dir /opt/ml/input/data/training/ --skip-copy | tee partition_output.log - - # checking if neff files are generated. - sudo mv $PWD/models/test/partition-test $PWD/models/ - if ls $PWD/models/partition-test/compiled/*.neff &>/dev/null; \ - then echo "compiled files generated"; else exit 1; fi - - # checking whether requirements.txt download is successful - if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \ - then echo "requirements.txt install was successful"; else exit 1; fi - if [ -d models ]; then sudo rm -rf models; fi - - name: On fail step - if: ${{ failure() }} - working-directory: tests/integration - run: | - if [ -d models ]; then sudo rm -rf models; fi - cat logs/serving.log - - name: Upload test logs - uses: actions/upload-artifact@v3 - with: - name: transformers-neuronx-${{ matrix.arch }}-logs - path: tests/integration/logs/ - - transformers-neuronx-test-2: - runs-on: [ self-hosted, inf2 ] - timeout-minutes: 90 - needs: create-runners - steps: - - uses: actions/checkout@v4 - - name: Clean env - run: | - yes | docker system prune -a --volumes - sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ - echo "wait dpkg lock..." - while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done - - name: Set up Python3 - uses: actions/setup-python@v5 - with: - python-version: '3.10.x' - - name: Install pip dependencies - run: pip3 install requests "numpy<2" pillow - - name: Build container name - run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} - - name: Download models and dockers - working-directory: tests/integration - run: | - docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG - mkdir logs - ./download_models.sh pytorch-inf2 - - name: Test streaming transformers-neuronx opt-1.3b with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx opt-1.3b-streaming - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx opt-1.3b-streaming - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test stable diffusion 1.5 with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx stable-diffusion-1.5-neuron - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py neuron-stable-diffusion stable-diffusion-1.5-neuron - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test stable diffusion bf16 with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx stable-diffusion-2.1-neuron - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py neuron-stable-diffusion stable-diffusion-2.1-neuron - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test stable diffusion xl with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx stable-diffusion-xl-neuron - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py neuron-stable-diffusion stable-diffusion-xl-neuron - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test mistral 7B with handler - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx mistral-7b - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx mistral-7b - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: On fail step - if: ${{ failure() }} - working-directory: tests/integration - run: | - if [ -d models ]; then sudo rm -rf models; fi - cat logs/serving.log - - name: Upload test logs - uses: actions/upload-artifact@v3 - with: - name: transformers-neuronx-${{ matrix.arch }}-logs - path: tests/integration/logs/ - - transformers-neuronx-rolling-batch: - runs-on: [ self-hosted, inf2 ] - timeout-minutes: 90 - needs: create-runners - steps: - - uses: actions/checkout@v4 - - name: Clean env - run: | - yes | docker system prune -a --volumes - sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ - echo "wait dpkg lock..." - while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done - - name: Set up JDK 17 - uses: actions/setup-java@v4 - with: - distribution: 'corretto' - java-version: 17 - - name: Set up Python3 - uses: actions/setup-python@v5 - with: - python-version: '3.10.x' - - name: Install pip dependencies - run: pip3 install requests "numpy<2" pillow - - name: Build container name - run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} - - name: Download models and dockers - working-directory: tests/integration - run: | - docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG - mkdir logs - ./download_models.sh pytorch-inf2 - - name: Test transformers-neuronx llama-7b rolling batch - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx llama-7b-rb - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx_rolling_batch llama-7b-rb - docker rm -f $(docker ps -aq) - - name: Test transformers-neuronx tiny-llama vllm model load and rolling batch - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx tiny-llama-rb-vllm - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx_rolling_batch tiny-llama-rb-vllm - docker rm -f $(docker ps -aq) - - name: Test transformers-neuronx llama-3-8b vllm rolling batch - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx llama-3-8b-rb-vllm - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-4 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx_rolling_batch llama-3-8b-rb-vllm - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test transformers-neuronx mixtral-8x-7b rolling batch - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx mixtral-8x7b-rb - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-4 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx_rolling_batch mixtral-8x7b-rb - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test transformers-neuronx llama-2-13b-speculative-rb rolling batch - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx llama-speculative-rb - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx_rolling_batch llama-speculative-rb - docker rm -f $(docker ps -aq) - sudo rm -rf models - - name: Test transformers-neuronx llama-2-13b-speculative-rb compiled draft model rolling batch - working-directory: tests/integration - run: | - rm -rf models - python3 llm/prepare.py transformers_neuronx llama-speculative-compiled-rb - ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \ - serve - curl http://127.0.0.1:8080/models - python3 llm/client.py transformers_neuronx_rolling_batch llama-speculative-compiled-rb - docker rm -f $(docker ps -aq) - - name: On fail step - if: ${{ failure() }} - working-directory: tests/integration - run: | - if [ -d models ]; then sudo rm -rf models; fi - cat logs/serving.log - - name: Upload test logs - uses: actions/upload-artifact@v3 - with: - name: transformers-neuronx-${{ matrix.arch }}-logs - path: tests/integration/logs/ - - stop-runners: - if: always() - runs-on: [ self-hosted, scheduler ] - needs: [ create-runners, transformers-neuronx-container-unit-tests, transformers-neuronx-test-1, transformers-neuronx-test-2, transformers-neuronx-rolling-batch ] - steps: - - name: Stop all instances - run: | - cd /home/ubuntu/djl_benchmark_script/scripts - instance_id=${{ needs.create-runners.outputs.inf2_instance_id_1 }} - ./stop_instance.sh $instance_id - instance_id=${{ needs.create-runners.outputs.inf2_instance_id_2 }} - ./stop_instance.sh $instance_id diff --git a/.github/workflows/llm_integration.yml b/.github/workflows/llm_integration.yml index a74879060..d31503595 100644 --- a/.github/workflows/llm_integration.yml +++ b/.github/workflows/llm_integration.yml @@ -42,29 +42,65 @@ jobs: --fail \ | jq '.token' | tr -d '"' ) ./start_instance.sh action_g6 $token djl-serving + - name: Create new Inf2.24xl instance + id: create_inf2 + run: | + cd /home/ubuntu/djl_benchmark_script/scripts + token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ + https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ + --fail \ + | jq '.token' | tr -d '"' ) + ./start_instance.sh action_inf2 $token djl-serving + - name: Create new Inf2.24xl instance + id: create_inf2_2 + run: | + cd /home/ubuntu/djl_benchmark_script/scripts + token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \ + https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \ + --fail \ + | jq '.token' | tr -d '"' ) + ./start_instance.sh action_inf2 $token djl-serving outputs: gpu_instance_id_1: ${{ steps.create_gpu.outputs.action_g6_instance_id }} gpu_instance_id_2: ${{ steps.create_gpu2.outputs.action_g6_instance_id }} gpu_instance_id_3: ${{ steps.create_gpu3.outputs.action_g6_instance_id }} + inf2_instance_id_1: ${{ steps.create_inf2.outputs.action_inf2_instance_id }} + inf2_instance_id_2: ${{ steps.create_inf2_2.outputs.action_inf2_instance_id }} test: - runs-on: [ self-hosted, g6 ] + runs-on: [ self-hosted, "${{ matrix.test.instance }}" ] timeout-minutes: 60 needs: create-runners strategy: fail-fast: false matrix: test: - - TestHfHandler - - TestTrtLlmHandler1 - - TestTrtLlmHandler2 - - TestSchedulerSingleGPU - - TestSchedulerMultiGPU - - TestLmiDist1 - - TestLmiDist2 - - TestVllm1 - - TestVllmLora - - TestLmiDistLora + - test: TestHfHandler + instance: g6 + - test: TestTrtLlmHandler1 + instance: g6 + - test: TestTrtLlmHandler2 + instance: g6 + - test: TestSchedulerSingleGPU + instance: g6 + - test: TestSchedulerMultiGPU + instance: g6 + - test: TestLmiDist1 + instance: g6 + - test: TestLmiDist2 + instance: g6 + - test: TestVllm1 + instance: g6 + - test: TestVllmLora + instance: g6 + - test: TestLmiDistLora + instance: g6 + - test: TestNeuronx1 + instance: inf2 + - test: TestNeuronx2 + instance: inf2 + - test: TestNeuronxRollingBatch + instance: inf2 steps: - uses: actions/checkout@v4 - name: Clean env @@ -78,7 +114,7 @@ jobs: with: python-version: '3.10.x' - name: Install pip dependencies - run: pip3 install pytest requests "numpy<2" huggingface_hub + run: pip3 install pytest requests "numpy<2" pillow huggingface_hub - name: Install awscurl working-directory: tests/integration run: | @@ -90,7 +126,7 @@ jobs: env: TEST_DJL_VERSION: ${{ inputs.djl-version }} run: | - pytest -k ${{ matrix.test }} tests.py + pytest -k ${{ matrix.test.test }} tests.py - name: Cleanup working-directory: tests/integration run: | @@ -108,13 +144,68 @@ jobs: if: ${{ always() }} uses: actions/upload-artifact@v3 with: - name: test-${{ matrix.test }}-logs + name: test-${{ matrix.test.test }}-logs path: tests/integration/all_logs/ + transformers-neuronx-container-unit-tests: + runs-on: [ self-hosted, inf2 ] + timeout-minutes: 15 + needs: create-runners + steps: + - uses: actions/checkout@v4 + - name: Clean env + run: | + yes | docker system prune -a --volumes + sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/ + echo "wait dpkg lock..." + while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done + - name: Set up Python3 + uses: actions/setup-python@v5 + with: + python-version: '3.10.x' + - name: Install pip dependencies + run: pip3 install requests numpy pillow wheel + - name: Build container name + run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }} + - name: Download models and dockers + run: | + docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG + - name: Run djl_python unit/integration tests on container + working-directory: engines/python/setup + run: | + # Setup + pip install setuptools + python3 -m setup bdist_wheel + mkdir logs + docker run -t --rm --network="host" \ + --name neuron-test \ + -v $PWD/:/opt/ml/model/ \ + -w /opt/ml/model \ + --device=/dev/neuron0:/dev/neuron0 \ + deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG \ + /bin/bash -c "'pip install /opt/ml/model/dist/*.whl pytest' && \ + pytest djl_python/tests/neuron_test_scripts/ | tee logs/results.log" + + # Cleanup + sudo rm -rf TinyLlama .pytest_cache djl_python + + # Fail on failed tests + if grep -F "failed" logs/results.log &>/dev/null; then exit 1; fi + - name: On fail step + if: ${{ failure() }} + working-directory: engines/python/setup + run: | + cat logs/results.log + - name: Upload test logs + uses: actions/upload-artifact@v3 + with: + name: transformers-neuronx-${{ matrix.arch }}-logs + path: engines/python/setup/logs/ + stop-runners: if: always() runs-on: [ self-hosted, scheduler ] - needs: [ create-runners, test] + needs: [ create-runners, test, transformers-neuronx-container-unit-tests] steps: - name: Stop all instances run: | @@ -125,3 +216,7 @@ jobs: ./stop_instance.sh $instance_id instance_id=${{ needs.create-runners.outputs.gpu_instance_id_3 }} ./stop_instance.sh $instance_id + instance_id=${{ needs.create-runners.outputs.inf2_instance_id_1 }} + ./stop_instance.sh $instance_id + instance_id=${{ needs.create-runners.outputs.inf2_instance_id_2 }} + ./stop_instance.sh $instance_id diff --git a/tests/test_client.py b/tests/integration/test_client.py similarity index 94% rename from tests/test_client.py rename to tests/integration/test_client.py index f5da2480f..ac8ebe869 100644 --- a/tests/test_client.py +++ b/tests/integration/test_client.py @@ -67,5 +67,10 @@ def test_image(self): self.assertEqual(res.content, img.tobytes()) +def run(): + suite = unittest.defaultTestLoader.loadTestsFromTestCase(TestInputOutput) + unittest.TextTestRunner().run(suite) + + if __name__ == '__main__': - unittest.main(verbosity=2) + run() diff --git a/tests/integration/tests.py b/tests/integration/tests.py index 93ec1c450..c8efa5c93 100644 --- a/tests/integration/tests.py +++ b/tests/integration/tests.py @@ -2,16 +2,18 @@ import os import subprocess +import pytest import llm.prepare as prepare import llm.client as client import rb_client as rb_client +import test_client djl_version = os.environ.get('TEST_DJL_VERSION', '').strip() class Runner: - def __init__(self, container, test_name=None): + def __init__(self, container, test_name=None, download=False): self.container = container self.test_name = test_name @@ -26,9 +28,13 @@ def __init__(self, container, test_name=None): self.image = f"deepjavalibrary/djl-serving:{flavor}" - def __enter__(self): # os.system(f'docker pull {self.image}') os.system('rm -rf models') + + if download: + os.system(f"./download_models.sh {self.container}") + + def __enter__(self): return self def __exit__(self, *args): @@ -39,7 +45,7 @@ def __exit__(self, *args): subprocess.run(["./remove_container.sh"], check=True) os.system("cat logs/serving.log") - def launch(self, env_vars=None, cmd=None): + def launch(self, env_vars=None, container=None, cmd=None): if env_vars is not None: with open("docker_env", "w") as f: f.write(env_vars) @@ -47,14 +53,18 @@ def launch(self, env_vars=None, cmd=None): if os.path.isfile("docker_env"): os.remove("docker_env") + if container is None: + container = self.container + if cmd is None: cmd = 'serve -m test=file:/opt/ml/model/test/' model_dir = os.path.join(os.getcwd(), 'models') - subprocess.run( - f'./launch_container.sh {self.image} {model_dir} {self.container} {cmd}' + return subprocess.run( + f'./launch_container.sh {self.image} {model_dir} {container} {cmd}' .split(), - check=True) + check=True, + capture_output=True) class TestHfHandler: @@ -430,3 +440,173 @@ def test_lora_llama3_8b(self): prepare.build_lmi_dist_model("llama3-8b-unmerged-lora") r.launch() client.run("lmi_dist_adapters llama3-8b-unmerged-lora".split()) + + +class TestNeuronx1: + # Runs on inf2.24xl + + def test_python_mode(self): + with Runner('pytorch-inf2', 'test_python_mode', download=True) as r: + r.launch( + cmd= + 'serve -m test::PyTorch:nc0=file:/opt/ml/model/resnet18_inf2_2_4.tar.gz', + container='pytorch-inf2-1') + test_client.run() + + def test_gpt2(self): + with Runner('pytorch-inf2', 'gpt2') as r: + prepare.build_transformers_neuronx_handler_model("gpt2") + r.launch(container='pytorch-inf2-1') + client.run("transformers_neuronx gpt2".split()) + + def test_gpt2_quantize(self): + with Runner('pytorch-inf2', 'gpt2-quantize') as r: + prepare.build_transformers_neuronx_handler_model("gpt2-quantize") + r.launch(container='pytorch-inf2-1') + client.run("transformers_neuronx gpt2-quantize".split()) + + def test_opt_1_3b(self): + with Runner('pytorch-inf2', 'opt-1.3b') as r: + prepare.build_transformers_neuronx_handler_model("opt-1.3b") + r.launch(container='pytorch-inf2-6') + client.run("transformers_neuronx opt-1.3b".split()) + + def test_gpt_j_6b(self): + with Runner('pytorch-inf2', 'gpt-j-6b') as r: + prepare.build_transformers_neuronx_handler_model("gpt-j-6b") + r.launch(container='pytorch-inf2-6') + client.run("transformers_neuronx gpt-j-6b".split()) + + def test_pythia(self): + with Runner('pytorch-inf2', 'pythia-2.8b') as r: + prepare.build_transformers_neuronx_handler_model("pythia-2.8b") + r.launch(container='pytorch-inf2-2') + client.run("transformers_neuronx pythia-2.8b".split()) + + def test_bloom(self): + with Runner('pytorch-inf2', 'bloom-7b1') as r: + prepare.build_transformers_neuronx_handler_model("bloom-7b1") + r.launch(container='pytorch-inf2-2') + client.run("transformers_neuronx bloom-7b1".split()) + + @pytest.mark.parametrize("model", ["gpt2", "gpt2-quantize"]) + def test_partition(self, model): + try: + with Runner('pytorch-inf2', f'partition-{model}') as r: + prepare.build_transformers_neuronx_handler_model(model) + with open("models/test/requirements.txt", "a") as f: + f.write("dummy_test") + partition_output = r.launch( + container="pytorch-inf2-1", + cmd= + 'partition --model-dir /opt/ml/input/data/training/ --save-mp-checkpoint-path /opt/ml/input/data/training/partition --skip-copy' + ) + + # Check if neff files are generated + if len([ + fn + for fn in os.listdir("models/test/partition/compiled") + if fn.endswith(".neff") + ]) == 0: + raise Exception("Failed to generate any .neff files") + + # Check whether requirements.txt download is sufficient + if 'pip install requirements succeed!' not in partition_output.stdout.decode( + "utf-8"): + raise Exception( + "Requirements.txt not installed successfully") + finally: + os.system('sudo rm -rf models') + + +class TestNeuronx2: + # Runs on inf2.24xl + + def test_stream_opt(self): + with Runner('pytorch-inf2', 'opt-1.3b-streaming') as r: + prepare.build_transformers_neuronx_handler_model( + "opt-1.3b-streaming") + r.launch(container='pytorch-inf2-6') + client.run("transformers_neuronx opt-1.3b-streaming".split()) + + def test_mistral(self): + with Runner('pytorch-inf2', 'mistral-7b') as r: + prepare.build_transformers_neuronx_handler_model("mistral-7b") + r.launch(container='pytorch-inf2-2') + client.run("transformers_neuronx mistral-7b".split()) + + def test_stable_diffusion_1_5(self): + with Runner('pytorch-inf2', 'stable-diffusion-1.5-neuron') as r: + prepare.build_transformers_neuronx_handler_model( + "stable-diffusion-1.5-neuron") + r.launch(container='pytorch-inf2-2') + client.run( + "neuron-stable-diffusion stable-diffusion-1.5-neuron".split()) + + def test_stable_diffusion_2_1(self): + with Runner('pytorch-inf2', 'stable-diffusion-2.1-neuron') as r: + prepare.build_transformers_neuronx_handler_model( + "stable-diffusion-2.1-neuron") + r.launch(container='pytorch-inf2-2') + client.run( + "neuron-stable-diffusion stable-diffusion-2.1-neuron".split()) + + def test_stable_diffusion_xl(self): + with Runner('pytorch-inf2', 'stable-diffusion-xl-neuron') as r: + prepare.build_transformers_neuronx_handler_model( + "stable-diffusion-xl-neuron") + r.launch(container='pytorch-inf2-2') + client.run( + "neuron-stable-diffusion stable-diffusion-xl-neuron".split()) + + +class TestNeuronxRollingBatch: + # Runs on inf2.24xl + + def test_llama_7b(self): + with Runner('pytorch-inf2', 'llama-7b-rb') as r: + prepare.build_transformers_neuronx_handler_model("llama-7b-rb") + r.launch(container='pytorch-inf2-2') + client.run( + "transformers_neuronx_rolling_batch llama-7b-rb".split()) + + def test_tiny_llama_vllm(self): + with Runner('pytorch-inf2', 'tiny-llama-rb-vllm') as r: + prepare.build_transformers_neuronx_handler_model( + "tiny-llama-rb-vllm") + r.launch(container='pytorch-inf2-1') + client.run("transformers_neuronx_rolling_batch tiny-llama-rb-vllm". + split()) + + def test_llama3_vllm(self): + with Runner('pytorch-inf2', 'llama-3-8b-rb-vllm') as r: + prepare.build_transformers_neuronx_handler_model( + "llama-3-8b-rb-vllm") + r.launch(container='pytorch-inf2-4') + client.run("transformers_neuronx_rolling_batch llama-3-8b-rb-vllm". + split()) + + def test_mixtral(self): + with Runner('pytorch-inf2', 'mixtral-8x7b-rb') as r: + prepare.build_transformers_neuronx_handler_model("mixtral-8x7b-rb") + r.launch(container='pytorch-inf2-4') + client.run( + "transformers_neuronx_rolling_batch mixtral-8x7b-rb".split()) + + def test_llama_speculative(self): + with Runner('pytorch-inf2', 'llama-speculative-rb') as r: + prepare.build_transformers_neuronx_handler_model( + "llama-speculative-rb") + r.launch(container='pytorch-inf2-6') + client.run( + "transformers_neuronx_rolling_batch llama-speculative-rb". + split()) + + def test_llama_speculative_compiled(self): + with Runner('pytorch-inf2', 'llama-speculative-compiled-rb') as r: + prepare.build_transformers_neuronx_handler_model( + "llama-speculative-compiled-rb") + r.launch(container='pytorch-inf2-6') + client.run( + "transformers_neuronx_rolling_batch llama-speculative-compiled-rb" + .split()) From dda02c228bc54e210d091d3cb09ea6b499b46718 Mon Sep 17 00:00:00 2001 From: Zach Kimberg Date: Tue, 25 Jun 2024 15:41:42 -0700 Subject: [PATCH 2/2] Fix --- .github/workflows/client-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/client-test.yml b/.github/workflows/client-test.yml index 1362636ae..3502fd244 100644 --- a/.github/workflows/client-test.yml +++ b/.github/workflows/client-test.yml @@ -109,7 +109,7 @@ jobs: ./gradlew --stop ./gradlew :serving:run --args="-m test::Python=file:$(pwd -W)/tests/python" &> output.log & sleep 30 - cd tests/ && python test_client.py + cd tests/ && python integration/test_client.py - name: On failure step if: ${{ failure() }} shell: bash