[tnx] refactor neuron testing

deepjavalibrary · May 25, 2024 · 8db75ca · 8db75ca
1 parent f30c88a
commit 8db75ca
Showing 1 changed file with 6 additions and 323 deletions.
diff --git a/.github/workflows/llm_inf2_integration.yml b/.github/workflows/llm_inf2_integration.yml
@@ -24,18 +24,8 @@ jobs:
           --fail \
           | jq '.token' | tr -d '"' )
           ./start_instance.sh action_inf2 $token djl-serving
-      - name: Create new Inf2.24xl instance
-        id: create_inf2_2
-        run: |
-          cd /home/ubuntu/djl_benchmark_script/scripts
-          token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
-          https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
-          --fail \
-          | jq '.token' | tr -d '"' )
-          ./start_instance.sh action_inf2 $token djl-serving
     outputs:
       inf2_instance_id_1: ${{ steps.create_inf2.outputs.action_inf2_instance_id }}
-      inf2_instance_id_2: ${{ steps.create_inf2_2.outputs.action_inf2_instance_id }}
 
   transformers-neuronx-container-unit-tests:
     runs-on: [ self-hosted, inf2 ]
@@ -62,8 +52,11 @@ jobs:
           docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
       - name: Run djl_python unit/integration tests on container
         run: |
-          # setup
+          # Setup
           mkdir logs
+          if [[ "$UID" == "1000" ]]; then uid_mapping="-u djl"; fi
+          
+          # Run test suite
           docker run -t --rm --network="host" \
           --name neuron-test \
           -v $PWD/:/opt/ml/model/ \
@@ -73,9 +66,6 @@ jobs:
           /bin/bash -c "'pip install /opt/ml/model/engines/python/setup/. pytest' && \
           pytest engines/python/setup/djl_python/tests/neuron_test_scripts/ | tee logs/results.log"
           
-          # Cleanup
-          sudo rm -rf TinyLlama engines/python/setup/.pytest_cache
-          
           # Fail on failed tests
           if grep -F "failed" logs/results.log &>/dev/null; then exit 1; fi
       - name: On fail step
@@ -91,7 +81,7 @@ jobs:
   transformers-neuronx-test-1:
     runs-on: [ self-hosted, inf2 ]
     timeout-minutes: 90
-    needs: create-runners
+    needs: [ create-runners, transformers-neuronx-container-unit-tests ]
     steps:
       - uses: actions/checkout@v4
       - name: Clean env
@@ -128,311 +118,6 @@ jobs:
           serve -m test::Python:nc0=file:/opt/ml/model/resnet18_no_reqs_inf2_2_4.tar.gz
           ./test_client.sh image/jpg models/kitten.jpg
           docker rm -f $(docker ps -aq)
-      - name: Test transformers-neuronx gpt2 with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx gpt2
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx gpt2
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test transformers-neuronx gpt2 quantization with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx gpt2-quantize
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx gpt2-quantize
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test transformers-neuronx opt-1.3b with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx opt-1.3b
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx opt-1.3b
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test transformers-neuronx gpt-j-6b with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx gpt-j-6b
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx gpt-j-6b
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test transformers-neuronx pythia-2.8b with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx pythia-2.8b
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx pythia-2.8b
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test transformers-neuronx bloom-7b1 with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx bloom-7b1
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx bloom-7b1
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test gpt2 partition
-        working-directory: tests/integration
-        run: |
-          sudo rm -rf models
-          python3 llm/prepare.py transformers_neuronx_aot gpt2
-          # To test the requirements.txt download.
-          echo "dummy_test" >> $PWD/models/test/requirements.txt
-
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
-          partition --model-dir /opt/ml/input/data/training/ --skip-copy | tee partition_output.log
-
-          # checking if neff files are generated.
-          sudo mv $PWD/models/test/partition-test $PWD/models/
-          if ls $PWD/models/partition-test/compiled/*.neff &>/dev/null; \
-          then echo "compiled files generated"; else exit 1; fi
-          
-          # checking whether requirements.txt download is successful
-          if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \
-          then echo "requirements.txt install was successful"; else exit 1; fi
-          if [ -d models ]; then sudo rm -rf models; fi
-      - name: Test gpt2-quantize partition
-        working-directory: tests/integration
-        run: |
-          sudo rm -rf models
-          python3 llm/prepare.py transformers_neuronx_aot gpt2-quantize
-          # To test the requirements.txt download.
-          echo "dummy_test" >> $PWD/models/test/requirements.txt
-
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
-          partition --model-dir /opt/ml/input/data/training/ --skip-copy | tee partition_output.log
-
-          # checking if neff files are generated.
-          sudo mv $PWD/models/test/partition-test $PWD/models/
-          if ls $PWD/models/partition-test/compiled/*.neff &>/dev/null; \
-          then echo "compiled files generated"; else exit 1; fi
-          
-          # checking whether requirements.txt download is successful
-          if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \
-          then echo "requirements.txt install was successful"; else exit 1; fi
-          if [ -d models ]; then sudo rm -rf models; fi
-      - name: On fail step
-        if: ${{ failure() }}
-        working-directory: tests/integration
-        run: |
-          if [ -d models ]; then sudo rm -rf models; fi
-          cat logs/serving.log
-      - name: Upload test logs
-        uses: actions/upload-artifact@v3
-        with:
-          name: transformers-neuronx-${{ matrix.arch }}-logs
-          path: tests/integration/logs/
-
-  transformers-neuronx-test-2:
-    runs-on: [ self-hosted, inf2 ]
-    timeout-minutes: 90
-    needs: create-runners
-    steps:
-      - uses: actions/checkout@v4
-      - name: Clean env
-        run: |
-          yes | docker system prune -a --volumes
-          sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
-          echo "wait dpkg lock..."
-          while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
-      - name: Set up Python3
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.10.x'
-      - name: Install pip dependencies
-        run: pip3 install requests numpy pillow
-      - name: Build container name
-        run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }}
-      - name: Download models and dockers
-        working-directory: tests/integration
-        run: |
-          docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
-          mkdir logs
-          ./download_models.sh pytorch-inf2
-      - name: Test streaming transformers-neuronx opt-1.3b with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx opt-1.3b-streaming
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx opt-1.3b-streaming
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test stable diffusion 1.5 with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx stable-diffusion-1.5-neuron
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py neuron-stable-diffusion stable-diffusion-1.5-neuron
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test stable diffusion bf16 with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx stable-diffusion-2.1-neuron
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py neuron-stable-diffusion stable-diffusion-2.1-neuron
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test stable diffusion xl with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx stable-diffusion-xl-neuron
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py neuron-stable-diffusion stable-diffusion-xl-neuron
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test mistral 7B with handler
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx mistral-7b
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx mistral-7b
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: On fail step
-        if: ${{ failure() }}
-        working-directory: tests/integration
-        run: |
-          if [ -d models ]; then sudo rm -rf models; fi
-          cat logs/serving.log
-      - name: Upload test logs
-        uses: actions/upload-artifact@v3
-        with:
-          name: transformers-neuronx-${{ matrix.arch }}-logs
-          path: tests/integration/logs/
-
-  transformers-neuronx-rolling-batch:
-    runs-on: [ self-hosted, inf2 ]
-    timeout-minutes: 90
-    needs: create-runners
-    steps:
-      - uses: actions/checkout@v4
-      - name: Clean env
-        run: |
-          yes | docker system prune -a --volumes
-          sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
-          echo "wait dpkg lock..."
-          while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
-      - name: Set up JDK 17
-        uses: actions/setup-java@v4
-        with:
-          distribution: 'corretto'
-          java-version: 17
-      - name: Set up Python3
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.10.x'
-      - name: Install pip dependencies
-        run: pip3 install requests numpy pillow
-      - name: Build container name
-        run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }}
-      - name: Download models and dockers
-        working-directory: tests/integration
-        run: |
-          docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
-          mkdir logs
-          ./download_models.sh pytorch-inf2
-      - name: Test transformers-neuronx llama-7b rolling batch
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx llama-7b-rb
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx_rolling_batch llama-7b-rb
-          docker rm -f $(docker ps -aq)
-      - name: Test transformers-neuronx tiny-llama vllm model load and rolling batch
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx tiny-llama-rb-vllm
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx_rolling_batch tiny-llama-rb-vllm
-          docker rm -f $(docker ps -aq)
-      - name: Test transformers-neuronx llama-3-8b vllm rolling batch
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx llama-3-8b-rb-vllm
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-4 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx_rolling_batch llama-3-8b-rb-vllm
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test transformers-neuronx mixtral-8x-7b rolling batch
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx mixtral-8x7b-rb
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-4 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx_rolling_batch mixtral-8x7b-rb
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test transformers-neuronx llama-2-13b-speculative-rb rolling batch
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx llama-speculative-rb
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx_rolling_batch llama-speculative-rb
-          docker rm -f $(docker ps -aq)
-          sudo rm -rf models
-      - name: Test transformers-neuronx llama-2-13b-speculative-rb compiled draft model rolling batch
-        working-directory: tests/integration
-        run: |
-          rm -rf models
-          python3 llm/prepare.py transformers_neuronx llama-speculative-compiled-rb
-          ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
-          serve
-          curl http://127.0.0.1:8080/models
-          python3 llm/client.py transformers_neuronx_rolling_batch llama-speculative-compiled-rb
-          docker rm -f $(docker ps -aq)
       - name: On fail step
         if: ${{ failure() }}
         working-directory: tests/integration
@@ -448,12 +133,10 @@ jobs:
   stop-runners:
     if: always()
     runs-on: [ self-hosted, scheduler ]
-    needs: [ create-runners, transformers-neuronx-container-unit-tests, transformers-neuronx-test-1, transformers-neuronx-test-2, transformers-neuronx-rolling-batch ]
+    needs: [ create-runners, transformers-neuronx-container-unit-tests, transformers-neuronx-test-1 ]
     steps:
       - name: Stop all instances
         run: |
           cd /home/ubuntu/djl_benchmark_script/scripts
           instance_id=${{ needs.create-runners.outputs.inf2_instance_id_1 }}
           ./stop_instance.sh $instance_id
-          instance_id=${{ needs.create-runners.outputs.inf2_instance_id_2 }}
-          ./stop_instance.sh $instance_id