Skip to content

Commit

Permalink
[tnx] refactor neuron testing
Browse files Browse the repository at this point in the history
  • Loading branch information
tosterberg committed May 25, 2024
1 parent f30c88a commit 8db75ca
Showing 1 changed file with 6 additions and 323 deletions.
329 changes: 6 additions & 323 deletions .github/workflows/llm_inf2_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,8 @@ jobs:
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_inf2 $token djl-serving
- name: Create new Inf2.24xl instance
id: create_inf2_2
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_inf2 $token djl-serving
outputs:
inf2_instance_id_1: ${{ steps.create_inf2.outputs.action_inf2_instance_id }}
inf2_instance_id_2: ${{ steps.create_inf2_2.outputs.action_inf2_instance_id }}

transformers-neuronx-container-unit-tests:
runs-on: [ self-hosted, inf2 ]
Expand All @@ -62,8 +52,11 @@ jobs:
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Run djl_python unit/integration tests on container
run: |
# setup
# Setup
mkdir logs
if [[ "$UID" == "1000" ]]; then uid_mapping="-u djl"; fi
# Run test suite
docker run -t --rm --network="host" \
--name neuron-test \
-v $PWD/:/opt/ml/model/ \
Expand All @@ -73,9 +66,6 @@ jobs:
/bin/bash -c "'pip install /opt/ml/model/engines/python/setup/. pytest' && \
pytest engines/python/setup/djl_python/tests/neuron_test_scripts/ | tee logs/results.log"
# Cleanup
sudo rm -rf TinyLlama engines/python/setup/.pytest_cache
# Fail on failed tests
if grep -F "failed" logs/results.log &>/dev/null; then exit 1; fi
- name: On fail step
Expand All @@ -91,7 +81,7 @@ jobs:
transformers-neuronx-test-1:
runs-on: [ self-hosted, inf2 ]
timeout-minutes: 90
needs: create-runners
needs: [ create-runners, transformers-neuronx-container-unit-tests ]
steps:
- uses: actions/checkout@v4
- name: Clean env
Expand Down Expand Up @@ -128,311 +118,6 @@ jobs:
serve -m test::Python:nc0=file:/opt/ml/model/resnet18_no_reqs_inf2_2_4.tar.gz
./test_client.sh image/jpg models/kitten.jpg
docker rm -f $(docker ps -aq)
- name: Test transformers-neuronx gpt2 with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx gpt2
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx gpt2
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test transformers-neuronx gpt2 quantization with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx gpt2-quantize
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx gpt2-quantize
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test transformers-neuronx opt-1.3b with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx opt-1.3b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx opt-1.3b
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test transformers-neuronx gpt-j-6b with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx gpt-j-6b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx gpt-j-6b
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test transformers-neuronx pythia-2.8b with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx pythia-2.8b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx pythia-2.8b
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test transformers-neuronx bloom-7b1 with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx bloom-7b1
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx bloom-7b1
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test gpt2 partition
working-directory: tests/integration
run: |
sudo rm -rf models
python3 llm/prepare.py transformers_neuronx_aot gpt2
# To test the requirements.txt download.
echo "dummy_test" >> $PWD/models/test/requirements.txt
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
partition --model-dir /opt/ml/input/data/training/ --skip-copy | tee partition_output.log
# checking if neff files are generated.
sudo mv $PWD/models/test/partition-test $PWD/models/
if ls $PWD/models/partition-test/compiled/*.neff &>/dev/null; \
then echo "compiled files generated"; else exit 1; fi
# checking whether requirements.txt download is successful
if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \
then echo "requirements.txt install was successful"; else exit 1; fi
if [ -d models ]; then sudo rm -rf models; fi
- name: Test gpt2-quantize partition
working-directory: tests/integration
run: |
sudo rm -rf models
python3 llm/prepare.py transformers_neuronx_aot gpt2-quantize
# To test the requirements.txt download.
echo "dummy_test" >> $PWD/models/test/requirements.txt
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
partition --model-dir /opt/ml/input/data/training/ --skip-copy | tee partition_output.log
# checking if neff files are generated.
sudo mv $PWD/models/test/partition-test $PWD/models/
if ls $PWD/models/partition-test/compiled/*.neff &>/dev/null; \
then echo "compiled files generated"; else exit 1; fi
# checking whether requirements.txt download is successful
if grep -F "pip install requirements succeed!" partition_output.log &>/dev/null; \
then echo "requirements.txt install was successful"; else exit 1; fi
if [ -d models ]; then sudo rm -rf models; fi
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
if [ -d models ]; then sudo rm -rf models; fi
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: transformers-neuronx-${{ matrix.arch }}-logs
path: tests/integration/logs/

transformers-neuronx-test-2:
runs-on: [ self-hosted, inf2 ]
timeout-minutes: 90
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests numpy pillow
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
mkdir logs
./download_models.sh pytorch-inf2
- name: Test streaming transformers-neuronx opt-1.3b with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx opt-1.3b-streaming
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx opt-1.3b-streaming
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test stable diffusion 1.5 with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx stable-diffusion-1.5-neuron
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py neuron-stable-diffusion stable-diffusion-1.5-neuron
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test stable diffusion bf16 with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx stable-diffusion-2.1-neuron
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py neuron-stable-diffusion stable-diffusion-2.1-neuron
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test stable diffusion xl with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx stable-diffusion-xl-neuron
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py neuron-stable-diffusion stable-diffusion-xl-neuron
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test mistral 7B with handler
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx mistral-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx mistral-7b
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
if [ -d models ]; then sudo rm -rf models; fi
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: transformers-neuronx-${{ matrix.arch }}-logs
path: tests/integration/logs/

transformers-neuronx-rolling-batch:
runs-on: [ self-hosted, inf2 ]
timeout-minutes: 90
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up JDK 17
uses: actions/setup-java@v4
with:
distribution: 'corretto'
java-version: 17
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests numpy pillow
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh pytorch-inf2 ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
mkdir logs
./download_models.sh pytorch-inf2
- name: Test transformers-neuronx llama-7b rolling batch
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx llama-7b-rb
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-2 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx_rolling_batch llama-7b-rb
docker rm -f $(docker ps -aq)
- name: Test transformers-neuronx tiny-llama vllm model load and rolling batch
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx tiny-llama-rb-vllm
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-1 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx_rolling_batch tiny-llama-rb-vllm
docker rm -f $(docker ps -aq)
- name: Test transformers-neuronx llama-3-8b vllm rolling batch
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx llama-3-8b-rb-vllm
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-4 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx_rolling_batch llama-3-8b-rb-vllm
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test transformers-neuronx mixtral-8x-7b rolling batch
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx mixtral-8x7b-rb
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-4 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx_rolling_batch mixtral-8x7b-rb
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test transformers-neuronx llama-2-13b-speculative-rb rolling batch
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx llama-speculative-rb
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx_rolling_batch llama-speculative-rb
docker rm -f $(docker ps -aq)
sudo rm -rf models
- name: Test transformers-neuronx llama-2-13b-speculative-rb compiled draft model rolling batch
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py transformers_neuronx llama-speculative-compiled-rb
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models pytorch-inf2-6 \
serve
curl http://127.0.0.1:8080/models
python3 llm/client.py transformers_neuronx_rolling_batch llama-speculative-compiled-rb
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
Expand All @@ -448,12 +133,10 @@ jobs:
stop-runners:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [ create-runners, transformers-neuronx-container-unit-tests, transformers-neuronx-test-1, transformers-neuronx-test-2, transformers-neuronx-rolling-batch ]
needs: [ create-runners, transformers-neuronx-container-unit-tests, transformers-neuronx-test-1 ]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.inf2_instance_id_1 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.inf2_instance_id_2 }}
./stop_instance.sh $instance_id

0 comments on commit 8db75ca

Please sign in to comment.