Skip to content

Commit

Permalink
[CI] add trtllm P4D tests (#1455)
Browse files Browse the repository at this point in the history
  • Loading branch information
Qing Lan authored Jan 5, 2024
1 parent dc94256 commit da91aa1
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 4 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Large model integration tests with compiler optimizations
name: Large model integration tests with P4D and compiler optimizations

on:
workflow_dispatch:
Expand Down Expand Up @@ -49,7 +49,7 @@ jobs:
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests pillow numpy
run: pip3 install requests numpy
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh deepspeed ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
Expand Down Expand Up @@ -118,10 +118,69 @@ jobs:
name: lmi-dist-aiccl-logs
path: tests/integration/logs/

trtllm-test:
runs-on: [ self-hosted, p4d ]
timeout-minutes: 120
needs: create-runners-p4d
steps:
- uses: actions/checkout@v3
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v4
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests numpy
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh tensorrt-llm ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test llama-2-70B with TP8
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py trtllm llama2-70b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm llama2-70b
docker rm -f $(docker ps -aq)
- name: Test mixtral-8x7b with with TP8
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py trtllm mixtral-8x7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm mixtral-8x7b
docker rm -f $(docker ps -aq)
- name: Remove models dir
working-directory: tests/integration
run: |
sudo rm -rf models
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
sudo rm -rf models
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: trtllm-logs
path: tests/integration/logs/

stop-runners-p4d:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [ create-runners-p4d, lmi-dist-aiccl-test ]
needs: [ create-runners-p4d, lmi-dist-aiccl-test, trtllm-test ]
steps:
- name: Stop all instances
run: |
Expand Down
12 changes: 12 additions & 0 deletions tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,18 @@ def get_model_name():
"batch_size": [1, 4],
"seq_length": [256],
"tokenizer": "Qwen/Qwen-7B"
},
"llama2-70b": {
"max_memory_per_gpu": [40.0],
"batch_size": [1, 8],
"seq_length": [256],
"tokenizer": "TheBloke/Llama-2-13B-fp16"
},
"mixtral-8x7b": {
"max_memory_per_gpu": [40.0],
"batch_size": [1, 8],
"seq_length": [256],
"tokenizer": "mistralai/Mixtral-8x7B-v0.1"
}
}

Expand Down
16 changes: 15 additions & 1 deletion tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,20 @@
"option.tensor_parallel_degree": 4,
"option.trust_remote_code": True,
"option.output_formatter": "jsonlines"
},
"llama2-70b": {
"option.model_id": "s3://djl-llm/llama-2-70b-hf/",
"option.tensor_parallel_degree": 8,
"option.use_custom_all_reduce": True,
"option.max_rolling_batch_size": 32,
"option.output_formatter": "jsonlines"
},
"mixtral-8x7b": {
"option.model_id": "s3://djl-llm/mixtral-8x7b/",
"option.tensor_parallel_degree": 8,
"option.use_custom_all_reduce": True,
"option.max_rolling_batch_size": 32,
"option.output_formatter": "jsonlines"
}
}

Expand Down Expand Up @@ -987,7 +1001,7 @@ def build_lmi_dist_aiccl_model(model):
options["option.tensor_parallel_degree"] = 8
options["option.rolling_batch"] = "lmi-dist"
options["option.output_formatter"] = "jsonlines"
options["option.max_rolling_batch_size"] = 4
options["option.max_rolling_batch_size"] = 16
write_model_artifacts(options)


Expand Down

0 comments on commit da91aa1

Please sign in to comment.