Skip to content

Commit

Permalink
[ci] Updating lmi-dist ci tests for rubikon-engine
Browse files Browse the repository at this point in the history
  • Loading branch information
Aaqib Ansari committed Mar 22, 2024
1 parent cfae4c0 commit c63e6de
Show file tree
Hide file tree
Showing 8 changed files with 240 additions and 480 deletions.
48 changes: 24 additions & 24 deletions .github/workflows/llm_integration_p4d.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,12 @@ jobs:
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve
python3 llm/client.py lmi_dist_aiccl mixtral-8x7b-aiccl
if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
echo "aiccl backend not used"
return 1
else
echo "Using aiccl backend"
fi
# if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
# echo "aiccl backend not used"
# return 1
# else
# echo "Using aiccl backend"
# fi
docker rm -f $(docker ps -aq)
- name: Test Llama-2-70B with aiccl backend
working-directory: tests/integration
Expand All @@ -79,12 +79,12 @@ jobs:
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve
python3 llm/client.py lmi_dist_aiccl llama-2-70b-aiccl
if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
echo "aiccl backend not used"
return 1
else
echo "Using aiccl backend"
fi
# if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
# echo "aiccl backend not used"
# return 1
# else
# echo "Using aiccl backend"
# fi
docker rm -f $(docker ps -aq)
- name: Test codellama/CodeLlama-34b-hf with aiccl backend
working-directory: tests/integration
Expand All @@ -94,12 +94,12 @@ jobs:
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve
python3 llm/client.py lmi_dist_aiccl codellama-34b-aiccl
if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
echo "aiccl backend not used"
return 1
else
echo "Using aiccl backend"
fi
# if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
# echo "aiccl backend not used"
# return 1
# else
# echo "Using aiccl backend"
# fi
docker rm -f $(docker ps -aq)
- name: Test tiiuae/falcon-40b with aiccl backend
working-directory: tests/integration
Expand All @@ -109,12 +109,12 @@ jobs:
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve
python3 llm/client.py lmi_dist_aiccl falcon-40b-aiccl
if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
echo "aiccl backend not used"
return 1
else
echo "Using aiccl backend"
fi
# if [ "$(docker logs $(docker ps -aq) 2>&1 | grep -c 'Starting torch distributed with aiccl backend')" -lt 8 ]; then
# echo "aiccl backend not used"
# return 1
# else
# echo "Using aiccl backend"
# fi
docker rm -f $(docker ps -aq)
- name: Remove models dir
working-directory: tests/integration
Expand Down
33 changes: 21 additions & 12 deletions .github/workflows/rolling_batch_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -299,14 +299,32 @@ jobs:
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist octocoder
docker rm -f $(docker ps -aq)
- name: Test gpt-neox-20b-bitsandbytes
- name: Test speculative-llama-13b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist gpt-neox-20b-bitsandbytes
python3 llm/prepare.py lmi_dist speculative-llama-13b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist gpt-neox-20b-bitsandbytes
python3 llm/client.py lmi_dist speculative-llama-13b
docker rm -f $(docker ps -aq)
- name: Test starcoder2-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist starcoder2-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist starcoder2-7b
docker rm -f $(docker ps -aq)
- name: Test gemma-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist gemma-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist gemma-7b
docker rm -f $(docker ps -aq)
- name: Test llama2-13b-gptq
working-directory: tests/integration
Expand Down Expand Up @@ -426,15 +444,6 @@ jobs:
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm phi-2
docker rm -f $(docker ps -aq)
- name: Test Speculative Decoding with LLAMA 13B model
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm speculative-llama-13b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models deepspeed \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm speculative-llama-13b
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
Expand Down
2 changes: 1 addition & 1 deletion engines/python/setup/djl_python/huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def get_rolling_batch_class_from_str(rolling_batch_type: str, is_mpi: bool,
from djl_python.rolling_batch.scheduler_rolling_batch import SchedulerRollingBatch
return SchedulerRollingBatch
elif rolling_batch_type == "lmi-dist":
from djl_python.rolling_batch.lmi_dist_v2_rolling_batch import LmiDistRollingBatch
from djl_python.rolling_batch.lmi_dist_rolling_batch import LmiDistRollingBatch
return LmiDistRollingBatch
elif rolling_batch_type == "vllm":
from djl_python.rolling_batch.vllm_rolling_batch import VLLMRollingBatch
Expand Down
Loading

0 comments on commit c63e6de

Please sign in to comment.