diff --git a/.github/workflows/lmi-dist-deps-build.yml b/.github/workflows/lmi-dist-deps-build.yml index c8721f416..996e2fb59 100644 --- a/.github/workflows/lmi-dist-deps-build.yml +++ b/.github/workflows/lmi-dist-deps-build.yml @@ -79,6 +79,15 @@ jobs: cd lmi_vllm pip wheel . --no-deps cp lmi_vllm-*.whl ../build_artifacts + - name: Build vllm 0.3.1 speculative decoding + run: | + . ./venv/bin/activate + git clone https://github.com/ymwangg/vllm -b specdec_v0.3.1 + cd vllm + export TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0+PTX" + export VLLM_INSTALL_PUNICA_KERNELS=1 + pip wheel . --no-deps + cp vllm-*.whl ../build_artifacts - name: Build awq kernels run: | . ./venv/bin/activate @@ -114,6 +123,7 @@ jobs: aws s3 cp dropout_layer_norm*.whl s3://djl-ai-staging/publish/flash_attn/cu121-pt211/ aws s3 cp rotary_emb*.whl s3://djl-ai-staging/publish/flash_attn/cu121-pt211/ aws s3 cp lmi_vllm*.whl s3://djl-ai-staging/publish/lmi_vllm/cu121-pt211/ + aws s3 cp vllm*.whl s3://djl-ai-staging/publish/vllm/cu121-pt211/ aws s3 cp awq*.whl s3://djl-ai-staging/publish/awq/cu121-pt211/ stop-runners-p4d: