Skip to content

Commit

Permalink
Mirror various upstream updates to Dockerfile.ubi
Browse files Browse the repository at this point in the history
And use kernels from latest 0.4.0.post1 wheels
  • Loading branch information
njhill committed Apr 3, 2024
1 parent 2961da7 commit ea47979
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 9 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,10 @@ jobs:
cache-to: ${{ env.CACHE_TO }}
push: ${{ github.event_name != 'pull_request' }}
file: Dockerfile.ubi


- name: "List docker images"
run: docker images

- name: "Cleanup old cache images"
uses: actions/delete-package-versions@v5
if: ${{ github.event_name == 'push' }}
Expand All @@ -118,9 +121,6 @@ jobs:
package-type: container
delete-only-untagged-versions: true

- name: "List docker images"
run: docker images

- name: "Check disk usage"
shell: bash
run: |
Expand Down
18 changes: 13 additions & 5 deletions Dockerfile.ubi
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ RUN microdnf install -y \

ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"

# Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully
# this won't be needed for future versions of this docker image
# or future versions of triton.
RUN ldconfig /usr/local/cuda-12.2/compat/

## Development #################################################################
FROM cuda-devel AS dev
Expand Down Expand Up @@ -181,8 +186,8 @@ RUN microdnf install -y \
&& microdnf clean all

ARG PYTHON_VERSION
# 0.3.3 is built for CUDA 12.1 and PyTorch 2.1.2
ARG VLLM_WHEEL_VERSION=0.3.3
# 0.4.0.post1 is built for CUDA 12.1 and PyTorch 2.1.2
ARG VLLM_WHEEL_VERSION=0.4.0.post1

RUN curl -Lo vllm.whl https://github.com/vllm-project/vllm/releases/download/v${VLLM_WHEEL_VERSION}/vllm-${VLLM_WHEEL_VERSION}-cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}-manylinux1_x86_64.whl \
&& unzip vllm.whl \
Expand Down Expand Up @@ -263,7 +268,7 @@ RUN umask 002 \
## Release #####################################################################
# Note from the non-UBI Dockerfile:
# We used base cuda image because pytorch installs its own cuda libraries.
# However cupy depends on cuda libraries so we had to switch to the runtime image
# However pynccl depends on cuda libraries so we had to switch to the runtime image
# In the future it would be nice to get a container with pytorch and cuda without duplicating cuda
FROM cuda-runtime AS vllm-openai

Expand All @@ -280,7 +285,9 @@ RUN --mount=type=cache,target=/root/.cache/pip \
# additional dependencies for the TGIS gRPC server
grpcio-tools==1.62.1 \
# additional dependencies for openai api_server
accelerate==0.28.0
accelerate==0.28.0 \
# hf_transfer for faster HF hub downloads
hf_transfer==0.1.6

# Install flash attention (from pre-built wheel)
RUN --mount=type=bind,from=flash-attn-builder,src=/usr/src/flash-attention-v2,target=/usr/src/flash-attention-v2 \
Expand All @@ -296,7 +303,8 @@ RUN microdnf install -y gcc \
ENV HF_HUB_OFFLINE=1 \
PORT=8000 \
GRPC_PORT=8033 \
HOME=/home/vllm
HOME=/home/vllm \
VLLM_USAGE_SOURCE=production-docker-image

# setup non-root user for OpenShift
RUN microdnf install -y shadow-utils \
Expand Down

0 comments on commit ea47979

Please sign in to comment.