Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
commit 82d2261
Author: Prashant Gupta <prashantgupta@us.ibm.com>
Date:   Wed Apr 17 15:44:35 2024 -0700

    ♻️ update dockerfile.ubi with vllm wheel installation

    Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com>

commit 15076fa
Author: Nick Hill <nickhill@us.ibm.com>
Date:   Fri Apr 12 00:50:25 2024 +0100

    Compile kernels and fix build (#17)

    These Dockerfile changes:
    - Update the release stage to work with the recently refactored
    `requirements-common.txt` / `requirements-cuda.txt` split
    - Fixup the kernel compilation in the `build` stage to correctly pick up
    cuda
    - Install the kernels from this docker build rather than pulling a
    precompiled wheel. We can swap that back once a new wheel is available
    with the correct pytorch version + updated interfaces

    ---------

    Signed-off-by: Nick Hill <nickhill@us.ibm.com>
    Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
    Co-authored-by: Joe Runde <Joseph.Runde@ibm.com>

Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com>
  • Loading branch information
prashantgupta24 committed Apr 17, 2024
1 parent 4e3ff78 commit 38dbc92
Showing 1 changed file with 32 additions and 12 deletions.
44 changes: 32 additions & 12 deletions Dockerfile.ubi
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ COPY CMakeLists.txt CMakeLists.txt
COPY requirements-common.txt requirements-common.txt
COPY requirements-cuda.txt requirements-cuda.txt
COPY pyproject.toml pyproject.toml
COPY vllm/__init__.py vllm/__init__.py
COPY vllm vllm

ARG TORCH_CUDA_ARCH_LIST
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
Expand All @@ -179,7 +179,21 @@ ENV VLLM_INSTALL_PUNICA_KERNELS=1
ENV PATH=/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH

RUN python3 setup.py build_ext --inplace
# Setup path stuff? Ref: https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/build.sh#L6-L8
ENV PATH=/usr/local/cuda/bin:$PATH
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH

ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/pip \
python3 setup.py bdist_wheel --dist-dir=dist

# the `vllm_nccl` package must be installed from source distribution
# pip is too smart to store a wheel in the cache, and other CI jobs
# will directly use the wheel from the cache, which is not what we want.
# we need to remove it manually
RUN --mount=type=cache,target=/root/.cache/pip \
pip cache remove vllm_nccl*


## Extension Cache #############################################################
Expand Down Expand Up @@ -253,23 +267,28 @@ RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=proto,target=proto \
make gen-protos

## vLLM Library Files ##########################################################
# Little extra stage to gather files and manage permissions on them without any
# duplication in the release layer due to permission changes
## vLLM installation IMAGE ##########################################################
# image with vLLM installed
FROM base AS vllm

WORKDIR /vllm-staging
WORKDIR /vllm-workspace

# install vllm wheel first, so that torch etc will be installed
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
--mount=type=cache,target=/root/.cache/pip \
pip install dist/*.whl --verbose

# COPY files from various places into a staging directory
COPY --link vllm vllm
COPY --from=build --link /workspace/vllm/*.so vllm/
COPY --from=prebuilt-wheel --link /workspace/vllm/*.so vllm/
COPY --from=gen-protos --link /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb

# custom COPY command to use umask to control permissions and grant permissions
# to the group
RUN umask 002 \
&& cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
# not strictly needed, but .so files typically have executable bits
&& chmod +x /workspace/vllm/*.so
# RUN umask 002 \
# && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
# # not strictly needed, but .so files typically have executable bits
# && chmod +x /workspace/vllm/*.so

## Release #####################################################################
# Note from the non-UBI Dockerfile:
Expand All @@ -286,8 +305,9 @@ ENV PATH=/opt/vllm/bin/:$PATH

RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda-cuda.txt \
pip3 install \
# requirements-cuda installs the requirements-common from within
-r requirements-cuda.txt \
# additional dependencies for the TGIS gRPC server
grpcio-tools==1.62.1 \
Expand Down

0 comments on commit 38dbc92

Please sign in to comment.