From 38dbc9249ec5790037eac8f3d5d4132cca804c95 Mon Sep 17 00:00:00 2001 From: Prashant Gupta Date: Wed, 17 Apr 2024 15:47:57 -0700 Subject: [PATCH] Squashed commit of the following: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 82d22616ddfb1b041ce3af8ade66498d0bc99025 Author: Prashant Gupta Date: Wed Apr 17 15:44:35 2024 -0700 ♻️ update dockerfile.ubi with vllm wheel installation Signed-off-by: Prashant Gupta commit 15076fabb90a76188ec77d1162e63f866d8c6c75 Author: Nick Hill Date: Fri Apr 12 00:50:25 2024 +0100 Compile kernels and fix build (#17) These Dockerfile changes: - Update the release stage to work with the recently refactored `requirements-common.txt` / `requirements-cuda.txt` split - Fixup the kernel compilation in the `build` stage to correctly pick up cuda - Install the kernels from this docker build rather than pulling a precompiled wheel. We can swap that back once a new wheel is available with the correct pytorch version + updated interfaces --------- Signed-off-by: Nick Hill Signed-off-by: Joe Runde Co-authored-by: Joe Runde Signed-off-by: Prashant Gupta --- Dockerfile.ubi | 44 ++++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index d80691e56..dfe6dfa90 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -161,7 +161,7 @@ COPY CMakeLists.txt CMakeLists.txt COPY requirements-common.txt requirements-common.txt COPY requirements-cuda.txt requirements-cuda.txt COPY pyproject.toml pyproject.toml -COPY vllm/__init__.py vllm/__init__.py +COPY vllm vllm ARG TORCH_CUDA_ARCH_LIST ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST @@ -179,7 +179,21 @@ ENV VLLM_INSTALL_PUNICA_KERNELS=1 ENV PATH=/usr/local/cuda/bin:$PATH ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH -RUN python3 setup.py build_ext --inplace +# Setup path stuff? Ref: https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/build.sh#L6-L8 +ENV PATH=/usr/local/cuda/bin:$PATH +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH + +ENV CCACHE_DIR=/root/.cache/ccache +RUN --mount=type=cache,target=/root/.cache/ccache \ + --mount=type=cache,target=/root/.cache/pip \ + python3 setup.py bdist_wheel --dist-dir=dist + +# the `vllm_nccl` package must be installed from source distribution +# pip is too smart to store a wheel in the cache, and other CI jobs +# will directly use the wheel from the cache, which is not what we want. +# we need to remove it manually +RUN --mount=type=cache,target=/root/.cache/pip \ + pip cache remove vllm_nccl* ## Extension Cache ############################################################# @@ -253,23 +267,28 @@ RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=proto,target=proto \ make gen-protos -## vLLM Library Files ########################################################## -# Little extra stage to gather files and manage permissions on them without any -# duplication in the release layer due to permission changes +## vLLM installation IMAGE ########################################################## +# image with vLLM installed FROM base AS vllm -WORKDIR /vllm-staging +WORKDIR /vllm-workspace + +# install vllm wheel first, so that torch etc will be installed +RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \ + --mount=type=cache,target=/root/.cache/pip \ + pip install dist/*.whl --verbose + # COPY files from various places into a staging directory COPY --link vllm vllm -COPY --from=build --link /workspace/vllm/*.so vllm/ +COPY --from=prebuilt-wheel --link /workspace/vllm/*.so vllm/ COPY --from=gen-protos --link /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb # custom COPY command to use umask to control permissions and grant permissions # to the group -RUN umask 002 \ - && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \ - # not strictly needed, but .so files typically have executable bits - && chmod +x /workspace/vllm/*.so +# RUN umask 002 \ +# && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \ +# # not strictly needed, but .so files typically have executable bits +# && chmod +x /workspace/vllm/*.so ## Release ##################################################################### # Note from the non-UBI Dockerfile: @@ -286,8 +305,9 @@ ENV PATH=/opt/vllm/bin/:$PATH RUN --mount=type=cache,target=/root/.cache/pip \ --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ - --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ + --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda-cuda.txt \ pip3 install \ + # requirements-cuda installs the requirements-common from within -r requirements-cuda.txt \ # additional dependencies for the TGIS gRPC server grpcio-tools==1.62.1 \