Squashed commit of the following:

commit 82d2261 Author: Prashant Gupta <prashantgupta@us.ibm.com> Date: Wed Apr 17 15:44:35 2024 -0700 ♻️ update dockerfile.ubi with vllm wheel installation Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com> commit 15076fa Author: Nick Hill <nickhill@us.ibm.com> Date: Fri Apr 12 00:50:25 2024 +0100 Compile kernels and fix build (#17) These Dockerfile changes: - Update the release stage to work with the recently refactored `requirements-common.txt` / `requirements-cuda.txt` split - Fixup the kernel compilation in the `build` stage to correctly pick up cuda - Install the kernels from this docker build rather than pulling a precompiled wheel. We can swap that back once a new wheel is available with the correct pytorch version + updated interfaces --------- Signed-off-by: Nick Hill <nickhill@us.ibm.com> Signed-off-by: Joe Runde <Joseph.Runde@ibm.com> Co-authored-by: Joe Runde <Joseph.Runde@ibm.com> Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com>
IBM · Apr 17, 2024 · 38dbc92 · 38dbc92
1 parent 4e3ff78
commit 38dbc92
Showing 1 changed file with 32 additions and 12 deletions.
diff --git a/Dockerfile.ubi b/Dockerfile.ubi
@@ -161,7 +161,7 @@ COPY CMakeLists.txt CMakeLists.txt
 COPY requirements-common.txt requirements-common.txt
 COPY requirements-cuda.txt requirements-cuda.txt
 COPY pyproject.toml pyproject.toml
-COPY vllm/__init__.py vllm/__init__.py
+COPY vllm vllm
 
 ARG TORCH_CUDA_ARCH_LIST
 ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
@@ -179,7 +179,21 @@ ENV VLLM_INSTALL_PUNICA_KERNELS=1
 ENV PATH=/usr/local/cuda/bin:$PATH
 ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 
-RUN python3 setup.py build_ext --inplace
+# Setup path stuff? Ref: https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/build.sh#L6-L8
+ENV PATH=/usr/local/cuda/bin:$PATH
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+
+ENV CCACHE_DIR=/root/.cache/ccache
+RUN --mount=type=cache,target=/root/.cache/ccache \
+    --mount=type=cache,target=/root/.cache/pip \
+    python3 setup.py bdist_wheel --dist-dir=dist
+
+# the `vllm_nccl` package must be installed from source distribution
+# pip is too smart to store a wheel in the cache, and other CI jobs
+# will directly use the wheel from the cache, which is not what we want.
+# we need to remove it manually
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip cache remove vllm_nccl*
 
 
 ## Extension Cache #############################################################
@@ -253,23 +267,28 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=proto,target=proto \
     make gen-protos
 
-## vLLM Library Files ##########################################################
-# Little extra stage to gather files and manage permissions on them without any
-# duplication in the release layer due to permission changes
+## vLLM installation IMAGE ##########################################################
+# image with vLLM installed
 FROM base AS vllm
 
-WORKDIR /vllm-staging
+WORKDIR /vllm-workspace
+
+# install vllm wheel first, so that torch etc will be installed
+RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
+    --mount=type=cache,target=/root/.cache/pip \
+    pip install dist/*.whl --verbose
+
 # COPY files from various places into a staging directory
 COPY --link vllm vllm
-COPY --from=build --link /workspace/vllm/*.so vllm/
+COPY --from=prebuilt-wheel --link /workspace/vllm/*.so vllm/
 COPY --from=gen-protos --link /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb
 
 # custom COPY command to use umask to control permissions and grant permissions
 # to the group
-RUN umask 002 \
-    && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
-    # not strictly needed, but .so files typically have executable bits
-    && chmod +x /workspace/vllm/*.so
+# RUN umask 002 \
+#     && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
+#     # not strictly needed, but .so files typically have executable bits
+#     && chmod +x /workspace/vllm/*.so
 
 ## Release #####################################################################
 # Note from the non-UBI Dockerfile:
@@ -286,8 +305,9 @@ ENV PATH=/opt/vllm/bin/:$PATH
 
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
-    --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
+    --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda-cuda.txt \
     pip3 install \
+        # requirements-cuda installs the requirements-common from within
         -r requirements-cuda.txt \
         # additional dependencies for the TGIS gRPC server
         grpcio-tools==1.62.1 \