From 38dbc9249ec5790037eac8f3d5d4132cca804c95 Mon Sep 17 00:00:00 2001
From: Prashant Gupta <prashantgupta@us.ibm.com>
Date: Wed, 17 Apr 2024 15:47:57 -0700
Subject: [PATCH] Squashed commit of the following:
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 82d22616ddfb1b041ce3af8ade66498d0bc99025
Author: Prashant Gupta <prashantgupta@us.ibm.com>
Date:   Wed Apr 17 15:44:35 2024 -0700

    ♻️ update dockerfile.ubi with vllm wheel installation

    Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com>

commit 15076fabb90a76188ec77d1162e63f866d8c6c75
Author: Nick Hill <nickhill@us.ibm.com>
Date:   Fri Apr 12 00:50:25 2024 +0100

    Compile kernels and fix build (#17)

    These Dockerfile changes:
    - Update the release stage to work with the recently refactored
    `requirements-common.txt` / `requirements-cuda.txt` split
    - Fixup the kernel compilation in the `build` stage to correctly pick up
    cuda
    - Install the kernels from this docker build rather than pulling a
    precompiled wheel. We can swap that back once a new wheel is available
    with the correct pytorch version + updated interfaces

    ---------

    Signed-off-by: Nick Hill <nickhill@us.ibm.com>
    Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
    Co-authored-by: Joe Runde <Joseph.Runde@ibm.com>

Signed-off-by: Prashant Gupta <prashantgupta@us.ibm.com>
---
 Dockerfile.ubi | 44 ++++++++++++++++++++++++++++++++------------
 1 file changed, 32 insertions(+), 12 deletions(-)

diff --git a/Dockerfile.ubi b/Dockerfile.ubi
index d80691e56..dfe6dfa90 100644
--- a/Dockerfile.ubi
+++ b/Dockerfile.ubi
@@ -161,7 +161,7 @@ COPY CMakeLists.txt CMakeLists.txt
 COPY requirements-common.txt requirements-common.txt
 COPY requirements-cuda.txt requirements-cuda.txt
 COPY pyproject.toml pyproject.toml
-COPY vllm/__init__.py vllm/__init__.py
+COPY vllm vllm
 
 ARG TORCH_CUDA_ARCH_LIST
 ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
@@ -179,7 +179,21 @@ ENV VLLM_INSTALL_PUNICA_KERNELS=1
 ENV PATH=/usr/local/cuda/bin:$PATH
 ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
 
-RUN python3 setup.py build_ext --inplace
+# Setup path stuff? Ref: https://github.com/vllm-project/vllm/blob/main/.github/workflows/scripts/build.sh#L6-L8
+ENV PATH=/usr/local/cuda/bin:$PATH
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
+
+ENV CCACHE_DIR=/root/.cache/ccache
+RUN --mount=type=cache,target=/root/.cache/ccache \
+    --mount=type=cache,target=/root/.cache/pip \
+    python3 setup.py bdist_wheel --dist-dir=dist
+
+# the `vllm_nccl` package must be installed from source distribution
+# pip is too smart to store a wheel in the cache, and other CI jobs
+# will directly use the wheel from the cache, which is not what we want.
+# we need to remove it manually
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip cache remove vllm_nccl*
 
 
 ## Extension Cache #############################################################
@@ -253,23 +267,28 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=proto,target=proto \
     make gen-protos
 
-## vLLM Library Files ##########################################################
-# Little extra stage to gather files and manage permissions on them without any
-# duplication in the release layer due to permission changes
+## vLLM installation IMAGE ##########################################################
+# image with vLLM installed
 FROM base AS vllm
 
-WORKDIR /vllm-staging
+WORKDIR /vllm-workspace
+
+# install vllm wheel first, so that torch etc will be installed
+RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \
+    --mount=type=cache,target=/root/.cache/pip \
+    pip install dist/*.whl --verbose
+ 
 # COPY files from various places into a staging directory
 COPY --link vllm vllm
-COPY --from=build --link /workspace/vllm/*.so vllm/
+COPY --from=prebuilt-wheel --link /workspace/vllm/*.so vllm/
 COPY --from=gen-protos --link /workspace/vllm/entrypoints/grpc/pb vllm/entrypoints/grpc/pb
 
 # custom COPY command to use umask to control permissions and grant permissions
 # to the group
-RUN umask 002 \
-    && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
-    # not strictly needed, but .so files typically have executable bits
-    && chmod +x /workspace/vllm/*.so
+# RUN umask 002 \
+#     && cp --recursive --no-preserve=all /vllm-staging/vllm /workspace/vllm \
+#     # not strictly needed, but .so files typically have executable bits
+#     && chmod +x /workspace/vllm/*.so
 
 ## Release #####################################################################
 # Note from the non-UBI Dockerfile:
@@ -286,8 +305,9 @@ ENV PATH=/opt/vllm/bin/:$PATH
 
 RUN --mount=type=cache,target=/root/.cache/pip \
     --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
-    --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
+    --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda-cuda.txt \
     pip3 install \
+        # requirements-cuda installs the requirements-common from within
         -r requirements-cuda.txt \
         # additional dependencies for the TGIS gRPC server
         grpcio-tools==1.62.1 \