From bcbcf3a801e8bb3a1dd0662b2ef3ed4109049c6c Mon Sep 17 00:00:00 2001 From: Qing Lan Date: Tue, 14 May 2024 17:54:18 -0700 Subject: [PATCH] [TRTLLM] add gemma model support (#1906) --- .github/workflows/lmi-no-code.yml | 1 - serving/docker/tensorrt-llm.Dockerfile | 8 ++------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/.github/workflows/lmi-no-code.yml b/.github/workflows/lmi-no-code.yml index 748482f70..536f8ba99 100644 --- a/.github/workflows/lmi-no-code.yml +++ b/.github/workflows/lmi-no-code.yml @@ -211,7 +211,6 @@ jobs: python3 llm/client.py no_code llama-13b ./remove_container.sh - name: Gemma-7b lmi container - if: ${{ matrix.container == 'lmi' }} working-directory: tests/integration run: | rm -rf models diff --git a/serving/docker/tensorrt-llm.Dockerfile b/serving/docker/tensorrt-llm.Dockerfile index 4ad483c3b..3e1bc8fe4 100644 --- a/serving/docker/tensorrt-llm.Dockerfile +++ b/serving/docker/tensorrt-llm.Dockerfile @@ -9,7 +9,7 @@ # or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for # the specific language governing permissions and limitations under the License. -ARG version=12.2.2-devel-ubuntu22.04 +ARG version=12.2.2-cudnn8-devel-ubuntu22.04 FROM nvidia/cuda:$version ARG cuda_version=cu122 ARG python_version=3.10 @@ -70,15 +70,11 @@ RUN apt-get update && apt-get install -y g++ wget unzip openmpi-bin libopenmpi-d pip3 cache purge && \ apt-get clean -y && rm -rf /var/lib/apt/lists/* -# Install latest CUDNN8 -RUN apt-get update && apt-get install -y libcudnn8 && \ - apt-get clean -y && rm -rf /var/lib/apt/lists/* - # Install PyTorch # Qwen needs transformers_stream_generator, tiktoken and einops RUN pip install torch==${TORCH_VERSION} transformers==${transformers_version} accelerate==${accelerate_version} peft==${peft_version} sentencepiece \ mpi4py cuda-python==${cuda_python_version} onnx polygraphy pynvml==${pynvml_verison} datasets pydantic==${pydantic_version} scipy torchprofile bitsandbytes ninja \ - transformers_stream_generator einops tiktoken jinja2 graphviz blobfile colored h5py strenum pulp && \ + transformers_stream_generator einops tiktoken jinja2 graphviz blobfile colored h5py strenum pulp flax easydict && \ pip3 cache purge # Install TensorRT and TRT-LLM Deps