[TRTLLM] add gemma model support (#1906)

deepjavalibrary · May 15, 2024 · bcbcf3a · bcbcf3a
1 parent 97f0951
commit bcbcf3a
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 7 deletions.
diff --git a/.github/workflows/lmi-no-code.yml b/.github/workflows/lmi-no-code.yml
@@ -211,7 +211,6 @@ jobs:
           python3 llm/client.py no_code llama-13b
           ./remove_container.sh
       - name: Gemma-7b lmi container
-        if: ${{ matrix.container  == 'lmi' }}
         working-directory: tests/integration
         run: |
           rm -rf models

diff --git a/serving/docker/tensorrt-llm.Dockerfile b/serving/docker/tensorrt-llm.Dockerfile
@@ -9,7 +9,7 @@
 # or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS"
 # BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for
 # the specific language governing permissions and limitations under the License.
-ARG version=12.2.2-devel-ubuntu22.04
+ARG version=12.2.2-cudnn8-devel-ubuntu22.04
 FROM nvidia/cuda:$version
 ARG cuda_version=cu122
 ARG python_version=3.10
@@ -70,15 +70,11 @@ RUN apt-get update && apt-get install -y g++ wget unzip openmpi-bin libopenmpi-d
     pip3 cache purge && \
     apt-get clean -y && rm -rf /var/lib/apt/lists/*
 
-# Install latest CUDNN8
-RUN apt-get update && apt-get install -y libcudnn8 && \
-    apt-get clean -y && rm -rf /var/lib/apt/lists/*
-
 # Install PyTorch
 # Qwen needs transformers_stream_generator, tiktoken and einops
 RUN pip install torch==${TORCH_VERSION} transformers==${transformers_version} accelerate==${accelerate_version} peft==${peft_version} sentencepiece \
     mpi4py cuda-python==${cuda_python_version} onnx polygraphy pynvml==${pynvml_verison} datasets pydantic==${pydantic_version} scipy torchprofile bitsandbytes ninja \
-    transformers_stream_generator einops tiktoken jinja2 graphviz blobfile colored h5py strenum pulp && \
+    transformers_stream_generator einops tiktoken jinja2 graphviz blobfile colored h5py strenum pulp flax easydict && \
     pip3 cache purge
 
 # Install TensorRT and TRT-LLM Deps