Merge pull request #1581 from pytorch/dockerfile_update

chore: Update dockerfile
pytorch · Jan 23, 2023 · 2525f67 · 2525f67
2 parents d92e409 + fd72efb
commit 2525f67
Show file tree

Hide file tree

Showing 7 changed files with 83 additions and 166 deletions.
diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -1,62 +1,79 @@
-ARG BASE=22.04
-ARG BASE_IMG=nvcr.io/nvidia/tensorrt:${BASE}-py3
+# Base image starts with CUDA
+ARG BASE_IMG=nvidia/cuda:11.7.1-devel-ubuntu18.04
 FROM ${BASE_IMG} as base
 
-FROM base as torch-tensorrt-builder-base
+# Install basic dependencies
+RUN apt-get update
+RUN apt install -y build-essential manpages-dev wget zlib1g software-properties-common git
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt install -y python3.8 python3.8-distutils python3.8-dev
+RUN wget https://bootstrap.pypa.io/get-pip.py
+RUN ln -s /usr/bin/python3.8 /usr/bin/python
+RUN python get-pip.py
+RUN pip3 install wheel
+
+# Install Pytorch
+RUN pip3 install torch==2.0.0.dev20230103+cu117 torchvision==0.15.0.dev20230103+cu117 --extra-index-url https://download.pytorch.org/whl/nightly/cu117
+
+# Install CUDNN + TensorRT
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
+RUN mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
+RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 536F8F1DE80F6A35
+RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A4B469963BF863CC
+RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
+RUN apt-get update
+RUN apt-get install -y libcudnn8=8.5.0* libcudnn8-dev=8.5.0*
+
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
+RUN add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
+RUN apt-get update
+
+RUN apt-get install -y libnvinfer8=8.5.1* libnvinfer-plugin8=8.5.1* libnvinfer-dev=8.5.1* libnvinfer-plugin-dev=8.5.1* libnvonnxparsers8=8.5.1-1* libnvonnxparsers-dev=8.5.1-1* libnvparsers8=8.5.1-1*  libnvparsers-dev=8.5.1-1*
+
+# Setup Bazel
+ARG BAZEL_VERSION=5.2.0
+RUN wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-x86_64 -O /usr/bin/bazel \
+ && chmod a+x /usr/bin/bazel
+
+# Remove cuda symlink to avoid bazel circle symlink errors
+RUN rm /usr/local/cuda-11.7/cuda-11.7
 
-# Removing any bazel or torch-tensorrt pre-installed from the base image
-RUN rm -rf /opt/pytorch/torch_tensorrt /usr/bin/bazel
+# Build Torch-TensorRT in an auxillary container
+FROM base as torch-tensorrt-builder-base
 
 ARG ARCH="x86_64"
 ARG TARGETARCH="amd64"
-ARG BAZEL_VERSION=5.2.0
-
-RUN [[ "$TARGETARCH" == "amd64" ]] && ARCH="x86_64" || ARCH="${TARGETARCH}" \
- && wget -q https://github.com/bazelbuild/bazel/releases/download/${BAZEL_VERSION}/bazel-${BAZEL_VERSION}-linux-${ARCH} -O /usr/bin/bazel \
- && chmod a+x /usr/bin/bazel
 
-# Workaround for bazel expecting both static and shared versions, we only use shared libraries inside container
-RUN touch /usr/lib/$HOSTTYPE-linux-gnu/libnvinfer_static.a
-
-RUN rm -rf /usr/local/cuda/lib* /usr/local/cuda/include \
-  && ln -sf /usr/local/cuda/targets/$HOSTTYPE-linux/lib /usr/local/cuda/lib64 \
-  && ln -sf /usr/local/cuda/targets/$HOSTTYPE-linux/include /usr/local/cuda/include
+RUN apt-get install -y python3-setuptools
+RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
+RUN  apt-get update
 
 RUN apt-get update && apt-get install -y --no-install-recommends locales ninja-build && rm -rf /var/lib/apt/lists/* && locale-gen en_US.UTF-8
 
 FROM torch-tensorrt-builder-base as torch-tensorrt-builder
 
-# Removing any bazel or torch-tensorrt pre-installed from the base image
-RUN rm -rf /opt/pytorch/torch_tensorrt
-
 COPY . /workspace/torch_tensorrt/src
 WORKDIR /workspace/torch_tensorrt/src
 RUN cp ./docker/WORKSPACE.docker WORKSPACE
 
 # This script builds both libtorchtrt bin/lib/include tarball and the Python wheel, in dist/
 RUN ./docker/dist-build.sh
 
+# Copy and install Torch-TRT into the main container
 FROM base as torch-tensorrt
 
-# Removing any bazel or torch-tensorrt pre-installed from the base image
-RUN rm -rf /opt/pytorch/torch_tensorrt
-
-# copy source repo
-COPY . /workspace/torch_tensorrt
+COPY . /opt/torch_tensorrt
 COPY --from=torch-tensorrt-builder  /workspace/torch_tensorrt/src/py/dist/ .
 
-RUN pip3 install ipywidgets --trusted-host pypi.org --trusted-host pypi.python.org --trusted-host=files.pythonhosted.org
-RUN jupyter nbextension enable --py widgetsnbextension
-
+RUN cp /opt/torch_tensorrt/docker/WORKSPACE.docker /opt/torch_tensorrt/WORKSPACE
 RUN pip3 install *.whl && rm -fr /workspace/torch_tensorrt/py/dist/* *.whl
 
-ENV LD_LIBRARY_PATH /usr/local/lib/python3.8/dist-packages/torch/lib:/usr/local/lib/python3.8/dist-packages/torch_tensorrt/lib:${LD_LIBRARY_PATH}
+# Install native tensorrt python package required by torch_tensorrt whl file
+RUN pip install tensorrt==8.5.1.7
+
+WORKDIR /opt/torch_tensorrt
+ENV LD_LIBRARY_PATH /usr/local/lib/python3.8/dist-packages/torch/lib:/usr/local/lib/python3.8/dist-packages/torch_tensorrt/lib:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}
 ENV PATH /usr/local/lib/python3.8/dist-packages/torch_tensorrt/bin:${PATH}
-#
-WORKDIR /workspace
-RUN mv /workspace/torch_tensorrt /opt/torch_tensorrt
-RUN cp /opt/torch_tensorrt/docker/WORKSPACE.docker /opt/torch_tensorrt/WORKSPACE
-RUN mkdir torch_tensorrt
-RUN ln -s /opt/torch_tensorrt/notebooks /workspace/torch_tensorrt/notebooks
 
-CMD /bin/bash
+CMD /bin/bash
diff --git a/docker/Dockerfile.ngc b/docker/Dockerfile.ngc
diff --git a/docker/README.md b/docker/README.md
@@ -1,19 +1,41 @@
 # Building a Torch-TensorRT container
 
-### Install Docker and NVIDIA Container Toolkit
+* Use `Dockerfile` to build a container which provides the exact development environment that our master branch is usually tested against.
 
-https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
+* `Dockerfile` currently uses the exact library versions (Torch, CUDA, CUDNN, TensorRT) listed in <a href="https://github.com/pytorch/TensorRT#dependencies">dependencies</a> to build Torch-TensorRT.
 
-### Build Container
+* This `Dockerfile` installs `pre-cxx11-abi` versions of Pytorch and builds Torch-TRT using `pre-cxx11-abi` libtorch as well.
+Note: To install `cxx11_abi` version of Torch-TensorRT, enable `USE_CXX11=1` flag so that `dist-build.sh` can build it accordingly.
+
+### Dependencies
+
+* Install nvidia-docker by following https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker
+
+### Instructions
 
 > From root of Torch-TensorRT repo
 
+Build:
+```
+DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile -t torch_tensorrt:latest .
 ```
-# Build:
-DOCKER_BUILDKIT=1 docker build --build-arg BASE={TensorRT Base Container Version} -f docker/Dockerfile -t torch_tensorrt1.0:latest .
 
-# Run:
-docker run --gpus all -it \
-	--shm-size=8gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" \
-	--name=torch_tensorrt1.0 --ipc=host --net=host torch_tensorrt1.0:latest
+Run:
 ```
+nvidia-docker run --gpus all -it --shm-size=8gb --env="DISPLAY" --volume="/tmp/.X11-unix:/tmp/.X11-unix:rw" --name=torch_tensorrt --ipc=host --net=host torch_tensorrt:latest
+```
+
+Test:
+
+
+You can run any converter test to verify if Torch-TRT built sucessfully inside the container. Once you launch the container, you can run
+```
+bazel test //tests/core/conversion/converters:test_activation --compilation_mode=opt --test_output=summary --config use_precompiled_torchtrt --config pre_cxx11_abi
+```
+
+* `--config use_precompiled_torchtrt` : Indicates bazel to use pre-installed Torch-TRT library to test an application.
+* `--config pre_cxx11_abi` : This flag ensures `bazel test` uses `pre_cxx11_abi` version of `libtorch`. Use this flag corresponding to the ABI format of your Torch-TensorRT installation.
+
+### Pytorch NGC containers
+
+We also ship Torch-TensorRT in <a href="https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch">Pytorch NGC containers </a>. Release notes for these containers can be found <a href="https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/index.html">here</a>. Check out `release/ngc/23.XX` branch of Torch-TensorRT for source code that gets shipped with `23.XX` version of Pytorch NGC container.
diff --git a/docker/dist-test.sh b/docker/dist-test.sh
diff --git a/docker/mha.patch b/docker/mha.patch
diff --git a/docker/qat.patch b/docker/qat.patch
diff --git a/docker/setup_nox.sh b/docker/setup_nox.sh