Skip to content

Commit

Permalink
Merge pull request #487 from sony/feature/20230922-dockerfile-rocky
Browse files Browse the repository at this point in the history
replace centos7 to rockylinux8
  • Loading branch information
YukioOobuchi authored Oct 26, 2023
2 parents 8557098 + 736487d commit 10c5562
Show file tree
Hide file tree
Showing 6 changed files with 270 additions and 263 deletions.
7 changes: 4 additions & 3 deletions build-tools/make/build-with-docker.mk
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ docker_image_build_cuda:
--build-arg CUDNN_VERSION=$(CUDNN_VERSION) \
--build-arg PYTHON_VERSION_MAJOR=$(PYTHON_VERSION_MAJOR) \
--build-arg PYTHON_VERSION_MINOR=$(PYTHON_VERSION_MINOR) \
--build-arg HPCX_URL_centos=$(HPCX_URL_centos_$(OMPI_VERSION)) \
--build-arg HPCX_URL_rhel=$(HPCX_URL_rhel_$(OMPI_VERSION)) \
-t $(DOCKER_IMAGE_BUILD_NNABLA_EXT_CUDA) \
-f docker/development/Dockerfile.build-mpi$(ARCH_SUFFIX) \
.
Expand All @@ -77,7 +77,7 @@ docker_image_build_cuda_test:
--build-arg PYTHON_VERSION_MAJOR=$(PYTHON_VERSION_MAJOR) \
--build-arg PYTHON_VERSION_MINOR=$(PYTHON_VERSION_MINOR) \
--build-arg BUILD_WITH_CUTENSOR=False \
--build-arg HPCX_URL_centos=$(HPCX_URL_centos_$(OMPI_VERSION)) \
--build-arg HPCX_URL_rhel=$(HPCX_URL_rhel_$(OMPI_VERSION)) \
-t $(DOCKER_IMAGE_TEST_NNABLA_EXT_CUDA) \
-f docker/development/Dockerfile.build-mpi$(ARCH_SUFFIX) \
.
Expand Down Expand Up @@ -174,9 +174,10 @@ docker_image_cuda_cudnn_lib_in_wheel:
cd $(NNABLA_EXT_CUDA_DIRECTORY) \
&& docker build $(DOCKER_BUILD_ARGS) \
--build-arg CUDA_VERSION_MAJOR=$(CUDA_VERSION_MAJOR) \
--build-arg CUDA_VERSION_MINOR=$(CUDA_VERSION_MINOR) \
--build-arg PYTHON_VER=3.$(PYTHON_VERSION_MINOR) \
--build-arg MPIVER=$(OMPI_VERSION) \
--build-arg HPCX_URL_centos=$(HPCX_URL_centos_$(OMPI_VERSION)) \
--build-arg HPCX_URL_rhel=$(HPCX_URL_rhel_$(OMPI_VERSION)) \
-f $(DOCKERFILE_PATH_LIB_IN_WHEEL) . -t nnabla-ext-cuda-lib-in-whl-py3$(PYTHON_VERSION_MINOR)-cuda$(CUDA_SUFFIX)-mpi$(OMPI_VERSION):$(DOCKER_IMAGE_ID_NNABLA_EXT_CUDA_LIB_IN_WHEEL)

########################################################################################################################
Expand Down
3 changes: 1 addition & 2 deletions build-tools/make/hpcx_url.mk
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,5 @@
.SILENT:

# Map specific openmpi version to HPCX download url

export HPCX_URL_ubuntu_4.1.5='https://content.mellanox.com/hpc/hpc-x/v2.12/hpcx-v2.12-gcc-MLNX_OFED_LINUX-5-ubuntu20.04-cuda11-gdrcopy2-nccl2.12-x86_64.tbz'
export HPCX_URL_centos_4.1.5='https://content.mellanox.com/hpc/hpc-x/v2.12/hpcx-v2.12-gcc-MLNX_OFED_LINUX-5-redhat7-cuda11-gdrcopy2-nccl2.12-x86_64.tbz'
export HPCX_URL_rhel_4.1.5='https://content.mellanox.com/hpc/hpc-x/v2.12/hpcx-v2.12-gcc-MLNX_OFED_LINUX-5-redhat8-cuda11-gdrcopy2-nccl2.12-x86_64.tbz'
174 changes: 87 additions & 87 deletions docker/development/Dockerfile.build-mpi
Original file line number Diff line number Diff line change
Expand Up @@ -19,52 +19,54 @@ ARG CUDA_VERSION_MINOR=0.3
ARG CUDNN_VERSION=8

############################################################ OpenMPI
# CentOS7: 1.10.7-1 (12)
# Ubuntu16: 1.10.2-1 (12)
# Ubuntu18: 2.1.1-1 (20)
# Ubuntu20: 4.0.3-1 (40)
# CentOS7: 3.1.3-1 (40)
# ABCI: 2.1.6-1 (20)
FROM centos:7 as openmpi
# CentOS7: 1.10.7-1 (12)
# Ubuntu16: 1.10.2-1 (12)
# Ubuntu18: 2.1.1-1 (20)
# CentOS7: 3.1.6-1 (30)
# RockyLinux8: 3.1.6-1 (30)
# Ubuntu20: 4.1.5-1 (40)
# ABCI: 4.1.5-1 (40)

FROM rockylinux:8 as openmpi

ARG PIP_INS_OPTS
ARG PYTHONWARNINGS
ARG CURL_OPTS
ARG WGET_OPTS
ARG YUM_OPTS
ARG DNF_OPTS
ARG MPIVER=3.1.6
ARG HPCX_URL_centos
ARG HPCX_URL_rhel

ENV LC_ALL C
ENV LANG C
ENV LANGUAGE C

RUN eval ${YUM_OPTS} \
&& yum install -y \
RUN eval ${DNF_OPTS} \
&& dnf install -y \
epel-release \
yum-utils \
&& yum install -y \
curl \
rpm-build \
dnf-plugins-core \
&& dnf install -y \
curl \
rpm-build \
&& if [ $(echo "${MPIVER}" | awk -F. '{ printf("%d%02d%02d\n", $1,$2,$3); }') -gt 30106 ]; then \
yum install -y libibverbs \
dnf install -y libibverbs \
librdmacm \
rdma-core-devel \
numactl-libs \
numactl-devel \
binutils-devel \
patch; \
fi \
&& yum group install -y "Development Tools" \
&& yum clean all
fi \
&& dnf group install -y "Development Tools" \
&& dnf clean all

COPY docker/release/hpcx-init.patch /tmp/hpcx-init.patch
COPY docker/release/hpcx-ompi-etc.patch /tmp/hpcx-ompi-etc.patch

RUN if [ $(echo "${MPIVER}" | awk -F. '{ printf("%d%02d%02d\n", $1,$2,$3); }') -gt 30106 ]; then \
mkdir /root/openmpi-hpcx \
&& cd /root/openmpi-hpcx \
&& curl ${CURL_OPTS} -LO ${HPCX_URL_centos} \
&& curl ${CURL_OPTS} -LO ${HPCX_URL_rhel} \
&& tar -xvf hpcx*.tbz \
&& rm -f hpcx*.tbz \
&& mv hpcx* hpcx \
Expand All @@ -87,78 +89,77 @@ RUN if [ $(echo "${MPIVER}" | awk -F. '{ printf("%d%02d%02d\n", $1,$2,$3); }') -
&& mv /root/rpmbuild/RPMS/x86_64/openmpi-${MPIVER}-1.*.rpm /root; \
fi

FROM nvidia/cuda:${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}-cudnn${CUDNN_VERSION}-devel-centos7
FROM nvidia/cuda:${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}-cudnn${CUDNN_VERSION}-devel-rockylinux8

ARG PIP_INS_OPTS
ARG PYTHONWARNINGS
ARG CURL_OPTS
ARG WGET_OPTS
ARG YUM_OPTS
ARG DNF_OPTS
ARG BUILD_WITH_CUTENSOR=True

ENV LC_ALL C
ENV LANG C
ENV LANGUAGE C

RUN eval ${YUM_OPTS} \
&& curl ${CURL_OPTS} -L https://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/D42D0685.pub -o D42D0685.pub \
RUN eval ${DNF_OPTS} \
&& curl ${CURL_OPTS} -L https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/D42D0685.pub -o D42D0685.pub \
&& rpm --import D42D0685.pub \
&& yum install -y \
&& dnf install -y \
epel-release \
yum-utils \
&& yum install -y \
ca-certificates \
curl \
freetype-devel \
git \
hdf5 \
hdf5-devel \
lapack-devel \
libjpeg-devel \
xz-devel \
libpng-devel \
redhat-lsb-core \
rpm-build \
unzip \
wget \
which \
zip \
zlib-static \
libmpc-devel \
mpfr-devel \
gmp-devel \
openssl-devel \
bzip2-devel \
libffi-devel \
nsight-systems-2021.1.3 \
libsndfile \
python-backports-lzma \
openssl11 \
openssl11-devel \
zlib-devel \
bzip2 bzip2-devel \
readline-devel \
sqlite \
sqlite-devel \
tk-devel \
numactl-libs \
&& if [ "${BUILD_WITH_CUTENSOR}" == "True" ]; then yum install -y libcutensor-devel; fi \
&& yum group install -y "Development Tools" \
&& yum clean all
dnf-plugins-core \
&& dnf config-manager --set-enabled powertools \
&& dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo \
&& dnf install -y \
ca-certificates \
curl \
freetype-devel \
git \
hdf5 \
hdf5-devel \
lapack-devel \
libjpeg-devel \
xz-devel \
libpng-devel \
redhat-lsb-core \
rpm-build \
unzip \
wget \
which \
zip \
zlib-static \
libmpc-devel \
mpfr-devel \
gmp-devel \
openssl-devel \
bzip2-devel \
libffi-devel \
nsight-systems-2023.2.3 \
libsndfile \
zlib-devel \
bzip2 bzip2-devel \
readline-devel \
sqlite \
sqlite-devel \
tk-devel \
numactl-libs \
&& if [ "${BUILD_WITH_CUTENSOR}" == "True" ]; then dnf install -y libcutensor-devel; fi \
&& dnf group install -y "Development Tools" \
&& dnf clean all

################################################### nvidia
ARG CUDA_VERSION_MAJOR=10
ARG CUDA_VERSION_MINOR=0

RUN mkdir /tmp/deps \
&& cd /tmp/deps \
&& curl ${CURL_OPTS} -L https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm -o nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm \
&& yum install -y nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm \
&& yum clean all \
&& curl ${CURL_OPTS} -L https://developer.download.nvidia.com/compute/machine-learning/repos/rhel8/x86_64/nvidia-machine-learning-repo-rhel8-1.0.0-1.x86_64.rpm -o nvidia-machine-learning-repo-rhel8-1.0.0-1.x86_64.rpm \
&& dnf install -y nvidia-machine-learning-repo-rhel8-1.0.0-1.x86_64.rpm \
&& dnf clean all \
&& cd / \
&& rm -rf /tmp/*

RUN yum install -y libnccl-*+cuda${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR%.?} libnccl-devel-*+cuda${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR%.?}
RUN dnf install -y libnccl-*-*+cuda${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR%.?} libnccl-devel-*-*+cuda${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR%.?}

################################################### gcc
RUN mkdir /tmp/deps \
Expand All @@ -176,7 +177,7 @@ RUN mkdir /tmp/deps \
ARG CMAKEVER=3.18.4
RUN mkdir /tmp/deps \
&& cd /tmp/deps \
&& yum install -y cmake3 openssl-devel \
&& dnf install -y cmake3 openssl-devel \
&& curl ${CURL_OPTS} -L https://github.com/Kitware/CMake/releases/download/v${CMAKEVER}/cmake-${CMAKEVER}.tar.gz -o cmake-${CMAKEVER}.tar.gz \
&& tar xf cmake-${CMAKEVER}.tar.gz \
&& cd cmake-${CMAKEVER} \
Expand All @@ -185,9 +186,9 @@ RUN mkdir /tmp/deps \
&& cmake3 -DBUILD_TESTING=FALSE .. \
&& make -j8 \
&& make install \
&& yum remove -y cmake3 \
&& yum clean all \
&& rm -rf /var/cache/yum/* \
&& dnf remove -y cmake3 \
&& dnf clean all \
&& rm -rf /var/cache/dnf/* \
&& cd / \
&& rm -rf /tmp/*

Expand All @@ -201,11 +202,11 @@ RUN mkdir /tmp/deps \
&& mkdir build \
&& cd build \
&& cmake \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-Dprotobuf_BUILD_TESTS=OFF \
-DCMAKE_CXX_STANDARD=14 \
-D CMAKE_C_COMPILER=gcc CMAKE_CXX_COMPILER=g++ /usr/bin/gcc \
../cmake \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-Dprotobuf_BUILD_TESTS=OFF \
-DCMAKE_CXX_STANDARD=14 \
-D CMAKE_C_COMPILER=gcc CMAKE_CXX_COMPILER=g++ /usr/bin/gcc \
../cmake \
&& make -j8 \
&& make install \
&& cd / \
Expand All @@ -220,13 +221,13 @@ RUN mkdir /tmp/deps \
&& mkdir libarchive-build \
&& cd libarchive-build \
&& cmake \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DENABLE_NETTLE=FALSE -DENABLE_OPENSSL=FALSE \
-DENABLE_LZO=FALSE -DENABLE_LZMA=FALSE -DENABLE_BZip2=FALSE \
-DENABLE_LIBXML2=FALSE -DENABLE_EXPAT=FALSE -DENABLE_PCREPOSIX=FALSE -DENABLE_LibGCC=FALSE \
-DENABLE_CNG=FALSE -DENABLE_TAR=FALSE -DENABLE_TAR_SHARED=FALSE -DENABLE_CPIO=FALSE \
-DENABLE_CPIO_SHARED=FALSE -DENABLE_CAT=FALSE -DENABLE_CAT_SHARED=FALSE -DENABLE_XATTR=FALSE \
-DENABLE_ACL=FALSE -DENABLE_ICONV=FALSE -DENABLE_TEST=FALSE \
../libarchive-${LIBARCHIVEVER} \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DENABLE_NETTLE=FALSE -DENABLE_OPENSSL=FALSE \
-DENABLE_LZO=FALSE -DENABLE_LZMA=FALSE -DENABLE_BZip2=FALSE \
-DENABLE_LIBXML2=FALSE -DENABLE_EXPAT=FALSE -DENABLE_PCREPOSIX=FALSE -DENABLE_LibGCC=FALSE \
-DENABLE_CNG=FALSE -DENABLE_TAR=FALSE -DENABLE_TAR_SHARED=FALSE -DENABLE_CPIO=FALSE \
-DENABLE_CPIO_SHARED=FALSE -DENABLE_CAT=FALSE -DENABLE_CAT_SHARED=FALSE -DENABLE_XATTR=FALSE \
-DENABLE_ACL=FALSE -DENABLE_ICONV=FALSE -DENABLE_TEST=FALSE \
../libarchive-${LIBARCHIVEVER} \
&& make -j8 \
&& make install \
&& cd / \
Expand Down Expand Up @@ -262,7 +263,6 @@ RUN git clone https://github.com/pyenv/pyenv.git ~/.pyenv \
&& export PYTHON_BUILD_CURL_OPTS="${CURL_OPTS}" \
&& export PYTHON_BUILD_WGET_OPTS="${WGET_OPTS}" \
&& export PYTHON_CONFIGURE_OPTS=--disable-shared \
&& if [ ${PYTHON_VERSION_MINOR} -ge 10 ]; then export CPPFLAGS=-I/usr/include/openssl11 && export LDFLAGS=-L/usr/lib64/openssl11; fi \
&& eval "$(pyenv init -)" \
&& python-build `pyenv latest -k ${PYVERNAME}` /usr/local \
&& pyenv global system \
Expand All @@ -275,7 +275,7 @@ RUN git clone https://github.com/pyenv/pyenv.git ~/.pyenv \
&& rm -rf ~/.pyenv/.git /tmp/*

RUN rm -f /usr/lib64/libstdc++.so.6
ENV PATH /tmp/.local/bin:/opt/nvidia/nsight-systems/2021.1.3/host-linux-x64/:$PATH
ENV PATH /tmp/.local/bin:/opt/nvidia/nsight-systems/2023.2.3/host-linux-x64/:$PATH
ENV LD_LIBRARY_PATH /usr/local/lib64:$LD_LIBRARY_PATH
ENV CC /usr/local/bin/gcc
ENV CXX /usr/local/bin/g++
Expand Down
Loading

0 comments on commit 10c5562

Please sign in to comment.