From ef89b278f8e04b459cd7539fd16754d6cdc77a2d Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 13 Sep 2023 10:19:12 +0800 Subject: [PATCH] [SPARK-45096][INFRA] Optimize apt-get install in Dockerfile ### What changes were proposed in this pull request? follow the [Best practices for writing Dockerfiles](https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#apt-get) : > Always combine RUN apt-get update with apt-get install in the same RUN statement. ### Why are the changes needed? 1, to address https://github.com/apache/spark/pull/42253#discussion_r1280479837 2, when I attempted to change the apt-get install in https://github.com/apache/spark/pull/41918, the behavior was confusing. By following the best practices, further changes should work immediately. ### Does this PR introduce _any_ user-facing change? NO, dev-only ### How was this patch tested? CI ### Was this patch authored or co-authored using generative AI tooling? NO Closes #42842 from zhengruifeng/infra_docker_file_opt. Authored-by: Ruifeng Zheng Signed-off-by: Ruifeng Zheng --- dev/infra/Dockerfile | 50 +++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile index b69e682f239c8..60204dcc49ece 100644 --- a/dev/infra/Dockerfile +++ b/dev/infra/Dockerfile @@ -24,19 +24,44 @@ ENV FULL_REFRESH_DATE 20221118 ENV DEBIAN_FRONTEND noninteractive ENV DEBCONF_NONINTERACTIVE_SEEN true -ARG APT_INSTALL="apt-get install --no-install-recommends -y" - -RUN apt-get clean -RUN apt-get update -RUN $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget openjdk-8-jdk libpython3-dev python3-pip python3-setuptools python3.8 python3.9 -RUN update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java +RUN apt-get update && apt-get install -y \ + software-properties-common \ + git \ + pkg-config \ + curl \ + wget \ + openjdk-8-jdk \ + gfortran \ + libopenblas-dev \ + liblapack-dev \ + build-essential \ + gnupg \ + ca-certificates \ + pandoc \ + libpython3-dev \ + python3-pip \ + python3-setuptools \ + python3.8 \ + python3.9 \ + r-base \ + libcurl4-openssl-dev \ + qpdf \ + zlib1g-dev \ + libssl-dev \ + libpng-dev \ + libharfbuzz-dev \ + libfribidi-dev \ + libtiff5-dev \ + libgit2-dev \ + libxml2-dev \ + libjpeg-dev \ + libfontconfig1-dev \ + libfreetype6-dev \ + && rm -rf /var/lib/apt/lists/* RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9 RUN add-apt-repository ppa:pypy/ppa -RUN apt update -RUN $APT_INSTALL gfortran libopenblas-dev liblapack-dev -RUN $APT_INSTALL build-essential RUN mkdir -p /usr/local/pypy/pypy3.8 && \ curl -sqL https://downloads.python.org/pypy/pypy3.8-v7.3.11-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.8 --strip-components=1 && \ @@ -45,19 +70,14 @@ RUN mkdir -p /usr/local/pypy/pypy3.8 && \ RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3 -RUN $APT_INSTALL gnupg ca-certificates pandoc RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list RUN gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9 RUN gpg -a --export E084DAB9 | apt-key add - RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' -RUN apt update -RUN $APT_INSTALL r-base libcurl4-openssl-dev qpdf libssl-dev zlib1g-dev + RUN Rscript -e "install.packages(c('knitr', 'markdown', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2', 'xml2'), repos='https://cloud.r-project.org/')" # See more in SPARK-39959, roxygen2 < 7.2.1 -RUN apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \ - libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev \ - libtiff5-dev libjpeg-dev RUN Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" RUN Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')"