From 87a3fe685da9034e73c8c638332b86c12bc79a1d Mon Sep 17 00:00:00 2001 From: ihiverlet Date: Fri, 18 Oct 2024 11:37:44 +0200 Subject: [PATCH] upgrade spark 3.5.3 & jdbc postgres driver --- .github/workflows/main-workflow.yml | 10 +++++----- scripts/install-spark-hadoop-hive.sh | 9 ++++++--- spark/Dockerfile | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/.github/workflows/main-workflow.yml b/.github/workflows/main-workflow.yml index 3a5e1487..529b38c0 100644 --- a/.github/workflows/main-workflow.yml +++ b/.github/workflows/main-workflow.yml @@ -103,7 +103,7 @@ jobs: base_image: python-minimal python_version_1: 3.12.6 python_version_2: 3.11.10 - spark_version: 3.5.2 + spark_version: 3.5.3 build_gpu: false secrets: inherit jupyter-pyspark: @@ -115,7 +115,7 @@ jobs: base_image: pyspark python_version_1: 3.12.6 python_version_2: 3.11.10 - spark_version: 3.5.2 + spark_version: 3.5.3 build_gpu: false secrets: inherit vscode-pyspark: @@ -127,7 +127,7 @@ jobs: base_image: pyspark python_version_1: 3.12.6 python_version_2: 3.11.10 - spark_version: 3.5.2 + spark_version: 3.5.3 build_gpu: false secrets: inherit jupyter-pytorch: @@ -189,7 +189,7 @@ jobs: base_image: r-minimal r_version_1: 4.4.1 r_version_2: 4.3.3 - spark_version: 3.5.2 + spark_version: 3.5.3 build_gpu: false secrets: inherit rstudio-sparkr: @@ -201,7 +201,7 @@ jobs: base_image: sparkr r_version_1: 4.4.1 r_version_2: 4.3.3 - spark_version: 3.5.2 + spark_version: 3.5.3 build_gpu: false secrets: inherit jupyter-r: diff --git a/scripts/install-spark-hadoop-hive.sh b/scripts/install-spark-hadoop-hive.sh index 7656bed1..7771b6fd 100644 --- a/scripts/install-spark-hadoop-hive.sh +++ b/scripts/install-spark-hadoop-hive.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -HADOOP_VERSION="3.3.6" +HADOOP_VERSION="3.4.0" HIVE_VERSION="2.3.9" HIVE_LISTENER_VERSION="0.0.3" @@ -37,8 +37,8 @@ wget -q ${SPARK_BUILD_S3_BUCKET}/hive-listener/${HIVE_LISTENER_JAR} mv ${HIVE_LISTENER_JAR} ${HIVE_HOME}/lib/hive-listener.jar # Add postgreSQL support to Hive -wget -q https://jdbc.postgresql.org/download/postgresql-42.2.18.jar -mv postgresql-42.2.18.jar ${HIVE_HOME}/lib/postgresql-jdbc.jar +wget -q https://jdbc.postgresql.org/download/postgresql-42.7.3.jar +mv postgresql-42.7.3.jar ${HIVE_HOME}/lib/postgresql-jdbc.jar # Fix versions inconsistencies of some binaries between Hadoop & Hive distributions rm ${HIVE_HOME}/lib/guava-14.0.1.jar @@ -46,3 +46,6 @@ cp ${HADOOP_HOME}/share/hadoop/common/lib/guava-27.0-jre.jar ${HIVE_HOME}/lib/ wget -q https://repo1.maven.org/maven2/jline/jline/2.14.6/jline-2.14.6.jar mv jline-2.14.6.jar ${HIVE_HOME}/lib/ rm ${HIVE_HOME}/lib/jline-2.12.jar + +# Fix multiple bindings +rm ${HADOOP_HOME}/share/hadoop/tools/lib/bundle-2.23.19.jar diff --git a/spark/Dockerfile b/spark/Dockerfile index 11cbbec0..0d095be9 100644 --- a/spark/Dockerfile +++ b/spark/Dockerfile @@ -5,7 +5,7 @@ LABEL maintainer="InseeFrLab " ARG BASE_IMAGE -ARG SPARK_VERSION="3.5.1" +ARG SPARK_VERSION="3.5.3" ENV SPARK_VERSION=${SPARK_VERSION} ENV JAVA_VERSION="17"