[SPARK-21708][BUILD] Migrate build to sbt 1.x

Migrate sbt-launcher URL to download one for sbt 1.x. Update plugins versions where required by sbt update. Change sbt version to be used to latest released at the moment, 1.3.13 Adjust build settings according to plugins and sbt changes. Migration to sbt 1.x: 1. enhances dev experience in development 2. updates build plugins to bring there new features/to fix bugs in them 3. enhances build performance on sbt side 4. eases movement to Scala 3 / dotty No. All existing tests passed, both on Jenkins and via Github Actions, also manually for Scala 2.13 profile. Closes apache#29286 from gemelen/feature/sbt-1.x. Authored-by: Denis Pyshev <git@gemelen.net> Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
palantir · Apr 20, 2021 · e45a2f8 · e45a2f8
1 parent 919028c
commit e45a2f8
Show file tree

Hide file tree

Showing 11 changed files with 144 additions and 93 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -28,9 +28,6 @@ all-branches-and-tags: &all-branches-and-tags
 # Step templates
 
 step_templates:
-  restore-build-binaries-cache: &restore-build-binaries-cache
-    restore_cache:
-      key: build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}
   restore-ivy-cache: &restore-ivy-cache
     restore_cache:
       keys:
@@ -136,20 +133,11 @@ jobs:
             - maven-dependency-cache-{{ checksum "pom.xml" }}
             # Fallback - see https://circleci.com/docs/2.0/configuration-reference/#example-2
             - maven-dependency-cache-
-      # Given the build-maven cache, this is superfluous, but leave it in in case we will want to remove the former
-      - restore_cache:
-          keys:
-            - build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}
-            - build-binaries-
       - run:
           command: ./build/mvn -DskipTests -Psparkr -Phadoop-palantir install
           no_output_timeout: 20m
       # Get sbt to run trivially, ensures its launcher is downloaded under build/
       - run: ./build/sbt -h || true
-      - save_cache:
-          key: build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}
-          paths:
-            - ./build
       - save_cache:
           key: maven-dependency-cache-{{ checksum "pom.xml" }}
           paths:
@@ -165,7 +153,6 @@ jobs:
       # Failed to execute goal on project spark-assembly_2.11: Could not resolve dependencies for project org.apache.spark:spark-assembly_2.11:pom:2.4.0-SNAPSHOT
       - restore_cache:
           key: maven-dependency-cache-{{ checksum "pom.xml" }}
-      - *restore-build-binaries-cache
       - run:
           name: Run style tests
           command: dev/run-style-tests.py
@@ -181,7 +168,6 @@ jobs:
       #    key: build-maven-{{ .Branch }}-{{ .BuildNum }}
       - restore_cache:
           key: maven-dependency-cache-{{ checksum "pom.xml" }}
-      - *restore-build-binaries-cache
       - run: |
           dev/run-build-tests.py | tee /tmp/run-build-tests.log
       - store_artifacts:
@@ -206,7 +192,6 @@ jobs:
             fi
       - *restore-ivy-cache
       - *restore-home-sbt-cache
-      - *restore-build-binaries-cache
       - run:
           name: Download all external dependencies for the test configuration (which extends compile) and ensure we update first
           command: dev/sbt test:externalDependencyClasspath oldDeps/test:externalDependencyClasspath
@@ -251,7 +236,6 @@ jobs:
       - attach_workspace:
           at: .
       - *restore-ivy-cache
-      - *restore-build-binaries-cache
       - *restore-home-sbt-cache
       - run: |
           dev/run-backcompat-tests.py | tee /tmp/run-backcompat-tests.log
@@ -305,7 +289,7 @@ jobs:
   run-scala-tests:
     <<: *test-defaults
     # project/CirclePlugin.scala does its own test splitting in SBT based on CIRCLE_NODE_INDEX, CIRCLE_NODE_TOTAL
-    parallelism: 12
+    parallelism: 8
     # Spark runs a lot of tests in parallel, we need 16 GB of RAM for this
     resource_class: xlarge
     steps:
@@ -320,7 +304,6 @@ jobs:
       - *link-in-build-sbt-cache
       # ---
       - *restore-ivy-cache
-      - *restore-build-binaries-cache
       - *restore-home-sbt-cache
       - restore_cache:
           keys:
@@ -407,7 +390,6 @@ jobs:
       - *checkout-code
       - restore_cache:
           key: maven-dependency-cache-{{ checksum "pom.xml" }}
-      - *restore-build-binaries-cache
       - run:
           command: dev/set_version_and_package.sh
           no_output_timeout: 15m

diff --git a/.sbtopts b/.sbtopts
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+-J-Xmx4G
+-J-Xss4m
diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash
@@ -39,7 +39,7 @@ dlog () {
 
 acquire_sbt_jar () {
   SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
-  URL1=https://dl.bintray.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
+  URL1=https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar
   JAR=build/sbt-launch-${SBT_VERSION}.jar
 
   sbt_jar=$JAR

diff --git a/dev/run-tests.py b/dev/run-tests.py
@@ -390,7 +390,8 @@ def build_spark_assembly_sbt(extra_profiles, checkstyle=False):
     if checkstyle:
         run_java_style_checks(build_profiles)
 
-    build_spark_unidoc_sbt(extra_profiles)
+    # TODO(lmartini): removed because broken, checks generated classes
+    # build_spark_unidoc_sbt(extra_profiles)
 
 
 def build_apache_spark(build_tool, extra_profiles):

diff --git a/project/CirclePlugin.scala b/project/CirclePlugin.scala
@@ -288,8 +288,8 @@ object CirclePlugin extends AutoPlugin {
       }
     },
 
-    test := (test, copyTestReportsToCircle) { (test, copy) =>
-      test.doFinally(copy.map(_ => ()))
-    }.value
+    test := (test andFinally Def.taskDyn {
+      copyTestReportsToCircle
+    }).value
   ))
 }
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
@@ -22,9 +22,7 @@ import com.typesafe.tools.mima.core._
 import com.typesafe.tools.mima.core.MissingClassProblem
 import com.typesafe.tools.mima.core.MissingTypesProblem
 import com.typesafe.tools.mima.core.ProblemFilters._
-import com.typesafe.tools.mima.plugin.MimaKeys.{mimaBinaryIssueFilters, mimaPreviousArtifacts}
-import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
-
+import com.typesafe.tools.mima.plugin.MimaKeys.{mimaBinaryIssueFilters, mimaPreviousArtifacts, mimaFailOnNoPrevious}
 
 object MimaBuild {
 
@@ -86,14 +84,17 @@ object MimaBuild {
     ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes(currentSparkVersion)
   }
 
-  def mimaSettings(sparkHome: File, projectRef: ProjectRef) = {
+  def mimaSettings(sparkHome: File, projectRef: ProjectRef): Seq[Setting[_]] = {
     val organization = "org.apache.spark"
-    val previousSparkVersion = "2.4.0"
+    val previousSparkVersion = "3.0.0"
     val project = projectRef.project
     val fullId = "spark-" + project + "_2.12"
-    mimaDefaultSettings ++
-    Seq(mimaPreviousArtifacts := Set(organization % fullId % previousSparkVersion),
-      mimaBinaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value))
+
+    Seq(
+      mimaFailOnNoPrevious := true,
+      mimaPreviousArtifacts := Set(organization % fullId % previousSparkVersion),
+      mimaBinaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value)
+    )
   }
 
 }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
@@ -36,6 +36,44 @@ object MimaExcludes {
 
   // Exclude rules for 3.0.x
   lazy val v30excludes = v24excludes ++ Seq(
+    //[SPARK-21708][BUILD] Migrate build to sbt 1.x
+    // mima plugin update caused new incompatibilities to be detected
+    // core module
+    // TODO(lmartini): this group was originally on top of 3.1 but applied on 3.0 because we picked the above commit
+    // on top of 3.0
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.sort.io.LocalDiskShuffleMapOutputWriter.commitAllPartitions"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
+    // mllib module
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.totalIterations"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.$init$"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.labels"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.truePositiveRateByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.falsePositiveRateByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.precisionByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.recallByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.accuracy"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedTruePositiveRate"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFalsePositiveRate"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedRecall"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedPrecision"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.roc"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.areaUnderROC"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.pr"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.fMeasureByThreshold"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.precisionByThreshold"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.recallByThreshold"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.FMClassifier.trainImpl"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.FMRegressor.trainImpl"),
+    // TODO(lmartini): Additional excludes not in upstream but unique to palantir fork
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.initializeForcefully"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.initializeForcefully"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.broadcast.Broadcast.initializeForcefully"),
+
     // [SPARK-23429][CORE] Add executor memory metrics to heartbeat and expose in executors REST API
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate.apply"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate.copy"),