Skip to content

Commit

Permalink
[SPARK-21708][BUILD] Migrate build to sbt 1.x
Browse files Browse the repository at this point in the history
Migrate sbt-launcher URL to download one for sbt 1.x.
Update plugins versions where required by sbt update.
Change sbt version to be used to latest released at the moment, 1.3.13
Adjust build settings according to plugins and sbt changes.

Migration to sbt 1.x:
1. enhances dev experience in development
2. updates build plugins to bring there new features/to fix bugs in them
3. enhances build performance on sbt side
4. eases movement to Scala 3 / dotty

No.

All existing tests passed, both on Jenkins and via Github Actions, also manually for Scala 2.13 profile.

Closes apache#29286 from gemelen/feature/sbt-1.x.

Authored-by: Denis Pyshev <git@gemelen.net>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
gemelen authored and Lorenzo Martini committed Apr 20, 2021
1 parent 919028c commit e45a2f8
Show file tree
Hide file tree
Showing 11 changed files with 144 additions and 93 deletions.
20 changes: 1 addition & 19 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,6 @@ all-branches-and-tags: &all-branches-and-tags
# Step templates

step_templates:
restore-build-binaries-cache: &restore-build-binaries-cache
restore_cache:
key: build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}
restore-ivy-cache: &restore-ivy-cache
restore_cache:
keys:
Expand Down Expand Up @@ -136,20 +133,11 @@ jobs:
- maven-dependency-cache-{{ checksum "pom.xml" }}
# Fallback - see https://circleci.com/docs/2.0/configuration-reference/#example-2
- maven-dependency-cache-
# Given the build-maven cache, this is superfluous, but leave it in in case we will want to remove the former
- restore_cache:
keys:
- build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}
- build-binaries-
- run:
command: ./build/mvn -DskipTests -Psparkr -Phadoop-palantir install
no_output_timeout: 20m
# Get sbt to run trivially, ensures its launcher is downloaded under build/
- run: ./build/sbt -h || true
- save_cache:
key: build-binaries-{{ checksum "build/mvn" }}-{{ checksum "build/sbt" }}
paths:
- ./build
- save_cache:
key: maven-dependency-cache-{{ checksum "pom.xml" }}
paths:
Expand All @@ -165,7 +153,6 @@ jobs:
# Failed to execute goal on project spark-assembly_2.11: Could not resolve dependencies for project org.apache.spark:spark-assembly_2.11:pom:2.4.0-SNAPSHOT
- restore_cache:
key: maven-dependency-cache-{{ checksum "pom.xml" }}
- *restore-build-binaries-cache
- run:
name: Run style tests
command: dev/run-style-tests.py
Expand All @@ -181,7 +168,6 @@ jobs:
# key: build-maven-{{ .Branch }}-{{ .BuildNum }}
- restore_cache:
key: maven-dependency-cache-{{ checksum "pom.xml" }}
- *restore-build-binaries-cache
- run: |
dev/run-build-tests.py | tee /tmp/run-build-tests.log
- store_artifacts:
Expand All @@ -206,7 +192,6 @@ jobs:
fi
- *restore-ivy-cache
- *restore-home-sbt-cache
- *restore-build-binaries-cache
- run:
name: Download all external dependencies for the test configuration (which extends compile) and ensure we update first
command: dev/sbt test:externalDependencyClasspath oldDeps/test:externalDependencyClasspath
Expand Down Expand Up @@ -251,7 +236,6 @@ jobs:
- attach_workspace:
at: .
- *restore-ivy-cache
- *restore-build-binaries-cache
- *restore-home-sbt-cache
- run: |
dev/run-backcompat-tests.py | tee /tmp/run-backcompat-tests.log
Expand Down Expand Up @@ -305,7 +289,7 @@ jobs:
run-scala-tests:
<<: *test-defaults
# project/CirclePlugin.scala does its own test splitting in SBT based on CIRCLE_NODE_INDEX, CIRCLE_NODE_TOTAL
parallelism: 12
parallelism: 8
# Spark runs a lot of tests in parallel, we need 16 GB of RAM for this
resource_class: xlarge
steps:
Expand All @@ -320,7 +304,6 @@ jobs:
- *link-in-build-sbt-cache
# ---
- *restore-ivy-cache
- *restore-build-binaries-cache
- *restore-home-sbt-cache
- restore_cache:
keys:
Expand Down Expand Up @@ -407,7 +390,6 @@ jobs:
- *checkout-code
- restore_cache:
key: maven-dependency-cache-{{ checksum "pom.xml" }}
- *restore-build-binaries-cache
- run:
command: dev/set_version_and_package.sh
no_output_timeout: 15m
Expand Down
17 changes: 17 additions & 0 deletions .sbtopts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

-J-Xmx4G
-J-Xss4m
2 changes: 1 addition & 1 deletion build/sbt-launch-lib.bash
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ dlog () {

acquire_sbt_jar () {
SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
URL1=https://dl.bintray.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
URL1=https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar
JAR=build/sbt-launch-${SBT_VERSION}.jar

sbt_jar=$JAR
Expand Down
3 changes: 2 additions & 1 deletion dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ def build_spark_assembly_sbt(extra_profiles, checkstyle=False):
if checkstyle:
run_java_style_checks(build_profiles)

build_spark_unidoc_sbt(extra_profiles)
# TODO(lmartini): removed because broken, checks generated classes
# build_spark_unidoc_sbt(extra_profiles)


def build_apache_spark(build_tool, extra_profiles):
Expand Down
6 changes: 3 additions & 3 deletions project/CirclePlugin.scala
Original file line number Diff line number Diff line change
Expand Up @@ -288,8 +288,8 @@ object CirclePlugin extends AutoPlugin {
}
},

test := (test, copyTestReportsToCircle) { (test, copy) =>
test.doFinally(copy.map(_ => ()))
}.value
test := (test andFinally Def.taskDyn {
copyTestReportsToCircle
}).value
))
}
17 changes: 9 additions & 8 deletions project/MimaBuild.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ import com.typesafe.tools.mima.core._
import com.typesafe.tools.mima.core.MissingClassProblem
import com.typesafe.tools.mima.core.MissingTypesProblem
import com.typesafe.tools.mima.core.ProblemFilters._
import com.typesafe.tools.mima.plugin.MimaKeys.{mimaBinaryIssueFilters, mimaPreviousArtifacts}
import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings

import com.typesafe.tools.mima.plugin.MimaKeys.{mimaBinaryIssueFilters, mimaPreviousArtifacts, mimaFailOnNoPrevious}

object MimaBuild {

Expand Down Expand Up @@ -86,14 +84,17 @@ object MimaBuild {
ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes(currentSparkVersion)
}

def mimaSettings(sparkHome: File, projectRef: ProjectRef) = {
def mimaSettings(sparkHome: File, projectRef: ProjectRef): Seq[Setting[_]] = {
val organization = "org.apache.spark"
val previousSparkVersion = "2.4.0"
val previousSparkVersion = "3.0.0"
val project = projectRef.project
val fullId = "spark-" + project + "_2.12"
mimaDefaultSettings ++
Seq(mimaPreviousArtifacts := Set(organization % fullId % previousSparkVersion),
mimaBinaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value))

Seq(
mimaFailOnNoPrevious := true,
mimaPreviousArtifacts := Set(organization % fullId % previousSparkVersion),
mimaBinaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value)
)
}

}
38 changes: 38 additions & 0 deletions project/MimaExcludes.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,44 @@ object MimaExcludes {

// Exclude rules for 3.0.x
lazy val v30excludes = v24excludes ++ Seq(
//[SPARK-21708][BUILD] Migrate build to sbt 1.x
// mima plugin update caused new incompatibilities to be detected
// core module
// TODO(lmartini): this group was originally on top of 3.1 but applied on 3.0 because we picked the above commit
// on top of 3.0
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.sort.io.LocalDiskShuffleMapOutputWriter.commitAllPartitions"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
// mllib module
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.totalIterations"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.$init$"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.labels"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.truePositiveRateByLabel"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.falsePositiveRateByLabel"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.precisionByLabel"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.recallByLabel"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.accuracy"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedTruePositiveRate"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFalsePositiveRate"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedRecall"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedPrecision"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.roc"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.areaUnderROC"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.pr"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.fMeasureByThreshold"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.precisionByThreshold"),
ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.recallByThreshold"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.FMClassifier.trainImpl"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.FMRegressor.trainImpl"),
// TODO(lmartini): Additional excludes not in upstream but unique to palantir fork
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.initializeForcefully"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkContext.initializeForcefully"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.broadcast.Broadcast.initializeForcefully"),

// [SPARK-23429][CORE] Add executor memory metrics to heartbeat and expose in executors REST API
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate.apply"),
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate.copy"),
Expand Down
Loading

0 comments on commit e45a2f8

Please sign in to comment.