Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP][SPARK-35917][SHUFFLE][CORE]Disable push-based shuffle feature to prevent it from being used #33118

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,8 @@ public boolean useOldFetchProtocol() {
* 'org.apache.spark.network.shuffle.ExternalBlockHandler$NoOpMergedShuffleFileManager'.
* To turn on push-based shuffle at a cluster level, set the configuration to
* 'org.apache.spark.network.shuffle.RemoteBlockPushResolver'.
*
* Push-based shuffle is not yet supported.
*/
public String mergedShuffleFileManagerImpl() {
return conf.get("spark.shuffle.server.mergedShuffleFileManagerImpl",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,10 @@ protected void serviceInit(Configuration externalConf) throws Exception {
TransportConf transportConf = new TransportConf("shuffle", new HadoopConfigProvider(_conf));
MergedShuffleFileManager shuffleMergeManager = newMergedShuffleFileManagerInstance(
transportConf);
if (!(shuffleMergeManager instanceof ExternalBlockHandler.NoOpMergedShuffleFileManager)) {
// TODO: Remove this once push-based shuffle is fully supported.
throw new UnsupportedOperationException("Push-based shuffle is not yet supported.");
}
blockHandler = new ExternalBlockHandler(
transportConf, registeredExecutorFile, shuffleMergeManager);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2079,7 +2079,7 @@ package object config {
"conjunction with the server side flag spark.shuffle.server.mergedShuffleFileManagerImpl " +
"which needs to be set with the appropriate " +
"org.apache.spark.network.shuffle.MergedShuffleFileManager implementation for push-based " +
"shuffle to be enabled")
"shuffle to be enabled. Push-based shuffle is not yet supported.")
.version("3.1.0")
.booleanConf
.createWithDefault(false)
Expand Down
7 changes: 6 additions & 1 deletion core/src/main/scala/org/apache/spark/util/Utils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2598,11 +2598,16 @@ private[spark] object Utils extends Logging {
* Support push based shuffle with multiple app attempts
*/
def isPushBasedShuffleEnabled(conf: SparkConf): Boolean = {
conf.get(PUSH_BASED_SHUFFLE_ENABLED) &&
val isPushBasedShuffleEnabled = conf.get(PUSH_BASED_SHUFFLE_ENABLED) &&
(conf.get(IS_TESTING).getOrElse(false) ||
(conf.get(SHUFFLE_SERVICE_ENABLED) &&
conf.get(SparkLauncher.SPARK_MASTER, null) == "yarn" &&
getYarnMaxAttempts(conf) == 1))
if (isPushBasedShuffleEnabled && !conf.get(IS_TESTING).getOrElse(false)) {
// TODO: Remove this once push-based shuffle is fully supported.
throw new UnsupportedOperationException("Push-based shuffle is not yet supported.")
}
isPushBasedShuffleEnabled
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import org.apache.spark._
import org.apache.spark.broadcast.BroadcastManager
import org.apache.spark.executor.ExecutorMetrics
import org.apache.spark.internal.config
import org.apache.spark.internal.config.Tests.IS_TESTING
import org.apache.spark.rdd.{DeterministicLevel, RDD}
import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, ResourceProfileBuilder, TaskResourceRequests}
import org.apache.spark.resource.ResourceUtils.{FPGA, GPU}
Expand Down Expand Up @@ -3426,6 +3427,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
private def initPushBasedShuffleConfs(conf: SparkConf) = {
conf.set(config.SHUFFLE_SERVICE_ENABLED, true)
conf.set(config.PUSH_BASED_SHUFFLE_ENABLED, true)
conf.set(IS_TESTING, true)
conf.set("spark.master", "pushbasedshuffleclustermanager")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.scalatest.matchers.should.Matchers._

import org.apache.spark._
import org.apache.spark.internal.config._
import org.apache.spark.internal.config.Tests.IS_TESTING
import org.apache.spark.network.TransportContext
import org.apache.spark.network.netty.{NettyBlockTransferService, SparkTransportConf}
import org.apache.spark.network.server.TransportServer
Expand Down Expand Up @@ -136,6 +137,7 @@ class HostLocalShuffleReadingSuite extends SparkFunSuite with Matchers with Loca

test("Enable host local shuffle reading when push based shuffle is enabled") {
val conf = new SparkConf()
.set(IS_TESTING, true)
.set(SHUFFLE_SERVICE_ENABLED, true)
.set("spark.yarn.maxAttempts", "1")
.set(PUSH_BASED_SHUFFLE_ENABLED, true)
Expand Down
2 changes: 1 addition & 1 deletion core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1448,7 +1448,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
conf.set(SHUFFLE_SERVICE_ENABLED, true)
conf.set(SparkLauncher.SPARK_MASTER, "yarn")
conf.set("spark.yarn.maxAttempts", "1")
assert(Utils.isPushBasedShuffleEnabled(conf) === true)
assertThrows[UnsupportedOperationException](Utils.isPushBasedShuffleEnabled(conf))
conf.set("spark.yarn.maxAttempts", "2")
assert(Utils.isPushBasedShuffleEnabled(conf) === false)
}
Expand Down