-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-17443][SPARK-11035] Stop Spark Application if launcher goes down and use reflection #15009
Changes from 45 commits
8986aa9
99b1d1b
70a67fb
bac5cd2
38a7e3a
67f8de7
57defa9
eaa1bca
a3250ac
58c6bac
0a57684
1fe498b
25c3258
14050f5
92e4445
64a21b3
1e6311a
c207b30
ad20ccc
6a7ba5b
3cb8e85
3091105
2fdcec9
64a0e45
4357107
2707d21
cc2c0be
82df055
99d8c29
41cf6da
b098ecd
b66243d
bc99435
517fed0
a3d18b4
c17f15f
026d026
fe5b5d6
677edf7
ee3f24a
30b460c
7323200
0cfd4a7
8609874
ab50dd8
14c6365
04e56fc
7ee465f
3f060b6
f1b49d8
2996fb1
a311721
d906072
81fd297
10513ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark | ||
|
||
/** | ||
* An interface that can be implemented by applications launched by SparkSubmit | ||
* which exposes the Spark job configuration explicitly. | ||
*/ | ||
private[spark] trait SparkApp { | ||
this: Singleton => | ||
|
||
/** | ||
* Method executed by SparkSubmit to run the application. | ||
* | ||
* @param args - all arguments for SparkApp. | ||
* @param conf - Spark Configuration. | ||
*/ | ||
def sparkMain(args: Array[String], conf: Map[String, String]): Unit | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,6 +24,7 @@ import java.security.PrivilegedExceptionAction | |
import java.text.ParseException | ||
|
||
import scala.annotation.tailrec | ||
import scala.collection.JavaConverters._ | ||
import scala.collection.mutable.{ArrayBuffer, HashMap, Map} | ||
import scala.util.Properties | ||
|
||
|
@@ -691,10 +692,6 @@ object SparkSubmit extends CommandLineUtils { | |
addJarToClasspath(jar, loader) | ||
} | ||
|
||
for ((key, value) <- sysProps) { | ||
System.setProperty(key, value) | ||
} | ||
|
||
var mainClass: Class[_] = null | ||
|
||
try { | ||
|
@@ -725,9 +722,15 @@ object SparkSubmit extends CommandLineUtils { | |
printWarning("Subclasses of scala.App may not work correctly. Use a main() method instead.") | ||
} | ||
|
||
val mainMethod = mainClass.getMethod("main", new Array[String](0).getClass) | ||
if (!Modifier.isStatic(mainMethod.getModifiers)) { | ||
throw new IllegalStateException("The main method in the given main class must be static") | ||
val sparkAppMainMethod = mainClass.getMethods().find(_.getName == "sparkMain") | ||
val childSparkConf = sysProps.filter{ p => p._1.startsWith("spark.") }.toMap | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: again, please address the feedback that is given. I've lost count of how many times I've pointed out that there's a missing space between |
||
|
||
// If running a SparkApp we can explicitly pass in the confs separately. | ||
// If we aren't running a SparkApp they get passed via the system properties. | ||
if (sparkAppMainMethod.isEmpty) { | ||
sysProps.foreach { case (key, value) => | ||
System.setProperty(key, value) | ||
} | ||
} | ||
|
||
@tailrec | ||
|
@@ -741,7 +744,17 @@ object SparkSubmit extends CommandLineUtils { | |
} | ||
|
||
try { | ||
mainMethod.invoke(null, childArgs.toArray) | ||
if (sparkAppMainMethod.isDefined) { | ||
sparkAppMainMethod.get.invoke(null, childArgs.toArray, childSparkConf) | ||
} else { | ||
val mainMethod = mainClass.getMethod("main", new Array[String](0).getClass) | ||
|
||
if (!Modifier.isStatic(mainMethod.getModifiers)) { | ||
throw new IllegalStateException("The main method in the given main class must be static") | ||
} | ||
|
||
mainMethod.invoke(null, childArgs.toArray) | ||
} | ||
} catch { | ||
case t: Throwable => | ||
findCause(t) match { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,35 +17,59 @@ | |
|
||
package org.apache.spark.launcher | ||
|
||
import java.io.IOException | ||
import java.net.{InetAddress, Socket} | ||
|
||
import org.apache.spark.SPARK_VERSION | ||
import org.apache.spark.internal.Logging | ||
import org.apache.spark.launcher.LauncherProtocol._ | ||
import org.apache.spark.util.{ThreadUtils, Utils} | ||
import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} | ||
|
||
/** | ||
* A class that can be used to talk to a launcher server. Users should extend this class to | ||
* provide implementation for the abstract methods. | ||
* | ||
* See `LauncherServer` for an explanation of how launcher communication works. | ||
*/ | ||
private[spark] abstract class LauncherBackend { | ||
private[spark] abstract class LauncherBackend extends Logging { | ||
|
||
private var clientThread: Thread = _ | ||
private var connection: BackendConnection = _ | ||
private var lastState: SparkAppHandle.State = _ | ||
private var stopOnShutdown: Boolean = false | ||
@volatile private var _isConnected = false | ||
|
||
def connect(): Unit = { | ||
val port = sys.env.get(LauncherProtocol.ENV_LAUNCHER_PORT).map(_.toInt) | ||
val secret = sys.env.get(LauncherProtocol.ENV_LAUNCHER_SECRET) | ||
val stopFlag = sys.env.get(LauncherProtocol.ENV_LAUNCHER_STOP_IF_SHUTDOWN).map(_.toBoolean) | ||
if (port != None && secret != None) { | ||
val s = new Socket(InetAddress.getLoopbackAddress(), port.get) | ||
connection = new BackendConnection(s) | ||
connection.send(new Hello(secret.get, SPARK_VERSION)) | ||
clientThread = LauncherBackend.threadFactory.newThread(connection) | ||
clientThread.start() | ||
_isConnected = true | ||
connect(port.get, secret.get, stopFlag.getOrElse(false)) | ||
} | ||
} | ||
|
||
def connect(port: Int, secret: String, stopFlag: Boolean): Unit = { | ||
this.stopOnShutdown = stopFlag | ||
val s = new Socket(InetAddress.getLoopbackAddress(), port) | ||
connection = new BackendConnection(s) | ||
connection.send(new Hello(secret, SPARK_VERSION)) | ||
clientThread = LauncherBackend.threadFactory.newThread(connection) | ||
clientThread.start() | ||
_isConnected = true | ||
if (stopOnShutdown) { | ||
logDebug("Adding shutdown hook") // force eager creation of logger | ||
ShutdownHookManager.addShutdownHook( | ||
ShutdownHookManager.SPARK_CONTEXT_SHUTDOWN_PRIORITY) { () => | ||
logInfo("Invoking onStopRequest() from shutdown hook") | ||
try { | ||
if (_isConnected) { | ||
onStopRequest() | ||
} | ||
} catch { | ||
case ioException: IOException => | ||
logError("Error while running LauncherBackend shutdownHook...", ioException) | ||
} | ||
} | ||
} | ||
} | ||
|
||
|
@@ -110,12 +134,14 @@ private[spark] abstract class LauncherBackend { | |
override def close(): Unit = { | ||
try { | ||
super.close() | ||
if (stopOnShutdown) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See previous comment for why I don't think this is correct. |
||
fireStopRequest() | ||
} | ||
} finally { | ||
onDisconnected() | ||
_isConnected = false | ||
} | ||
} | ||
|
||
} | ||
|
||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -183,6 +183,28 @@ public void testChildProcLauncher() throws Exception { | |
assertEquals(0, app.waitFor()); | ||
} | ||
|
||
@Test | ||
public void testThreadLauncher() throws Exception { | ||
// This test is failed on Windows due to the failure of initiating executors | ||
// by the path length limitation. See SPARK-18718. | ||
assumeTrue(!Utils.isWindows()); | ||
|
||
launcher | ||
.setMaster("local") | ||
.setAppResource(SparkLauncher.NO_RESOURCE) | ||
.setConf(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, | ||
"-Dfoo=bar -Dtest.appender=childproc") | ||
.setConf(SparkLauncher.DRIVER_EXTRA_CLASSPATH, System.getProperty("java.class.path")) | ||
.setMainClass(SparkLauncherTestApp.class.getName()) | ||
.launchAsThread(true) | ||
.addAppArgs("proc"); | ||
final Process app = launcher.launch(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is this testing?
|
||
|
||
new OutputRedirector(app.getInputStream(), TF); | ||
new OutputRedirector(app.getErrorStream(), TF); | ||
assertEquals(0, app.waitFor()); | ||
} | ||
|
||
public static class SparkLauncherTestApp { | ||
|
||
public static void main(String[] args) throws Exception { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.launcher; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.logging.Level; | ||
import java.util.logging.Logger; | ||
|
||
abstract class AbstractSparkAppHandle implements SparkAppHandle { | ||
private static final Logger LOG = Logger.getLogger(AbstractSparkAppHandle.class.getName()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nevermind, forgot this is java.util.logging not slf4j... |
||
|
||
protected final String secret; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: please add a blank line between static and non-static fields. |
||
protected final LauncherServer server; | ||
protected boolean disposed; | ||
protected List<Listener> listeners; | ||
protected State state; | ||
private LauncherConnection connection; | ||
private String appId; | ||
OutputRedirector redirector; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove extra blank newlines |
||
public AbstractSparkAppHandle(LauncherServer server, String secret) { | ||
this.server = server; | ||
this.secret = secret; | ||
this.state = State.UNKNOWN; | ||
} | ||
|
||
@Override | ||
public synchronized void addListener(Listener l) { | ||
if (listeners == null) { | ||
listeners = new ArrayList<>(); | ||
} | ||
listeners.add(l); | ||
} | ||
|
||
@Override | ||
public State getState() { | ||
return state; | ||
} | ||
|
||
@Override | ||
public String getAppId() { | ||
return appId; | ||
} | ||
|
||
@Override | ||
public void stop() { | ||
CommandBuilderUtils.checkState(connection != null, "Application is still not connected."); | ||
try { | ||
connection.send(new LauncherProtocol.Stop()); | ||
} catch (IOException ioe) { | ||
throw new RuntimeException(ioe); | ||
} | ||
} | ||
|
||
@Override | ||
public synchronized void disconnect() { | ||
if (!disposed) { | ||
disposed = true; | ||
if (connection != null) { | ||
try { | ||
connection.close(); | ||
} catch (IOException ioe) { | ||
// no-op. | ||
} | ||
} | ||
server.unregister(this); | ||
if (redirector != null) { | ||
redirector.stop(); | ||
} | ||
} | ||
} | ||
|
||
String getSecret() { | ||
return secret; | ||
} | ||
|
||
void setConnection(LauncherConnection connection) { | ||
this.connection = connection; | ||
} | ||
|
||
LauncherServer getServer() { | ||
return server; | ||
} | ||
|
||
LauncherConnection getConnection() { | ||
return connection; | ||
} | ||
|
||
void setState(State s) { | ||
if (!state.isFinal()) { | ||
state = s; | ||
fireEvent(false); | ||
} else { | ||
LOG.log(Level.WARNING, "Backend requested transition from final state {0} to {1}.", | ||
new Object[]{state, s}); | ||
} | ||
} | ||
|
||
void setAppId(String appId) { | ||
this.appId = appId; | ||
fireEvent(true); | ||
} | ||
|
||
private synchronized void fireEvent(boolean isInfoChanged) { | ||
if (listeners != null) { | ||
for (Listener l : listeners) { | ||
if (isInfoChanged) { | ||
l.infoChanged(this); | ||
} else { | ||
l.stateChanged(this); | ||
} | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this being used anywhere?