-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-21146] [CORE] Master/Worker should handle and shutdown when any thread gets UncaughtException #18357
[SPARK-21146] [CORE] Master/Worker should handle and shutdown when any thread gets UncaughtException #18357
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,27 +26,34 @@ import org.apache.spark.internal.Logging | |
*/ | ||
private[spark] object SparkUncaughtExceptionHandler | ||
extends Thread.UncaughtExceptionHandler with Logging { | ||
private[this] var exitOnException = true | ||
|
||
override def uncaughtException(thread: Thread, exception: Throwable) { | ||
try { | ||
// Make it explicit that uncaught exceptions are thrown when container is shutting down. | ||
// It will help users when they analyze the executor logs | ||
val inShutdownMsg = if (ShutdownHookManager.inShutdown()) "[Container in shutdown] " else "" | ||
val errMsg = "Uncaught exception in thread " | ||
logError(inShutdownMsg + errMsg + thread, exception) | ||
def apply(exitOnException: Boolean): Thread.UncaughtExceptionHandler = { | ||
this.exitOnException = exitOnException | ||
this | ||
} | ||
|
||
// We may have been called from a shutdown hook. If so, we must not call System.exit(). | ||
// (If we do, we will deadlock.) | ||
if (!ShutdownHookManager.inShutdown()) { | ||
override def uncaughtException(thread: Thread, exception: Throwable) { | ||
// Make it explicit that uncaught exceptions are thrown when process is shutting down. | ||
// It will help users when they analyze the executor logs | ||
val errMsg = "Uncaught exception in thread " + thread | ||
if (ShutdownHookManager.inShutdown()) { | ||
logError("[Process in shutdown] " + errMsg, exception) | ||
} else if (exception.isInstanceOf[Error] || | ||
(!exception.isInstanceOf[Error] && exitOnException)) { | ||
try { | ||
logError(errMsg + ". Shutting down now..", exception) | ||
if (exception.isInstanceOf[OutOfMemoryError]) { | ||
System.exit(SparkExitCode.OOM) | ||
} else { | ||
System.exit(SparkExitCode.UNCAUGHT_EXCEPTION) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current changes are too much. Could you rename
|
||
} | ||
} catch { | ||
case oom: OutOfMemoryError => Runtime.getRuntime.halt(SparkExitCode.OOM) | ||
case t: Throwable => Runtime.getRuntime.halt(SparkExitCode.UNCAUGHT_EXCEPTION_TWICE) | ||
} | ||
} catch { | ||
case oom: OutOfMemoryError => Runtime.getRuntime.halt(SparkExitCode.OOM) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you keep these codes? It unlikely happens but since the codes are there, it's better to not change it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zsxwing, this code is still there but I moved the try&catch to the block where we invoke System.exit. Do you mean moving the whole code in uncaughtException() to try block and having the catch block?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zsxwing Thanks for the clarification. |
||
case t: Throwable => Runtime.getRuntime.halt(SparkExitCode.UNCAUGHT_EXCEPTION_TWICE) | ||
} else { | ||
logError(errMsg, exception) | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of using a static variable, I prefer to change
SparkUncaughtExceptionHandler
to a class.