-
Notifications
You must be signed in to change notification settings - Fork 25.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Clean up Node#close. #39317
Clean up Node#close. #39317
Changes from all commits
36511f4
184a62c
3b4b67a
d67f703
d6f7fd9
ad5e1da
08068d6
1041225
5507cef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,6 +59,7 @@ | |
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.concurrent.CountDownLatch; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
/** | ||
* Internal startup code. | ||
|
@@ -183,8 +184,15 @@ public void run() { | |
IOUtils.close(node, spawner); | ||
LoggerContext context = (LoggerContext) LogManager.getContext(false); | ||
Configurator.shutdown(context); | ||
if (node != null && node.awaitClose(10, TimeUnit.SECONDS) == false) { | ||
throw new IllegalStateException("Node didn't stop within 10 seconds. " + | ||
"Any outstanding requests or tasks might get killed."); | ||
} | ||
} catch (IOException ex) { | ||
throw new ElasticsearchException("failed to stop node", ex); | ||
} catch (InterruptedException e) { | ||
LogManager.getLogger(Bootstrap.class).warn("Thread got interrupted while waiting for the node to shutdown."); | ||
Thread.currentThread().interrupt(); | ||
} | ||
} | ||
}); | ||
|
@@ -267,6 +275,12 @@ private void start() throws NodeValidationException { | |
static void stop() throws IOException { | ||
try { | ||
IOUtils.close(INSTANCE.node, INSTANCE.spawner); | ||
if (INSTANCE.node != null && INSTANCE.node.awaitClose(10, TimeUnit.SECONDS) == false) { | ||
throw new IllegalStateException("Node didn't stop within 10 seconds. Any outstanding requests or tasks might get killed."); | ||
} | ||
} catch (InterruptedException e) { | ||
LogManager.getLogger(Bootstrap.class).warn("Thread got interrupted while waiting for the node to shutdown."); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also here I suggest to revert the order. |
||
Thread.currentThread().interrupt(); | ||
} finally { | ||
INSTANCE.keepAliveLatch.countDown(); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,15 +19,12 @@ | |
|
||
package org.elasticsearch.common.component; | ||
|
||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
|
||
import java.io.IOException; | ||
import java.io.UncheckedIOException; | ||
import java.util.List; | ||
import java.util.concurrent.CopyOnWriteArrayList; | ||
|
||
public abstract class AbstractLifecycleComponent implements LifecycleComponent { | ||
private static final Logger logger = LogManager.getLogger(AbstractLifecycleComponent.class); | ||
|
||
protected final Lifecycle lifecycle = new Lifecycle(); | ||
|
||
|
@@ -52,59 +49,64 @@ public void removeLifecycleListener(LifecycleListener listener) { | |
|
||
@Override | ||
public void start() { | ||
if (!lifecycle.canMoveToStarted()) { | ||
return; | ||
} | ||
for (LifecycleListener listener : listeners) { | ||
listener.beforeStart(); | ||
} | ||
doStart(); | ||
lifecycle.moveToStarted(); | ||
for (LifecycleListener listener : listeners) { | ||
listener.afterStart(); | ||
synchronized (lifecycle) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it can become tricky to synchronize on an object that can also be directly accessed by subclasses. The state of |
||
if (!lifecycle.canMoveToStarted()) { | ||
return; | ||
} | ||
for (LifecycleListener listener : listeners) { | ||
listener.beforeStart(); | ||
} | ||
doStart(); | ||
lifecycle.moveToStarted(); | ||
for (LifecycleListener listener : listeners) { | ||
listener.afterStart(); | ||
} | ||
} | ||
} | ||
|
||
protected abstract void doStart(); | ||
|
||
@Override | ||
public void stop() { | ||
if (!lifecycle.canMoveToStopped()) { | ||
return; | ||
} | ||
for (LifecycleListener listener : listeners) { | ||
listener.beforeStop(); | ||
} | ||
lifecycle.moveToStopped(); | ||
doStop(); | ||
for (LifecycleListener listener : listeners) { | ||
listener.afterStop(); | ||
synchronized (lifecycle) { | ||
if (!lifecycle.canMoveToStopped()) { | ||
return; | ||
} | ||
for (LifecycleListener listener : listeners) { | ||
listener.beforeStop(); | ||
} | ||
lifecycle.moveToStopped(); | ||
doStop(); | ||
for (LifecycleListener listener : listeners) { | ||
listener.afterStop(); | ||
} | ||
} | ||
} | ||
|
||
protected abstract void doStop(); | ||
|
||
@Override | ||
public void close() { | ||
if (lifecycle.started()) { | ||
stop(); | ||
} | ||
if (!lifecycle.canMoveToClosed()) { | ||
return; | ||
} | ||
for (LifecycleListener listener : listeners) { | ||
listener.beforeClose(); | ||
} | ||
lifecycle.moveToClosed(); | ||
try { | ||
doClose(); | ||
} catch (IOException e) { | ||
// TODO: we need to separate out closing (ie shutting down) services, vs releasing runtime transient | ||
// structures. Shutting down services should use IOUtils.close | ||
logger.warn("failed to close " + getClass().getName(), e); | ||
} | ||
for (LifecycleListener listener : listeners) { | ||
listener.afterClose(); | ||
synchronized (lifecycle) { | ||
if (lifecycle.started()) { | ||
stop(); | ||
} | ||
if (!lifecycle.canMoveToClosed()) { | ||
return; | ||
} | ||
for (LifecycleListener listener : listeners) { | ||
listener.beforeClose(); | ||
} | ||
lifecycle.moveToClosed(); | ||
try { | ||
doClose(); | ||
} catch (IOException e) { | ||
throw new UncheckedIOException(e); | ||
} finally { | ||
for (LifecycleListener listener : listeners) { | ||
listener.afterClose(); | ||
} | ||
} | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -783,11 +783,13 @@ private Node stop() { | |
// In this case the process will be terminated even if the first call to close() has not finished yet. | ||
@Override | ||
public synchronized void close() throws IOException { | ||
if (lifecycle.started()) { | ||
stop(); | ||
} | ||
if (!lifecycle.moveToClosed()) { | ||
return; | ||
synchronized (lifecycle) { | ||
if (lifecycle.started()) { | ||
stop(); | ||
} | ||
if (!lifecycle.moveToClosed()) { | ||
return; | ||
} | ||
} | ||
|
||
logger.info("closing ..."); | ||
|
@@ -835,21 +837,12 @@ public synchronized void close() throws IOException { | |
toClose.add(injector.getInstance(ScriptService.class)); | ||
|
||
toClose.add(() -> stopWatch.stop().start("thread_pool")); | ||
// TODO this should really use ThreadPool.terminate() | ||
toClose.add(() -> injector.getInstance(ThreadPool.class).shutdown()); | ||
toClose.add(() -> { | ||
try { | ||
injector.getInstance(ThreadPool.class).awaitTermination(10, TimeUnit.SECONDS); | ||
} catch (InterruptedException e) { | ||
// ignore | ||
} | ||
}); | ||
|
||
toClose.add(() -> stopWatch.stop().start("thread_pool_force_shutdown")); | ||
toClose.add(() -> injector.getInstance(ThreadPool.class).shutdownNow()); | ||
// Don't call shutdownNow here, it might break ongoing operations on Lucene indices. | ||
// See https://issues.apache.org/jira/browse/LUCENE-7248. We call shutdownNow in | ||
// awaitClose if the node doesn't finish closing within the specified time. | ||
toClose.add(() -> stopWatch.stop()); | ||
|
||
|
||
toClose.add(injector.getInstance(NodeEnvironment.class)); | ||
toClose.add(injector.getInstance(PageCacheRecycler.class)); | ||
|
||
|
@@ -860,6 +853,30 @@ public synchronized void close() throws IOException { | |
logger.info("closed"); | ||
} | ||
|
||
/** | ||
* Wait for this node to be effectively closed. | ||
*/ | ||
// synchronized to prevent running concurrently with close() | ||
public synchronized boolean awaitClose(long timeout, TimeUnit timeUnit) throws InterruptedException { | ||
if (lifecycle.closed() == false) { | ||
// We don't want to shutdown the threadpool or interrupt threads on a node that is not | ||
// closed yet. | ||
throw new IllegalStateException("Call close() first"); | ||
} | ||
|
||
|
||
ThreadPool threadPool = injector.getInstance(ThreadPool.class); | ||
final boolean terminated = ThreadPool.terminate(threadPool, timeout, timeUnit); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find it a bit odd to call a mutating method in an |
||
if (terminated) { | ||
// All threads terminated successfully. Because search, recovery and all other operations | ||
// that run on shards run in the threadpool, indices should be effectively closed by now. | ||
if (nodeService.awaitClose(0, TimeUnit.MILLISECONDS) == false) { | ||
throw new IllegalStateException("Some shards are still open after the threadpool terminated. " + | ||
"Something is leaking index readers or store references."); | ||
} | ||
} | ||
return terminated; | ||
} | ||
|
||
/** | ||
* Returns {@code true} if the node is closed. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about we revert the order here (i.e. first log the message, then restore interrupt status)?