-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Shutdown ClusterTopologyRefreshTask properly #2985
base: main
Are you sure you want to change the base?
Changes from 7 commits
34d1ae6
d8507a5
5952b0b
ed56fa4
83a470e
2f38c3c
370a172
be55f44
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,8 @@ | |
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.atomic.AtomicBoolean; | ||
import java.util.concurrent.atomic.AtomicReference; | ||
import java.util.concurrent.locks.Condition; | ||
import java.util.concurrent.locks.ReentrantLock; | ||
import java.util.function.Supplier; | ||
|
||
import io.lettuce.core.ClientOptions; | ||
|
@@ -64,6 +66,10 @@ class ClusterTopologyRefreshScheduler implements Runnable, ClusterEventListener | |
|
||
private final EventExecutorGroup genericWorkerPool; | ||
|
||
private static final ReentrantLock refreshLock = new ReentrantLock(); | ||
|
||
private static final Condition refreshComplete = refreshLock.newCondition(); | ||
|
||
ClusterTopologyRefreshScheduler(Supplier<ClusterClientOptions> clientOptions, Supplier<Partitions> partitions, | ||
Supplier<CompletionStage<?>> refreshTopology, ClientResources clientResources) { | ||
|
||
|
@@ -112,6 +118,14 @@ public boolean isTopologyRefreshInProgress() { | |
return clusterTopologyRefreshTask.get(); | ||
} | ||
|
||
public ReentrantLock getRefreshLock() { | ||
return refreshLock; | ||
} | ||
|
||
public Condition getRefreshComplete() { | ||
return refreshComplete; | ||
} | ||
|
||
@Override | ||
public void run() { | ||
|
||
|
@@ -323,13 +337,18 @@ private static class ClusterTopologyRefreshTask extends AtomicBoolean implements | |
|
||
public void run() { | ||
|
||
if (compareAndSet(false, true)) { | ||
doRun(); | ||
return; | ||
} | ||
|
||
if (logger.isDebugEnabled()) { | ||
logger.debug("ClusterTopologyRefreshTask already in progress"); | ||
refreshLock.lock(); | ||
try { | ||
if (compareAndSet(false, true)) { | ||
doRun(); | ||
return; | ||
} | ||
|
||
if (logger.isDebugEnabled()) { | ||
logger.debug("ClusterTopologyRefreshTask already in progress"); | ||
} | ||
} finally { | ||
refreshLock.unlock(); | ||
} | ||
} | ||
|
||
|
@@ -345,7 +364,13 @@ void doRun() { | |
logger.warn("Cannot refresh Redis Cluster topology", throwable); | ||
} | ||
|
||
set(false); | ||
refreshLock.lock(); | ||
try { | ||
set(false); | ||
refreshComplete.signalAll(); | ||
} finally { | ||
refreshLock.unlock(); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to lock before initiating the topology refresh with |
||
}); | ||
} catch (Exception e) { | ||
logger.warn("Cannot refresh Redis Cluster topology", e); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,8 @@ | |
import java.util.concurrent.CompletionStage; | ||
import java.util.concurrent.ExecutionException; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.locks.Condition; | ||
import java.util.concurrent.locks.ReentrantLock; | ||
import java.util.function.Consumer; | ||
import java.util.function.Function; | ||
import java.util.function.Predicate; | ||
|
@@ -1152,6 +1154,21 @@ public void setPartitions(Partitions partitions) { | |
public CompletableFuture<Void> shutdownAsync(long quietPeriod, long timeout, TimeUnit timeUnit) { | ||
|
||
suspendTopologyRefresh(); | ||
ReentrantLock refreshLock = topologyRefreshScheduler.getRefreshLock(); | ||
Condition refreshComplete = topologyRefreshScheduler.getRefreshComplete(); | ||
|
||
refreshLock.lock(); | ||
try { | ||
while (topologyRefreshScheduler.isTopologyRefreshInProgress()) { | ||
try { | ||
refreshComplete.await(); | ||
} catch (InterruptedException e) { | ||
Thread.currentThread().interrupt(); | ||
} | ||
} | ||
} finally { | ||
refreshLock.unlock(); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we know there is only one thread that started a refresh we do not need to wait for more than one lock to be released. We need one lock to acquire (I'd not use a spinlock here, because if a refresh is in progress it might take some time) and then we call the super method, e.g. refreshLock.lock();
try {
return super.shutdownAsync(quietPeriod, timeout, timeUnit);
} finally {
refreshLock.unlock();
} |
||
|
||
return super.shutdownAsync(quietPeriod, timeout, timeUnit); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we do not need to make any changes here:
comapreAndSet
makes sure there is only one thread that starts a refreshsuspendTopologyRefresh
should stop any new topology refresh attempts