Only connect to new nodes on new cluster state

Today, when applying new cluster state we attempt to connect to all of its nodes as a blocking part of the application process. This is the right thing to do with new nodes, and is a no-op on any already-connected nodes, but is questionable on known nodes from which we are currently disconnected: there is a risk that we are partitioned from these nodes so that any attempt to connect to them will hang until it times out. This can dramatically slow down the application of new cluster states which hinders the recovery of the cluster during certain kinds of partition. If nodes are disconnected from the master then it is likely that they are to be removed as part of a subsequent cluster state update, so there's no need to try and reconnect to them like this. Moreover there is no need to attempt to reconnect to disconnected nodes as part of the cluster state application process, because we periodically try and reconnect to any disconnected nodes, and handle their disconnectedness reasonably gracefully in the meantime. This commit alters this behaviour to avoid reconnecting to known nodes during cluster state application. Resolves elastic#29025. Supersedes elastic#31547.
DaveCTurner · Mar 4, 2019 · 712cb3c · 712cb3c
1 parent 9f130a5
commit 712cb3c
Show file tree

Hide file tree

Showing 8 changed files with 625 additions and 248 deletions.
diff --git a/server/src/main/java/org/elasticsearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/elasticsearch/cluster/NodeConnectionsService.java
diff --git a/server/src/main/java/org/elasticsearch/cluster/service/ClusterApplierService.java b/server/src/main/java/org/elasticsearch/cluster/service/ClusterApplierService.java
@@ -55,6 +55,7 @@
 import java.util.Queue;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Consumer;
@@ -446,7 +447,7 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl
         }
 
         logger.trace("connecting to nodes of cluster state with version {}", newClusterState.version());
-        nodeConnectionsService.connectToNodes(newClusterState.nodes());
+        connectToNodesAndWait(newClusterState);
 
         // nothing to do until we actually recover from the gateway or any other block indicates we need to disable persistency
         if (clusterChangedEvent.state().blocks().disableStatePersistence() == false && clusterChangedEvent.metaDataChanged()) {
@@ -466,6 +467,18 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl
         callClusterStateListeners(clusterChangedEvent);
     }
 
+    protected void connectToNodesAndWait(ClusterState newClusterState) {
+        // can't wait for an ActionFuture on the cluster applier thread, but we do want to block the thread here, so use a CountDownLatch.
+        final CountDownLatch countDownLatch = new CountDownLatch(1);
+        nodeConnectionsService.connectToNodes(newClusterState.nodes(), countDownLatch::countDown);
+        try {
+            countDownLatch.await();
+        } catch (InterruptedException e) {
+            logger.debug("interrupted while connecting to nodes, continuing", e);
+            Thread.currentThread().interrupt();
+        }
+    }
+
     private void callClusterStateAppliers(ClusterChangedEvent clusterChangedEvent) {
         clusterStateAppliers.forEach(applier -> {
             logger.trace("calling [{}] with change to version [{}]", applier, clusterChangedEvent.state().version());

diff --git a/server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java
diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/CoordinatorTests.java
@@ -44,7 +44,6 @@
 import org.elasticsearch.cluster.metadata.MetaData;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNode.Role;
-import org.elasticsearch.cluster.node.DiscoveryNodes;
 import org.elasticsearch.cluster.service.ClusterApplierService;
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.Nullable;
@@ -1712,12 +1711,7 @@ protected Optional<DisruptableMockTransport> getDisruptableMockTransport(Transpo
                 clusterService = new ClusterService(settings, clusterSettings, masterService, clusterApplierService);
                 clusterService.setNodeConnectionsService(
                     new NodeConnectionsService(clusterService.getSettings(), deterministicTaskQueue.getThreadPool(this::onNode),
-                        transportService) {
-                        @Override
-                        public void connectToNodes(DiscoveryNodes discoveryNodes) {
-                            // override this method as it does blocking calls
-                        }
-                    });
+                        transportService));
                 final Collection<BiConsumer<DiscoveryNode, ClusterState>> onJoinValidators =
                     Collections.singletonList((dn, cs) -> extraJoinValidators.forEach(validator -> validator.accept(dn, cs)));
                 coordinator = new Coordinator("test_node", settings, clusterSettings, transportService, writableRegistry(),
@@ -2106,6 +2100,10 @@ public void onNewClusterState(String source, Supplier<ClusterState> clusterState
             }
         }
 
+        @Override
+        protected void connectToNodesAndWait(ClusterState newClusterState) {
+            // don't do anything, and don't block
+        }
     }
 
     private static DiscoveryNode createDiscoveryNode(int nodeIndex, boolean masterEligible) {

diff --git a/server/src/test/java/org/elasticsearch/cluster/service/ClusterApplierServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/service/ClusterApplierServiceTests.java
@@ -19,14 +19,13 @@
 package org.elasticsearch.cluster.service;
 
 import org.apache.logging.log4j.Level;
-import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.elasticsearch.Version;
 import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.ClusterStateObserver;
 import org.elasticsearch.cluster.LocalNodeMasterListener;
-import org.elasticsearch.cluster.NodeConnectionsService;
 import org.elasticsearch.cluster.block.ClusterBlocks;
 import org.elasticsearch.cluster.coordination.NoMasterBlockService;
 import org.elasticsearch.cluster.metadata.MetaData;
@@ -54,6 +53,7 @@
 
 import static java.util.Collections.emptyMap;
 import static java.util.Collections.emptySet;
+import static org.elasticsearch.test.ClusterServiceUtils.createNoOpNodeConnectionsService;
 import static org.elasticsearch.test.ClusterServiceUtils.setState;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.is;
@@ -88,23 +88,13 @@ public void tearDown() throws Exception {
         super.tearDown();
     }
 
-    TimedClusterApplierService createTimedClusterService(boolean makeMaster) throws InterruptedException {
+    private TimedClusterApplierService createTimedClusterService(boolean makeMaster) {
         DiscoveryNode localNode = new DiscoveryNode("node1", buildNewFakeTransportAddress(), emptyMap(),
             emptySet(), Version.CURRENT);
         TimedClusterApplierService timedClusterApplierService = new TimedClusterApplierService(Settings.builder().put("cluster.name",
             "ClusterApplierServiceTests").build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS),
             threadPool);
-        timedClusterApplierService.setNodeConnectionsService(new NodeConnectionsService(Settings.EMPTY, null, null) {
-            @Override
-            public void connectToNodes(DiscoveryNodes discoveryNodes) {
-                // skip
-            }
-
-            @Override
-            public void disconnectFromNodesExcept(DiscoveryNodes nodesToKeep) {
-                // skip
-            }
-        });
+        timedClusterApplierService.setNodeConnectionsService(createNoOpNodeConnectionsService());
         timedClusterApplierService.setInitialState(ClusterState.builder(new ClusterName("ClusterApplierServiceTests"))
             .nodes(DiscoveryNodes.builder()
                 .add(localNode)

diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java
@@ -748,6 +748,11 @@ private final class TestClusterNode {
                     protected PrioritizedEsThreadPoolExecutor createThreadPoolExecutor() {
                         return new MockSinglePrioritizingExecutor(node.getName(), deterministicTaskQueue);
                     }
+
+                    @Override
+                    protected void connectToNodesAndWait(ClusterState newClusterState) {
+                        // don't do anything, and don't block
+                    }
                 });
             mockTransport = new DisruptableMockTransport(node, logger) {
                 @Override
@@ -992,23 +997,7 @@ public void start(ClusterState initialState) {
             coordinator.start();
             masterService.start();
             clusterService.getClusterApplierService().setNodeConnectionsService(
-                new NodeConnectionsService(clusterService.getSettings(), threadPool, transportService) {
-                    @Override
-                    public void connectToNodes(DiscoveryNodes discoveryNodes) {
-                        // override this method as it does blocking calls
-                        boolean callSuper = true;
-                        for (final DiscoveryNode node : discoveryNodes) {
-                            try {
-                                transportService.connectToNode(node);
-                            } catch (Exception e) {
-                                callSuper = false;
-                            }
-                        }
-                        if (callSuper) {
-                            super.connectToNodes(discoveryNodes);
-                        }
-                    }
-                });
+                new NodeConnectionsService(clusterService.getSettings(), threadPool, transportService));
             clusterService.getClusterApplierService().start();
             indicesService.start();
             indicesClusterStateService.start();

diff --git a/test/framework/src/main/java/org/elasticsearch/test/ClusterServiceUtils.java b/test/framework/src/main/java/org/elasticsearch/test/ClusterServiceUtils.java
@@ -137,17 +137,7 @@ public static ClusterService createClusterService(ThreadPool threadPool, Discove
                 .put("cluster.name", "ClusterServiceTests")
                 .build();
         ClusterService clusterService = new ClusterService(settings, clusterSettings, threadPool);
-        clusterService.setNodeConnectionsService(new NodeConnectionsService(Settings.EMPTY, null, null) {
-            @Override
-            public void connectToNodes(DiscoveryNodes discoveryNodes) {
-                // skip
-            }
-
-            @Override
-            public void disconnectFromNodesExcept(DiscoveryNodes nodesToKeep) {
-                // skip
-            }
-        });
+        clusterService.setNodeConnectionsService(createNoOpNodeConnectionsService());
         ClusterState initialClusterState = ClusterState.builder(new ClusterName(ClusterServiceUtils.class.getSimpleName()))
             .nodes(DiscoveryNodes.builder()
                 .add(localNode)
@@ -162,6 +152,21 @@ public void disconnectFromNodesExcept(DiscoveryNodes nodesToKeep) {
         return clusterService;
     }
 
+    public static NodeConnectionsService createNoOpNodeConnectionsService() {
+        return new NodeConnectionsService(Settings.EMPTY, null, null) {
+            @Override
+            public void connectToNodes(DiscoveryNodes discoveryNodes, Runnable onCompletion) {
+                // don't do anything
+                onCompletion.run();
+            }
+
+            @Override
+            public void disconnectFromNodesExcept(DiscoveryNodes nodesToKeep) {
+                // don't do anything
+            }
+        };
+    }
+
     public static ClusterStatePublisher createClusterStatePublisher(ClusterApplier clusterApplier) {
         return (event, publishListener, ackListener) ->
             clusterApplier.onNewClusterState("mock_publish_to_self[" + event.source() + "]", () -> event.state(),

diff --git a/test/framework/src/main/java/org/elasticsearch/test/disruption/NetworkDisruption.java b/test/framework/src/main/java/org/elasticsearch/test/disruption/NetworkDisruption.java
@@ -38,6 +38,7 @@
 import java.util.HashSet;
 import java.util.Random;
 import java.util.Set;
+import java.util.concurrent.CountDownLatch;
 import java.util.function.BiConsumer;
 
 import static org.junit.Assert.assertFalse;
@@ -49,7 +50,7 @@
  */
 public class NetworkDisruption implements ServiceDisruptionScheme {
 
-    private final Logger logger = LogManager.getLogger(NetworkDisruption.class);
+    private static final Logger logger = LogManager.getLogger(NetworkDisruption.class);
 
     private final DisruptedLinks disruptedLinks;
     private final NetworkLinkDisruptionType networkLinkDisruptionType;
@@ -103,9 +104,17 @@ public void ensureHealthy(InternalTestCluster cluster) {
      * handy to be able to ensure this happens faster
      */
     public static void ensureFullyConnectedCluster(InternalTestCluster cluster) {
-        for (String node: cluster.getNodeNames()) {
+        final String[] nodeNames = cluster.getNodeNames();
+        final CountDownLatch countDownLatch = new CountDownLatch(nodeNames.length);
+        for (String node : nodeNames) {
             ClusterState stateOnNode = cluster.getInstance(ClusterService.class, node).state();
-            cluster.getInstance(NodeConnectionsService.class, node).connectToNodes(stateOnNode.nodes());
+            cluster.getInstance(NodeConnectionsService.class, node).reconnectToNodes(stateOnNode.nodes(), countDownLatch::countDown);
+        }
+
+        try {
+            countDownLatch.await();
+        } catch (InterruptedException e) {
+            throw new AssertionError(e);
         }
     }