-
Notifications
You must be signed in to change notification settings - Fork 1.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add changes for graceful node decommission #4586
Changes from all commits
bdb6c6f
afec689
3314ec3
bf622c4
d933377
bf4c217
c57530a
7c43ad1
ba8f500
8920480
eeed711
ed1ccbc
7922291
bed942a
67622a2
b75b108
f79b51d
46a1b12
afda24c
509015e
3f832f9
b70370c
5724ba8
6d1ba1a
e401908
289077f
0890c39
ec0f0e1
2f10664
daa0065
e9e25f1
4590da3
abbec12
bd6f485
9c1b2aa
9aa4f37
fd9fabe
79f06fc
0faa9b9
adbc120
81d320b
de54abd
d2b0444
4ddaae4
e5984f7
c600c6d
b97c83b
926c969
abb9825
26ba03e
8564208
3e1c3b1
01b208c
d8f0166
f367f4f
e5ae1f3
d9d4904
f9ced18
9881ef0
1ed1b57
dde8f48
8453ca2
3c417f5
602ced2
0574db5
3115772
95fc853
165ce7d
8e0f681
460e96c
f3a7240
c147d15
13c0c61
56c48eb
ce3cc6f
09aec9f
252a976
60d4e87
abdda72
1b8234c
a4a2ff6
e410e45
7d2f2df
e25e411
09b7cdc
878dad4
9d28440
d2303c5
b78b452
d1bb936
a8a6032
b724464
10a6e5f
d2d171d
68e4e94
d6fe3c4
ca0418f
04e4676
a6fbdb2
811fe8e
97c93c1
122b16c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,10 @@ | |
import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsAction; | ||
import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsRequest; | ||
import org.opensearch.action.admin.cluster.configuration.ClearVotingConfigExclusionsResponse; | ||
import org.opensearch.action.admin.cluster.node.stats.NodeStats; | ||
import org.opensearch.action.admin.cluster.node.stats.NodesStatsAction; | ||
import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; | ||
import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; | ||
import org.opensearch.cluster.ClusterState; | ||
import org.opensearch.cluster.ClusterStateObserver; | ||
import org.opensearch.cluster.ClusterStateTaskConfig; | ||
|
@@ -32,14 +36,17 @@ | |
import org.opensearch.common.Strings; | ||
import org.opensearch.common.io.stream.StreamInput; | ||
import org.opensearch.common.unit.TimeValue; | ||
import org.opensearch.http.HttpStats; | ||
import org.opensearch.threadpool.ThreadPool; | ||
import org.opensearch.transport.TransportException; | ||
import org.opensearch.transport.TransportResponseHandler; | ||
import org.opensearch.transport.TransportService; | ||
|
||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.function.Predicate; | ||
|
@@ -271,4 +278,61 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS | |
} | ||
}); | ||
} | ||
|
||
private void logActiveConnections(NodesStatsResponse nodesStatsResponse) { | ||
if (nodesStatsResponse == null || nodesStatsResponse.getNodes() == null) { | ||
logger.info("Node stats response received is null/empty."); | ||
return; | ||
} | ||
|
||
Map<String, Long> nodeActiveConnectionMap = new HashMap<>(); | ||
List<NodeStats> responseNodes = nodesStatsResponse.getNodes(); | ||
for (int i = 0; i < responseNodes.size(); i++) { | ||
HttpStats httpStats = responseNodes.get(i).getHttp(); | ||
DiscoveryNode node = responseNodes.get(i).getNode(); | ||
nodeActiveConnectionMap.put(node.getId(), httpStats.getServerOpen()); | ||
} | ||
logger.info("Decommissioning node with connections : [{}]", nodeActiveConnectionMap); | ||
Comment on lines
+288
to
+295
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's avoid a map just for logging in a single line. I would suggest introducing a settings that could control minimum acceptable connections during draining There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Bukhtawar .. We are just logging the active connections for now. I was thinking once we have datapoint related to active connections count for the nodes we can set the minimum connection setting. I will open a task to track that change. Let me know your thoughts. Created an Issue to track the same: #4804 |
||
} | ||
|
||
void getActiveRequestCountOnDecommissionedNodes(Set<DiscoveryNode> decommissionedNodes) { | ||
if (decommissionedNodes == null || decommissionedNodes.isEmpty()) { | ||
return; | ||
} | ||
String[] nodes = decommissionedNodes.stream().map(DiscoveryNode::getId).toArray(String[]::new); | ||
if (nodes.length == 0) { | ||
return; | ||
} | ||
|
||
final NodesStatsRequest nodesStatsRequest = new NodesStatsRequest(nodes); | ||
nodesStatsRequest.clear(); | ||
nodesStatsRequest.addMetric(NodesStatsRequest.Metric.HTTP.metricName()); | ||
|
||
transportService.sendRequest( | ||
transportService.getLocalNode(), | ||
NodesStatsAction.NAME, | ||
nodesStatsRequest, | ||
new TransportResponseHandler<NodesStatsResponse>() { | ||
@Override | ||
public void handleResponse(NodesStatsResponse response) { | ||
logActiveConnections(response); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can response be null? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handled for null response. Added log for null response. |
||
} | ||
|
||
@Override | ||
public void handleException(TransportException exp) { | ||
logger.error("Failure occurred while dumping connection for decommission nodes - ", exp.unwrapCause()); | ||
} | ||
|
||
@Override | ||
public String executor() { | ||
return ThreadPool.Names.SAME; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generic thread? @Bukhtawar what do you think? |
||
} | ||
|
||
@Override | ||
public NodesStatsResponse read(StreamInput in) throws IOException { | ||
return new NodesStatsResponse(in); | ||
} | ||
} | ||
); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We missed adding
noDelay
here. With this no delay would never be read from the request