diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 8c4f4d59ea1fc..af83f8b2edac2 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -722,6 +722,12 @@ updates: - directory: /plugins/ open-pull-requests-limit: 1 package-ecosystem: gradle + ignore: + # For all packages, ignore all major versions to minimize breaking issues + - dependency-name: "com.google.cloud:google-cloud-storage" + update-types: [ "version-update:semver-major" ] + - dependency-name: "com.google.api-client:google-api-client" + update-types: [ "version-update:semver-major" ] schedule: interval: weekly labels: diff --git a/CHANGELOG.md b/CHANGELOG.md index 935901d410178..07637eaae3306 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,8 +23,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `com.microsoft.azure:msal4j` from 1.17.0 to 1.17.1 ([#15945](https://github.com/opensearch-project/OpenSearch/pull/15945)) - Bump `ch.qos.logback:logback-core` from 1.5.6 to 1.5.8 ([#15946](https://github.com/opensearch-project/OpenSearch/pull/15946)) - Update protobuf from 3.25.4 to 3.25.5 ([#16011](https://github.com/opensearch-project/OpenSearch/pull/16011)) +- Bump `org.roaringbitmap:RoaringBitmap` from 1.2.1 to 1.3.0 ([#16040](https://github.com/opensearch-project/OpenSearch/pull/16040)) +- Bump `com.nimbusds:nimbus-jose-jwt` from 9.40 to 9.41.1 ([#16038](https://github.com/opensearch-project/OpenSearch/pull/16038)) - Bump `actions/github-script` from 5 to 7 ([#16039](https://github.com/opensearch-project/OpenSearch/pull/16039)) - Bump `dnsjava:dnsjava` from 3.6.1 to 3.6.2 ([#16041](https://github.com/opensearch-project/OpenSearch/pull/16041)) +- Bump `com.maxmind.geoip2:geoip2` from 4.2.0 to 4.2.1 ([#16042](https://github.com/opensearch-project/OpenSearch/pull/16042)) ### Changed - Add support for docker compose v2 in TestFixturesPlugin ([#16049](https://github.com/opensearch-project/OpenSearch/pull/16049)) @@ -41,6 +44,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix search_as_you_type not supporting multi-fields ([#15988](https://github.com/opensearch-project/OpenSearch/pull/15988)) - Avoid infinite loop when `flat_object` field contains invalid token ([#15985](https://github.com/opensearch-project/OpenSearch/pull/15985)) - Fix infinite loop in nested agg ([#15931](https://github.com/opensearch-project/OpenSearch/pull/15931)) +- Fix race condition in node-join and node-left ([#15521](https://github.com/opensearch-project/OpenSearch/pull/15521)) ### Security diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 5c6205ebf24d4..e312a2da77d94 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -11,7 +11,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.1-all.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists -distributionSha256Sum=fdfca5dbc2834f0ece5020465737538e5ba679deeff5ab6c09621d67f8bb1a15 +distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6 diff --git a/modules/ingest-geoip/build.gradle b/modules/ingest-geoip/build.gradle index c0ff155ce1038..162ac6f9ad620 100644 --- a/modules/ingest-geoip/build.gradle +++ b/modules/ingest-geoip/build.gradle @@ -39,7 +39,7 @@ opensearchplugin { } dependencies { - api('com.maxmind.geoip2:geoip2:4.2.0') + api('com.maxmind.geoip2:geoip2:4.2.1') // geoip2 dependencies: api('com.maxmind.db:maxmind-db:3.1.0') api("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}") diff --git a/modules/ingest-geoip/licenses/geoip2-4.2.0.jar.sha1 b/modules/ingest-geoip/licenses/geoip2-4.2.0.jar.sha1 deleted file mode 100644 index b6bfeeb9da60b..0000000000000 --- a/modules/ingest-geoip/licenses/geoip2-4.2.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -78ff932dc13ac41dd1f0fd9e7405a7f4ad815ce0 \ No newline at end of file diff --git a/modules/ingest-geoip/licenses/geoip2-4.2.1.jar.sha1 b/modules/ingest-geoip/licenses/geoip2-4.2.1.jar.sha1 new file mode 100644 index 0000000000000..a9dc5483ac727 --- /dev/null +++ b/modules/ingest-geoip/licenses/geoip2-4.2.1.jar.sha1 @@ -0,0 +1 @@ +9dbf8a8bea88a33e88c46eb3f503721b4bd08b90 \ No newline at end of file diff --git a/modules/ingest-geoip/src/test/java/org/opensearch/ingest/geoip/IngestGeoIpModulePluginTests.java b/modules/ingest-geoip/src/test/java/org/opensearch/ingest/geoip/IngestGeoIpModulePluginTests.java index 9446ec1228532..ac6b87aefb3ec 100644 --- a/modules/ingest-geoip/src/test/java/org/opensearch/ingest/geoip/IngestGeoIpModulePluginTests.java +++ b/modules/ingest-geoip/src/test/java/org/opensearch/ingest/geoip/IngestGeoIpModulePluginTests.java @@ -35,6 +35,7 @@ import com.maxmind.geoip2.model.AbstractResponse; import org.opensearch.common.network.InetAddresses; +import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.env.TestEnvironment; import org.opensearch.ingest.Processor; @@ -126,6 +127,12 @@ public void testAllowListNotSpecified() throws IOException { } } + public void testSettingsRegistration() { + final IngestGeoIpModulePlugin plugin = new IngestGeoIpModulePlugin(); + final List> settings = plugin.getSettings(); + assertTrue(settings.contains(IngestGeoIpModulePlugin.PROCESSORS_ALLOWLIST_SETTING)); + } + private void runAllowListTest(List allowList) throws IOException { Settings.Builder settingsBuilder = Settings.builder(); createDb(settingsBuilder); diff --git a/modules/ingest-user-agent/src/main/java/org/opensearch/ingest/useragent/IngestUserAgentModulePlugin.java b/modules/ingest-user-agent/src/main/java/org/opensearch/ingest/useragent/IngestUserAgentModulePlugin.java index bac90d20b44e1..f5e2e34278880 100644 --- a/modules/ingest-user-agent/src/main/java/org/opensearch/ingest/useragent/IngestUserAgentModulePlugin.java +++ b/modules/ingest-user-agent/src/main/java/org/opensearch/ingest/useragent/IngestUserAgentModulePlugin.java @@ -44,6 +44,7 @@ import java.nio.file.Path; import java.nio.file.PathMatcher; import java.nio.file.StandardOpenOption; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -152,6 +153,6 @@ static Map createUserAgentParsers(Path userAgentConfigD @Override public List> getSettings() { - return Collections.singletonList(CACHE_SIZE_SETTING); + return Arrays.asList(CACHE_SIZE_SETTING, PROCESSORS_ALLOWLIST_SETTING); } } diff --git a/modules/ingest-user-agent/src/test/java/org/opensearch/ingest/useragent/IngestUserAgentModulePluginTests.java b/modules/ingest-user-agent/src/test/java/org/opensearch/ingest/useragent/IngestUserAgentModulePluginTests.java index 31fdafff1188a..563158026c37f 100644 --- a/modules/ingest-user-agent/src/test/java/org/opensearch/ingest/useragent/IngestUserAgentModulePluginTests.java +++ b/modules/ingest-user-agent/src/test/java/org/opensearch/ingest/useragent/IngestUserAgentModulePluginTests.java @@ -8,6 +8,7 @@ package org.opensearch.ingest.useragent; +import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.env.TestEnvironment; import org.opensearch.ingest.Processor; @@ -89,6 +90,12 @@ public void testAllowListNotSpecified() throws IOException { } } + public void testSettingsRegistration() { + final IngestUserAgentModulePlugin plugin = new IngestUserAgentModulePlugin(); + final List> settings = plugin.getSettings(); + assertTrue(settings.contains(IngestUserAgentModulePlugin.PROCESSORS_ALLOWLIST_SETTING)); + } + private void runAllowListTest(List allowList) throws IOException { final Settings settings = settingsBuilder.putList(IngestUserAgentModulePlugin.PROCESSORS_ALLOWLIST_SETTING.getKey(), allowList) .build(); diff --git a/plugins/repository-azure/build.gradle b/plugins/repository-azure/build.gradle index 616da0be62c30..2892bdba51ba6 100644 --- a/plugins/repository-azure/build.gradle +++ b/plugins/repository-azure/build.gradle @@ -63,7 +63,7 @@ dependencies { api "net.java.dev.jna:jna-platform:${versions.jna}" api 'com.microsoft.azure:msal4j:1.17.1' api 'com.nimbusds:oauth2-oidc-sdk:11.19.1' - api 'com.nimbusds:nimbus-jose-jwt:9.40' + api 'com.nimbusds:nimbus-jose-jwt:9.41.1' api 'com.nimbusds:content-type:2.3' api 'com.nimbusds:lang-tag:1.7' // Both msal4j:1.14.3 and oauth2-oidc-sdk:11.9.1 has compile dependency on different versions of json-smart, diff --git a/plugins/repository-azure/licenses/nimbus-jose-jwt-9.40.jar.sha1 b/plugins/repository-azure/licenses/nimbus-jose-jwt-9.40.jar.sha1 deleted file mode 100644 index 83228caf233cc..0000000000000 --- a/plugins/repository-azure/licenses/nimbus-jose-jwt-9.40.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -42b1dfa0360e4062951b070bac52dd8d96fd7b38 \ No newline at end of file diff --git a/plugins/repository-azure/licenses/nimbus-jose-jwt-9.41.1.jar.sha1 b/plugins/repository-azure/licenses/nimbus-jose-jwt-9.41.1.jar.sha1 new file mode 100644 index 0000000000000..71fa950cb9530 --- /dev/null +++ b/plugins/repository-azure/licenses/nimbus-jose-jwt-9.41.1.jar.sha1 @@ -0,0 +1 @@ +35532a88e1b49a623ec97fd276cc208ea525b6bc \ No newline at end of file diff --git a/server/build.gradle b/server/build.gradle index 0cc42ad690eab..83a04ef12d13b 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -127,7 +127,7 @@ dependencies { api "jakarta.annotation:jakarta.annotation-api:${versions.jakarta_annotation}" // https://mvnrepository.com/artifact/org.roaringbitmap/RoaringBitmap - implementation 'org.roaringbitmap:RoaringBitmap:1.2.1' + implementation 'org.roaringbitmap:RoaringBitmap:1.3.0' testImplementation(project(":test:framework")) { // tests use the locally compiled version of server diff --git a/server/licenses/RoaringBitmap-1.2.1.jar.sha1 b/server/licenses/RoaringBitmap-1.2.1.jar.sha1 deleted file mode 100644 index ef8cd48c7a388..0000000000000 --- a/server/licenses/RoaringBitmap-1.2.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -828eb489b5e8c8762f2471010e9c7f20c7de596d \ No newline at end of file diff --git a/server/licenses/RoaringBitmap-1.3.0.jar.sha1 b/server/licenses/RoaringBitmap-1.3.0.jar.sha1 new file mode 100644 index 0000000000000..2e48289c4f7af --- /dev/null +++ b/server/licenses/RoaringBitmap-1.3.0.jar.sha1 @@ -0,0 +1 @@ +a46ce7a2dc494da69700ab421f081b1583857f6d \ No newline at end of file diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java new file mode 100644 index 0000000000000..014e2bf642a4d --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/coordination/NodeJoinLeftIT.java @@ -0,0 +1,355 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.cluster.coordination; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.LoggerConfig; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; +import org.opensearch.cluster.NodeConnectionsService; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.MockEngineFactoryPlugin; +import org.opensearch.indices.recovery.RecoverySettings; +import org.opensearch.plugins.Plugin; +import org.opensearch.tasks.Task; +import org.opensearch.test.InternalSettingsPlugin; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.OpenSearchIntegTestCase.ClusterScope; +import org.opensearch.test.OpenSearchIntegTestCase.Scope; +import org.opensearch.test.TestLogsAppender; +import org.opensearch.test.store.MockFSIndexStore; +import org.opensearch.test.transport.MockTransportService; +import org.opensearch.test.transport.StubbableTransport; +import org.opensearch.transport.ClusterConnectionManager; +import org.opensearch.transport.TransportChannel; +import org.opensearch.transport.TransportRequest; +import org.opensearch.transport.TransportRequestHandler; +import org.opensearch.transport.TransportService; +import org.junit.After; +import org.junit.Before; + +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.opensearch.cluster.coordination.FollowersChecker.FOLLOWER_CHECK_ACTION_NAME; +import static org.hamcrest.Matchers.is; + +/** + Check https://github.com/opensearch-project/OpenSearch/issues/4874 and + https://github.com/opensearch-project/OpenSearch/pull/15521 for context + */ +@ClusterScope(scope = Scope.TEST, numDataNodes = 0) +public class NodeJoinLeftIT extends OpenSearchIntegTestCase { + + private TestLogsAppender testLogsAppender; + private String clusterManager; + private String redNodeName; + private LoggerContext loggerContext; + + @Override + protected Collection> nodePlugins() { + return Arrays.asList( + MockTransportService.TestPlugin.class, + MockFSIndexStore.TestPlugin.class, + InternalSettingsPlugin.class, + MockEngineFactoryPlugin.class + ); + } + + @Override + protected void beforeIndexDeletion() throws Exception { + super.beforeIndexDeletion(); + internalCluster().assertConsistentHistoryBetweenTranslogAndLuceneIndex(); + internalCluster().assertSeqNos(); + internalCluster().assertSameDocIdsOnShards(); + } + + @Override + @Before + public void setUp() throws Exception { + super.setUp(); + // Add any other specific messages you want to capture + List messagesToCapture = Arrays.asList("failed to join", "IllegalStateException"); + testLogsAppender = new TestLogsAppender(messagesToCapture); + loggerContext = (LoggerContext) LogManager.getContext(false); + Configuration config = loggerContext.getConfiguration(); + LoggerConfig loggerConfig = config.getLoggerConfig(ClusterConnectionManager.class.getName()); + loggerConfig.addAppender(testLogsAppender, null, null); + loggerContext.updateLoggers(); + + String indexName = "test"; + final Settings nodeSettings = Settings.builder() + .put(RecoverySettings.INDICES_RECOVERY_RETRY_DELAY_NETWORK_SETTING.getKey(), "100ms") + .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "10s") + .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "200ms") + .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "100ms") + .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) + .put(NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), "100ms") + .build(); + // start a 3 node cluster with 1 cluster-manager + this.clusterManager = internalCluster().startNode(nodeSettings); + internalCluster().startNode(Settings.builder().put("node.attr.color", "blue").put(nodeSettings).build()); + this.redNodeName = internalCluster().startNode(Settings.builder().put("node.attr.color", "red").put(nodeSettings).build()); + + // validate the 3 node cluster is up + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes(">=3").get(); + assertThat(response.isTimedOut(), is(false)); + + // create an index + client().admin() + .indices() + .prepareCreate(indexName) + .setSettings( + Settings.builder() + .put(IndexMetadata.INDEX_ROUTING_INCLUDE_GROUP_SETTING.getKey() + "color", "blue") + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + ) + .get(); + } + + @After + public void tearDown() throws Exception { + testLogsAppender.clearCapturedLogs(); + loggerContext = (LoggerContext) LogManager.getContext(false); + Configuration config = loggerContext.getConfiguration(); + LoggerConfig loggerConfig = config.getLoggerConfig(ClusterConnectionManager.class.getName()); + loggerConfig.removeAppender(testLogsAppender.getName()); + loggerContext.updateLoggers(); + super.tearDown(); + } + + public void testClusterStabilityWhenJoinRequestHappensDuringNodeLeftTask() throws Exception { + + ClusterService clusterManagerClsService = internalCluster().getInstance(ClusterService.class, clusterManager); + // Simulate a slow applier on the cm to delay node-left state application + clusterManagerClsService.addStateApplier(event -> { + if (event.nodesRemoved()) { + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }); + // Toggle to succeed/fail the followerchecker to simulate the initial node leaving. + AtomicBoolean succeedFollowerChecker = new AtomicBoolean(); + + // Simulate followerchecker failure on 1 node when succeedFollowerChecker is false + FollowerCheckerBehaviour simulatedFailureBehaviour = new FollowerCheckerBehaviour(() -> { + if (succeedFollowerChecker.get()) { + return; + } + try { + Thread.sleep(10); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + throw new NodeHealthCheckFailureException("fake followerchecker failure simulated by test to repro race condition"); + }); + MockTransportService redTransportService = (MockTransportService) internalCluster().getInstance( + TransportService.class, + redNodeName + ); + redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, simulatedFailureBehaviour); + + // Loop runs 5 times to ensure race condition gets reproduced + testLogsAppender.clearCapturedLogs(); + for (int i = 0; i < 5; i++) { + logger.info("--> simulating followerchecker failure to trigger node-left"); + succeedFollowerChecker.set(false); + ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + assertThat(response1.isTimedOut(), is(false)); + + // once we know a node has left, we can re-enable followerchecker to work normally and validate node rejoins + logger.info("--> re-enabling normal followerchecker and validating cluster is stable"); + succeedFollowerChecker.set(true); + ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response2.isTimedOut(), is(false)); + + Thread.sleep(1000); + // checking again to validate stability and ensure node did not leave + ClusterHealthResponse response3 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response3.isTimedOut(), is(false)); + } + + succeedFollowerChecker.set(true); + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response.isTimedOut(), is(false)); + + // assert that join requests fail with the right exception + boolean logFound = testLogsAppender.waitForLog("failed to join", 30, TimeUnit.SECONDS) + && testLogsAppender.waitForLog( + "IllegalStateException[cannot make a new connection as disconnect to node", + 30, + TimeUnit.SECONDS + ); + assertTrue("Expected log was not found within the timeout period", logFound); + } + + public void testClusterStabilityWhenDisconnectDuringSlowNodeLeftTask() throws Exception { + ClusterService clusterManagerClsService = internalCluster().getInstance(ClusterService.class, clusterManager); + // Simulate a slow applier on the cm to delay node-left state application + clusterManagerClsService.addStateApplier(event -> { + if (event.nodesRemoved()) { + try { + Thread.sleep(3000); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }); + // Toggle to succeed/fail the followerchecker to simulate the initial node leaving. + AtomicBoolean succeedFollowerChecker = new AtomicBoolean(); + + // Simulate followerchecker failure on 1 node when succeedFollowerChecker is false + FollowerCheckerBehaviour simulatedFailureBehaviour = new FollowerCheckerBehaviour(() -> { + if (succeedFollowerChecker.get()) { + return; + } + try { + Thread.sleep(10); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + throw new NodeHealthCheckFailureException("fake followerchecker failure simulated by test to repro race condition"); + }); + MockTransportService cmTransportService = (MockTransportService) internalCluster().getInstance( + TransportService.class, + clusterManager + ); + MockTransportService redTransportService = (MockTransportService) internalCluster().getInstance( + TransportService.class, + redNodeName + ); + redTransportService.addRequestHandlingBehavior(FOLLOWER_CHECK_ACTION_NAME, simulatedFailureBehaviour); + + // Loop runs 5 times to ensure race condition gets reproduced + testLogsAppender.clearCapturedLogs(); + for (int i = 0; i < 5; i++) { + // Fail followerchecker by force to trigger node disconnect and node left + logger.info("--> simulating followerchecker failure to trigger node-left"); + succeedFollowerChecker.set(false); + Thread.sleep(1000); + + // Trigger a node disconnect while node-left task is still processing + logger.info( + "--> triggering a simulated disconnect on red node, after the follower checker failed to see how node-left task deals with this" + ); + cmTransportService.disconnectFromNode(redTransportService.getLocalDiscoNode()); + + ClusterHealthResponse response1 = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + assertThat(response1.isTimedOut(), is(false)); + + // once we know a node has left, we can re-enable followerchecker to work normally and validate node rejoins + logger.info("--> re-enabling normal followerchecker and validating cluster is stable"); + succeedFollowerChecker.set(true); + ClusterHealthResponse response2 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response2.isTimedOut(), is(false)); + + Thread.sleep(1000); + // checking again to validate stability and ensure node did not leave + ClusterHealthResponse response3 = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response3.isTimedOut(), is(false)); + } + + succeedFollowerChecker.set(true); + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response.isTimedOut(), is(false)); + + // assert that join requests fail with the right exception + boolean logFound = testLogsAppender.waitForLog("failed to join", 30, TimeUnit.SECONDS); + assertTrue("Expected log was not found within the timeout period", logFound); + logFound = testLogsAppender.waitForLog( + "IllegalStateException[cannot make a new connection as disconnect to node", + 30, + TimeUnit.SECONDS + ); + assertTrue("Expected log was not found within the timeout period", logFound); + } + + public void testRestartDataNode() throws Exception { + + Settings redNodeDataPathSettings = internalCluster().dataPathSettings(redNodeName); + logger.info("-> stopping data node"); + internalCluster().stopRandomNode(settings -> settings.get("node.name").equals(redNodeName)); + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + assertThat(response.isTimedOut(), is(false)); + + logger.info("-> restarting stopped node"); + internalCluster().startNode(Settings.builder().put("node.name", redNodeName).put(redNodeDataPathSettings).build()); + response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response.isTimedOut(), is(false)); + } + + public void testRestartCmNode() throws Exception { + + Settings cmNodeSettings = internalCluster().dataPathSettings(clusterManager); + + logger.info("-> stopping cluster-manager node"); + internalCluster().stopRandomNode(settings -> settings.get("node.name").equals(clusterManager)); + ClusterHealthResponse response = client().admin().cluster().prepareHealth().setWaitForNodes("2").get(); + assertThat(response.isTimedOut(), is(false)); + + logger.info("-> restarting stopped node"); + internalCluster().startNode(Settings.builder().put("node.name", clusterManager).put(cmNodeSettings).build()); + response = client().admin().cluster().prepareHealth().setWaitForNodes("3").get(); + assertThat(response.isTimedOut(), is(false)); + } + + private class FollowerCheckerBehaviour implements StubbableTransport.RequestHandlingBehavior { + private final Runnable connectionBreaker; + + private FollowerCheckerBehaviour(Runnable connectionBreaker) { + this.connectionBreaker = connectionBreaker; + } + + @Override + public void messageReceived( + TransportRequestHandler handler, + TransportRequest request, + TransportChannel channel, + Task task + ) throws Exception { + + connectionBreaker.run(); + handler.messageReceived(request, channel, task); + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemotePublicationConfigurationIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemotePublicationConfigurationIT.java new file mode 100644 index 0000000000000..1d19a4bfd1af5 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemotePublicationConfigurationIT.java @@ -0,0 +1,273 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; +import org.opensearch.common.settings.Settings; +import org.opensearch.plugins.Plugin; +import org.opensearch.remotemigration.MigrationBaseTestCase; +import org.opensearch.remotestore.multipart.mocks.MockFsRepositoryPlugin; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.fs.ReloadableFsRepository; +import org.opensearch.test.InternalSettingsPlugin; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.transport.MockTransportService; +import org.junit.Before; + +import java.util.Collection; +import java.util.Locale; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +/** + * Tests the compatibility between types of nodes based on the configured repositories + * Non Remote node [No Repositories configured] + * Remote Publish Configured Node [Cluster State + Routing Table] + * Remote Node [Cluster State + Segment + Translog] + * Remote Node With Routing Table [Cluster State + Segment + Translog + Routing Table] + */ +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemotePublicationConfigurationIT extends MigrationBaseTestCase { + private final String REMOTE_PRI_DOCREP_REP = "remote-primary-docrep-replica"; + + @Override + protected Collection> nodePlugins() { + /* Adding the following mock plugins: + - InternalSettingsPlugin : To override default intervals of retention lease and global ckp sync + - MockFsRepositoryPlugin and MockTransportService.TestPlugin: To ensure remote interactions are not no-op and retention leases are properly propagated + */ + return Stream.concat( + super.nodePlugins().stream(), + Stream.of(InternalSettingsPlugin.class, MockFsRepositoryPlugin.class, MockTransportService.TestPlugin.class) + ).collect(Collectors.toList()); + } + + @Before + public void setUp() throws Exception { + if (segmentRepoPath == null || translogRepoPath == null) { + segmentRepoPath = randomRepoPath().toAbsolutePath(); + translogRepoPath = randomRepoPath().toAbsolutePath(); + } + super.setUp(); + } + + public Settings.Builder remotePublishConfiguredNodeSetting() { + String stateRepoSettingsAttributeKeyPrefix = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, + REPOSITORY_NAME + ); + String prefixModeVerificationSuffix = BlobStoreRepository.PREFIX_MODE_VERIFICATION_SETTING.getKey(); + String stateRepoTypeAttributeKey = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, + REPOSITORY_NAME + ); + String routingTableRepoTypeAttributeKey = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT, + ROUTING_TABLE_REPO_NAME + ); + String routingTableRepoSettingsAttributeKeyPrefix = String.format( + Locale.getDefault(), + "node.attr." + REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX, + ROUTING_TABLE_REPO_NAME + ); + + Settings.Builder builder = Settings.builder() + .put("node.attr." + REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, REPOSITORY_NAME) + .put(stateRepoTypeAttributeKey, ReloadableFsRepository.TYPE) + .put(stateRepoSettingsAttributeKeyPrefix + "location", segmentRepoPath) + .put(stateRepoSettingsAttributeKeyPrefix + prefixModeVerificationSuffix, true) + .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .put("node.attr." + REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY, ROUTING_TABLE_REPO_NAME) + .put(routingTableRepoTypeAttributeKey, ReloadableFsRepository.TYPE) + .put(routingTableRepoSettingsAttributeKeyPrefix + "location", segmentRepoPath); + return builder; + } + + public Settings.Builder remoteWithRoutingTableNodeSetting() { + // Remote Cluster with Routing table + return Settings.builder() + .put( + buildRemoteStoreNodeAttributes( + REPOSITORY_NAME, + segmentRepoPath, + REPOSITORY_2_NAME, + translogRepoPath, + REPOSITORY_NAME, + segmentRepoPath, + false + ) + ) + .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true); + } + + public void testRemotePublishConfigNodeJoinNonRemoteCluster() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNodes(2); + + Settings.Builder build = remotePublishConfiguredNodeSetting(); + internalCluster().startClusterManagerOnlyNode(build.build()); + internalCluster().startDataOnlyNodes(2, build.build()); + + ensureStableCluster(6); + ensureGreen(); + } + + public void testRemotePublishConfigNodeJoinRemoteCluster() throws Exception { + // Remote Cluster without Routing table + setAddRemote(true); + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNodes(2); + setAddRemote(false); + + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings( + Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store") + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + ); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + Settings.Builder build = remotePublishConfiguredNodeSetting(); + internalCluster().startClusterManagerOnlyNode(build.build()); + ensureStableCluster(4); + ensureGreen(); + } + + public void testRemoteNodeWithRoutingTableJoinRemoteCluster() throws Exception { + setAddRemote(true); + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNodes(2); + setAddRemote(false); + + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings( + Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store") + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + ); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + // Remote Repo with Routing table + Settings settings = remoteWithRoutingTableNodeSetting().build(); + internalCluster().startClusterManagerOnlyNode(settings); + ensureStableCluster(4); + ensureGreen(); + } + + public void testNonRemoteNodeJoinRemoteWithRoutingCluster() throws Exception { + Settings settings = remoteWithRoutingTableNodeSetting().build(); + internalCluster().startClusterManagerOnlyNode(settings); + internalCluster().startDataOnlyNodes(2, settings); + + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings( + Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store") + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + ); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + internalCluster().startClusterManagerOnlyNode(); + ensureStableCluster(4); + ensureGreen(); + } + + public void testRemotePublishConfigNodeJoinRemoteWithRoutingCluster() throws Exception { + Settings settings = remoteWithRoutingTableNodeSetting().build(); + internalCluster().startClusterManagerOnlyNode(settings); + internalCluster().startDataOnlyNodes(2, settings); + + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings( + Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store") + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + ); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + internalCluster().startClusterManagerOnlyNode(remotePublishConfiguredNodeSetting().build()); + + ensureStableCluster(4); + ensureGreen(); + } + + public void testNonRemoteNodeJoiningPublishConfigCluster() throws Exception { + Settings.Builder build = remotePublishConfiguredNodeSetting(); + internalCluster().startClusterManagerOnlyNode(build.build()); + internalCluster().startDataOnlyNodes(2, build.build()); + + internalCluster().startClusterManagerOnlyNode(); + + ensureStableCluster(4); + ensureGreen(); + } + + public void testRemoteNodeJoiningPublishConfigCluster() throws Exception { + Settings.Builder build = remotePublishConfiguredNodeSetting(); + internalCluster().startClusterManagerOnlyNode(build.build()); + internalCluster().startDataOnlyNodes(2, build.build()); + + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings( + Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store") + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + ); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + + setAddRemote(true); + internalCluster().startClusterManagerOnlyNode(); + ensureStableCluster(4); + ensureGreen(); + } + + public void testRemoteNodeWithRoutingTableJoiningPublishConfigCluster() throws Exception { + Settings.Builder build = remotePublishConfiguredNodeSetting(); + internalCluster().startClusterManagerOnlyNode(build.build()); + internalCluster().startDataOnlyNodes(2, build.build()); + + ClusterUpdateSettingsRequest updateSettingsRequest = new ClusterUpdateSettingsRequest(); + updateSettingsRequest.persistentSettings( + Settings.builder() + .put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store") + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), "mixed") + ); + + Settings settings = Settings.builder() + .put( + buildRemoteStoreNodeAttributes( + REPOSITORY_NAME, + segmentRepoPath, + REPOSITORY_2_NAME, + translogRepoPath, + ROUTING_TABLE_REPO_NAME, + segmentRepoPath, + false + ) + ) + .put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) + .build(); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + internalCluster().startClusterManagerOnlyNode(settings); + + ensureStableCluster(4); + ensureGreen(); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java index 17a9c3ddbe317..a82e6d45ce0f6 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java @@ -57,6 +57,8 @@ public class MigrationBaseTestCase extends OpenSearchIntegTestCase { protected static final String REPOSITORY_NAME = "test-remote-store-repo"; + protected static final String ROUTING_TABLE_REPO_NAME = "remote-routing-repo"; + protected static final String REPOSITORY_2_NAME = "test-remote-store-repo-2"; protected Path segmentRepoPath; @@ -72,7 +74,7 @@ public class MigrationBaseTestCase extends OpenSearchIntegTestCase { randomAlphaOfLength(5) ); - void setAddRemote(boolean addRemote) { + public void setAddRemote(boolean addRemote) { this.addRemote = addRemote; } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java index 0acb578e2e7bf..a0183e89bfce2 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java @@ -11,6 +11,8 @@ import org.opensearch.action.DocWriteResponse; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; +import org.opensearch.action.admin.cluster.snapshots.get.GetSnapshotsRequest; +import org.opensearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse; import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; import org.opensearch.action.admin.indices.recovery.RecoveryResponse; @@ -25,6 +27,7 @@ import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.io.PathUtils; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.index.Index; @@ -43,14 +46,11 @@ import org.opensearch.repositories.RepositoryData; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.repositories.fs.FsRepository; -import org.opensearch.snapshots.AbstractSnapshotIntegTestCase; import org.opensearch.snapshots.SnapshotInfo; import org.opensearch.snapshots.SnapshotRestoreException; import org.opensearch.snapshots.SnapshotState; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; -import org.junit.After; -import org.junit.Before; import java.io.IOException; import java.nio.file.Files; @@ -63,6 +63,7 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -79,48 +80,7 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) -public class RemoteRestoreSnapshotIT extends AbstractSnapshotIntegTestCase { - private static final String BASE_REMOTE_REPO = "test-rs-repo" + TEST_REMOTE_STORE_REPO_SUFFIX; - private Path remoteRepoPath; - - @Before - public void setup() { - remoteRepoPath = randomRepoPath().toAbsolutePath(); - } - - @After - public void teardown() { - clusterAdmin().prepareCleanupRepository(BASE_REMOTE_REPO).get(); - } - - @Override - protected Settings nodeSettings(int nodeOrdinal) { - return Settings.builder() - .put(super.nodeSettings(nodeOrdinal)) - .put(remoteStoreClusterSettings(BASE_REMOTE_REPO, remoteRepoPath)) - .build(); - } - - private Settings.Builder getIndexSettings(int numOfShards, int numOfReplicas) { - Settings.Builder settingsBuilder = Settings.builder() - .put(super.indexSettings()) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numOfReplicas) - .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "300s"); - return settingsBuilder; - } - - private void indexDocuments(Client client, String indexName, int numOfDocs) { - indexDocuments(client, indexName, 0, numOfDocs); - } - - private void indexDocuments(Client client, String indexName, int fromId, int toId) { - for (int i = fromId; i < toId; i++) { - String id = Integer.toString(i); - client.prepareIndex(indexName).setId(id).setSource("text", "sometext").get(); - } - client.admin().indices().prepareFlush(indexName).get(); - } +public class RemoteRestoreSnapshotIT extends RemoteSnapshotIT { private void assertDocsPresentInIndex(Client client, String indexName, int numOfDocs) { for (int i = 0; i < numOfDocs; i++) { @@ -997,6 +957,75 @@ public void testConcurrentSnapshotV2CreateOperation() throws InterruptedExceptio assertThat(repositoryData.getSnapshotIds().size(), greaterThanOrEqualTo(1)); } + public void testConcurrentSnapshotV2CreateOperation_MasterChange() throws Exception { + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String snapshotRepoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + Thread thread = new Thread(() -> { + try { + String snapshotName = "snapshot-earlier-master"; + internalCluster().nonClusterManagerClient() + .admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName) + .setWaitForCompletion(true) + .setMasterNodeTimeout(TimeValue.timeValueSeconds(60)) + .get(); + + } catch (Exception ignored) {} + }); + thread.start(); + + // stop existing master + final String clusterManagerNode = internalCluster().getClusterManagerName(); + stopNode(clusterManagerNode); + + // Validate that we have greater one snapshot has been created + String snapshotName = "new-snapshot"; + try { + client().admin().cluster().prepareCreateSnapshot(snapshotRepoName, snapshotName).setWaitForCompletion(true).get(); + } catch (Exception e) { + logger.info("Exception while creating new-snapshot", e); + } + + // Validate that snapshot is present in repository data + assertBusy(() -> { + GetSnapshotsRequest request = new GetSnapshotsRequest(snapshotRepoName); + GetSnapshotsResponse response2 = client().admin().cluster().getSnapshots(request).actionGet(); + assertThat(response2.getSnapshots().size(), greaterThanOrEqualTo(1)); + }, 30, TimeUnit.SECONDS); + thread.join(); + } + public void testCreateSnapshotV2WithRedIndex() throws Exception { internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); internalCluster().startDataOnlyNode(pinnedTimestampSettings()); @@ -1315,11 +1344,4 @@ public void testConcurrentV1SnapshotAndV2RepoSettingUpdate() throws Exception { createV1SnapshotThread.join(); } - private Settings pinnedTimestampSettings() { - Settings settings = Settings.builder() - .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) - .build(); - return settings; - } - } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteSnapshotIT.java new file mode 100644 index 0000000000000..836871b8251d1 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteSnapshotIT.java @@ -0,0 +1,89 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesRequest; +import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.RepositoryMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.IndexSettings; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.repositories.fs.ReloadableFsRepository; +import org.opensearch.snapshots.AbstractSnapshotIntegTestCase; +import org.junit.After; +import org.junit.Before; + +import java.nio.file.Path; +import java.util.concurrent.ExecutionException; + +import static org.opensearch.repositories.fs.ReloadableFsRepository.REPOSITORIES_FAILRATE_SETTING; + +public abstract class RemoteSnapshotIT extends AbstractSnapshotIntegTestCase { + protected static final String BASE_REMOTE_REPO = "test-rs-repo" + TEST_REMOTE_STORE_REPO_SUFFIX; + protected Path remoteRepoPath; + + @Before + public void setup() { + remoteRepoPath = randomRepoPath().toAbsolutePath(); + } + + @After + public void teardown() { + clusterAdmin().prepareCleanupRepository(BASE_REMOTE_REPO).get(); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(remoteStoreClusterSettings(BASE_REMOTE_REPO, remoteRepoPath)) + .build(); + } + + protected Settings pinnedTimestampSettings() { + Settings settings = Settings.builder() + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .build(); + return settings; + } + + protected Settings.Builder getIndexSettings(int numOfShards, int numOfReplicas) { + Settings.Builder settingsBuilder = Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numOfReplicas) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "300s"); + return settingsBuilder; + } + + protected void indexDocuments(Client client, String indexName, int numOfDocs) { + indexDocuments(client, indexName, 0, numOfDocs); + } + + void indexDocuments(Client client, String indexName, int fromId, int toId) { + for (int i = fromId; i < toId; i++) { + String id = Integer.toString(i); + client.prepareIndex(indexName).setId(id).setSource("text", "sometext").get(); + } + client.admin().indices().prepareFlush(indexName).get(); + } + + protected void setFailRate(String repoName, int value) throws ExecutionException, InterruptedException { + GetRepositoriesRequest gr = new GetRepositoriesRequest(new String[] { repoName }); + GetRepositoriesResponse res = client().admin().cluster().getRepositories(gr).get(); + RepositoryMetadata rmd = res.repositories().get(0); + Settings.Builder settings = Settings.builder() + .put("location", rmd.settings().get("location")) + .put(REPOSITORIES_FAILRATE_SETTING.getKey(), value); + createRepository(repoName, ReloadableFsRepository.TYPE, settings); + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotV2IT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotV2IT.java index c6744ae62db60..69e85b13548e0 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotV2IT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/CloneSnapshotV2IT.java @@ -34,11 +34,15 @@ import org.opensearch.action.ActionRunnable; import org.opensearch.action.DocWriteResponse; import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; +import org.opensearch.action.admin.cluster.snapshots.get.GetSnapshotsRequest; +import org.opensearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse; import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; import org.opensearch.action.delete.DeleteResponse; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.action.support.master.AcknowledgedResponse; import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.rest.RestStatus; @@ -50,8 +54,10 @@ import org.opensearch.test.OpenSearchIntegTestCase; import java.nio.file.Path; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; -import static org.opensearch.remotestore.RemoteStoreBaseIntegTestCase.remoteStoreClusterSettings; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -134,27 +140,32 @@ public void testCloneShallowCopyV2() throws Exception { assertTrue(response.isAcknowledged()); awaitClusterManagerFinishRepoOperations(); + AtomicReference cloneSnapshotId = new AtomicReference<>(); // Validate that snapshot is present in repository data - PlainActionFuture repositoryDataPlainActionFutureClone = new PlainActionFuture<>(); - repository.getRepositoryData(repositoryDataPlainActionFutureClone); - - repositoryData = repositoryDataPlainActionFutureClone.get(); - assertEquals(repositoryData.getSnapshotIds().size(), 2); - boolean foundCloneInRepoData = false; - SnapshotId cloneSnapshotId = null; - for (SnapshotId snapshotId : repositoryData.getSnapshotIds()) { - if (snapshotId.getName().equals("test_clone_snapshot1")) { - foundCloneInRepoData = true; - cloneSnapshotId = snapshotId; + waitUntil(() -> { + PlainActionFuture repositoryDataPlainActionFutureClone = new PlainActionFuture<>(); + repository.getRepositoryData(repositoryDataPlainActionFutureClone); + + RepositoryData repositoryData1; + try { + repositoryData1 = repositoryDataPlainActionFutureClone.get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); } - } - final SnapshotId cloneSnapshotIdFinal = cloneSnapshotId; + for (SnapshotId snapshotId : repositoryData1.getSnapshotIds()) { + if (snapshotId.getName().equals("test_clone_snapshot1")) { + cloneSnapshotId.set(snapshotId); + return true; + } + } + return false; + }, 90, TimeUnit.SECONDS); + + final SnapshotId cloneSnapshotIdFinal = cloneSnapshotId.get(); SnapshotInfo cloneSnapshotInfo = PlainActionFuture.get( f -> repository.threadPool().generic().execute(ActionRunnable.supply(f, () -> repository.getSnapshotInfo(cloneSnapshotIdFinal))) ); - assertTrue(foundCloneInRepoData); - assertThat(cloneSnapshotInfo.getPinnedTimestamp(), equalTo(sourceSnapshotInfo.getPinnedTimestamp())); for (String index : sourceSnapshotInfo.indices()) { assertTrue(cloneSnapshotInfo.indices().contains(index)); @@ -259,19 +270,21 @@ public void testCloneShallowCopyAfterDisablingV2() throws Exception { assertThat(sourceSnapshotInfoV1.state(), equalTo(SnapshotState.SUCCESS)); assertThat(sourceSnapshotInfoV1.successfulShards(), greaterThan(0)); assertThat(sourceSnapshotInfoV1.successfulShards(), equalTo(sourceSnapshotInfoV1.totalShards())); - assertThat(sourceSnapshotInfoV1.getPinnedTimestamp(), equalTo(0L)); + // assertThat(sourceSnapshotInfoV1.getPinnedTimestamp(), equalTo(0L)); + AtomicReference repositoryDataAtomicReference = new AtomicReference<>(); + awaitClusterManagerFinishRepoOperations(); // Validate that snapshot is present in repository data - PlainActionFuture repositoryDataV1PlainActionFuture = new PlainActionFuture<>(); - BlobStoreRepository repositoryV1 = (BlobStoreRepository) internalCluster().getCurrentClusterManagerNodeInstance( - RepositoriesService.class - ).repository(snapshotRepoName); - repositoryV1.getRepositoryData(repositoryDataV1PlainActionFuture); - - repositoryData = repositoryDataV1PlainActionFuture.get(); + assertBusy(() -> { + Metadata metadata = clusterAdmin().prepareState().get().getState().metadata(); + RepositoriesMetadata repositoriesMetadata = metadata.custom(RepositoriesMetadata.TYPE); + assertEquals(1, repositoriesMetadata.repository(snapshotRepoName).generation()); + assertEquals(1, repositoriesMetadata.repository(snapshotRepoName).pendingGeneration()); - assertTrue(repositoryData.getSnapshotIds().contains(sourceSnapshotInfoV1.snapshotId())); - assertEquals(repositoryData.getSnapshotIds().size(), 2); + GetSnapshotsRequest request = new GetSnapshotsRequest(snapshotRepoName); + GetSnapshotsResponse response = client().admin().cluster().getSnapshots(request).actionGet(); + assertEquals(2, response.getSnapshots().size()); + }, 30, TimeUnit.SECONDS); // clone should get created for v2 snapshot AcknowledgedResponse response = client().admin() @@ -289,31 +302,28 @@ public void testCloneShallowCopyAfterDisablingV2() throws Exception { ).repository(snapshotRepoName); repositoryCloneV2.getRepositoryData(repositoryDataCloneV2PlainActionFuture); - repositoryData = repositoryDataCloneV2PlainActionFuture.get(); - - assertEquals(repositoryData.getSnapshotIds().size(), 3); - boolean foundCloneInRepoData = false; - SnapshotId cloneSnapshotId = null; - for (SnapshotId snapshotId : repositoryData.getSnapshotIds()) { - if (snapshotId.getName().equals(cloneSnapshotV2)) { - foundCloneInRepoData = true; - cloneSnapshotId = snapshotId; - } - } - final SnapshotId cloneSnapshotIdFinal = cloneSnapshotId; - SnapshotInfo cloneSnapshotInfo = PlainActionFuture.get( - f -> repository.threadPool().generic().execute(ActionRunnable.supply(f, () -> repository.getSnapshotInfo(cloneSnapshotIdFinal))) - ); + // Validate that snapshot is present in repository data + assertBusy(() -> { + Metadata metadata = clusterAdmin().prepareState().get().getState().metadata(); + RepositoriesMetadata repositoriesMetadata = metadata.custom(RepositoriesMetadata.TYPE); + assertEquals(2, repositoriesMetadata.repository(snapshotRepoName).generation()); + assertEquals(2, repositoriesMetadata.repository(snapshotRepoName).pendingGeneration()); + GetSnapshotsRequest request = new GetSnapshotsRequest(snapshotRepoName); + GetSnapshotsResponse response2 = client().admin().cluster().getSnapshots(request).actionGet(); + assertEquals(3, response2.getSnapshots().size()); + }, 30, TimeUnit.SECONDS); - assertTrue(foundCloneInRepoData); // pinned timestamp value in clone snapshot v2 matches source snapshot v2 - assertThat(cloneSnapshotInfo.getPinnedTimestamp(), equalTo(sourceSnapshotInfo.getPinnedTimestamp())); - for (String index : sourceSnapshotInfo.indices()) { - assertTrue(cloneSnapshotInfo.indices().contains(index)); - + GetSnapshotsRequest request = new GetSnapshotsRequest(snapshotRepoName, new String[] { sourceSnapshotV2, cloneSnapshotV2 }); + GetSnapshotsResponse response2 = client().admin().cluster().getSnapshots(request).actionGet(); + + SnapshotInfo sourceInfo = response2.getSnapshots().get(0); + SnapshotInfo cloneInfo = response2.getSnapshots().get(1); + assertEquals(sourceInfo.getPinnedTimestamp(), cloneInfo.getPinnedTimestamp()); + assertEquals(sourceInfo.totalShards(), cloneInfo.totalShards()); + for (String index : sourceInfo.indices()) { + assertTrue(cloneInfo.indices().contains(index)); } - assertThat(cloneSnapshotInfo.totalShards(), equalTo(sourceSnapshotInfo.totalShards())); - } public void testRestoreFromClone() throws Exception { diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsV2IT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsV2IT.java new file mode 100644 index 0000000000000..f20fddb6af26c --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsV2IT.java @@ -0,0 +1,486 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.snapshots; + +import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; +import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.client.Client; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.remotestore.RemoteSnapshotIT; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.RepositoryData; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.fs.FsRepository; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class ConcurrentSnapshotsV2IT extends RemoteSnapshotIT { + + public void testLongRunningSnapshotDontAllowConcurrentSnapshot() throws Exception { + final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + blockClusterManagerOnWriteIndexFile(repoName); + + final ActionFuture snapshotFuture = startFullSnapshot(repoName, "snapshot-queued"); + awaitNumberOfSnapshotsInProgress(1); + + try { + String snapshotName = "snapshot-concurrent"; + client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(true).get(); + fail(); + } catch (Exception e) {} + + unblockNode(repoName, clusterManagerName); + CreateSnapshotResponse csr = snapshotFuture.actionGet(); + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(1, snapInfo.size()); + assertThat(snapInfo, contains(csr.getSnapshotInfo())); + } + + public void testCreateSnapshotFailInFinalize() throws Exception { + final String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + blockClusterManagerFromFinalizingSnapshotOnIndexFile(repoName); + final ActionFuture snapshotFuture = startFullSnapshot(repoName, "snapshot-queued"); + awaitNumberOfSnapshotsInProgress(1); + waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L)); + unblockNode(repoName, clusterManagerNode); + expectThrows(SnapshotException.class, snapshotFuture::actionGet); + + final ActionFuture snapshotFuture2 = startFullSnapshot(repoName, "snapshot-success"); + // Second create works out cleanly since the repo + CreateSnapshotResponse csr = snapshotFuture2.actionGet(); + + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(1, snapInfo.size()); + assertThat(snapInfo, contains(csr.getSnapshotInfo())); + } + + public void testCreateSnapshotV2MasterSwitch() throws Exception { + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + String clusterManagerNode = internalCluster().getClusterManagerName(); + + blockClusterManagerFromFinalizingSnapshotOnIndexFile(repoName); + final ActionFuture snapshotFuture = startFullSnapshot(repoName, "snapshot-queued"); + awaitNumberOfSnapshotsInProgress(1); + waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L)); + + // Fail the cluster manager + stopNode(clusterManagerNode); + + ensureGreen(); + + final ActionFuture snapshotFuture2 = startFullSnapshot(repoName, "snapshot-success"); + // Second create works out cleanly since the repo + CreateSnapshotResponse csr = snapshotFuture2.actionGet(); + + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(1, snapInfo.size()); + assertThat(snapInfo, contains(csr.getSnapshotInfo())); + + } + + public void testPinnedTimestampFailSnapshot() throws Exception { + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + // fail segment repo - this is to fail the timestamp pinning + setFailRate(BASE_REMOTE_REPO, 100); + + try { + String snapshotName = "snapshot-fail"; + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(repoName, snapshotName) + .setWaitForCompletion(true) + .get(); + fail(); + } catch (Exception e) {} + + setFailRate(BASE_REMOTE_REPO, 0); + String snapshotName = "snapshot-success"; + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(repoName, snapshotName) + .setWaitForCompletion(true) + .get(); + + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(1, snapInfo.size()); + } + + public void testConcurrentSnapshotV2CreateOperation() throws InterruptedException, ExecutionException { + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String snapshotRepoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + int concurrentSnapshots = 5; + + // Prepare threads for concurrent snapshot creation + List threads = new ArrayList<>(); + + for (int i = 0; i < concurrentSnapshots; i++) { + int snapshotIndex = i; + Thread thread = new Thread(() -> { + try { + String snapshotName = "snapshot-concurrent-" + snapshotIndex; + CreateSnapshotResponse createSnapshotResponse2 = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName) + .setWaitForCompletion(true) + .get(); + SnapshotInfo snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName)); + assertThat(snapshotInfo.getPinnedTimestamp(), greaterThan(0L)); + } catch (Exception e) {} + }); + threads.add(thread); + } + // start all threads + for (Thread thread : threads) { + thread.start(); + } + + // Wait for all threads to complete + for (Thread thread : threads) { + thread.join(); + } + + // Validate that only one snapshot has been created + Repository repository = internalCluster().getInstance(RepositoriesService.class).repository(snapshotRepoName); + PlainActionFuture repositoryDataPlainActionFuture = new PlainActionFuture<>(); + repository.getRepositoryData(repositoryDataPlainActionFuture); + + RepositoryData repositoryData = repositoryDataPlainActionFuture.get(); + assertThat(repositoryData.getSnapshotIds().size(), greaterThanOrEqualTo(1)); + } + + public void testLongRunningSnapshotDontAllowConcurrentClone() throws Exception { + final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + String sourceSnap = "snapshot-source"; + + final CreateSnapshotResponse csr = startFullSnapshot(repoName, sourceSnap).actionGet(); + blockClusterManagerOnWriteIndexFile(repoName); + + final ActionFuture snapshotFuture = startCloneSnapshot(repoName, sourceSnap, "snapshot-clone"); + awaitNumberOfSnapshotsInProgress(1); + + final ActionFuture snapshotFuture2 = startCloneSnapshot(repoName, sourceSnap, "snapshot-clone-2"); + assertThrows(ConcurrentSnapshotExecutionException.class, snapshotFuture2::actionGet); + + unblockNode(repoName, clusterManagerName); + assertThrows(SnapshotException.class, snapshotFuture2::actionGet); + + snapshotFuture.get(); + + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(2, snapInfo.size()); + } + + public void testCloneSnapshotFailInFinalize() throws Exception { + final String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + String sourceSnap = "snapshot-source"; + CreateSnapshotResponse sourceResp = startFullSnapshot(repoName, sourceSnap).actionGet(); + + blockClusterManagerFromFinalizingSnapshotOnIndexFile(repoName); + final ActionFuture snapshotFuture = startCloneSnapshot(repoName, sourceSnap, "snapshot-queued"); + awaitNumberOfSnapshotsInProgress(1); + waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L)); + unblockNode(repoName, clusterManagerNode); + assertThrows(SnapshotException.class, snapshotFuture::actionGet); + + final ActionFuture snapshotFuture2 = startFullSnapshot(repoName, "snapshot-success"); + // Second create works out cleanly since the repo is cleaned up + CreateSnapshotResponse csr = snapshotFuture2.actionGet(); + + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(2, snapInfo.size()); + assertThat(snapInfo, containsInAnyOrder(csr.getSnapshotInfo(), sourceResp.getSnapshotInfo())); + } + + public void testCloneSnapshotV2MasterSwitch() throws Exception { + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + String sourceSnap = "snapshot-source"; + CreateSnapshotResponse csr = startFullSnapshot(repoName, sourceSnap).actionGet(); + + String clusterManagerNode = internalCluster().getClusterManagerName(); + + blockClusterManagerFromFinalizingSnapshotOnIndexFile(repoName); + final ActionFuture snapshotFuture = startCloneSnapshot(repoName, sourceSnap, "snapshot-queued"); + awaitNumberOfSnapshotsInProgress(1); + waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L)); + + // Fail the cluster manager + stopNode(clusterManagerNode); + + ensureGreen(); + + final ActionFuture snapshotFuture2 = startFullSnapshot(repoName, "snapshot-success"); + // Second create works out cleanly since the repo + CreateSnapshotResponse csr2 = snapshotFuture2.actionGet(); + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(2, snapInfo.size()); + assertThat(snapInfo, containsInAnyOrder(csr.getSnapshotInfo(), csr2.getSnapshotInfo())); + } + + protected ActionFuture startCloneSnapshot(String repoName, String sourceSnapshotName, String snapshotName) { + logger.info("--> creating full snapshot [{}] to repo [{}]", snapshotName, repoName); + return clusterAdmin().prepareCloneSnapshot(repoName, sourceSnapshotName, snapshotName).setIndices("*").execute(); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java index 1c12c260b3929..8ce11c8183cf6 100644 --- a/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java +++ b/server/src/main/java/org/opensearch/cluster/NodeConnectionsService.java @@ -103,10 +103,10 @@ public class NodeConnectionsService extends AbstractLifecycleComponent { // contains an entry for every node in the latest cluster state, as well as for nodes from which we are in the process of // disconnecting - private final Map targetsByNode = new HashMap<>(); + protected final Map targetsByNode = new HashMap<>(); private final TimeValue reconnectInterval; - private volatile ConnectionChecker connectionChecker; + protected volatile ConnectionChecker connectionChecker; @Inject public NodeConnectionsService(Settings settings, ThreadPool threadPool, TransportService transportService) { @@ -115,6 +115,11 @@ public NodeConnectionsService(Settings settings, ThreadPool threadPool, Transpor this.reconnectInterval = NodeConnectionsService.CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.get(settings); } + // exposed for testing + protected ConnectionTarget createConnectionTarget(DiscoveryNode discoveryNode) { + return new ConnectionTarget(discoveryNode); + } + /** * Connect to all the given nodes, but do not disconnect from any extra nodes. Calls the completion handler on completion of all * connection attempts to _new_ nodes, but not on attempts to re-establish connections to nodes that are already known. @@ -159,6 +164,14 @@ public void connectToNodes(DiscoveryNodes discoveryNodes, Runnable onCompletion) runnables.forEach(Runnable::run); } + public void setPendingDisconnections(Set nodes) { + nodes.forEach(transportService::setPendingDisconnection); + } + + public void clearPendingDisconnections() { + transportService.clearPendingDisconnections(); + } + /** * Disconnect from any nodes to which we are currently connected which do not appear in the given nodes. Does not wait for the * disconnections to complete, because they might have to wait for ongoing connection attempts first. @@ -211,7 +224,7 @@ private void awaitPendingActivity(Runnable onCompletion) { * nodes which are in the process of disconnecting. The onCompletion handler is called after all ongoing connection/disconnection * attempts have completed. */ - private void connectDisconnectedTargets(Runnable onCompletion) { + protected void connectDisconnectedTargets(Runnable onCompletion) { final List runnables = new ArrayList<>(); synchronized (mutex) { final Collection connectionTargets = targetsByNode.values(); @@ -321,7 +334,7 @@ private enum ActivityType { * * @opensearch.internal */ - private class ConnectionTarget { + protected class ConnectionTarget { private final DiscoveryNode discoveryNode; private PlainListenableActionFuture future = PlainListenableActionFuture.newListenableFuture(); diff --git a/server/src/main/java/org/opensearch/cluster/SnapshotsInProgress.java b/server/src/main/java/org/opensearch/cluster/SnapshotsInProgress.java index d658f38430dd9..595f6d54fd9be 100644 --- a/server/src/main/java/org/opensearch/cluster/SnapshotsInProgress.java +++ b/server/src/main/java/org/opensearch/cluster/SnapshotsInProgress.java @@ -134,6 +134,38 @@ public static Entry startedEntry( ); } + public static Entry startedEntry( + Snapshot snapshot, + boolean includeGlobalState, + boolean partial, + List indices, + List dataStreams, + long startTime, + long repositoryStateId, + final Map shards, + Map userMetadata, + Version version, + boolean remoteStoreIndexShallowCopy, + boolean remoteStoreIndexShallowCopyV2 + ) { + return new SnapshotsInProgress.Entry( + snapshot, + includeGlobalState, + partial, + completed(shards.values()) ? State.SUCCESS : State.STARTED, + indices, + dataStreams, + startTime, + repositoryStateId, + shards, + null, + userMetadata, + version, + remoteStoreIndexShallowCopy, + remoteStoreIndexShallowCopyV2 + ); + } + /** * Creates the initial snapshot clone entry * @@ -168,8 +200,39 @@ public static Entry startClone( version, source, Map.of(), - false // initialising to false, will be updated in startCloning method of SnapshotsService while updating entry with - // clone jobs + false, + false// initialising to false, will be updated in startCloning method of SnapshotsService while updating entry with + // clone jobs + ); + } + + public static Entry startClone( + Snapshot snapshot, + SnapshotId source, + List indices, + long startTime, + long repositoryStateId, + Version version, + boolean remoteStoreIndexShallowCopyV2 + ) { + return new SnapshotsInProgress.Entry( + snapshot, + true, + false, + State.STARTED, + indices, + Collections.emptyList(), + startTime, + repositoryStateId, + Map.of(), + null, + Collections.emptyMap(), + version, + source, + Map.of(), + remoteStoreIndexShallowCopyV2, + remoteStoreIndexShallowCopyV2// initialising to false, will be updated in startCloning method of SnapshotsService + // while updating entry with clone jobs ); } @@ -183,6 +246,8 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation private final Snapshot snapshot; private final boolean includeGlobalState; private final boolean remoteStoreIndexShallowCopy; + + private final boolean remoteStoreIndexShallowCopyV2; private final boolean partial; /** * Map of {@link ShardId} to {@link ShardSnapshotStatus} tracking the state of each shard snapshot operation. @@ -212,6 +277,42 @@ public static class Entry implements Writeable, ToXContent, RepositoryOperation @Nullable private final String failure; + public Entry( + Snapshot snapshot, + boolean includeGlobalState, + boolean partial, + State state, + List indices, + List dataStreams, + long startTime, + long repositoryStateId, + final Map shards, + String failure, + Map userMetadata, + Version version, + boolean remoteStoreIndexShallowCopy, + boolean remoteStoreIndexShallowCopyV2 + ) { + this( + snapshot, + includeGlobalState, + partial, + state, + indices, + dataStreams, + startTime, + repositoryStateId, + shards, + failure, + userMetadata, + version, + null, + Map.of(), + remoteStoreIndexShallowCopy, + remoteStoreIndexShallowCopyV2 + ); + } + // visible for testing, use #startedEntry and copy constructors in production code public Entry( Snapshot snapshot, @@ -243,7 +344,8 @@ public Entry( version, null, Map.of(), - remoteStoreIndexShallowCopy + remoteStoreIndexShallowCopy, + false ); } @@ -262,7 +364,8 @@ private Entry( Version version, @Nullable SnapshotId source, @Nullable final Map clones, - boolean remoteStoreIndexShallowCopy + boolean remoteStoreIndexShallowCopy, + boolean remoteStoreIndexShallowCopyV2 ) { this.state = state; this.snapshot = snapshot; @@ -284,7 +387,9 @@ private Entry( this.clones = Collections.unmodifiableMap(clones); } this.remoteStoreIndexShallowCopy = remoteStoreIndexShallowCopy; - assert assertShardsConsistent(this.source, this.state, this.indices, this.shards, this.clones); + this.remoteStoreIndexShallowCopyV2 = remoteStoreIndexShallowCopyV2; + assert this.remoteStoreIndexShallowCopyV2 + || assertShardsConsistent(this.source, this.state, this.indices, this.shards, this.clones); } private Entry(StreamInput in) throws IOException { @@ -307,6 +412,11 @@ private Entry(StreamInput in) throws IOException { } else { remoteStoreIndexShallowCopy = false; } + if (in.getVersion().onOrAfter(Version.CURRENT)) { + remoteStoreIndexShallowCopyV2 = in.readBoolean(); + } else { + remoteStoreIndexShallowCopyV2 = false; + } } private static boolean assertShardsConsistent( @@ -428,7 +538,8 @@ public Entry withRepoGen(long newRepoGen) { version, source, clones, - remoteStoreIndexShallowCopy + remoteStoreIndexShallowCopy, + remoteStoreIndexShallowCopyV2 ); } @@ -451,7 +562,8 @@ public Entry withClones(final Map update version, source, updatedClones, - remoteStoreIndexShallowCopy + remoteStoreIndexShallowCopy, + remoteStoreIndexShallowCopyV2 ); } @@ -471,7 +583,8 @@ public Entry withRemoteStoreIndexShallowCopy(final boolean remoteStoreIndexShall version, source, clones, - remoteStoreIndexShallowCopy + remoteStoreIndexShallowCopy, + remoteStoreIndexShallowCopyV2 ); } @@ -527,7 +640,8 @@ public Entry fail(final Map shards, State state, S version, source, clones, - remoteStoreIndexShallowCopy + remoteStoreIndexShallowCopy, + remoteStoreIndexShallowCopyV2 ); } @@ -614,6 +728,10 @@ public boolean remoteStoreIndexShallowCopy() { return remoteStoreIndexShallowCopy; } + public boolean remoteStoreIndexShallowCopyV2() { + return remoteStoreIndexShallowCopyV2; + } + public Map userMetadata() { return userMetadata; } @@ -678,6 +796,7 @@ public boolean equals(Object o) { if (Objects.equals(source, ((Entry) o).source) == false) return false; if (clones.equals(((Entry) o).clones) == false) return false; if (remoteStoreIndexShallowCopy != entry.remoteStoreIndexShallowCopy) return false; + if (remoteStoreIndexShallowCopyV2 != entry.remoteStoreIndexShallowCopyV2) return false; return true; } @@ -695,6 +814,7 @@ public int hashCode() { result = 31 * result + (source == null ? 0 : source.hashCode()); result = 31 * result + clones.hashCode(); result = 31 * result + (remoteStoreIndexShallowCopy ? 1 : 0); + result = 31 * result + (remoteStoreIndexShallowCopyV2 ? 1 : 0); return result; } @@ -766,6 +886,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_9_0)) { out.writeBoolean(remoteStoreIndexShallowCopy); } + if (out.getVersion().onOrAfter(Version.CURRENT)) { + out.writeBoolean(remoteStoreIndexShallowCopyV2); + } } @Override diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 13a57d93f03f0..9859abe503eaa 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -42,6 +42,7 @@ import org.opensearch.cluster.ClusterStateTaskConfig; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.LocalClusterUpdateTask; +import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.coordination.ClusterFormationFailureHelper.ClusterFormationState; import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfigExclusion; @@ -187,6 +188,7 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery private final NodeHealthService nodeHealthService; private final PersistedStateRegistry persistedStateRegistry; private final RemoteStoreNodeService remoteStoreNodeService; + private NodeConnectionsService nodeConnectionsService; /** * @param nodeName The name of the node, used to name the {@link java.util.concurrent.ExecutorService} of the {@link SeedHostsResolver}. @@ -418,7 +420,11 @@ PublishWithJoinResponse handlePublishRequest(PublishRequest publishRequest) { synchronized (mutex) { final DiscoveryNode sourceNode = publishRequest.getAcceptedState().nodes().getClusterManagerNode(); - logger.trace("handlePublishRequest: handling [{}] from [{}]", publishRequest, sourceNode); + logger.debug( + "handlePublishRequest: handling version [{}] from [{}]", + publishRequest.getAcceptedState().getVersion(), + sourceNode + ); if (sourceNode.equals(getLocalNode()) && mode != Mode.LEADER) { // Rare case in which we stood down as leader between starting this publication and receiving it ourselves. The publication @@ -630,7 +636,6 @@ private void handleJoinRequest(JoinRequest joinRequest, JoinHelper.JoinCallback transportService.connectToNode(joinRequest.getSourceNode(), ActionListener.wrap(ignore -> { final ClusterState stateForJoinValidation = getStateForClusterManagerService(); - if (stateForJoinValidation.nodes().isLocalNodeElectedClusterManager()) { onJoinValidators.forEach(a -> a.accept(joinRequest.getSourceNode(), stateForJoinValidation)); if (stateForJoinValidation.getBlocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK) == false) { @@ -814,6 +819,10 @@ public void onFailure(String source, Exception e) { public ClusterTasksResult execute(ClusterState currentState) { if (currentState.nodes().isLocalNodeElectedClusterManager() == false) { allocationService.cleanCaches(); + // This set only needs to be maintained on active cluster-manager + // This is cleaned up to avoid stale entries which would block future reconnections + logger.trace("Removing all pending disconnections as part of cluster-manager cleanup"); + nodeConnectionsService.clearPendingDisconnections(); } return unchanged(); } @@ -914,11 +923,18 @@ public DiscoveryStats stats() { @Override public void startInitialJoin() { synchronized (mutex) { + logger.trace("Starting initial join, becoming candidate"); becomeCandidate("startInitialJoin"); } clusterBootstrapService.scheduleUnconfiguredBootstrap(); } + @Override + public void setNodeConnectionsService(NodeConnectionsService nodeConnectionsService) { + assert this.nodeConnectionsService == null : "nodeConnectionsService is already set"; + this.nodeConnectionsService = nodeConnectionsService; + } + @Override protected void doStop() { configuredHostsResolver.stop(); @@ -1356,6 +1372,9 @@ assert getLocalNode().equals(clusterState.getNodes().get(getLocalNode().getId()) currentPublication = Optional.of(publication); final DiscoveryNodes publishNodes = publishRequest.getAcceptedState().nodes(); + // marking pending disconnects before publish + // if a nodes tries to send a joinRequest while it is pending disconnect, it should fail + nodeConnectionsService.setPendingDisconnections(new HashSet<>(clusterChangedEvent.nodesDelta().removedNodes())); leaderChecker.setCurrentNodes(publishNodes); followersChecker.setCurrentNodes(publishNodes); lagDetector.setTrackedNodes(publishNodes); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java index 838b5723b217b..13033b670d44b 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/JoinTaskExecutor.java @@ -420,9 +420,14 @@ public static void ensureIndexCompatibility(final Version nodeVersion, Metadata * ensures that the joining node has a version that's compatible with all current nodes */ public static void ensureNodesCompatibility(final DiscoveryNode joiningNode, DiscoveryNodes currentNodes, Metadata metadata) { - final Version minNodeVersion = currentNodes.getMinNodeVersion(); - final Version maxNodeVersion = currentNodes.getMaxNodeVersion(); - ensureNodesCompatibility(joiningNode, currentNodes, metadata, minNodeVersion, maxNodeVersion); + try { + final Version minNodeVersion = currentNodes.getMinNodeVersion(); + final Version maxNodeVersion = currentNodes.getMaxNodeVersion(); + ensureNodesCompatibility(joiningNode, currentNodes, metadata, minNodeVersion, maxNodeVersion); + } catch (Exception e) { + logger.error("Exception in NodesCompatibility validation", e); + throw e; + } } /** @@ -539,9 +544,11 @@ private static void ensureRemoteStoreNodesCompatibility(DiscoveryNode joiningNod CompatibilityMode remoteStoreCompatibilityMode = REMOTE_STORE_COMPATIBILITY_MODE_SETTING.get(metadata.settings()); List reposToSkip = new ArrayList<>(1); + // find a remote node which has routing table configured Optional remoteRoutingTableNode = existingNodes.stream() .filter( - node -> node.getAttributes().get(RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY) != null + node -> node.isRemoteStoreNode() + && node.getAttributes().get(RemoteStoreNodeAttribute.REMOTE_STORE_ROUTING_TABLE_REPOSITORY_NAME_ATTRIBUTE_KEY) != null ) .findFirst(); // If none of the existing nodes have routing table repo, then we skip this repo check if present in joining node. diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Publication.java b/server/src/main/java/org/opensearch/cluster/coordination/Publication.java index 43801a05dbc24..3f7218939be92 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Publication.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Publication.java @@ -85,7 +85,7 @@ public Publication(PublishRequest publishRequest, AckListener ackListener, LongS } public void start(Set faultyNodes) { - logger.trace("publishing {} to {}", publishRequest, publicationTargets); + logger.debug("publishing version {} to {}", publishRequest.getAcceptedState().getVersion(), publicationTargets); for (final DiscoveryNode faultyNode : faultyNodes) { onFaultyNode(faultyNode); diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java index cdf331b7bb577..caed2b6eceb49 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/PublicationTransportHandler.java @@ -542,7 +542,7 @@ public String executor() { } public void sendClusterState(DiscoveryNode destination, ActionListener listener) { - logger.debug("sending cluster state over transport to node: {}", destination.getName()); + logger.trace("sending cluster state over transport to node: {}", destination.getName()); if (sendFullVersion || previousState.nodes().nodeExists(destination) == false) { logger.trace("sending full cluster state version [{}] to [{}]", newState.version(), destination); sendFullClusterState(destination, listener); diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java index 47080cfbde692..d0b6f812e9ee2 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java @@ -502,6 +502,7 @@ private void runTask(UpdateTask task) { try { applyChanges(task, previousClusterState, newClusterState, stopWatch); TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS)); + // At this point, cluster state appliers and listeners are completed logger.debug( "processing [{}]: took [{}] done applying updated cluster state (version: {}, uuid: {})", task.source, @@ -510,6 +511,7 @@ private void runTask(UpdateTask task) { newClusterState.stateUUID() ); warnAboutSlowTaskIfNeeded(executionTime, task.source, stopWatch); + // Then we call the ClusterApplyListener of the task task.listener.onSuccess(task.source); } catch (Exception e) { TimeValue executionTime = TimeValue.timeValueMillis(Math.max(0, currentTimeInMillis() - startTimeMS)); @@ -578,6 +580,7 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl logger.debug("apply cluster state with version {}", newClusterState.version()); callClusterStateAppliers(clusterChangedEvent, stopWatch); + logger.debug("completed calling appliers of cluster state for version {}", newClusterState.version()); nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes()); @@ -594,6 +597,7 @@ private void applyChanges(UpdateTask task, ClusterState previousClusterState, Cl state.set(newClusterState); callClusterStateListeners(clusterChangedEvent, stopWatch); + logger.debug("completed calling listeners of cluster state for version {}", newClusterState.version()); } protected void connectToNodesAndWait(ClusterState newClusterState) { diff --git a/server/src/main/java/org/opensearch/discovery/Discovery.java b/server/src/main/java/org/opensearch/discovery/Discovery.java index 9d6807b6522c9..6d9fb1f4985df 100644 --- a/server/src/main/java/org/opensearch/discovery/Discovery.java +++ b/server/src/main/java/org/opensearch/discovery/Discovery.java @@ -32,6 +32,7 @@ package org.opensearch.discovery; +import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.cluster.coordination.ClusterStatePublisher; import org.opensearch.common.lifecycle.LifecycleComponent; @@ -54,4 +55,8 @@ public interface Discovery extends LifecycleComponent, ClusterStatePublisher { */ void startInitialJoin(); + /** + * Sets the NodeConnectionsService which is an abstraction used for connection management + */ + void setNodeConnectionsService(NodeConnectionsService nodeConnectionsService); } diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index a8d4ebcf23dab..4962d72d8728a 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -1602,6 +1602,7 @@ public Node start() throws NodeValidationException { injector.getInstance(GatewayService.class).start(); Discovery discovery = injector.getInstance(Discovery.class); + discovery.setNodeConnectionsService(nodeConnectionsService); clusterService.getClusterManagerService().setClusterStatePublisher(discovery::publish); // Start the transport service now so the publish address will be added to the local disco node in ClusterService diff --git a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java index dd4e4d073cb1b..9c438401b9fbe 100644 --- a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java +++ b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java @@ -299,7 +299,7 @@ public SearchSourceBuilder(StreamInput in) throws IOException { derivedFields = in.readList(DerivedField::new); } } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_18_0)) { searchPipeline = in.readOptionalString(); } } @@ -382,7 +382,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeList(derivedFields); } } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_18_0)) { out.writeOptionalString(searchPipeline); } } diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java index 23f6deff3715d..22b2a72b36026 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java @@ -468,26 +468,33 @@ public void createSnapshotV2(final CreateSnapshotRequest request, final ActionLi long pinnedTimestamp = System.currentTimeMillis(); final String repositoryName = request.repository(); final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.snapshot()); - validate(repositoryName, snapshotName); - final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); // new UUID for the snapshot Repository repository = repositoriesService.repository(repositoryName); + validate(repositoryName, snapshotName); + repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask(Priority.URGENT) { + private SnapshotsInProgress.Entry newEntry; - if (repository.isReadOnly()) { - listener.onFailure( - new RepositoryException(repository.getMetadata().name(), "cannot create snapshot-v2 in a readonly repository") - ); - return; - } + private SnapshotId snapshotId; - final Snapshot snapshot = new Snapshot(repositoryName, snapshotId); - ClusterState currentState = clusterService.state(); - final Map userMeta = repository.adaptUserMetadata(request.userMetadata()); - try { - final StepListener repositoryDataListener = new StepListener<>(); - repositoriesService.getRepositoryData(repositoryName, repositoryDataListener); + private Snapshot snapshot; + + boolean enteredLoop; + + @Override + public ClusterState execute(ClusterState currentState) { + // move to in progress + snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); // new UUID for the snapshot + Repository repository = repositoriesService.repository(repositoryName); + + if (repository.isReadOnly()) { + listener.onFailure( + new RepositoryException(repository.getMetadata().name(), "cannot create snapshot-v2 in a readonly repository") + ); + } + + snapshot = new Snapshot(repositoryName, snapshotId); + final Map userMeta = repository.adaptUserMetadata(request.userMetadata()); - repositoryDataListener.whenComplete(repositoryData -> { createSnapshotPreValidations(currentState, repositoryData, repositoryName, snapshotName); List indices = new ArrayList<>(currentState.metadata().indices().keySet()); @@ -498,7 +505,7 @@ public void createSnapshotV2(final CreateSnapshotRequest request, final ActionLi request.indices() ); - logger.trace("[{}][{}] creating snapshot-v2 for indices [{}]", repositoryName, snapshotName, indices); + logger.info("[{}][{}] creating snapshot-v2 for indices [{}]", repositoryName, snapshotName, indices); final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); final List runningSnapshots = snapshots.entries(); @@ -509,74 +516,139 @@ public void createSnapshotV2(final CreateSnapshotRequest request, final ActionLi IndexId.DEFAULT_SHARD_PATH_TYPE ); final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null); - final ShardGenerations shardGenerations = buildShardsGenerationFromRepositoryData( - currentState.metadata(), - currentState.routingTable(), - indexIds, - repositoryData - ); if (repositoryData.getGenId() == RepositoryData.UNKNOWN_REPO_GEN) { logger.debug("[{}] was aborted before starting", snapshot); throw new SnapshotException(snapshot, "Aborted on initialization"); } + + Map shards = new HashMap<>(); + + newEntry = SnapshotsInProgress.startedEntry( + new Snapshot(repositoryName, snapshotId), + request.includeGlobalState(), + request.partial(), + indexIds, + dataStreams, + threadPool.absoluteTimeInMillis(), + repositoryData.getGenId(), + shards, + userMeta, + version, + true, + true + ); + final List newEntries = new ArrayList<>(runningSnapshots); + newEntries.add(newEntry); + + // Entering finalize loop here to prevent concurrent snapshots v2 snapshots + enteredLoop = tryEnterRepoLoop(repositoryName); + if (enteredLoop == false) { + throw new ConcurrentSnapshotExecutionException( + repositoryName, + snapshotName, + "cannot start snapshot-v2 while a repository is in finalization state" + ); + } + return ClusterState.builder(currentState) + .putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(new ArrayList<>(newEntries))) + .build(); + } + + @Override + public void onFailure(String source, Exception e) { + logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to create snapshot-v2", repositoryName, snapshotName), e); + listener.onFailure(e); + if (enteredLoop) { + leaveRepoLoop(repositoryName); + } + } + + @Override + public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) { + final ShardGenerations shardGenerations = buildShardsGenerationFromRepositoryData( + newState.metadata(), + newState.routingTable(), + newEntry.indices(), + repositoryData + ); + final List dataStreams = indexNameExpressionResolver.dataStreamNames( + newState, + request.indicesOptions(), + request.indices() + ); final SnapshotInfo snapshotInfo = new SnapshotInfo( - snapshot.getSnapshotId(), + snapshotId, shardGenerations.indices().stream().map(IndexId::getName).collect(Collectors.toList()), - dataStreams, + newEntry.dataStreams(), pinnedTimestamp, null, System.currentTimeMillis(), shardGenerations.totalShards(), Collections.emptyList(), request.includeGlobalState(), - userMeta, + newEntry.userMetadata(), true, pinnedTimestamp ); - if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) { - throw new SnapshotException(repositoryName, snapshotName, "Aborting snapshot-v2, no longer cluster manager"); - } + final Version version = minCompatibleVersion(newState.nodes().getMinNodeVersion(), repositoryData, null); final StepListener pinnedTimestampListener = new StepListener<>(); - pinnedTimestampListener.whenComplete(repoData -> { listener.onResponse(snapshotInfo); }, listener::onFailure); - repository.finalizeSnapshot( - shardGenerations, - repositoryData.getGenId(), - metadataForSnapshot(currentState.metadata(), request.includeGlobalState(), false, dataStreams, indexIds), - snapshotInfo, - version, - state -> state, - Priority.IMMEDIATE, - new ActionListener() { - @Override - public void onResponse(RepositoryData repositoryData) { - if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) { - failSnapshotCompletionListeners( - snapshot, - new SnapshotException(snapshot, "Aborting snapshot-v2, no longer cluster manager") - ); - listener.onFailure( - new SnapshotException(repositoryName, snapshotName, "Aborting snapshot-v2, no longer cluster manager") - ); - return; + pinnedTimestampListener.whenComplete(repoData -> { + repository.finalizeSnapshot( + shardGenerations, + repositoryData.getGenId(), + metadataForSnapshot(newState.metadata(), request.includeGlobalState(), false, dataStreams, newEntry.indices()), + snapshotInfo, + version, + state -> stateWithoutSnapshot(state, snapshot), + Priority.IMMEDIATE, + new ActionListener() { + @Override + public void onResponse(RepositoryData repositoryData) { + leaveRepoLoop(repositoryName); + if (clusterService.state().nodes().isLocalNodeElectedClusterManager() == false) { + failSnapshotCompletionListeners( + snapshot, + new SnapshotException(snapshot, "Aborting snapshot-v2, no longer cluster manager") + ); + listener.onFailure( + new SnapshotException( + repositoryName, + snapshotName, + "Aborting snapshot-v2, no longer cluster manager" + ) + ); + return; + } + listener.onResponse(snapshotInfo); } - updateSnapshotPinnedTimestamp(repositoryData, snapshot, pinnedTimestamp, pinnedTimestampListener); - } - @Override - public void onFailure(Exception e) { - logger.error("Failed to upload files to snapshot repo {} for snapshot-v2 {} ", repositoryName, snapshotName); - listener.onFailure(e); + @Override + public void onFailure(Exception e) { + logger.error("Failed to finalize snapshot repo {} for snapshot-v2 {} ", repositoryName, snapshotName); + leaveRepoLoop(repositoryName); + // cleaning up in progress snapshot here + stateWithoutSnapshotV2(newState); + listener.onFailure(e); + } } - } - ); + ); + }, e -> { + logger.error("Failed to update pinned timestamp for snapshot-v2 {} {} {} ", repositoryName, snapshotName, e); + leaveRepoLoop(repositoryName); + // cleaning up in progress snapshot here + stateWithoutSnapshotV2(newState); + listener.onFailure(e); + }); + updateSnapshotPinnedTimestamp(repositoryData, snapshot, pinnedTimestamp, pinnedTimestampListener); + } - }, listener::onFailure); - } catch (Exception e) { - assert false : new AssertionError(e); - logger.error("Snapshot-v2 {} creation failed with exception {}", snapshot.getSnapshotId().getName(), e); - listener.onFailure(e); - } + @Override + public TimeValue timeout() { + return request.clusterManagerNodeTimeout(); + } + + }, "create_snapshot [" + snapshotName + ']', listener::onFailure); } private void createSnapshotPreValidations( @@ -770,12 +842,24 @@ public void cloneSnapshotV2( private SnapshotId sourceSnapshotId; private List indicesForSnapshot; + boolean enteredRepoLoop; + @Override public ClusterState execute(ClusterState currentState) { createSnapshotPreValidations(currentState, repositoryData, repositoryName, snapshotName); final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); final List runningSnapshots = snapshots.entries(); + // Entering finalize loop here to prevent concurrent snapshots v2 snapshots + enteredRepoLoop = tryEnterRepoLoop(repositoryName); + if (enteredRepoLoop == false) { + throw new ConcurrentSnapshotExecutionException( + repositoryName, + snapshotName, + "cannot start snapshot-v2 while a repository is in finalization state" + ); + } + sourceSnapshotId = repositoryData.getSnapshotIds() .stream() .filter(src -> src.getName().equals(request.source())) @@ -799,14 +883,14 @@ public ClusterState execute(ClusterState currentState) { indicesForSnapshot.add(indexId.getName()); } } - newEntry = SnapshotsInProgress.startClone( snapshot, sourceSnapshotId, repositoryData.resolveIndices(indicesForSnapshot), threadPool.absoluteTimeInMillis(), repositoryData.getGenId(), - minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null) + minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null), + true ); final List newEntries = new ArrayList<>(runningSnapshots); newEntries.add(newEntry); @@ -817,6 +901,9 @@ public ClusterState execute(ClusterState currentState) { public void onFailure(String source, Exception e) { logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to clone snapshot-v2", repositoryName, snapshotName), e); listener.onFailure(e); + if (enteredRepoLoop) { + leaveRepoLoop(repositoryName); + } } @Override @@ -843,67 +930,80 @@ public void clusterStateProcessed(String source, ClusterState oldState, final Cl true, snapshotInfo.getPinnedTimestamp() ); - if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) { + if (clusterService.state().nodes().isLocalNodeElectedClusterManager() == false) { throw new SnapshotException(repositoryName, snapshotName, "Aborting snapshot-v2 clone, no longer cluster manager"); } final StepListener pinnedTimestampListener = new StepListener<>(); pinnedTimestampListener.whenComplete(repoData -> { - logger.info("snapshot-v2 clone [{}] completed successfully", snapshot); - listener.onResponse(null); - }, listener::onFailure); - repository.finalizeSnapshot( - shardGenerations, - repositoryData.getGenId(), - metadataForSnapshot( - currentState.metadata(), - newEntry.includeGlobalState(), - false, - newEntry.dataStreams(), - newEntry.indices() - ), - cloneSnapshotInfo, - repositoryData.getVersion(sourceSnapshotId), - state -> stateWithoutSnapshot(state, snapshot), - Priority.IMMEDIATE, - new ActionListener() { - @Override - public void onResponse(RepositoryData repositoryData) { - if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) { - failSnapshotCompletionListeners( - snapshot, - new SnapshotException(snapshot, "Aborting Snapshot-v2 clone, no longer cluster manager") - ); - listener.onFailure( - new SnapshotException( - repositoryName, - snapshotName, - "Aborting Snapshot-v2 clone, no longer cluster manager" - ) - ); - return; + repository.finalizeSnapshot( + shardGenerations, + repositoryData.getGenId(), + metadataForSnapshot( + currentState.metadata(), + newEntry.includeGlobalState(), + false, + newEntry.dataStreams(), + newEntry.indices() + ), + cloneSnapshotInfo, + repositoryData.getVersion(sourceSnapshotId), + state -> stateWithoutSnapshot(state, snapshot), + Priority.IMMEDIATE, + new ActionListener() { + @Override + public void onResponse(RepositoryData repositoryData) { + leaveRepoLoop(repositoryName); + if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) { + failSnapshotCompletionListeners( + snapshot, + new SnapshotException(snapshot, "Aborting Snapshot-v2 clone, no longer cluster manager") + ); + listener.onFailure( + new SnapshotException( + repositoryName, + snapshotName, + "Aborting Snapshot-v2 clone, no longer cluster manager" + ) + ); + return; + } + logger.info("snapshot-v2 clone [{}] completed successfully", snapshot); + listener.onResponse(null); } - cloneSnapshotPinnedTimestamp( - repositoryData, - sourceSnapshotId, - snapshot, - snapshotInfo.getPinnedTimestamp(), - pinnedTimestampListener - ); - } - @Override - public void onFailure(Exception e) { - logger.error( - "Failed to upload files to snapshot repo {} for clone snapshot-v2 {} ", - repositoryName, - snapshotName - ); - listener.onFailure(e); + @Override + public void onFailure(Exception e) { + logger.error( + "Failed to upload files to snapshot repo {} for clone snapshot-v2 {} ", + repositoryName, + snapshotName + ); + stateWithoutSnapshotV2(newState); + leaveRepoLoop(repositoryName); + listener.onFailure(e); + } } - } + ); + }, e -> { + logger.error("Failed to update pinned timestamp for snapshot-v2 {} {} ", repositoryName, snapshotName); + stateWithoutSnapshotV2(newState); + leaveRepoLoop(repositoryName); + listener.onFailure(e); + }); + + cloneSnapshotPinnedTimestamp( + repositoryData, + sourceSnapshotId, + snapshot, + snapshotInfo.getPinnedTimestamp(), + pinnedTimestampListener ); - - }, listener::onFailure); + }, e -> { + logger.error("Failed to retrieve snapshot info for snapshot-v2 {} {} ", repositoryName, snapshotName); + stateWithoutSnapshotV2(newState); + leaveRepoLoop(repositoryName); + listener.onFailure(e); + }); } @Override @@ -1486,6 +1586,13 @@ public void applyClusterState(ClusterChangedEvent event) { // cluster-manager SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); final boolean newClusterManager = event.previousState().nodes().isLocalNodeElectedClusterManager() == false; + + if (newClusterManager && snapshotsInProgress.entries().isEmpty() == false) { + // clean up snapshot v2 in progress or clone v2 present. + // Snapshot v2 create and clone are sync operation . In case of cluster manager failures in midst , we won't + // send ack to caller and won't continue on new cluster manager . Caller will need to retry it. + stateWithoutSnapshotV2(event.state()); + } processExternalChanges( newClusterManager || removedNodesCleanupNeeded(snapshotsInProgress, event.nodesDelta().removedNodes()), event.routingTableChanged() && waitingShardsStartedOrUnassigned(snapshotsInProgress, event) @@ -1597,7 +1704,14 @@ private void processExternalChanges(boolean changedNodes, boolean startShards) { @Override public ClusterState execute(ClusterState currentState) { RoutingTable routingTable = currentState.routingTable(); - final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + // Removing shallow snapshots v2 as we we take care of these in stateWithoutSnapshotV2() + snapshots = SnapshotsInProgress.of( + snapshots.entries() + .stream() + .filter(snapshot -> snapshot.remoteStoreIndexShallowCopyV2() == false) + .collect(Collectors.toList()) + ); DiscoveryNodes nodes = currentState.nodes(); boolean changed = false; final EnumSet statesToUpdate; @@ -1654,7 +1768,7 @@ public ClusterState execute(ClusterState currentState) { changed = true; logger.debug("[{}] was found in dangling INIT or ABORTED state", snapshot); } else { - if (snapshot.state().completed() || completed(snapshot.shards().values())) { + if ((snapshot.state().completed() || completed(snapshot.shards().values()))) { finishedSnapshots.add(snapshot); } updatedSnapshotEntries.add(snapshot); @@ -2180,6 +2294,59 @@ private static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot sn return readyDeletions(result).v1(); } + private void stateWithoutSnapshotV2(ClusterState state) { + SnapshotsInProgress snapshots = state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + boolean changed = false; + ArrayList entries = new ArrayList<>(); + for (SnapshotsInProgress.Entry entry : snapshots.entries()) { + if (entry.remoteStoreIndexShallowCopyV2()) { + changed = true; + } else { + entries.add(entry); + } + } + if (changed) { + logger.info("Cleaning up in progress v2 snapshots now"); + clusterService.submitStateUpdateTask( + "remove in progress snapshot v2 after cluster manager switch", + new ClusterStateUpdateTask() { + @Override + public ClusterState execute(ClusterState currentState) { + SnapshotsInProgress snapshots = state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY); + boolean changed = false; + ArrayList entries = new ArrayList<>(); + for (SnapshotsInProgress.Entry entry : snapshots.entries()) { + if (entry.remoteStoreIndexShallowCopyV2()) { + changed = true; + } else { + entries.add(entry); + } + } + if (changed) { + return ClusterState.builder(currentState) + .putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(unmodifiableList(entries))) + .build(); + } else { + return currentState; + } + } + + @Override + public void onFailure(String source, Exception e) { + // execute never fails , so we should never hit this. + logger.warn( + () -> new ParameterizedMessage( + "failed to remove in progress snapshot v2 state after cluster manager switch {}", + e + ), + e + ); + } + } + ); + } + } + /** * Removes record of running snapshot from cluster state and notifies the listener when this action is complete. This method is only * used when the snapshot fails for some reason. During normal operation the snapshot repository will remove the @@ -3330,6 +3497,9 @@ public boolean assertAllListenersResolved() { + " on [" + localNode + "]"; + if (repositoryOperations.isEmpty() == false) { + logger.info("Not empty"); + } assert repositoryOperations.isEmpty() : "Found leaked snapshots to finalize " + repositoryOperations + " on [" + localNode + "]"; return true; } diff --git a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java index e634323d58269..3a3e8c964b6c5 100644 --- a/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ClusterConnectionManager.java @@ -64,6 +64,15 @@ public class ClusterConnectionManager implements ConnectionManager { private final ConcurrentMap connectedNodes = ConcurrentCollections.newConcurrentMap(); private final ConcurrentMap> pendingConnections = ConcurrentCollections.newConcurrentMap(); + /** + This set is used only by cluster-manager nodes. + Nodes are marked as pending disconnect right before cluster state publish phase. + They are cleared up as part of cluster state apply commit phase + This is to avoid connections from being made to nodes that are in the process of leaving the cluster + Note: If a disconnect is initiated while a connect is in progress, this Set will not handle this case. + Callers need to ensure that connects and disconnects are sequenced. + */ + private final Set pendingDisconnections = ConcurrentCollections.newConcurrentSet(); private final AbstractRefCounted connectingRefCounter = new AbstractRefCounted("connection manager") { @Override protected void closeInternal() { @@ -122,12 +131,19 @@ public void connectToNode( ConnectionValidator connectionValidator, ActionListener listener ) throws ConnectTransportException { + logger.trace("connecting to node [{}]", node); ConnectionProfile resolvedProfile = ConnectionProfile.resolveConnectionProfile(connectionProfile, defaultProfile); if (node == null) { listener.onFailure(new ConnectTransportException(null, "can't connect to a null node")); return; } + // if node-left is still in progress, we fail the connect request early + if (pendingDisconnections.contains(node)) { + listener.onFailure(new IllegalStateException("cannot make a new connection as disconnect to node [" + node + "] is pending")); + return; + } + if (connectingRefCounter.tryIncRef() == false) { listener.onFailure(new IllegalStateException("connection manager is closed")); return; @@ -170,6 +186,7 @@ public void connectToNode( conn.addCloseListener(ActionListener.wrap(() -> { logger.trace("unregistering {} after connection close and marking as disconnected", node); connectedNodes.remove(node, finalConnection); + pendingDisconnections.remove(node); connectionListener.onNodeDisconnected(node, conn); })); } @@ -226,6 +243,19 @@ public void disconnectFromNode(DiscoveryNode node) { // if we found it and removed it we close nodeChannels.close(); } + pendingDisconnections.remove(node); + logger.trace("Removed node [{}] from pending disconnections list", node); + } + + @Override + public void setPendingDisconnection(DiscoveryNode node) { + logger.trace("marking disconnection as pending for node: [{}]", node); + pendingDisconnections.add(node); + } + + @Override + public void clearPendingDisconnections() { + pendingDisconnections.clear(); } /** diff --git a/server/src/main/java/org/opensearch/transport/ConnectionManager.java b/server/src/main/java/org/opensearch/transport/ConnectionManager.java index 10cfc2907098f..ebd5ccf29c8cc 100644 --- a/server/src/main/java/org/opensearch/transport/ConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/ConnectionManager.java @@ -65,6 +65,10 @@ void connectToNode( void disconnectFromNode(DiscoveryNode node); + void setPendingDisconnection(DiscoveryNode node); + + void clearPendingDisconnections(); + Set getAllConnectedNodes(); int size(); diff --git a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java index bd646f10df517..52f29bea8050d 100644 --- a/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java +++ b/server/src/main/java/org/opensearch/transport/RemoteConnectionManager.java @@ -114,6 +114,16 @@ public void disconnectFromNode(DiscoveryNode node) { delegate.disconnectFromNode(node); } + @Override + public void setPendingDisconnection(DiscoveryNode node) { + delegate.setPendingDisconnection(node); + } + + @Override + public void clearPendingDisconnections() { + delegate.clearPendingDisconnections(); + } + @Override public ConnectionProfile getConnectionProfile() { return delegate.getConnectionProfile(); diff --git a/server/src/main/java/org/opensearch/transport/TransportService.java b/server/src/main/java/org/opensearch/transport/TransportService.java index fff6d82b23c7e..fe8631aa5ca3d 100644 --- a/server/src/main/java/org/opensearch/transport/TransportService.java +++ b/server/src/main/java/org/opensearch/transport/TransportService.java @@ -773,6 +773,18 @@ public void disconnectFromNode(DiscoveryNode node) { connectionManager.disconnectFromNode(node); } + public void setPendingDisconnection(DiscoveryNode node) { + connectionManager.setPendingDisconnection(node); + } + + /** + * Wipes out all pending disconnections. + * This is called on cluster-manager failover to remove stale entries + */ + public void clearPendingDisconnections() { + connectionManager.clearPendingDisconnections(); + } + public void addMessageListener(TransportMessageListener listener) { messageListener.listeners.add(listener); } diff --git a/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java b/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java index 4cf82f1dabab3..4500860c937ea 100644 --- a/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/NodeConnectionsServiceTests.java @@ -35,6 +35,9 @@ import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.config.Configuration; +import org.apache.logging.log4j.core.config.LoggerConfig; import org.opensearch.OpenSearchTimeoutException; import org.opensearch.Version; import org.opensearch.action.support.PlainActionFuture; @@ -53,9 +56,11 @@ import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.MockLogAppender; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.TestLogsAppender; import org.opensearch.test.junit.annotations.TestLogging; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.ClusterConnectionManager; import org.opensearch.transport.ConnectTransportException; import org.opensearch.transport.ConnectionProfile; import org.opensearch.transport.Transport; @@ -69,6 +74,7 @@ import org.junit.Before; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -77,6 +83,7 @@ import java.util.concurrent.CyclicBarrier; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Predicate; import static java.util.Collections.emptySet; @@ -86,12 +93,15 @@ import static org.opensearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap; import static org.opensearch.node.Node.NODE_NAME_SETTING; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; public class NodeConnectionsServiceTests extends OpenSearchTestCase { private ThreadPool threadPool; private TransportService transportService; private Map> nodeConnectionBlocks; + private TestLogsAppender testLogsAppender; + private LoggerContext loggerContext; private List generateNodes() { List nodes = new ArrayList<>(); @@ -490,6 +500,108 @@ public void testDebugLogging() throws IllegalAccessException { } } + public void testConnectionCheckerRetriesIfPendingDisconnection() throws InterruptedException { + final Settings.Builder settings = Settings.builder(); + final long reconnectIntervalMillis = 50; + settings.put(CLUSTER_NODE_RECONNECT_INTERVAL_SETTING.getKey(), reconnectIntervalMillis + "ms"); + + final DeterministicTaskQueue deterministicTaskQueue = new DeterministicTaskQueue( + builder().put(NODE_NAME_SETTING.getKey(), "node").build(), + random() + ); + + MockTransport transport = new MockTransport(deterministicTaskQueue.getThreadPool()); + TestTransportService transportService = new TestTransportService(transport, deterministicTaskQueue.getThreadPool()); + transportService.start(); + transportService.acceptIncomingRequests(); + + final TestNodeConnectionsService service = new TestNodeConnectionsService( + settings.build(), + deterministicTaskQueue.getThreadPool(), + transportService + ); + service.start(); + + // setup the connections + final DiscoveryNode node = new DiscoveryNode("node0", buildNewFakeTransportAddress(), Version.CURRENT); + + final DiscoveryNodes nodes = DiscoveryNodes.builder().add(node).build(); + + final AtomicBoolean connectionCompleted = new AtomicBoolean(); + service.connectToNodes(nodes, () -> connectionCompleted.set(true)); + deterministicTaskQueue.runAllRunnableTasks(); + assertTrue(connectionCompleted.get()); + + // reset any logs as we want to assert for exceptions that show up after this + // reset connect to node count to assert for later + logger.info("--> resetting captured logs and counters"); + testLogsAppender.clearCapturedLogs(); + // this ensures we only track connection attempts that happen after the disconnection + transportService.resetConnectToNodeCallCount(); + + // block connection checker reconnection attempts until after we set pending disconnections + logger.info("--> disabling connection checker, and triggering disconnect"); + service.setShouldReconnect(false); + transportService.disconnectFromNode(node); + + // set pending disconnections to true to fail future reconnection attempts + final long maxDisconnectionTime = 1000; + deterministicTaskQueue.scheduleNow(new Runnable() { + @Override + public void run() { + logger.info("--> setting pending disconnections to fail next connection attempts"); + service.setPendingDisconnections(new HashSet<>(Collections.singleton(node))); + } + + @Override + public String toString() { + return "scheduled disconnection of " + node; + } + }); + // our task queue will have the first task as the runnable to set pending disconnections + // here we re-enable the connection checker to enqueue next tasks for attempting reconnection + logger.info("--> re-enabling reconnection checker"); + service.setShouldReconnect(true); + + final long maxReconnectionTime = 2000; + final int expectedReconnectionAttempts = 10; + + // this will first run the task to set the pending disconnections, then will execute the reconnection tasks + // exit early when we have enough reconnection attempts + logger.info("--> running tasks in order until expected reconnection attempts"); + runTasksInOrderUntilExpectedReconnectionAttempts( + deterministicTaskQueue, + maxDisconnectionTime + maxReconnectionTime, + transportService, + expectedReconnectionAttempts + ); + logger.info("--> verifying that connectionchecker tried to reconnect"); + + // assert that the connections failed + assertFalse("connected to " + node, transportService.nodeConnected(node)); + + // assert that we saw at least the required number of reconnection attempts, and the exceptions that showed up are as expected + logger.info("--> number of reconnection attempts: {}", transportService.getConnectToNodeCallCount()); + assertThat( + "Did not see enough reconnection attempts from connection checker", + transportService.getConnectToNodeCallCount(), + greaterThan(expectedReconnectionAttempts) + ); + boolean logFound = testLogsAppender.waitForLog("failed to connect", 1, TimeUnit.SECONDS) + && testLogsAppender.waitForLog( + "IllegalStateException: cannot make a new connection as disconnect to node", + 1, + TimeUnit.SECONDS + ); + assertTrue("Expected log for reconnection failure was not found in the required time period", logFound); + + // clear the pending disconnections and ensure the connection gets re-established automatically by connectionchecker + logger.info("--> clearing pending disconnections to allow connections to re-establish"); + service.clearPendingDisconnections(); + runTasksUntil(deterministicTaskQueue, maxDisconnectionTime + maxReconnectionTime + 2 * reconnectIntervalMillis); + assertConnectedExactlyToNodes(transportService, nodes); + } + private void runTasksUntil(DeterministicTaskQueue deterministicTaskQueue, long endTimeMillis) { while (deterministicTaskQueue.getCurrentTimeMillis() < endTimeMillis) { if (deterministicTaskQueue.hasRunnableTasks() && randomBoolean()) { @@ -501,6 +613,24 @@ private void runTasksUntil(DeterministicTaskQueue deterministicTaskQueue, long e deterministicTaskQueue.runAllRunnableTasks(); } + private void runTasksInOrderUntilExpectedReconnectionAttempts( + DeterministicTaskQueue deterministicTaskQueue, + long endTimeMillis, + TestTransportService transportService, + int expectedReconnectionAttempts + ) { + // break the loop if we timeout or if we have enough reconnection attempts + while ((deterministicTaskQueue.getCurrentTimeMillis() < endTimeMillis) + && (transportService.getConnectToNodeCallCount() <= expectedReconnectionAttempts)) { + if (deterministicTaskQueue.hasRunnableTasks() && randomBoolean()) { + deterministicTaskQueue.runNextTask(); + } else if (deterministicTaskQueue.hasDeferredTasks()) { + deterministicTaskQueue.advanceTime(); + } + } + deterministicTaskQueue.runAllRunnableTasksInEnqueuedOrder(); + } + private void ensureConnections(NodeConnectionsService service) { final PlainActionFuture future = new PlainActionFuture<>(); service.ensureConnections(() -> future.onResponse(null)); @@ -526,6 +656,16 @@ private void assertConnected(TransportService transportService, Iterable messagesToCapture = Arrays.asList("failed to connect", "IllegalStateException"); + testLogsAppender = new TestLogsAppender(messagesToCapture); + loggerContext = (LoggerContext) LogManager.getContext(false); + Configuration config = loggerContext.getConfiguration(); + LoggerConfig loggerConfig = config.getLoggerConfig(NodeConnectionsService.class.getName()); + loggerConfig.addAppender(testLogsAppender, null, null); + loggerConfig = config.getLoggerConfig(ClusterConnectionManager.class.getName()); + loggerConfig.addAppender(testLogsAppender, null, null); + loggerContext.updateLoggers(); ThreadPool threadPool = new TestThreadPool(getClass().getName()); this.threadPool = threadPool; nodeConnectionBlocks = newConcurrentMap(); @@ -537,6 +677,14 @@ public void setUp() throws Exception { @Override @After public void tearDown() throws Exception { + testLogsAppender.clearCapturedLogs(); + loggerContext = (LoggerContext) LogManager.getContext(false); + Configuration config = loggerContext.getConfiguration(); + LoggerConfig loggerConfig = config.getLoggerConfig(NodeConnectionsService.class.getName()); + loggerConfig.removeAppender(testLogsAppender.getName()); + loggerConfig = config.getLoggerConfig(ClusterConnectionManager.class.getName()); + loggerConfig.removeAppender(testLogsAppender.getName()); + loggerContext.updateLoggers(); transportService.stop(); ThreadPool.terminate(threadPool, 30, TimeUnit.SECONDS); threadPool = null; @@ -545,6 +693,8 @@ public void tearDown() throws Exception { private final class TestTransportService extends TransportService { + private final AtomicInteger connectToNodeCallCount = new AtomicInteger(0); + private TestTransportService(Transport transport, ThreadPool threadPool) { super( Settings.EMPTY, @@ -588,6 +738,47 @@ public void connectToNode(DiscoveryNode node, ActionListener listener) thr } else { super.connectToNode(node, listener); } + logger.info("calling connectToNode"); + connectToNodeCallCount.incrementAndGet(); + } + + public int getConnectToNodeCallCount() { + return connectToNodeCallCount.get(); + } + + public void resetConnectToNodeCallCount() { + connectToNodeCallCount.set(0); + } + } + + private class TestNodeConnectionsService extends NodeConnectionsService { + private boolean shouldReconnect = true; + + public TestNodeConnectionsService(Settings settings, ThreadPool threadPool, TransportService transportService) { + super(settings, threadPool, transportService); + } + + public void setShouldReconnect(boolean shouldReconnect) { + this.shouldReconnect = shouldReconnect; + } + + @Override + protected void doStart() { + final StoppableConnectionChecker connectionChecker = new StoppableConnectionChecker(); + this.connectionChecker = connectionChecker; + connectionChecker.scheduleNextCheck(); + } + + class StoppableConnectionChecker extends NodeConnectionsService.ConnectionChecker { + @Override + protected void doRun() { + if (connectionChecker == this && shouldReconnect) { + connectDisconnectedTargets(this::scheduleNextCheck); + } else { + // Skip reconnection attempt but still schedule the next check + scheduleNextCheck(); + } + } } } diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 350c6f9ae8f6b..440227436175d 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -1923,11 +1923,6 @@ private final class TestClusterNode { protected PrioritizedOpenSearchThreadPoolExecutor createThreadPoolExecutor() { return new MockSinglePrioritizingExecutor(node.getName(), deterministicTaskQueue, threadPool); } - - @Override - protected void connectToNodesAndWait(ClusterState newClusterState) { - // don't do anything, and don't block - } } ); recoverySettings = new RecoverySettings(settings, clusterSettings); @@ -2094,7 +2089,7 @@ public void onFailure(final Exception e) { rerouteService, threadPool ); - nodeConnectionsService = new NodeConnectionsService(clusterService.getSettings(), threadPool, transportService); + nodeConnectionsService = createTestNodeConnectionsService(clusterService.getSettings(), threadPool, transportService); final MetadataMappingService metadataMappingService = new MetadataMappingService(clusterService, indicesService); indicesClusterStateService = new IndicesClusterStateService( settings, @@ -2492,6 +2487,24 @@ protected void assertSnapshotOrGenericThread() { } } + public NodeConnectionsService createTestNodeConnectionsService( + Settings settings, + ThreadPool threadPool, + TransportService transportService + ) { + return new NodeConnectionsService(settings, threadPool, transportService) { + @Override + public void connectToNodes(DiscoveryNodes discoveryNodes, Runnable onCompletion) { + // just update targetsByNode to ensure disconnect runs for these nodes + // we rely on disconnect to run for keeping track of pendingDisconnects and ensuring node-joins can happen + for (final DiscoveryNode discoveryNode : discoveryNodes) { + this.targetsByNode.put(discoveryNode, createConnectionTarget(discoveryNode)); + } + onCompletion.run(); + } + }; + } + public ClusterInfoService getMockClusterInfoService() { return clusterInfoService; } @@ -2563,10 +2576,11 @@ public void start(ClusterState initialState) { new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), null ); + coordinator.setNodeConnectionsService(nodeConnectionsService); clusterManagerService.setClusterStatePublisher(coordinator); - coordinator.start(); clusterService.getClusterApplierService().setNodeConnectionsService(nodeConnectionsService); nodeConnectionsService.start(); + coordinator.start(); clusterService.start(); indicesService.start(); indicesClusterStateService.start(); diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotsInProgressSerializationTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotsInProgressSerializationTests.java index 8fd1f44286094..d79cb62b6b7ac 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotsInProgressSerializationTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotsInProgressSerializationTests.java @@ -214,7 +214,11 @@ public void testSerDeRemoteStoreIndexShallowCopy() throws IOException { assert (curr_entry.remoteStoreIndexShallowCopy() == false); } } - try (StreamInput in = out.bytes().streamInput()) { + + BytesStreamOutput out2 = new BytesStreamOutput(); + out2.setVersion(Version.V_2_9_0); + snapshotsInProgress.writeTo(out2); + try (StreamInput in = out2.bytes().streamInput()) { in.setVersion(Version.V_2_9_0); actualSnapshotsInProgress = new SnapshotsInProgress(in); assert in.available() == 0; diff --git a/server/src/test/java/org/opensearch/test/NoopDiscovery.java b/server/src/test/java/org/opensearch/test/NoopDiscovery.java index 42d3f1887ab4d..c35503a556db6 100644 --- a/server/src/test/java/org/opensearch/test/NoopDiscovery.java +++ b/server/src/test/java/org/opensearch/test/NoopDiscovery.java @@ -32,6 +32,7 @@ package org.opensearch.test; import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.NodeConnectionsService; import org.opensearch.common.lifecycle.Lifecycle; import org.opensearch.common.lifecycle.LifecycleListener; import org.opensearch.core.action.ActionListener; @@ -55,6 +56,11 @@ public void startInitialJoin() { } + @Override + public void setNodeConnectionsService(NodeConnectionsService nodeConnectionsService) { + + } + @Override public Lifecycle.State lifecycleState() { return null; diff --git a/server/src/test/java/org/opensearch/transport/ClusterConnectionManagerTests.java b/server/src/test/java/org/opensearch/transport/ClusterConnectionManagerTests.java index 1d734a56ef189..fdf762aa096f0 100644 --- a/server/src/test/java/org/opensearch/transport/ClusterConnectionManagerTests.java +++ b/server/src/test/java/org/opensearch/transport/ClusterConnectionManagerTests.java @@ -320,6 +320,50 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti assertEquals(0, nodeDisconnectedCount.get()); } + public void testConnectFailsWhenDisconnectIsPending() { + AtomicInteger nodeConnectedCount = new AtomicInteger(); + AtomicInteger nodeDisconnectedCount = new AtomicInteger(); + connectionManager.addListener(new TransportConnectionListener() { + @Override + public void onNodeConnected(DiscoveryNode node, Transport.Connection connection) { + nodeConnectedCount.incrementAndGet(); + } + + @Override + public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connection) { + nodeDisconnectedCount.incrementAndGet(); + } + }); + + DiscoveryNode node = new DiscoveryNode("", new TransportAddress(InetAddress.getLoopbackAddress(), 0), Version.CURRENT); + ConnectionManager.ConnectionValidator validator = (c, p, l) -> l.onResponse(null); + Transport.Connection connection = new TestConnect(node); + doAnswer(invocationOnMock -> { + ActionListener listener = (ActionListener) invocationOnMock.getArguments()[2]; + listener.onResponse(connection); + return null; + }).when(transport).openConnection(eq(node), eq(connectionProfile), any(ActionListener.class)); + assertFalse(connectionManager.nodeConnected(node)); + + // Mark connection as pending disconnect, any connection attempt should fail + connectionManager.setPendingDisconnection(node); + PlainActionFuture fut = new PlainActionFuture<>(); + connectionManager.connectToNode(node, connectionProfile, validator, fut); + expectThrows(IllegalStateException.class, () -> fut.actionGet()); + + // clear the pending disconnect and assert that connection succeeds + connectionManager.clearPendingDisconnections(); + assertFalse(connectionManager.nodeConnected(node)); + PlainActionFuture.get( + future -> connectionManager.connectToNode(node, connectionProfile, validator, ActionListener.map(future, x -> null)) + ); + assertFalse(connection.isClosed()); + assertTrue(connectionManager.nodeConnected(node)); + assertEquals(1, connectionManager.size()); + assertEquals(1, nodeConnectedCount.get()); + assertEquals(0, nodeDisconnectedCount.get()); + } + private static class TestConnect extends CloseableConnection { private final DiscoveryNode node; diff --git a/test/fixtures/hdfs-fixture/build.gradle b/test/fixtures/hdfs-fixture/build.gradle index 21fe28bfd835d..99b0386a48808 100644 --- a/test/fixtures/hdfs-fixture/build.gradle +++ b/test/fixtures/hdfs-fixture/build.gradle @@ -79,7 +79,7 @@ dependencies { api "org.jboss.xnio:xnio-nio:3.8.16.Final" api 'org.jline:jline:3.26.3' api 'org.apache.commons:commons-configuration2:2.11.0' - api 'com.nimbusds:nimbus-jose-jwt:9.40' + api 'com.nimbusds:nimbus-jose-jwt:9.41.1' api ('org.apache.kerby:kerb-admin:2.1.0') { exclude group: "org.jboss.xnio" exclude group: "org.jline" diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index b432e5411404e..3efcc538a1b25 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -55,6 +55,7 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterApplierService; import org.opensearch.cluster.service.ClusterService; @@ -1150,9 +1151,12 @@ protected Optional getDisruptableMockTransport(Transpo new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); clusterService = new ClusterService(settings, clusterSettings, clusterManagerService, clusterApplierService); - clusterService.setNodeConnectionsService( - new NodeConnectionsService(clusterService.getSettings(), threadPool, transportService) + NodeConnectionsService nodeConnectionsService = createTestNodeConnectionsService( + clusterService.getSettings(), + threadPool, + transportService ); + clusterService.setNodeConnectionsService(nodeConnectionsService); repositoriesService = new RepositoriesService( settings, clusterService, @@ -1187,6 +1191,7 @@ protected Optional getDisruptableMockTransport(Transpo new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE), null ); + coordinator.setNodeConnectionsService(nodeConnectionsService); clusterManagerService.setClusterStatePublisher(coordinator); final GatewayService gatewayService = new GatewayService( settings, @@ -1588,6 +1593,24 @@ public void onNodeAck(DiscoveryNode node, Exception e) { } } + public static NodeConnectionsService createTestNodeConnectionsService( + Settings settings, + ThreadPool threadPool, + TransportService transportService + ) { + return new NodeConnectionsService(settings, threadPool, transportService) { + @Override + public void connectToNodes(DiscoveryNodes discoveryNodes, Runnable onCompletion) { + // just update targetsByNode to ensure disconnect runs for these nodes + // we rely on disconnect to run for keeping track of pendingDisconnects and ensuring node-joins can happen + for (final DiscoveryNode discoveryNode : discoveryNodes) { + this.targetsByNode.put(discoveryNode, createConnectionTarget(discoveryNode)); + } + onCompletion.run(); + } + }; + } + static class DisruptableClusterApplierService extends ClusterApplierService { private final String nodeName; private final DeterministicTaskQueue deterministicTaskQueue; @@ -1641,11 +1664,6 @@ public void onNewClusterState(String source, Supplier clusterState } } - @Override - protected void connectToNodesAndWait(ClusterState newClusterState) { - // don't do anything, and don't block - } - @Override protected boolean applicationMayFail() { return this.applicationMayFail; diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/DeterministicTaskQueue.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/DeterministicTaskQueue.java index 1ad18bf89d5ba..4f692c7bc8f62 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/DeterministicTaskQueue.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/DeterministicTaskQueue.java @@ -92,6 +92,12 @@ public void runAllRunnableTasks() { } } + public void runAllRunnableTasksInEnqueuedOrder() { + while (hasRunnableTasks()) { + runTask(0); + } + } + public void runAllTasks() { while (hasDeferredTasks() || hasRunnableTasks()) { if (hasDeferredTasks() && random.nextBoolean()) { @@ -141,6 +147,11 @@ public void runRandomTask() { runTask(RandomNumbers.randomIntBetween(random, 0, runnableTasks.size() - 1)); } + public void runNextTask() { + assert hasRunnableTasks(); + runTask(0); + } + private void runTask(final int index) { final Runnable task = runnableTasks.remove(index); logger.trace("running task {} of {}: {}", index, runnableTasks.size() + 1, task); diff --git a/test/framework/src/main/java/org/opensearch/test/TestLogsAppender.java b/test/framework/src/main/java/org/opensearch/test/TestLogsAppender.java new file mode 100644 index 0000000000000..030f399a5bcc0 --- /dev/null +++ b/test/framework/src/main/java/org/opensearch/test/TestLogsAppender.java @@ -0,0 +1,74 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.test; + +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.Property; +import org.apache.logging.log4j.core.layout.PatternLayout; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * Test logs appender that provides functionality to extract specific logs/exception messages and wait for it to show up + * @opensearch.internal + */ +public class TestLogsAppender extends AbstractAppender { + private final List capturedLogs = new ArrayList<>(); + private final List messagesToCapture; + + public TestLogsAppender(List messagesToCapture) { + super("TestAppender", null, PatternLayout.createDefaultLayout(), false, Property.EMPTY_ARRAY); + this.messagesToCapture = messagesToCapture; + start(); + } + + @Override + public void append(LogEvent event) { + if (shouldCaptureMessage(event.getMessage().getFormattedMessage())) capturedLogs.add(event.getMessage().getFormattedMessage()); + if (event.getThrown() != null) { + if (shouldCaptureMessage(event.getThrown().toString())) capturedLogs.add(event.getThrown().toString()); + for (StackTraceElement element : event.getThrown().getStackTrace()) + if (shouldCaptureMessage(element.toString())) capturedLogs.add(element.toString()); + } + } + + public boolean shouldCaptureMessage(String log) { + return messagesToCapture.stream().anyMatch(log::contains); + } + + public List getCapturedLogs() { + return new ArrayList<>(capturedLogs); + } + + public boolean waitForLog(String expectedLog, long timeout, TimeUnit unit) { + long startTime = System.currentTimeMillis(); + long timeoutInMillis = unit.toMillis(timeout); + + while (System.currentTimeMillis() - startTime < timeoutInMillis) { + if (capturedLogs.stream().anyMatch(log -> log.contains(expectedLog))) { + return true; + } + try { + Thread.sleep(100); // Wait for 100ms before checking again + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + return false; + } + + // Clear captured logs + public void clearCapturedLogs() { + capturedLogs.clear(); + } +} diff --git a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java index 37df90fb103a3..d1e1a3e8af17c 100644 --- a/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java +++ b/test/framework/src/main/java/org/opensearch/test/transport/StubbableConnectionManager.java @@ -123,6 +123,16 @@ public void disconnectFromNode(DiscoveryNode node) { delegate.disconnectFromNode(node); } + @Override + public void setPendingDisconnection(DiscoveryNode node) { + delegate.setPendingDisconnection(node); + } + + @Override + public void clearPendingDisconnections() { + delegate.clearPendingDisconnections(); + } + @Override public int size() { return delegate.size();