From 50289384739d3f33a06080b612834033577c8f7f Mon Sep 17 00:00:00 2001 From: Gaurav Bafna Date: Thu, 26 Sep 2024 17:43:48 +0530 Subject: [PATCH] Add more ITs and handle some failure cases Signed-off-by: Gaurav Bafna --- .../remotestore/RemoteRestoreSnapshotIT.java | 53 +- .../remotestore/RemoteSnapshotIT.java | 89 ++++ .../snapshots/ConcurrentSnapshotsV2IT.java | 486 ++++++++++++++++++ .../blobstore/BlobStoreRepository.java | 4 +- .../snapshots/SnapshotsService.java | 17 +- 5 files changed, 588 insertions(+), 61 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteSnapshotIT.java create mode 100644 server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsV2IT.java diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java index 50de5a1b98d97..a0183e89bfce2 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java @@ -46,14 +46,11 @@ import org.opensearch.repositories.RepositoryData; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.repositories.fs.FsRepository; -import org.opensearch.snapshots.AbstractSnapshotIntegTestCase; import org.opensearch.snapshots.SnapshotInfo; import org.opensearch.snapshots.SnapshotRestoreException; import org.opensearch.snapshots.SnapshotState; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; -import org.junit.After; -import org.junit.Before; import java.io.IOException; import java.nio.file.Files; @@ -83,48 +80,7 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) -public class RemoteRestoreSnapshotIT extends AbstractSnapshotIntegTestCase { - private static final String BASE_REMOTE_REPO = "test-rs-repo" + TEST_REMOTE_STORE_REPO_SUFFIX; - private Path remoteRepoPath; - - @Before - public void setup() { - remoteRepoPath = randomRepoPath().toAbsolutePath(); - } - - @After - public void teardown() { - clusterAdmin().prepareCleanupRepository(BASE_REMOTE_REPO).get(); - } - - @Override - protected Settings nodeSettings(int nodeOrdinal) { - return Settings.builder() - .put(super.nodeSettings(nodeOrdinal)) - .put(remoteStoreClusterSettings(BASE_REMOTE_REPO, remoteRepoPath)) - .build(); - } - - private Settings.Builder getIndexSettings(int numOfShards, int numOfReplicas) { - Settings.Builder settingsBuilder = Settings.builder() - .put(super.indexSettings()) - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numOfReplicas) - .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "300s"); - return settingsBuilder; - } - - private void indexDocuments(Client client, String indexName, int numOfDocs) { - indexDocuments(client, indexName, 0, numOfDocs); - } - - private void indexDocuments(Client client, String indexName, int fromId, int toId) { - for (int i = fromId; i < toId; i++) { - String id = Integer.toString(i); - client.prepareIndex(indexName).setId(id).setSource("text", "sometext").get(); - } - client.admin().indices().prepareFlush(indexName).get(); - } +public class RemoteRestoreSnapshotIT extends RemoteSnapshotIT { private void assertDocsPresentInIndex(Client client, String indexName, int numOfDocs) { for (int i = 0; i < numOfDocs; i++) { @@ -1388,11 +1344,4 @@ public void testConcurrentV1SnapshotAndV2RepoSettingUpdate() throws Exception { createV1SnapshotThread.join(); } - private Settings pinnedTimestampSettings() { - Settings settings = Settings.builder() - .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) - .build(); - return settings; - } - } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteSnapshotIT.java new file mode 100644 index 0000000000000..836871b8251d1 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteSnapshotIT.java @@ -0,0 +1,89 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesRequest; +import org.opensearch.action.admin.cluster.repositories.get.GetRepositoriesResponse; +import org.opensearch.client.Client; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.RepositoryMetadata; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.IndexSettings; +import org.opensearch.indices.RemoteStoreSettings; +import org.opensearch.repositories.fs.ReloadableFsRepository; +import org.opensearch.snapshots.AbstractSnapshotIntegTestCase; +import org.junit.After; +import org.junit.Before; + +import java.nio.file.Path; +import java.util.concurrent.ExecutionException; + +import static org.opensearch.repositories.fs.ReloadableFsRepository.REPOSITORIES_FAILRATE_SETTING; + +public abstract class RemoteSnapshotIT extends AbstractSnapshotIntegTestCase { + protected static final String BASE_REMOTE_REPO = "test-rs-repo" + TEST_REMOTE_STORE_REPO_SUFFIX; + protected Path remoteRepoPath; + + @Before + public void setup() { + remoteRepoPath = randomRepoPath().toAbsolutePath(); + } + + @After + public void teardown() { + clusterAdmin().prepareCleanupRepository(BASE_REMOTE_REPO).get(); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(remoteStoreClusterSettings(BASE_REMOTE_REPO, remoteRepoPath)) + .build(); + } + + protected Settings pinnedTimestampSettings() { + Settings settings = Settings.builder() + .put(RemoteStoreSettings.CLUSTER_REMOTE_STORE_PINNED_TIMESTAMP_ENABLED.getKey(), true) + .build(); + return settings; + } + + protected Settings.Builder getIndexSettings(int numOfShards, int numOfReplicas) { + Settings.Builder settingsBuilder = Settings.builder() + .put(super.indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, numOfReplicas) + .put(IndexSettings.INDEX_REFRESH_INTERVAL_SETTING.getKey(), "300s"); + return settingsBuilder; + } + + protected void indexDocuments(Client client, String indexName, int numOfDocs) { + indexDocuments(client, indexName, 0, numOfDocs); + } + + void indexDocuments(Client client, String indexName, int fromId, int toId) { + for (int i = fromId; i < toId; i++) { + String id = Integer.toString(i); + client.prepareIndex(indexName).setId(id).setSource("text", "sometext").get(); + } + client.admin().indices().prepareFlush(indexName).get(); + } + + protected void setFailRate(String repoName, int value) throws ExecutionException, InterruptedException { + GetRepositoriesRequest gr = new GetRepositoriesRequest(new String[] { repoName }); + GetRepositoriesResponse res = client().admin().cluster().getRepositories(gr).get(); + RepositoryMetadata rmd = res.repositories().get(0); + Settings.Builder settings = Settings.builder() + .put("location", rmd.settings().get("location")) + .put(REPOSITORIES_FAILRATE_SETTING.getKey(), value); + createRepository(repoName, ReloadableFsRepository.TYPE, settings); + } + +} diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsV2IT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsV2IT.java new file mode 100644 index 0000000000000..f20fddb6af26c --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/ConcurrentSnapshotsV2IT.java @@ -0,0 +1,486 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.snapshots; + +import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; +import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.action.support.master.AcknowledgedResponse; +import org.opensearch.client.Client; +import org.opensearch.common.action.ActionFuture; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.unit.ByteSizeUnit; +import org.opensearch.remotestore.RemoteSnapshotIT; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.RepositoryData; +import org.opensearch.repositories.blobstore.BlobStoreRepository; +import org.opensearch.repositories.fs.FsRepository; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; + +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class ConcurrentSnapshotsV2IT extends RemoteSnapshotIT { + + public void testLongRunningSnapshotDontAllowConcurrentSnapshot() throws Exception { + final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + blockClusterManagerOnWriteIndexFile(repoName); + + final ActionFuture snapshotFuture = startFullSnapshot(repoName, "snapshot-queued"); + awaitNumberOfSnapshotsInProgress(1); + + try { + String snapshotName = "snapshot-concurrent"; + client().admin().cluster().prepareCreateSnapshot(repoName, snapshotName).setWaitForCompletion(true).get(); + fail(); + } catch (Exception e) {} + + unblockNode(repoName, clusterManagerName); + CreateSnapshotResponse csr = snapshotFuture.actionGet(); + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(1, snapInfo.size()); + assertThat(snapInfo, contains(csr.getSnapshotInfo())); + } + + public void testCreateSnapshotFailInFinalize() throws Exception { + final String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + blockClusterManagerFromFinalizingSnapshotOnIndexFile(repoName); + final ActionFuture snapshotFuture = startFullSnapshot(repoName, "snapshot-queued"); + awaitNumberOfSnapshotsInProgress(1); + waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L)); + unblockNode(repoName, clusterManagerNode); + expectThrows(SnapshotException.class, snapshotFuture::actionGet); + + final ActionFuture snapshotFuture2 = startFullSnapshot(repoName, "snapshot-success"); + // Second create works out cleanly since the repo + CreateSnapshotResponse csr = snapshotFuture2.actionGet(); + + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(1, snapInfo.size()); + assertThat(snapInfo, contains(csr.getSnapshotInfo())); + } + + public void testCreateSnapshotV2MasterSwitch() throws Exception { + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + String clusterManagerNode = internalCluster().getClusterManagerName(); + + blockClusterManagerFromFinalizingSnapshotOnIndexFile(repoName); + final ActionFuture snapshotFuture = startFullSnapshot(repoName, "snapshot-queued"); + awaitNumberOfSnapshotsInProgress(1); + waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L)); + + // Fail the cluster manager + stopNode(clusterManagerNode); + + ensureGreen(); + + final ActionFuture snapshotFuture2 = startFullSnapshot(repoName, "snapshot-success"); + // Second create works out cleanly since the repo + CreateSnapshotResponse csr = snapshotFuture2.actionGet(); + + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(1, snapInfo.size()); + assertThat(snapInfo, contains(csr.getSnapshotInfo())); + + } + + public void testPinnedTimestampFailSnapshot() throws Exception { + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + // fail segment repo - this is to fail the timestamp pinning + setFailRate(BASE_REMOTE_REPO, 100); + + try { + String snapshotName = "snapshot-fail"; + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(repoName, snapshotName) + .setWaitForCompletion(true) + .get(); + fail(); + } catch (Exception e) {} + + setFailRate(BASE_REMOTE_REPO, 0); + String snapshotName = "snapshot-success"; + CreateSnapshotResponse createSnapshotResponse = client().admin() + .cluster() + .prepareCreateSnapshot(repoName, snapshotName) + .setWaitForCompletion(true) + .get(); + + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(1, snapInfo.size()); + } + + public void testConcurrentSnapshotV2CreateOperation() throws InterruptedException, ExecutionException { + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String snapshotRepoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(snapshotRepoName, FsRepository.TYPE, settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + int concurrentSnapshots = 5; + + // Prepare threads for concurrent snapshot creation + List threads = new ArrayList<>(); + + for (int i = 0; i < concurrentSnapshots; i++) { + int snapshotIndex = i; + Thread thread = new Thread(() -> { + try { + String snapshotName = "snapshot-concurrent-" + snapshotIndex; + CreateSnapshotResponse createSnapshotResponse2 = client().admin() + .cluster() + .prepareCreateSnapshot(snapshotRepoName, snapshotName) + .setWaitForCompletion(true) + .get(); + SnapshotInfo snapshotInfo = createSnapshotResponse2.getSnapshotInfo(); + assertThat(snapshotInfo.state(), equalTo(SnapshotState.SUCCESS)); + assertThat(snapshotInfo.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo.successfulShards(), equalTo(snapshotInfo.totalShards())); + assertThat(snapshotInfo.snapshotId().getName(), equalTo(snapshotName)); + assertThat(snapshotInfo.getPinnedTimestamp(), greaterThan(0L)); + } catch (Exception e) {} + }); + threads.add(thread); + } + // start all threads + for (Thread thread : threads) { + thread.start(); + } + + // Wait for all threads to complete + for (Thread thread : threads) { + thread.join(); + } + + // Validate that only one snapshot has been created + Repository repository = internalCluster().getInstance(RepositoriesService.class).repository(snapshotRepoName); + PlainActionFuture repositoryDataPlainActionFuture = new PlainActionFuture<>(); + repository.getRepositoryData(repositoryDataPlainActionFuture); + + RepositoryData repositoryData = repositoryDataPlainActionFuture.get(); + assertThat(repositoryData.getSnapshotIds().size(), greaterThanOrEqualTo(1)); + } + + public void testLongRunningSnapshotDontAllowConcurrentClone() throws Exception { + final String clusterManagerName = internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + String sourceSnap = "snapshot-source"; + + final CreateSnapshotResponse csr = startFullSnapshot(repoName, sourceSnap).actionGet(); + blockClusterManagerOnWriteIndexFile(repoName); + + final ActionFuture snapshotFuture = startCloneSnapshot(repoName, sourceSnap, "snapshot-clone"); + awaitNumberOfSnapshotsInProgress(1); + + final ActionFuture snapshotFuture2 = startCloneSnapshot(repoName, sourceSnap, "snapshot-clone-2"); + assertThrows(ConcurrentSnapshotExecutionException.class, snapshotFuture2::actionGet); + + unblockNode(repoName, clusterManagerName); + assertThrows(SnapshotException.class, snapshotFuture2::actionGet); + + snapshotFuture.get(); + + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(2, snapInfo.size()); + } + + public void testCloneSnapshotFailInFinalize() throws Exception { + final String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + String sourceSnap = "snapshot-source"; + CreateSnapshotResponse sourceResp = startFullSnapshot(repoName, sourceSnap).actionGet(); + + blockClusterManagerFromFinalizingSnapshotOnIndexFile(repoName); + final ActionFuture snapshotFuture = startCloneSnapshot(repoName, sourceSnap, "snapshot-queued"); + awaitNumberOfSnapshotsInProgress(1); + waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L)); + unblockNode(repoName, clusterManagerNode); + assertThrows(SnapshotException.class, snapshotFuture::actionGet); + + final ActionFuture snapshotFuture2 = startFullSnapshot(repoName, "snapshot-success"); + // Second create works out cleanly since the repo is cleaned up + CreateSnapshotResponse csr = snapshotFuture2.actionGet(); + + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(2, snapInfo.size()); + assertThat(snapInfo, containsInAnyOrder(csr.getSnapshotInfo(), sourceResp.getSnapshotInfo())); + } + + public void testCloneSnapshotV2MasterSwitch() throws Exception { + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + internalCluster().startClusterManagerOnlyNode(pinnedTimestampSettings()); + + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + internalCluster().startDataOnlyNode(pinnedTimestampSettings()); + String indexName1 = "testindex1"; + String indexName2 = "testindex2"; + String repoName = "test-create-snapshot-repo"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + Settings.Builder settings = Settings.builder() + .put(FsRepository.LOCATION_SETTING.getKey(), absolutePath1) + .put(FsRepository.COMPRESS_SETTING.getKey(), randomBoolean()) + .put(FsRepository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(100, 1000), ByteSizeUnit.BYTES) + .put(BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY.getKey(), true) + .put(BlobStoreRepository.SHALLOW_SNAPSHOT_V2.getKey(), true); + createRepository(repoName, "mock", settings); + + Client client = client(); + Settings indexSettings = getIndexSettings(20, 0).build(); + createIndex(indexName1, indexSettings); + + Settings indexSettings2 = getIndexSettings(15, 0).build(); + createIndex(indexName2, indexSettings2); + + final int numDocsInIndex1 = 10; + final int numDocsInIndex2 = 20; + indexDocuments(client, indexName1, numDocsInIndex1); + indexDocuments(client, indexName2, numDocsInIndex2); + ensureGreen(indexName1, indexName2); + + String sourceSnap = "snapshot-source"; + CreateSnapshotResponse csr = startFullSnapshot(repoName, sourceSnap).actionGet(); + + String clusterManagerNode = internalCluster().getClusterManagerName(); + + blockClusterManagerFromFinalizingSnapshotOnIndexFile(repoName); + final ActionFuture snapshotFuture = startCloneSnapshot(repoName, sourceSnap, "snapshot-queued"); + awaitNumberOfSnapshotsInProgress(1); + waitForBlock(clusterManagerNode, repoName, TimeValue.timeValueSeconds(30L)); + + // Fail the cluster manager + stopNode(clusterManagerNode); + + ensureGreen(); + + final ActionFuture snapshotFuture2 = startFullSnapshot(repoName, "snapshot-success"); + // Second create works out cleanly since the repo + CreateSnapshotResponse csr2 = snapshotFuture2.actionGet(); + List snapInfo = client().admin().cluster().prepareGetSnapshots(repoName).get().getSnapshots(); + assertEquals(2, snapInfo.size()); + assertThat(snapInfo, containsInAnyOrder(csr.getSnapshotInfo(), csr2.getSnapshotInfo())); + } + + protected ActionFuture startCloneSnapshot(String repoName, String sourceSnapshotName, String snapshotName) { + logger.info("--> creating full snapshot [{}] to repo [{}]", snapshotName, repoName); + return clusterAdmin().prepareCloneSnapshot(repoName, sourceSnapshotName, snapshotName).setIndices("*").execute(); + } +} diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index 2e4d036034e78..14c201e819994 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -823,7 +823,7 @@ public void updateState(ClusterState state) { + "] to generation [" + metadata.generation() + "]"; - logger.info("Updated repository generation from [{}] to [{}]", previousBest, metadata.generation()); + logger.debug("Updated repository generation from [{}] to [{}]", previousBest, metadata.generation()); } } } @@ -2490,7 +2490,6 @@ public void finalizeSnapshot( // number_of_shards) has increased. Set updatedIndexIds = writeNewIndexShardPaths(existingRepositoryData, updatedRepositoryData, snapshotId); cleanupRedundantSnapshotShardPaths(updatedIndexIds); - logger.info("update repo data for {}", snapshotInfo.snapshotId()); writeIndexGen( updatedRepositoryData, repositoryStateId, @@ -3290,7 +3289,6 @@ public ClusterState execute(ClusterState currentState) { + "] must be larger than latest known generation [" + latestKnownRepoGen.get() + "]"; - logger.info("Setting it to {} {}", safeGeneration, newGen); return ClusterState.builder(currentState) .metadata( Metadata.builder(currentState.getMetadata()) diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java index d3d89352e7b28..22b2a72b36026 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotsService.java @@ -605,6 +605,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, final Cl new ActionListener() { @Override public void onResponse(RepositoryData repositoryData) { + leaveRepoLoop(repositoryName); if (clusterService.state().nodes().isLocalNodeElectedClusterManager() == false) { failSnapshotCompletionListeners( snapshot, @@ -617,10 +618,8 @@ public void onResponse(RepositoryData repositoryData) { "Aborting snapshot-v2, no longer cluster manager" ) ); - return; } - leaveRepoLoop(repositoryName); listener.onResponse(snapshotInfo); } @@ -628,13 +627,17 @@ public void onResponse(RepositoryData repositoryData) { public void onFailure(Exception e) { logger.error("Failed to finalize snapshot repo {} for snapshot-v2 {} ", repositoryName, snapshotName); leaveRepoLoop(repositoryName); + // cleaning up in progress snapshot here + stateWithoutSnapshotV2(newState); listener.onFailure(e); } } ); }, e -> { - logger.error("Failed to update pinned timestamp for snapshot-v2 {} {} ", repositoryName, snapshotName); + logger.error("Failed to update pinned timestamp for snapshot-v2 {} {} {} ", repositoryName, snapshotName, e); leaveRepoLoop(repositoryName); + // cleaning up in progress snapshot here + stateWithoutSnapshotV2(newState); listener.onFailure(e); }); updateSnapshotPinnedTimestamp(repositoryData, snapshot, pinnedTimestamp, pinnedTimestampListener); @@ -931,7 +934,6 @@ public void clusterStateProcessed(String source, ClusterState oldState, final Cl throw new SnapshotException(repositoryName, snapshotName, "Aborting snapshot-v2 clone, no longer cluster manager"); } final StepListener pinnedTimestampListener = new StepListener<>(); - pinnedTimestampListener.whenComplete(repoData -> { repository.finalizeSnapshot( shardGenerations, @@ -950,6 +952,7 @@ public void clusterStateProcessed(String source, ClusterState oldState, final Cl new ActionListener() { @Override public void onResponse(RepositoryData repositoryData) { + leaveRepoLoop(repositoryName); if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) { failSnapshotCompletionListeners( snapshot, @@ -965,7 +968,6 @@ public void onResponse(RepositoryData repositoryData) { return; } logger.info("snapshot-v2 clone [{}] completed successfully", snapshot); - leaveRepoLoop(repositoryName); listener.onResponse(null); } @@ -976,14 +978,15 @@ public void onFailure(Exception e) { repositoryName, snapshotName ); + stateWithoutSnapshotV2(newState); leaveRepoLoop(repositoryName); listener.onFailure(e); } } ); - listener.onResponse(null); }, e -> { logger.error("Failed to update pinned timestamp for snapshot-v2 {} {} ", repositoryName, snapshotName); + stateWithoutSnapshotV2(newState); leaveRepoLoop(repositoryName); listener.onFailure(e); }); @@ -997,6 +1000,7 @@ public void onFailure(Exception e) { ); }, e -> { logger.error("Failed to retrieve snapshot info for snapshot-v2 {} {} ", repositoryName, snapshotName); + stateWithoutSnapshotV2(newState); leaveRepoLoop(repositoryName); listener.onFailure(e); }); @@ -2302,6 +2306,7 @@ private void stateWithoutSnapshotV2(ClusterState state) { } } if (changed) { + logger.info("Cleaning up in progress v2 snapshots now"); clusterService.submitStateUpdateTask( "remove in progress snapshot v2 after cluster manager switch", new ClusterStateUpdateTask() {