diff --git a/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java b/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java index 1563ac84bdd1c..4c562b348f141 100644 --- a/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java +++ b/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java @@ -41,6 +41,7 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.common.settings.ClusterSettings; @@ -120,7 +121,21 @@ static ClusterState updateRoutingTable(final ClusterState state) { // initialize all index routing tables as empty final RoutingTable.Builder routingTableBuilder = RoutingTable.builder(state.routingTable()); for (final IndexMetadata cursor : state.metadata().indices().values()) { - routingTableBuilder.addAsRecovery(cursor); + // Whether IndexMetadata is recovered from local disk or remote it doesn't matter to us at this point. + // We are only concerned about index data recovery here. Which is why we only check for remote store enabled and not for remote + // cluster state enabled. + if (cursor.getSettings().getAsBoolean(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, false) == false + || state.routingTable().hasIndex(cursor.getIndex()) == false + || state.routingTable() + .index(cursor.getIndex()) + .shardsMatchingPredicateCount( + shardRouting -> shardRouting.primary() + // We need to ensure atleast one of the primaries is being recovered from remote. + // This ensures we have gone through the RemoteStoreRestoreService and routing table is updated + && shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource + ) == 0) { + routingTableBuilder.addAsRecovery(cursor); + } } // start with 0 based versions for routing table routingTableBuilder.version(0); diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index d05242a3aeaf7..94fd08b99ac58 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -187,7 +187,7 @@ private RemoteRestoreResult executeRestore( IndexMetadata indexMetadata = indexMetadataEntry.getValue().v2(); boolean metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); IndexMetadata updatedIndexMetadata = indexMetadata; - if (restoreAllShards || metadataFromRemoteStore) { + if (metadataFromRemoteStore == false && restoreAllShards) { updatedIndexMetadata = IndexMetadata.builder(indexMetadata) .state(IndexMetadata.State.OPEN) .version(1 + indexMetadata.getVersion()) diff --git a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java index c83da46b23fb1..9b3fd45245ef7 100644 --- a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java +++ b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java @@ -40,6 +40,10 @@ import org.opensearch.cluster.metadata.MetadataIndexStateService; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.UUIDs; import org.opensearch.common.settings.ClusterSettings; @@ -48,10 +52,12 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.set.Sets; import org.opensearch.core.index.Index; +import org.opensearch.repositories.IndexId; import org.opensearch.test.OpenSearchTestCase; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Function; @@ -269,6 +275,320 @@ public void testUpdateRoutingTable() { } } + public void testSkipRoutingTableUpdateWhenRemoteRecovery() { + final int numOfShards = randomIntBetween(1, 10); + + final IndexMetadata remoteMetadata = createIndexMetadata( + "test-remote", + Settings.builder() + .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) + .build() + ); + + // Test remote index routing table is generated with ExistingStoreRecoverySource if no routing table is present + { + final Index index = remoteMetadata.getIndex(); + final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) + .metadata(Metadata.builder().put(remoteMetadata, false).build()) + .build(); + final ClusterState newState = updateRoutingTable(initialState); + IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); + assertTrue(newState.routingTable().hasIndex(index)); + assertEquals( + 0, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) + ) + ); + assertEquals( + numOfShards, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) + ) + ); + assertEquals( + 0, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource + ) + ); + assertEquals( + numOfShards, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource + ) + ); + + } + + // Test remote index routing table is overridden if recovery source is not RemoteStoreRecoverySource + { + IndexRoutingTable.Builder remoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) + .initializeAsNew(remoteMetadata); + final Index index = remoteMetadata.getIndex(); + final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) + .metadata(Metadata.builder().put(remoteMetadata, false).build()) + .routingTable(new RoutingTable.Builder().add(remoteBuilderWithoutRemoteRecovery.build()).build()) + .build(); + assertTrue(initialState.routingTable().hasIndex(index)); + final ClusterState newState = updateRoutingTable(initialState); + IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); + assertTrue(newState.routingTable().hasIndex(index)); + assertEquals( + 0, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) + ) + ); + assertEquals( + numOfShards, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) + ) + ); + assertEquals( + 0, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource + ) + ); + assertEquals( + numOfShards, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource + ) + ); + + } + + // Test routing table update is skipped for a remote index + { + IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) + .initializeAsRemoteStoreRestore( + remoteMetadata, + new RecoverySource.RemoteStoreRecoverySource( + UUIDs.randomBase64UUID(), + remoteMetadata.getCreationVersion(), + new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) + ), + new HashMap<>(), + true + ); + final Index index = remoteMetadata.getIndex(); + final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) + .metadata(Metadata.builder().put(remoteMetadata, false).build()) + .routingTable(new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()).build()) + .build(); + assertTrue(initialState.routingTable().hasIndex(index)); + final ClusterState newState = updateRoutingTable(initialState); + IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); + assertTrue(newState.routingTable().hasIndex(index)); + assertEquals( + 0, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) + ) + ); + assertEquals( + numOfShards, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) + ) + ); + assertEquals( + 0, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource + ) + ); + assertEquals( + numOfShards, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource + ) + ); + + } + + // Test reset routing table for 2 indices - one remote and one non remote. + // Routing table for non remote index should be updated and remote index routing table should remain intact + { + final IndexMetadata nonRemoteMetadata = createIndexMetadata( + "test-nonremote", + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards).build() + ); + IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) + .initializeAsRemoteStoreRestore( + remoteMetadata, + new RecoverySource.RemoteStoreRecoverySource( + UUIDs.randomBase64UUID(), + remoteMetadata.getCreationVersion(), + new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) + ), + new HashMap<>(), + true + ); + IndexRoutingTable.Builder nonRemoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder(nonRemoteMetadata.getIndex()) + .initializeAsNew(nonRemoteMetadata); + final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) + .metadata(Metadata.builder().put(remoteMetadata, false).build()) + .metadata(Metadata.builder().put(nonRemoteMetadata, false).build()) + .routingTable( + new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()) + .add(nonRemoteBuilderWithoutRemoteRecovery.build()) + .build() + ) + .build(); + assertTrue(initialState.routingTable().hasIndex(remoteMetadata.getIndex())); + assertTrue(initialState.routingTable().hasIndex(nonRemoteMetadata.getIndex())); + final ClusterState newState = updateRoutingTable(initialState); + assertTrue(newState.routingTable().hasIndex(remoteMetadata.getIndex())); + assertTrue(newState.routingTable().hasIndex(nonRemoteMetadata.getIndex())); + IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); + IndexRoutingTable newNonRemoteIndexRoutingTable = newState.routingTable().index(nonRemoteMetadata.getIndex()); + assertEquals( + 0, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) + ) + ); + assertEquals( + numOfShards, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) + ) + ); + assertEquals( + 0, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource + ) + ); + assertEquals( + numOfShards, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource + ) + ); + assertEquals( + 0, + newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) + ) + ); + assertEquals( + numOfShards, + newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) + ) + ); + assertEquals( + 0, + newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource + ) + ); + assertEquals( + numOfShards, + newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource + ) + ); + } + + // Test reset routing table for 2 indices, both remote backed but only once index has RemoteStoreRecoverySource. + // Routing table for only remote index without RemoteStoreRecoverySource should be updated + { + final IndexMetadata remoteWithoutRemoteRecoveryMetadata = createIndexMetadata( + "test-remote-without-recovery", + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) + .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) + .build() + ); + IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) + .initializeAsRemoteStoreRestore( + remoteMetadata, + new RecoverySource.RemoteStoreRecoverySource( + UUIDs.randomBase64UUID(), + remoteMetadata.getCreationVersion(), + new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) + ), + new HashMap<>(), + true + ); + IndexRoutingTable.Builder remoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder( + remoteWithoutRemoteRecoveryMetadata.getIndex() + ).initializeAsNew(remoteWithoutRemoteRecoveryMetadata); + final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) + .metadata(Metadata.builder().put(remoteMetadata, false).build()) + .metadata(Metadata.builder().put(remoteWithoutRemoteRecoveryMetadata, false).build()) + .routingTable( + new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()) + .add(remoteBuilderWithoutRemoteRecovery.build()) + .build() + ) + .build(); + assertTrue(initialState.routingTable().hasIndex(remoteMetadata.getIndex())); + assertTrue(initialState.routingTable().hasIndex(remoteWithoutRemoteRecoveryMetadata.getIndex())); + final ClusterState newState = updateRoutingTable(initialState); + assertTrue(newState.routingTable().hasIndex(remoteMetadata.getIndex())); + assertTrue(newState.routingTable().hasIndex(remoteWithoutRemoteRecoveryMetadata.getIndex())); + IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); + IndexRoutingTable newRemoteWithoutRemoteRecoveryIndexRoutingTable = newState.routingTable() + .index(remoteWithoutRemoteRecoveryMetadata.getIndex()); + assertEquals( + 0, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) + ) + ); + assertEquals( + numOfShards, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) + ) + ); + assertEquals( + 0, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource + ) + ); + assertEquals( + numOfShards, + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource + ) + ); + assertEquals( + 0, + newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) + ) + ); + assertEquals( + numOfShards, + newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) + ) + ); + assertEquals( + 0, + newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource + ) + ); + assertEquals( + numOfShards, + newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( + shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource + ) + ); + } + } + public void testMixCurrentAndRecoveredState() { final ClusterState currentState = ClusterState.builder(ClusterState.EMPTY_STATE) .blocks(ClusterBlocks.builder().addGlobalBlock(STATE_NOT_RECOVERED_BLOCK).build())