From 32260b74f37a301b0cc3a9fd4dd2e9db73f4374f Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 5 Aug 2024 10:08:29 -0400 Subject: [PATCH 01/51] Push down collection and partition key range resolution. --- ...EndpointManagerForCircuitBreakerTests.java | 1 - .../PartitionLevelCircuitBreakerTests.java | 85 ++ .../query/DocumentProducerTest.java | 8 +- ...ointOperationContextForCircuitBreaker.java | 7 - .../implementation/RxDocumentClientImpl.java | 730 +++++++++--------- .../DefaultDocumentQueryExecutionContext.java | 2 +- .../query/DocumentProducer.java | 12 +- .../query/IDocumentQueryClient.java | 5 +- 8 files changed, 464 insertions(+), 386 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index d76c5449fd14..b6bb616f180e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -920,7 +920,6 @@ private RxDocumentServiceRequest constructRxDocumentServiceRequestInstance( new AtomicBoolean(false), false, collectionLink, - new MetadataDiagnosticsContext(), new SerializationDiagnosticsContext())); return request; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index 1015adf98ffd..0ba43368d8bb 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -63,6 +63,7 @@ import java.net.URI; import java.time.Duration; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -2775,6 +2776,90 @@ private static int resolveTestObjectCountToBootstrapFrom(FaultInjectionOperation } } + @Test(groups = {"multi-master"}) + public void testCreate_404_1002_FirstRegionOnly_LocalPreferred_EagerAvailabilityStrategy_WithRetries() { + + System.setProperty("COSMOS.SESSION_CAPTURING_TYPE", "REGION_SCOPED"); + System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT", "5000000"); + System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE", "0.001"); + + CosmosAsyncClient asyncClient = buildCosmosClient( + ConsistencyLevel.SESSION, + Arrays.asList("West US 2", "South Central US", "East US"), + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + ConnectionMode.GATEWAY, + new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) + .build(), + new NonIdempotentWriteRetryOptions() + .setEnabled(true) + .setTrackingIdUsed(true)); + + CosmosAsyncDatabase asyncDatabase = asyncClient.getDatabase("testDb"); + CosmosAsyncContainer asyncContainer = asyncDatabase.getContainer("testContainer"); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE) + .build(); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .connectionType(FaultInjectionConnectionType.GATEWAY) +// .operationType(FaultInjectionOperationType.CREATE_ITEM) + .region("West US 2") + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .build(); + + CosmosFaultInjectionHelper.configureFaultInjectionRules(asyncContainer, Arrays.asList(faultInjectionRule)).block(); + + try { + CosmosItemResponse response = asyncContainer.createItem(TestObject.create(UUID.randomUUID().toString())).block(); + + System.out.println("Diagnostics : " + response.getDiagnostics()); + } catch (CosmosException ex) { + + System.out.println("Diagnostics : " + ex.getDiagnostics()); + } finally { + asyncClient.close(); + + System.clearProperty("COSMOS.SESSION_CAPTURING_TYPE"); + System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT"); + System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE"); + } + } + + private static CosmosAsyncClient buildCosmosClient( + ConsistencyLevel consistencyLevel, + List preferredRegions, + CosmosRegionSwitchHint regionSwitchHint, + ConnectionMode connectionMode, + CosmosEndToEndOperationLatencyPolicyConfig cosmosEndToEndOperationLatencyPolicyConfig, + NonIdempotentWriteRetryOptions nonIdempotentWriteRetryOptions) { + + CosmosClientBuilder clientBuilder = new CosmosClientBuilder() + .endpoint(TestConfigurations.HOST) + .key(TestConfigurations.MASTER_KEY) + .consistencyLevel(consistencyLevel) + .preferredRegions(preferredRegions) + .sessionRetryOptions(new SessionRetryOptionsBuilder() + .regionSwitchHint(regionSwitchHint) + .build()) + .endToEndOperationLatencyPolicyConfig(cosmosEndToEndOperationLatencyPolicyConfig) + .nonIdempotentWriteRetryOptions(nonIdempotentWriteRetryOptions) + .multipleWriteRegionsEnabled(true); + + if (connectionMode == ConnectionMode.DIRECT) { + clientBuilder.directMode(); + } else { + clientBuilder.gatewayMode(); + } + + return clientBuilder.buildAsyncClient(); + } + private static Function> resolveDataPlaneOperation(FaultInjectionOperationType faultInjectionOperationType) { switch (faultInjectionOperationType) { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java index 5613b3866d95..eb05b05b5df9 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java @@ -567,7 +567,7 @@ public void simple() { doAnswer(invocation -> { RxDocumentServiceRequest req = invocation.getArgument(0); return Mono.just(req); - }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any(), any()); doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(queryClient).getGlobalPartitionEndpointManagerForCircuitBreaker(); doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); @@ -669,7 +669,7 @@ public void retries() { doAnswer(invocation -> { RxDocumentServiceRequest req = invocation.getArgument(0); return Mono.just(req); - }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any(), any()); doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(queryClient).getGlobalPartitionEndpointManagerForCircuitBreaker(); doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); @@ -775,7 +775,7 @@ public void retriesExhausted() { doAnswer(invocation -> { RxDocumentServiceRequest req = invocation.getArgument(0); return Mono.just(req); - }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any(), any()); doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(queryClient).getGlobalPartitionEndpointManagerForCircuitBreaker(); doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); @@ -917,7 +917,7 @@ private IDocumentQueryClient mockQueryClient(List replacement doAnswer(invocation -> { RxDocumentServiceRequest req = invocation.getArgument(0); return Mono.just(req); - }).when(client).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + }).when(client).addPartitionLevelUnavailableRegionsOnRequest(any(), any(), any()); doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(client).getGlobalPartitionEndpointManagerForCircuitBreaker(); doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java index 8f5bdc1ccd91..0b35344540dc 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java @@ -11,20 +11,17 @@ public class PointOperationContextForCircuitBreaker { private final boolean isThresholdBasedAvailabilityStrategyEnabled; private boolean isRequestHedged; private final String collectionLink; - private final MetadataDiagnosticsContext metadataDiagnosticsContext; private final SerializationDiagnosticsContext serializationDiagnosticsContext; public PointOperationContextForCircuitBreaker( AtomicBoolean hasOperationSeenSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled, String collectionLink, - MetadataDiagnosticsContext metadataDiagnosticsContext, SerializationDiagnosticsContext serializationDiagnosticsContext) { this.hasOperationSeenSuccess = hasOperationSeenSuccess; this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; this.collectionLink = collectionLink; - this.metadataDiagnosticsContext = metadataDiagnosticsContext; this.serializationDiagnosticsContext = serializationDiagnosticsContext; } @@ -52,10 +49,6 @@ public String getCollectionLink() { return this.collectionLink; } - public MetadataDiagnosticsContext getMetadataDiagnosticsContext() { - return this.metadataDiagnosticsContext; - } - public SerializationDiagnosticsContext getSerializationDiagnosticsContext() { return serializationDiagnosticsContext; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 2a8027d4f313..4969c79c3f7e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -40,7 +40,6 @@ import com.azure.cosmos.implementation.directconnectivity.ServerStoreModel; import com.azure.cosmos.implementation.directconnectivity.StoreClient; import com.azure.cosmos.implementation.directconnectivity.StoreClientFactory; -import com.azure.cosmos.implementation.directconnectivity.WFConstants; import com.azure.cosmos.implementation.faultinjection.IFaultInjectorProvider; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.implementation.http.HttpClient; @@ -95,6 +94,7 @@ import reactor.core.publisher.Mono; import reactor.core.publisher.SignalType; import reactor.util.concurrent.Queues; +import reactor.util.function.Tuple2; import reactor.util.retry.Retry; import java.io.IOException; @@ -1826,14 +1826,14 @@ private void addPartitionKeyInformation(RxDocumentServiceRequest request, request.getHeaders().put(HttpConstants.HttpHeaders.PARTITION_KEY, Utils.escapeNonAscii(partitionKeyInternal.toJson())); } - private Mono getCreateDocumentRequest(DocumentClientRetryPolicy requestRetryPolicy, - String documentCollectionLink, - Object document, - RequestOptions options, - boolean disableAutomaticIdGeneration, - OperationType operationType, - DiagnosticsClientContext clientContextOverride, - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { + private Mono>> getCreateDocumentRequest(DocumentClientRetryPolicy requestRetryPolicy, + String documentCollectionLink, + Object document, + RequestOptions options, + boolean disableAutomaticIdGeneration, + OperationType operationType, + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { if (StringUtils.isEmpty(documentCollectionLink)) { throw new IllegalArgumentException("documentCollectionLink"); @@ -1878,8 +1878,13 @@ private Mono getCreateDocumentRequest(DocumentClientRe serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } + if (requestRetryPolicy != null) { + requestRetryPolicy.onBeforeSendRequest(request); + } + Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); - return addPartitionKeyInformation(request, content, document, options, collectionObs, pointOperationContextForCircuitBreaker); + return addPartitionKeyInformation(request, content, document, options, collectionObs, pointOperationContextForCircuitBreaker) + .zipWith(collectionObs); } private Mono getBatchDocumentRequest(DocumentClientRetryPolicy requestRetryPolicy, @@ -1944,7 +1949,6 @@ private Mono getBatchDocumentRequest(DocumentClientRet new AtomicBoolean(false), false, documentCollectionLink, - metadataDiagnosticsContext, serializationDiagnosticsContext)); return this.collectionCache.resolveCollectionAsync(metadataDiagnosticsContext, request) @@ -1955,11 +1959,7 @@ private Mono getBatchDocumentRequest(DocumentClientRet if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request) && options != null) { options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); - addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v); - } - - if (requestRetryPolicy != null) { - requestRetryPolicy.onBeforeSendRequest(request); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v, requestRetryPolicy); } return Mono.just(request); @@ -2246,15 +2246,15 @@ public Mono> createDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Create, - (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> createDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker) -> createDocumentCore( collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, - pointOperationContextForCircuitBreaker, - collectionRoutingMap), + pointOperationContextForCircuitBreaker + ), options, options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink @@ -2268,8 +2268,7 @@ private Mono> createDocumentCore( boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, - Utils.ValueHolder collectionRoutingMap) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); DocumentClientRetryPolicy requestRetryPolicy = @@ -2294,7 +2293,6 @@ private Mono> createDocumentCore( finalRetryPolicyInstance, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap, pointOperationContextForCircuitBreaker), requestRetryPolicy), scopedDiagnosticsFactory @@ -2309,12 +2307,11 @@ private Mono> createDocumentInternal( DocumentClientRetryPolicy requestRetryPolicy, DiagnosticsClientContext clientContextOverride, AtomicReference documentServiceRequestReference, - Utils.ValueHolder collectionRoutingMap, PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { logger.debug("Creating a Document. collectionLink: [{}]", collectionLink); - Mono requestObs = getCreateDocumentRequest( + Mono>> requestToDocumentCollectionObs = getCreateDocumentRequest( requestRetryPolicy, collectionLink, document, @@ -2324,28 +2321,36 @@ private Mono> createDocumentInternal( clientContextOverride, pointOperationContextForCircuitBreaker); - return requestObs - .flatMap(request -> { + return requestToDocumentCollectionObs + .flatMap(requestToDocumentCollection -> { - addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); - documentServiceRequestReference.set(request); - request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + RxDocumentServiceRequest request = requestToDocumentCollection.getT1(); + Utils.ValueHolder documentCollectionValueHolder = requestToDocumentCollection.getT2(); - // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses - // excluded regions to know the next location endpoint to route the request to - // unavailable regions are effectively excluded regions for this request - if (requestRetryPolicy != null) { - requestRetryPolicy.onBeforeSendRequest(request); - } + return this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired - // to the RxDocumentServiceRequest instance - mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v, requestRetryPolicy); + documentServiceRequestReference.set(request); + request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - return create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options)); - }) - .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request + if (requestRetryPolicy != null) { + requestRetryPolicy.onBeforeSendRequest(request); + } + + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + + return create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options)); + }) + .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); + }); } catch (Exception e) { logger.debug("Failure in creating a document due to [{}]", e.getMessage(), e); return Mono.error(e); @@ -2541,8 +2546,8 @@ public Mono> upsertDocument(String collectionLink, Ob return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Upsert, - (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> upsertDocumentCore( - collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap), + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker) -> upsertDocumentCore( + collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker), options, options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink @@ -2556,8 +2561,7 @@ private Mono> upsertDocumentCore( boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, - Utils.ValueHolder collectionRoutingMap) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2581,7 +2585,6 @@ private Mono> upsertDocumentCore( finalRetryPolicyInstance, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap, pointOperationContextForCircuitBreaker), finalRetryPolicyInstance), scopedDiagnosticsFactory), requestReference); @@ -2595,13 +2598,12 @@ private Mono> upsertDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap, PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { logger.debug("Upserting a Document. collectionLink: [{}]", collectionLink); - Mono reqObs = + Mono>> requestToDocumentCollectionObs = getCreateDocumentRequest( retryPolicyInstance, collectionLink, @@ -2612,28 +2614,41 @@ private Mono> upsertDocumentInternal( clientContextOverride, pointOperationContextForCircuitBreaker); - return reqObs - .flatMap(request -> { + return requestToDocumentCollectionObs + .flatMap(requestToDocumentCollection -> { + RxDocumentServiceRequest request = requestToDocumentCollection.getT1(); + Utils.ValueHolder documentCollectionValueHolder = requestToDocumentCollection.getT2(); - addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); + return this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { - request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - requestReference.set(request); + if (collectionRoutingMapValueHolder != null && collectionRoutingMapValueHolder.v != null) { - // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses - // excluded regions to know the next location endpoint to route the request to - // unavailable regions are effectively excluded regions for this request - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired - // to the RxDocumentServiceRequest instance - mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(request); - return upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)); - }) - .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } + + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + + return upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)); + } else { + return Mono.error(new NotFoundException()); + } + }) + .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); + + }); } catch (Exception e) { logger.debug("Failure in upserting a document due to [{}]", e.getMessage(), e); @@ -2650,14 +2665,13 @@ public Mono> replaceDocument(String documentLink, Obj return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker) -> replaceDocumentCore( documentLink, document, opt, e2ecfg, clientCtxOverride, - pointOperationContextForCircuitBreaker, - collectionRoutingMap), + pointOperationContextForCircuitBreaker), options, options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink @@ -2670,8 +2684,7 @@ private Mono> replaceDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, - Utils.ValueHolder collectionRoutingMap) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2697,7 +2710,6 @@ private Mono> replaceDocumentCore( endToEndPolicyConfig, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap, pointOperationContextForCircuitBreaker), requestRetryPolicy), scopedDiagnosticsFactory), requestReference); @@ -2711,7 +2723,6 @@ private Mono> replaceDocumentInternal( CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap, PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { @@ -2732,7 +2743,6 @@ private Mono> replaceDocumentInternal( retryPolicyInstance, clientContextOverride, requestReference, - collectionRoutingMap, pointOperationContextForCircuitBreaker); } catch (Exception e) { @@ -2749,13 +2759,13 @@ public Mono> replaceDocument(Document document, Reque return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker) -> replaceDocumentCore( document, opt, e2ecfg, clientCtxOverride, - pointOperationContextForCircuitBreaker, - collectionRoutingMap), + pointOperationContextForCircuitBreaker + ), options, options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink @@ -2767,8 +2777,7 @@ private Mono> replaceDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, - Utils.ValueHolder collectionRoutingMap) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { DocumentClientRetryPolicy requestRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(clientContextOverride); @@ -2788,7 +2797,6 @@ private Mono> replaceDocumentCore( endToEndPolicyConfig, clientContextOverride, requestReference, - collectionRoutingMap, pointOperationContextForCircuitBreaker), requestRetryPolicy), requestReference); } @@ -2800,7 +2808,6 @@ private Mono> replaceDocumentInternal( CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap, PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { @@ -2815,7 +2822,6 @@ private Mono> replaceDocumentInternal( retryPolicyInstance, clientContextOverride, requestReference, - collectionRoutingMap, pointOperationContextForCircuitBreaker); } catch (Exception e) { @@ -2831,7 +2837,6 @@ private Mono> replaceDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap, PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { if (document == null) { @@ -2882,6 +2887,10 @@ private Mono> replaceDocumentInternal( serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } + Mono> collectionObs = collectionCache.resolveCollectionAsync( BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), @@ -2889,28 +2898,26 @@ private Mono> replaceDocumentInternal( Mono requestObs = addPartitionKeyInformation(request, content, document, options, collectionObs, pointOperationContextForCircuitBreaker); - return requestObs - .flatMap(req -> { + return collectionObs + .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { + return requestObs.flatMap(req -> { - addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); - req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - requestReference.set(req); + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); - // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses - // excluded regions to know the next location endpoint to route the request to - // unavailable regions are effectively excluded regions for this request - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired - // to the RxDocumentServiceRequest instance - mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + return replace(request, retryPolicyInstance); + }) + .map(resp -> toResourceResponse(resp, Document.class)); - return replace(request, retryPolicyInstance); - }) - .map(resp -> toResourceResponse(resp, Document.class)); + })); } private CosmosEndToEndOperationLatencyPolicyConfig getEndToEndOperationLatencyPolicyConfig( @@ -2952,14 +2959,13 @@ public Mono> patchDocument(String documentLink, return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Patch, - (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> patchDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker) -> patchDocumentCore( documentLink, cosmosPatchOperations, opt, e2ecfg, clientCtxOverride, - pointOperationContextForCircuitBreaker, - collectionRoutingMap), + pointOperationContextForCircuitBreaker), options, options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink @@ -2972,8 +2978,7 @@ private Mono> patchDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, - Utils.ValueHolder collectionRoutingMap) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2993,7 +2998,6 @@ private Mono> patchDocumentCore( documentClientRetryPolicy, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap, pointOperationContextForCircuitBreaker), documentClientRetryPolicy), scopedDiagnosticsFactory), requestReference); @@ -3006,7 +3010,6 @@ private Mono> patchDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap, PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { checkArgument(StringUtils.isNotEmpty(documentLink), "expected non empty documentLink"); @@ -3072,28 +3075,24 @@ private Mono> patchDocumentInternal( collectionObs, pointOperationContextForCircuitBreaker); - return requestObs - .flatMap(req -> { - - addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); + return collectionObs + .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> requestObs + .flatMap(req -> { - req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - requestReference.set(req); + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); - // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses - // excluded regions to know the next location endpoint to route the request to - // unavailable regions are effectively excluded regions for this request - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired - // to the RxDocumentServiceRequest instance - mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); - return patch(request, retryPolicyInstance); - }) - .map(resp -> toResourceResponse(resp, Document.class)); + return patch(request, retryPolicyInstance); + }) + .map(resp -> toResourceResponse(resp, Document.class)))); } @Override @@ -3104,14 +3103,14 @@ public Mono> deleteDocument(String documentLink, Requ return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker) -> deleteDocumentCore( documentLink, null, opt, e2ecfg, clientCtxOverride, - pointOperationContextForCircuitBreaker, - collectionRoutingMap), + pointOperationContextForCircuitBreaker + ), options, options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink @@ -3126,14 +3125,13 @@ public Mono> deleteDocument(String documentLink, Inte return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker) -> deleteDocumentCore( documentLink, internalObjectNode, opt, e2ecfg, clientCtxOverride, - pointOperationContextForCircuitBreaker, - collectionRoutingMap), + pointOperationContextForCircuitBreaker), options, options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink @@ -3146,8 +3144,7 @@ private Mono> deleteDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, - Utils.ValueHolder collectionRoutingMap) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -3167,7 +3164,6 @@ private Mono> deleteDocumentCore( requestRetryPolicy, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap, pointOperationContextForCircuitBreaker), requestRetryPolicy), scopedDiagnosticsFactory), requestReference); @@ -3180,7 +3176,6 @@ private Mono> deleteDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap, PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { @@ -3206,6 +3201,10 @@ private Mono> deleteDocumentInternal( request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); } + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } + Mono> collectionObs = collectionCache.resolveCollectionAsync( BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); @@ -3213,29 +3212,27 @@ private Mono> deleteDocumentInternal( Mono requestObs = addPartitionKeyInformation( request, null, internalObjectNode, options, collectionObs, pointOperationContextForCircuitBreaker); - return requestObs - .flatMap(req -> { + return collectionObs + .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { + return requestObs + .flatMap(req -> { - addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); - req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - requestReference.set(req); + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); - // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses - // excluded regions to know the next location endpoint to route the request to - // unavailable regions are effectively excluded regions for this request - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired - // to the RxDocumentServiceRequest instance - mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); - - return this.delete(req, retryPolicyInstance, getOperationContextAndListenerTuple(options)); - }) - .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); + return this.delete(req, retryPolicyInstance, getOperationContextAndListenerTuple(options)); + }) + .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); + })); } catch (Exception e) { logger.debug("Failure in deleting a document due to [{}]", e.getMessage()); return Mono.error(e); @@ -3294,7 +3291,7 @@ private Mono> readDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Read, - (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap), + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker), options, false, innerDiagnosticsFactory, @@ -3307,8 +3304,7 @@ private Mono> readDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, - Utils.ValueHolder collectionRoutingMap) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -3326,7 +3322,6 @@ private Mono> readDocumentCore( retryPolicyInstance, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap, pointOperationContextForCircuitBreaker), retryPolicyInstance), scopedDiagnosticsFactory @@ -3339,7 +3334,6 @@ private Mono> readDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap, PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { @@ -3359,31 +3353,45 @@ private Mono> readDocumentInternal( request.requestContext.setExcludeRegions(options.getExcludedRegions()); request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } + Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); + return collectionObs.flatMap(documentCollectionValueHolder -> { + if (documentCollectionValueHolder != null && documentCollectionValueHolder.v != null) { + DocumentCollection documentCollection = documentCollectionValueHolder.v; + return this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollection.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { + if (collectionRoutingMapValueHolder.v != null) { - Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs, pointOperationContextForCircuitBreaker); + Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs, pointOperationContextForCircuitBreaker); - return requestObs.flatMap(req -> { + return requestObs.flatMap(req -> { - addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); + options.setPartitionKeyDefinition(documentCollection.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); - req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - requestReference.set(req); + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); - // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses - // excluded regions to know the next location endpoint to route the request to - // unavailable regions are effectively excluded regions for this request - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired - // to the RxDocumentServiceRequest instance - mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + return this.read(req, retryPolicyInstance) + .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); + }); + } else { + return Mono.error(new CollectionRoutingMapNotFoundException("")); + } + }); + } else { + return Mono.error(new NotFoundException()); + } + } + ); - return this.read(req, retryPolicyInstance) - .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); - }); } catch (Exception e) { logger.debug("Failure in reading a document due to [{}]", e.getMessage()); @@ -3978,7 +3986,7 @@ public Mono populateFeedRangeHeader(RxDocumentServiceR } @Override - public Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions) { + public Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions, DocumentClientRetryPolicy documentClientRetryPolicy) { if (RxDocumentClientImpl.this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { @@ -3994,6 +4002,11 @@ public Mono addPartitionLevelUnavailableRegionsOnReque } RxDocumentClientImpl.this.addPartitionLevelUnavailableRegionsForFeedRequest(request, queryRequestOptions, collectionRoutingMapValueHolder.v); + + if (documentClientRetryPolicy != null) { + documentClientRetryPolicy.onBeforeSendRequest(request); + } + return Mono.just(request); }); } else { @@ -5908,7 +5921,8 @@ static UUID randomUuid(long msb, long lsb) { public void addPartitionLevelUnavailableRegionsForRequest( RxDocumentServiceRequest request, RequestOptions options, - CollectionRoutingMap collectionRoutingMap) { + CollectionRoutingMap collectionRoutingMap, + DocumentClientRetryPolicy documentClientRetryPolicy) { checkNotNull(request, "Argument 'request' cannot be null!"); @@ -5937,6 +5951,12 @@ public void addPartitionLevelUnavailableRegionsForRequest( // since it is also recomputed in AddressResolver request.setEffectivePartitionKey(effectivePartitionKeyString); request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + + // onBeforeSendRequest uses excluded regions to know the next location endpoint + // to route the request to unavailable regions are effectively excluded regions for this request + if (documentClientRetryPolicy != null) { + documentClientRetryPolicy.onBeforeSendRequest(request); + } } } @@ -5945,12 +5965,9 @@ public void mergeContextInformationIntoDiagnosticsForPointRequest( PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { if (pointOperationContextForCircuitBreaker != null) { - MetadataDiagnosticsContext metadataDiagnosticsContext - = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); SerializationDiagnosticsContext serializationDiagnosticsContext = pointOperationContextForCircuitBreaker.getSerializationDiagnosticsContext(); - diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); diagnosticsAccessor.mergeSerializationDiagnosticContext(request.requestContext.cosmosDiagnostics, serializationDiagnosticsContext); } } @@ -6044,212 +6061,186 @@ private Mono> wrapPointOperationWithAvailabilityStrat DiagnosticsClientContext innerDiagnosticsFactory, String collectionLink) { - final MetadataDiagnosticsContext metadataDiagnosticsContext = new MetadataDiagnosticsContext(); + checkNotNull(resourceType, "Argument 'resourceType' must not be null."); + checkNotNull(operationType, "Argument 'operationType' must not be null."); + checkNotNull(callback, "Argument 'callback' must not be null."); - return Mono.defer(() -> this.collectionCache.resolveByNameAsync(metadataDiagnosticsContext, collectionLink, null) - .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(metadataDiagnosticsContext, collection.getResourceId(), null, null) - .flatMap(collectionRoutingMapValueHolder -> { + final RequestOptions nonNullRequestOptions = + initialRequestOptions != null ? initialRequestOptions : new RequestOptions(); + + checkArgument( + resourceType == ResourceType.Document, + "This method can only be used for document point operations."); + + CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = + getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); - checkNotNull(resourceType, "Argument 'resourceType' must not be null."); - checkNotNull(operationType, "Argument 'operationType' must not be null."); - checkNotNull(callback, "Argument 'callback' must not be null."); + List orderedApplicableRegionsForSpeculation = getApplicableRegionsForSpeculation( + endToEndPolicyConfig, + resourceType, + operationType, + idempotentWriteRetriesEnabled, + nonNullRequestOptions); - final RequestOptions nonNullRequestOptions = - initialRequestOptions != null ? initialRequestOptions : new RequestOptions(); + AtomicBoolean isOperationSuccessful = new AtomicBoolean(false); - checkArgument( - resourceType == ResourceType.Document, - "This method can only be used for document point operations."); + if (orderedApplicableRegionsForSpeculation.size() < 2) { + // There is at most one applicable region - no hedging possible + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest = new PointOperationContextForCircuitBreaker( + isOperationSuccessful, + false, + collectionLink, + new SerializationDiagnosticsContext()); - CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = - getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); + pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); + return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest); + } - if (collectionRoutingMapValueHolder.v == null) { - return Mono.error(new CollectionRoutingMapNotFoundException("Argument 'collectionRoutingMapValueHolder.v' cannot be null!")); + ThresholdBasedAvailabilityStrategy availabilityStrategy = + (ThresholdBasedAvailabilityStrategy) endToEndPolicyConfig.getAvailabilityStrategy(); + List> monoList = new ArrayList<>(); + + final ScopedDiagnosticsFactory diagnosticsFactory = new ScopedDiagnosticsFactory(innerDiagnosticsFactory, false); + + orderedApplicableRegionsForSpeculation + .forEach(region -> { + RequestOptions clonedOptions = new RequestOptions(nonNullRequestOptions); + + if (monoList.isEmpty()) { + // no special error handling for transient errors to suppress them here + // because any cross-regional retries are expected to be processed + // by the ClientRetryPolicy for the initial request - so, any outcome of the + // initial Mono should be treated as non-transient error - even when + // the error would otherwise be treated as transient + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest + = new PointOperationContextForCircuitBreaker( + isOperationSuccessful, + true, + collectionLink, + new SerializationDiagnosticsContext()); + + pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); + Mono initialMonoAcrossAllRegions = + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest) + .map(NonTransientPointOperationResult::new) + .onErrorResume( + RxDocumentClientImpl::isCosmosException, + t -> Mono.just( + new NonTransientPointOperationResult( + Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + + if (logger.isDebugEnabled()) { + monoList.add(initialMonoAcrossAllRegions.doOnSubscribe(c -> logger.debug( + "STARTING to process {} operation in region '{}'", + operationType, + region))); + } else { + monoList.add(initialMonoAcrossAllRegions); } + } else { + clonedOptions.setExcludedRegions( + getEffectiveExcludedRegionsForHedging( + nonNullRequestOptions.getExcludedRegions(), + orderedApplicableRegionsForSpeculation, + region) + ); - nonNullRequestOptions.setPartitionKeyDefinition(collection.getPartitionKey()); + // Non-Transient errors are mapped to a value - this ensures the firstWithValue + // operator below will complete the composite Mono for both successful values + // and non-transient errors + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForHedgedRequest + = new PointOperationContextForCircuitBreaker( + isOperationSuccessful, + true, + collectionLink, + new SerializationDiagnosticsContext()); - List orderedApplicableRegionsForSpeculation = getApplicableRegionsForSpeculation( - endToEndPolicyConfig, - resourceType, - operationType, - idempotentWriteRetriesEnabled, - nonNullRequestOptions); - - AtomicBoolean isOperationSuccessful = new AtomicBoolean(false); - - if (orderedApplicableRegionsForSpeculation.size() < 2) { - // There is at most one applicable region - no hedging possible - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest = new PointOperationContextForCircuitBreaker( - isOperationSuccessful, - false, - collectionLink, - metadataDiagnosticsContext, - new SerializationDiagnosticsContext()); - - pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); - return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest, collectionRoutingMapValueHolder); + pointOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); + Mono regionalCrossRegionRetryMono = + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForCircuitBreakerForHedgedRequest) + .map(NonTransientPointOperationResult::new) + .onErrorResume( + RxDocumentClientImpl::isNonTransientCosmosException, + t -> Mono.just( + new NonTransientPointOperationResult( + Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + + Duration delayForCrossRegionalRetry = (availabilityStrategy) + .getThreshold() + .plus((availabilityStrategy) + .getThresholdStep() + .multipliedBy(monoList.size() - 1)); + + if (logger.isDebugEnabled()) { + monoList.add( + regionalCrossRegionRetryMono + .doOnSubscribe(c -> logger.debug("STARTING to process {} operation in region '{}'", operationType, region)) + .delaySubscription(delayForCrossRegionalRetry)); + } else { + monoList.add( + regionalCrossRegionRetryMono + .delaySubscription(delayForCrossRegionalRetry)); } + } + }); - ThresholdBasedAvailabilityStrategy availabilityStrategy = - (ThresholdBasedAvailabilityStrategy) endToEndPolicyConfig.getAvailabilityStrategy(); - List> monoList = new ArrayList<>(); - - final ScopedDiagnosticsFactory diagnosticsFactory = new ScopedDiagnosticsFactory(innerDiagnosticsFactory, false); - - orderedApplicableRegionsForSpeculation - .forEach(region -> { - RequestOptions clonedOptions = new RequestOptions(nonNullRequestOptions); - - if (monoList.isEmpty()) { - // no special error handling for transient errors to suppress them here - // because any cross-regional retries are expected to be processed - // by the ClientRetryPolicy for the initial request - so, any outcome of the - // initial Mono should be treated as non-transient error - even when - // the error would otherwise be treated as transient - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest - = new PointOperationContextForCircuitBreaker( - isOperationSuccessful, - true, - collectionLink, - metadataDiagnosticsContext, - new SerializationDiagnosticsContext()); - - pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); - Mono initialMonoAcrossAllRegions = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest, collectionRoutingMapValueHolder) - .map(NonTransientPointOperationResult::new) - .onErrorResume( - RxDocumentClientImpl::isCosmosException, - t -> Mono.just( - new NonTransientPointOperationResult( - Utils.as(Exceptions.unwrap(t), CosmosException.class)))); - - if (logger.isDebugEnabled()) { - monoList.add(initialMonoAcrossAllRegions.doOnSubscribe(c -> logger.debug( - "STARTING to process {} operation in region '{}'", - operationType, - region))); - } else { - monoList.add(initialMonoAcrossAllRegions); - } - } else { - clonedOptions.setExcludedRegions( - getEffectiveExcludedRegionsForHedging( - nonNullRequestOptions.getExcludedRegions(), - orderedApplicableRegionsForSpeculation, - region) - ); - - // Non-Transient errors are mapped to a value - this ensures the firstWithValue - // operator below will complete the composite Mono for both successful values - // and non-transient errors - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForHedgedRequest - = new PointOperationContextForCircuitBreaker( - isOperationSuccessful, - true, - collectionLink, - metadataDiagnosticsContext, - new SerializationDiagnosticsContext()); - - pointOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); - Mono regionalCrossRegionRetryMono = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForCircuitBreakerForHedgedRequest, collectionRoutingMapValueHolder) - .map(NonTransientPointOperationResult::new) - .onErrorResume( - RxDocumentClientImpl::isNonTransientCosmosException, - t -> Mono.just( - new NonTransientPointOperationResult( - Utils.as(Exceptions.unwrap(t), CosmosException.class)))); - - Duration delayForCrossRegionalRetry = (availabilityStrategy) - .getThreshold() - .plus((availabilityStrategy) - .getThresholdStep() - .multipliedBy(monoList.size() - 1)); - - if (logger.isDebugEnabled()) { - monoList.add( - regionalCrossRegionRetryMono - .doOnSubscribe(c -> logger.debug("STARTING to process {} operation in region '{}'", operationType, region)) - .delaySubscription(delayForCrossRegionalRetry)); - } else { - monoList.add( - regionalCrossRegionRetryMono - .delaySubscription(delayForCrossRegionalRetry)); - } - } - }); + // NOTE - merging diagnosticsFactory cannot only happen in + // doFinally operator because the doFinally operator is a side effect method - + // meaning it executes concurrently with firing the onComplete/onError signal + // doFinally is also triggered by cancellation + // So, to make sure merging the Context happens synchronously in line we + // have to ensure merging is happening on error/completion + // and also in doOnCancel. + return Mono + .firstWithValue(monoList) + .flatMap(nonTransientResult -> { + diagnosticsFactory.merge(nonNullRequestOptions); + if (nonTransientResult.isError()) { + return Mono.error(nonTransientResult.exception); + } - // NOTE - merging diagnosticsFactory cannot only happen in - // doFinally operator because the doFinally operator is a side effect method - - // meaning it executes concurrently with firing the onComplete/onError signal - // doFinally is also triggered by cancellation - // So, to make sure merging the Context happens synchronously in line we - // have to ensure merging is happening on error/completion - // and also in doOnCancel. - return Mono - .firstWithValue(monoList) - .flatMap(nonTransientResult -> { - diagnosticsFactory.merge(nonNullRequestOptions); - if (nonTransientResult.isError()) { - return Mono.error(nonTransientResult.exception); - } + return Mono.just(nonTransientResult.response); + }) + .onErrorMap(throwable -> { + Throwable exception = Exceptions.unwrap(throwable); - return Mono.just(nonTransientResult.response); - }) - .onErrorMap(throwable -> { - Throwable exception = Exceptions.unwrap(throwable); - - if (exception instanceof NoSuchElementException) { - - List innerThrowables = Exceptions - .unwrapMultiple(exception.getCause()); - - int index = 0; - for (Throwable innerThrowable : innerThrowables) { - Throwable innerException = Exceptions.unwrap(innerThrowable); - - // collect latest CosmosException instance bubbling up for a region - if (innerException instanceof CosmosException) { - CosmosException cosmosException = Utils.as(innerException, CosmosException.class); - diagnosticsFactory.merge(nonNullRequestOptions); - return cosmosException; - } else if (innerException instanceof NoSuchElementException) { - logger.trace( - "Operation in {} completed with empty result because it was cancelled.", - orderedApplicableRegionsForSpeculation.get(index)); - } else if (logger.isWarnEnabled()) { - String message = "Unexpected Non-CosmosException when processing operation in '" - + orderedApplicableRegionsForSpeculation.get(index) - + "'."; - logger.warn( - message, - innerException - ); - } + if (exception instanceof NoSuchElementException) { - index++; - } - } + List innerThrowables = Exceptions + .unwrapMultiple(exception.getCause()); + int index = 0; + for (Throwable innerThrowable : innerThrowables) { + Throwable innerException = Exceptions.unwrap(innerThrowable); + + // collect latest CosmosException instance bubbling up for a region + if (innerException instanceof CosmosException) { + CosmosException cosmosException = Utils.as(innerException, CosmosException.class); diagnosticsFactory.merge(nonNullRequestOptions); + return cosmosException; + } else if (innerException instanceof NoSuchElementException) { + logger.trace( + "Operation in {} completed with empty result because it was cancelled.", + orderedApplicableRegionsForSpeculation.get(index)); + } else if (logger.isWarnEnabled()) { + String message = "Unexpected Non-CosmosException when processing operation in '" + + orderedApplicableRegionsForSpeculation.get(index) + + "'."; + logger.warn( + message, + innerException + ); + } - return exception; - }) - .doOnCancel(() -> diagnosticsFactory.merge(nonNullRequestOptions)); - })) - ) - .retryWhen(Retry.fixedDelay(Configs.getStaleCollectionCacheRefreshRetryCount(), Duration.ofSeconds(Configs.getStaleCollectionCacheRefreshRetryIntervalInSeconds())) - .filter(throwable -> throwable instanceof CollectionRoutingMapNotFoundException) - .doBeforeRetry((retrySignal) -> this.collectionCache - .refresh( - null, - collectionLink, - null) - ) - ); + index++; + } + } + + diagnosticsFactory.merge(nonNullRequestOptions); + + return exception; + }) + .doOnCancel(() -> diagnosticsFactory.merge(nonNullRequestOptions)); } private static boolean isCosmosException(Throwable t) { @@ -6634,8 +6625,7 @@ Mono> apply( RequestOptions requestOptions, CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, - Utils.ValueHolder collectionRoutingMap); + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker); } private static class NonTransientPointOperationResult { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java index 865203676512..b095e0d2762f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java @@ -206,7 +206,7 @@ private Mono> executeInternalFuncCore( return Mono.just(req) .flatMap(request -> client.populateFeedRangeHeader(request)) - .flatMap(request -> client.addPartitionLevelUnavailableRegionsOnRequest(request, cosmosQueryRequestOptions)) + .flatMap(request -> client.addPartitionLevelUnavailableRegionsOnRequest(request, cosmosQueryRequestOptions, finalRetryPolicyInstance)) .flatMap(request -> { finalRetryPolicyInstance.onBeforeSendRequest(request); return executeRequestAsync( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java index 65c1cd18176e..3cc9b8fcdb4b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java @@ -146,13 +146,21 @@ public DocumentProducer( return ObservableHelper.inlineIfPossibleAsObs( () -> Mono .just(request) - .flatMap(req -> client.populateFeedRangeHeader(req)) - .flatMap(req -> client.addPartitionLevelUnavailableRegionsOnRequest(req, cosmosQueryRequestOptions)) .flatMap(req -> { if(finalRetryPolicy != null) { finalRetryPolicy.onBeforeSendRequest(req); } + + return client.populateFeedRangeHeader(req); + }) + .flatMap(req -> client.addPartitionLevelUnavailableRegionsOnRequest(req, cosmosQueryRequestOptions, finalRetryPolicy)) + .flatMap(req -> { + + if(finalRetryPolicy != null) { + finalRetryPolicy.onBeforeSendRequest(req); + } + ++retries; return executeRequestFunc.apply(req); }), finalRetryPolicy); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java index 75868822962f..659461d476c2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java @@ -96,7 +96,10 @@ enum QueryCompatibilityMode { Mono populateFeedRangeHeader(RxDocumentServiceRequest request); - Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions); + Mono addPartitionLevelUnavailableRegionsOnRequest( + RxDocumentServiceRequest request, + CosmosQueryRequestOptions queryRequestOptions, + DocumentClientRetryPolicy clientRetryPolicy); GlobalEndpointManager getGlobalEndpointManager(); From 4e124f6f8b3ba4a701b82da7d57362f3d219e4ad Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 7 Aug 2024 12:26:26 -0400 Subject: [PATCH 02/51] Tweaking threshold behavior. --- .../PartitionLevelCircuitBreakerTests.java | 476 +++++++++++++----- ...nsecutiveExceptionBasedCircuitBreaker.java | 2 +- ...pecificHealthContextTransitionHandler.java | 33 +- 3 files changed, 377 insertions(+), 134 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index 0ba43368d8bb..c3191495028f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -29,6 +29,7 @@ import com.azure.cosmos.models.CosmosBatch; import com.azure.cosmos.models.CosmosBatchResponse; import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; +import com.azure.cosmos.models.CosmosContainerIdentity; import com.azure.cosmos.models.CosmosItemIdentity; import com.azure.cosmos.models.CosmosItemRequestOptions; import com.azure.cosmos.models.CosmosItemResponse; @@ -300,7 +301,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -325,7 +326,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -350,7 +351,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -375,7 +376,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -400,7 +401,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -425,7 +426,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -451,7 +452,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -476,7 +477,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -500,7 +501,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -805,7 +806,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildInternalServerErrorFaultInjectionRules, THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -830,7 +831,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildInternalServerErrorFaultInjectionRules, THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -854,7 +855,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildInternalServerErrorFaultInjectionRules, THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -880,7 +881,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildInternalServerErrorFaultInjectionRules, THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1149,7 +1150,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(11), + .withHitLimit(10), this.buildInternalServerErrorFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1173,7 +1174,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(11), + .withHitLimit(10), this.buildInternalServerErrorFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1197,7 +1198,7 @@ public Object[][] miscellaneousOpTestConfigsDirect() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(6), + .withHitLimit(5), this.buildInternalServerErrorFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1235,7 +1236,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1260,7 +1261,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1285,7 +1286,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1310,7 +1311,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1335,7 +1336,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1360,7 +1361,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1386,7 +1387,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1411,7 +1412,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1435,7 +1436,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1459,7 +1460,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildInternalServerErrorFaultInjectionRules, THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1484,7 +1485,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), + .withHitLimit(5), this.buildInternalServerErrorFaultInjectionRules, THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1508,7 +1509,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildInternalServerErrorFaultInjectionRules, THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1534,7 +1535,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), + .withHitLimit(10), this.buildInternalServerErrorFaultInjectionRules, THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1632,7 +1633,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(11), + .withHitLimit(10), this.buildInternalServerErrorFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1656,7 +1657,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(11), + .withHitLimit(10), this.buildInternalServerErrorFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1680,7 +1681,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(6), + .withHitLimit(5), this.buildInternalServerErrorFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1735,7 +1736,7 @@ public Object[][] readManyTestConfigs() { "Test read many operation injected with service unavailable exception in first preferred region.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withHitLimit(11) + .withHitLimit(10) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildServiceUnavailableFaultInjectionRules, executeReadManyOperation, @@ -1759,7 +1760,7 @@ public Object[][] readManyTestConfigs() { "Test read many operation injected with internal server error injected in first preferred region.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withHitLimit(11) + .withHitLimit(10) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildInternalServerErrorFaultInjectionRules, executeReadManyOperation, @@ -1856,7 +1857,7 @@ public Object[][] readManyTestConfigs() { "Test read many operation injected with internal server error in all preferred regions.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withHitLimit(11) + .withHitLimit(10) .withFaultInjectionApplicableRegions(this.writeRegions), this.buildInternalServerErrorFaultInjectionRules, executeReadManyOperation, @@ -1938,7 +1939,7 @@ public Object[][] readAllTestConfigs() { "Test read all operation injected with service unavailable exception in first preferred region.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withHitLimit(11) + .withHitLimit(10) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildServiceUnavailableFaultInjectionRules, executeReadAllOperation, @@ -1962,7 +1963,7 @@ public Object[][] readAllTestConfigs() { "Test read all operation injected with internal server error injected in first preferred region.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withHitLimit(11) + .withHitLimit(10) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildInternalServerErrorFaultInjectionRules, executeReadAllOperation, @@ -2059,7 +2060,7 @@ public Object[][] readAllTestConfigs() { "Test read all operation injected with internal server error in all preferred regions.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withHitLimit(11) + .withHitLimit(10) .withFaultInjectionApplicableRegions(this.writeRegions), this.buildInternalServerErrorFaultInjectionRules, executeReadAllOperation, @@ -2776,89 +2777,322 @@ private static int resolveTestObjectCountToBootstrapFrom(FaultInjectionOperation } } - @Test(groups = {"multi-master"}) - public void testCreate_404_1002_FirstRegionOnly_LocalPreferred_EagerAvailabilityStrategy_WithRetries() { - - System.setProperty("COSMOS.SESSION_CAPTURING_TYPE", "REGION_SCOPED"); - System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT", "5000000"); - System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE", "0.001"); - - CosmosAsyncClient asyncClient = buildCosmosClient( - ConsistencyLevel.SESSION, - Arrays.asList("West US 2", "South Central US", "East US"), - CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - ConnectionMode.GATEWAY, - new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) - .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) - .build(), - new NonIdempotentWriteRetryOptions() - .setEnabled(true) - .setTrackingIdUsed(true)); - - CosmosAsyncDatabase asyncDatabase = asyncClient.getDatabase("testDb"); - CosmosAsyncContainer asyncContainer = asyncDatabase.getContainer("testContainer"); - - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE) - .build(); - - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .connectionType(FaultInjectionConnectionType.GATEWAY) -// .operationType(FaultInjectionOperationType.CREATE_ITEM) - .region("West US 2") - .build(); - - FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .build(); - - CosmosFaultInjectionHelper.configureFaultInjectionRules(asyncContainer, Arrays.asList(faultInjectionRule)).block(); - - try { - CosmosItemResponse response = asyncContainer.createItem(TestObject.create(UUID.randomUUID().toString())).block(); - - System.out.println("Diagnostics : " + response.getDiagnostics()); - } catch (CosmosException ex) { - - System.out.println("Diagnostics : " + ex.getDiagnostics()); - } finally { - asyncClient.close(); - - System.clearProperty("COSMOS.SESSION_CAPTURING_TYPE"); - System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT"); - System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE"); - } - } - - private static CosmosAsyncClient buildCosmosClient( - ConsistencyLevel consistencyLevel, - List preferredRegions, - CosmosRegionSwitchHint regionSwitchHint, - ConnectionMode connectionMode, - CosmosEndToEndOperationLatencyPolicyConfig cosmosEndToEndOperationLatencyPolicyConfig, - NonIdempotentWriteRetryOptions nonIdempotentWriteRetryOptions) { - - CosmosClientBuilder clientBuilder = new CosmosClientBuilder() - .endpoint(TestConfigurations.HOST) - .key(TestConfigurations.MASTER_KEY) - .consistencyLevel(consistencyLevel) - .preferredRegions(preferredRegions) - .sessionRetryOptions(new SessionRetryOptionsBuilder() - .regionSwitchHint(regionSwitchHint) - .build()) - .endToEndOperationLatencyPolicyConfig(cosmosEndToEndOperationLatencyPolicyConfig) - .nonIdempotentWriteRetryOptions(nonIdempotentWriteRetryOptions) - .multipleWriteRegionsEnabled(true); - - if (connectionMode == ConnectionMode.DIRECT) { - clientBuilder.directMode(); - } else { - clientBuilder.gatewayMode(); - } - - return clientBuilder.buildAsyncClient(); - } +// @Test(groups = {"multi-master"}) +// public void testCreate_404_1002_FirstRegionOnly_LocalPreferred_EagerAvailabilityStrategy_WithRetries() { +// +// System.setProperty("COSMOS.SESSION_CAPTURING_TYPE", "REGION_SCOPED"); +// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT", "5000000"); +// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE", "0.001"); +// +// CosmosAsyncClient asyncClient = buildCosmosClient( +// ConsistencyLevel.SESSION, +// Arrays.asList("West US 2", "South Central US", "East US"), +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// ConnectionMode.GATEWAY, +// new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) +// .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) +// .build(), +// new NonIdempotentWriteRetryOptions() +// .setEnabled(true) +// .setTrackingIdUsed(true)); +// +// CosmosAsyncDatabase asyncDatabase = asyncClient.getDatabase("testDb"); +// CosmosAsyncContainer asyncContainer = asyncDatabase.getContainer("testContainer"); +// +// FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders +// .getResultBuilder(FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE) +// .build(); +// +// FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() +// .connectionType(FaultInjectionConnectionType.GATEWAY) +//// .operationType(FaultInjectionOperationType.CREATE_ITEM) +// .region("West US 2") +// .build(); +// +// FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) +// .condition(faultInjectionCondition) +// .result(faultInjectionServerErrorResult) +// .build(); +// +// asyncContainer.getFeedRanges().block(); +// CosmosFaultInjectionHelper.configureFaultInjectionRules(asyncContainer, Arrays.asList(faultInjectionRule)).block(); +// +// try { +// CosmosItemResponse response = asyncContainer.createItem(TestObject.create(UUID.randomUUID().toString())).block(); +// +// System.out.println("Success Diagnostics : " + response.getDiagnostics()); +// } catch (CosmosException ex) { +// +// System.out.println("Failure Diagnostics : " + ex.getDiagnostics()); +// } finally { +// asyncClient.close(); +// +// System.clearProperty("COSMOS.SESSION_CAPTURING_TYPE"); +// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT"); +// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE"); +// } +// } +// +// @Test(groups = {"multi-master"}) +// public void testCreate_500_FirstRegionOnly_LocalPreferred_EagerAvailabilityStrategy_WithRetries() { +// +// System.setProperty("COSMOS.SESSION_CAPTURING_TYPE", "REGION_SCOPED"); +// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT", "5000000"); +// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE", "0.001"); +// System.setProperty( +// "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", +// "{\"isPartitionLevelCircuitBreakerEnabled\": true, " +// + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," +// + "\"consecutiveExceptionCountToleratedForReads\": 10," +// + "\"consecutiveExceptionCountToleratedForWrites\": 5," +// + "}"); +// +// +// CosmosAsyncClient asyncClient = buildCosmosClient( +// ConsistencyLevel.SESSION, +// Arrays.asList("West US 2", "South Central US", "East US"), +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// ConnectionMode.DIRECT, +// new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) +// .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) +// .build(), +// new NonIdempotentWriteRetryOptions() +// .setEnabled(true) +// .setTrackingIdUsed(true)); +// +// CosmosAsyncDatabase asyncDatabase = asyncClient.getDatabase("testDb"); +// CosmosAsyncContainer asyncContainer = asyncDatabase.getContainer("testContainer"); +// +// FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders +// .getResultBuilder(FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR) +// .build(); +// +// FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() +// .connectionType(FaultInjectionConnectionType.DIRECT) +// .region("West US 2") +// .build(); +// +// FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) +// .condition(faultInjectionCondition) +// .result(faultInjectionServerErrorResult) +// .build(); +// +// CosmosFaultInjectionHelper.configureFaultInjectionRules(asyncContainer, Arrays.asList(faultInjectionRule)).block(); +// +// try { +// CosmosItemResponse response = null; +// +// for (int i = 1; i <= 7; i++) { +// response = asyncContainer.createItem(TestObject.create(UUID.randomUUID().toString())).onErrorResume(throwable -> Mono.empty()).block(); +// } +// +// System.out.println("Success Diagnostics : " + response.getDiagnostics()); +// } catch (CosmosException ex) { +// +// System.out.println("Failure Diagnostics : " + ex.getDiagnostics()); +// } finally { +// asyncClient.close(); +// +// System.clearProperty("COSMOS.SESSION_CAPTURING_TYPE"); +// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT"); +// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE"); +// System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); +// } +// } +// +// @Test(groups = {"multi-master"}) +// public void testRead_503_FirstRegionOnly() { +// System.setProperty("COSMOS.SESSION_CAPTURING_TYPE", "REGION_SCOPED"); +// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT", "5000000"); +// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE", "0.001"); +// System.setProperty( +// "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", +// "{\"isPartitionLevelCircuitBreakerEnabled\": true, " +// + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," +// + "\"consecutiveExceptionCountToleratedForReads\": 10," +// + "\"consecutiveExceptionCountToleratedForWrites\": 5," +// + "}"); +// +// +// CosmosAsyncClient asyncClient = buildCosmosClient( +// ConsistencyLevel.SESSION, +// Arrays.asList("West US 2", "South Central US", "East US"), +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// ConnectionMode.DIRECT, +// new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) +// .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) +// .build(), +// new NonIdempotentWriteRetryOptions() +// .setEnabled(true) +// .setTrackingIdUsed(true)); +// +// CosmosAsyncDatabase asyncDatabase = asyncClient.getDatabase("testDb"); +// CosmosAsyncContainer asyncContainer = asyncDatabase.getContainer("testContainer"); +// +// FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders +// .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) +// .build(); +// +// FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() +// .connectionType(FaultInjectionConnectionType.DIRECT) +// .region("West US 2") +// .build(); +// +// FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) +// .condition(faultInjectionCondition) +// .result(faultInjectionServerErrorResult) +// .build(); +// +// TestObject testObject = TestObject.create(); +// String id = testObject.getId(); +// String myPk = testObject.getMypk(); +// +// CosmosItemResponse responseFromCreate = asyncContainer.createItem(testObject).block(); +// +// CosmosAsyncClient asyncClient2 = buildCosmosClient( +// ConsistencyLevel.SESSION, +// Arrays.asList("West US 2", "South Central US", "East US"), +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// ConnectionMode.DIRECT, +// new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) +// .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) +// .build(), +// new NonIdempotentWriteRetryOptions() +// .setEnabled(true) +// .setTrackingIdUsed(true)); +// +// asyncDatabase = asyncClient2.getDatabase("testDb"); +// asyncContainer = asyncDatabase.getContainer("testContainer"); +// +// CosmosFaultInjectionHelper.configureFaultInjectionRules(asyncContainer, Arrays.asList(faultInjectionRule)).block(); +// +// try { +// CosmosItemResponse response = null; +// +// for (int i = 1; i <= 10; i++) { +// response = asyncContainer.readItem(id, new PartitionKey(myPk), TestObject.class).onErrorResume(throwable -> Mono.empty()).block(); +// System.out.println("Success Diagnostics : " + response.getDiagnostics()); +// } +// +// response = asyncContainer.readItem(id, new PartitionKey(myPk), TestObject.class).onErrorResume(throwable -> Mono.empty()).block(); +// System.out.println("Success Diagnostics : " + response.getDiagnostics()); +// } catch (CosmosException ex) { +// +// System.out.println("Failure Diagnostics : " + ex.getDiagnostics()); +// } finally { +// asyncClient.close(); +// +// System.clearProperty("COSMOS.SESSION_CAPTURING_TYPE"); +// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT"); +// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE"); +// System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); +// } +// } +// +// @Test(groups = {"multi-master"}) +// public void testCreate_503_FirstRegionOnly() { +// System.setProperty("COSMOS.SESSION_CAPTURING_TYPE", "REGION_SCOPED"); +// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT", "5000000"); +// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE", "0.001"); +// System.setProperty( +// "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", +// "{\"isPartitionLevelCircuitBreakerEnabled\": true, " +// + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," +// + "\"consecutiveExceptionCountToleratedForReads\": 10," +// + "\"consecutiveExceptionCountToleratedForWrites\": 5," +// + "}"); +// +// +// CosmosAsyncClient asyncClient = buildCosmosClient( +// ConsistencyLevel.SESSION, +// Arrays.asList("West US 2", "South Central US", "East US"), +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// ConnectionMode.DIRECT, +// new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) +// .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) +// .build(), +// new NonIdempotentWriteRetryOptions() +// .setEnabled(true) +// .setTrackingIdUsed(true)); +// +// CosmosAsyncDatabase asyncDatabase = asyncClient.getDatabase("testDb"); +// CosmosAsyncContainer asyncContainer = asyncDatabase.getContainer("testContainer"); +// +// FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders +// .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) +// .build(); +// +// FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() +// .connectionType(FaultInjectionConnectionType.DIRECT) +// .region("West US 2") +// .build(); +// +// FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) +// .condition(faultInjectionCondition) +// .result(faultInjectionServerErrorResult) +// .build(); +// +// TestObject testObject = TestObject.create(); +// String id = testObject.getId(); +// String myPk = testObject.getMypk(); +// +// CosmosItemResponse responseFromCreate = asyncContainer.createItem(testObject).block(); +// CosmosFaultInjectionHelper.configureFaultInjectionRules(asyncContainer, Arrays.asList(faultInjectionRule)).block(); +// +// try { +// CosmosItemResponse response = null; +// +// for (int i = 1; i <= 5; i++) { +// response = asyncContainer.createItem(TestObject.create()).onErrorResume(throwable -> Mono.empty()).block(); +// System.out.println("Success Diagnostics : " + response.getDiagnostics()); +// } +// +// response = asyncContainer.createItem(TestObject.create()).onErrorResume(throwable -> Mono.empty()).block(); +// System.out.println("Success Diagnostics : " + response.getDiagnostics()); +// } catch (CosmosException ex) { +// +// System.out.println("Failure Diagnostics : " + ex.getDiagnostics()); +// } finally { +// asyncClient.close(); +// +// System.clearProperty("COSMOS.SESSION_CAPTURING_TYPE"); +// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT"); +// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE"); +// System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); +// } +// } +// +// private static CosmosAsyncClient buildCosmosClient( +// ConsistencyLevel consistencyLevel, +// List preferredRegions, +// CosmosRegionSwitchHint regionSwitchHint, +// ConnectionMode connectionMode, +// CosmosEndToEndOperationLatencyPolicyConfig cosmosEndToEndOperationLatencyPolicyConfig, +// NonIdempotentWriteRetryOptions nonIdempotentWriteRetryOptions) { +// +// CosmosClientBuilder clientBuilder = new CosmosClientBuilder() +// .endpoint(TestConfigurations.HOST) +// .key(TestConfigurations.MASTER_KEY) +// .consistencyLevel(consistencyLevel) +// .preferredRegions(preferredRegions) +// .sessionRetryOptions(new SessionRetryOptionsBuilder() +// .regionSwitchHint(regionSwitchHint) +// .build()) +// .endToEndOperationLatencyPolicyConfig(cosmosEndToEndOperationLatencyPolicyConfig) +// .nonIdempotentWriteRetryOptions(nonIdempotentWriteRetryOptions) +// .openConnectionsAndInitCaches(new CosmosContainerProactiveInitConfigBuilder(Arrays.asList(new CosmosContainerIdentity("testDb", "testContainer"))) +// .setProactiveConnectionRegionsCount(3) +// .build()) +// .multipleWriteRegionsEnabled(true); +// +// if (connectionMode == ConnectionMode.DIRECT) { +// clientBuilder.directMode(); +// } else { +// clientBuilder.gatewayMode(); +// } +// +// return clientBuilder.buildAsyncClient(); +// } private static Function> resolveDataPlaneOperation(FaultInjectionOperationType faultInjectionOperationType) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index e1b12d00a037..d863d49c8a76 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -163,7 +163,7 @@ public boolean shouldHealthStatusBeDowngraded(LocationSpecificHealthContext loca int exceptionCountActual = isReadOnlyRequest ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); - return exceptionCountActual >= getAllowedExceptionCountToMaintainStatus(locationSpecificHealthContext.getLocationHealthStatus(), isReadOnlyRequest); + return (exceptionCountActual + 1) >= getAllowedExceptionCountToMaintainStatus(locationSpecificHealthContext.getLocationHealthStatus(), isReadOnlyRequest); } public boolean canHealthStatusBeUpgraded(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java index a3a34539df7f..09969dc10ccf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java @@ -5,11 +5,9 @@ import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.OperationType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.net.URI; import java.time.Duration; import java.time.Instant; import java.util.concurrent.ConcurrentHashMap; @@ -78,7 +76,7 @@ public LocationSpecificHealthContext handleSuccess( regionWithSuccess); } - return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Healthy); + return this.transitionHealthStatus(LocationHealthStatus.Healthy, isReadOnlyRequest); } else { return locationSpecificHealthContextInner; } @@ -97,7 +95,7 @@ public LocationSpecificHealthContext handleSuccess( regionWithSuccess); } - return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyTentative); + return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative, isReadOnlyRequest); } } else { @@ -109,7 +107,7 @@ public LocationSpecificHealthContext handleSuccess( regionWithSuccess); } - return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyTentative); + return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative, isReadOnlyRequest); } break; default: @@ -139,7 +137,7 @@ public LocationSpecificHealthContext handleException( regionWithException); } - return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyWithFailures); + return this.transitionHealthStatus(LocationHealthStatus.HealthyWithFailures, isReadOnlyRequest); case HealthyWithFailures: if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificHealthContext, isReadOnlyRequest)) { @@ -172,7 +170,7 @@ public LocationSpecificHealthContext handleException( regionWithException); } - return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Unavailable); + return this.transitionHealthStatus(LocationHealthStatus.Unavailable, isReadOnlyRequest); } case HealthyTentative: if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificHealthContext, isReadOnlyRequest)) { @@ -192,14 +190,16 @@ public LocationSpecificHealthContext handleException( regionWithException); } - return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Unavailable); + return this.transitionHealthStatus(LocationHealthStatus.Unavailable, isReadOnlyRequest); } default: throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); } } - public LocationSpecificHealthContext transitionHealthStatus(LocationSpecificHealthContext locationSpecificHealthContext, LocationHealthStatus newStatus) { + public LocationSpecificHealthContext transitionHealthStatus( + LocationHealthStatus newStatus, + boolean isReadOnlyRequest) { LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() .withSuccessCountForWriteForRecovery(0) @@ -218,11 +218,20 @@ public LocationSpecificHealthContext transitionHealthStatus(LocationSpecificHeal case HealthyWithFailures: - return builder + builder = builder .withUnavailableSince(Instant.MAX) .withLocationHealthStatus(LocationHealthStatus.HealthyWithFailures) - .withExceptionThresholdBreached(false) - .build(); + .withExceptionThresholdBreached(false); + + if (isReadOnlyRequest) { + return builder + .withExceptionCountForReadForCircuitBreaking(1) + .build(); + } else { + return builder + .withExceptionCountForWriteForCircuitBreaking(1) + .build(); + } case Unavailable: From 8a5dd05b05b7114101d0c2d3629bb8a294563f4c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 7 Aug 2024 12:56:23 -0400 Subject: [PATCH 03/51] Fixing tests. --- ...itionEndpointManagerForCircuitBreakerTests.java | 14 +++++++------- ...tionSpecificHealthContextTransitionHandler.java | 7 +++++++ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index b6bb616f180e..e2d1cc39c5f9 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -259,7 +259,7 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit int exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); - for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + for (int i = 1; i <= exceptionCountToHandle; i++) { globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } @@ -334,7 +334,7 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition int exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); - for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + for (int i = 1; i <= exceptionCountToHandle; i++) { globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } @@ -420,7 +420,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve int exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); - for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + for (int i = 1; i <= exceptionCountToHandle; i++) { globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } @@ -513,7 +513,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC int exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); - for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + for (int i = 1; i <= exceptionCountToHandle; i++) { globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } @@ -554,7 +554,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyTentative, readOperationTrue); - for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + for (int i = 1; i <= exceptionCountToHandle; i++) { globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } @@ -607,7 +607,7 @@ public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerCon .getConsecutiveExceptionBasedCircuitBreaker() .getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); - for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + for (int i = 1; i <= exceptionCountToHandle; i++) { globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); globalPartitionEndpointManagerForCircuitBreaker @@ -687,7 +687,7 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(St int exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); - for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + for (int i = 1; i <= exceptionCountToHandle; i++) { globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request1, LocationEastUs2EndpointToLocationPair.getKey()); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java index 09969dc10ccf..018154592a04 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java @@ -192,6 +192,13 @@ public LocationSpecificHealthContext handleException( return this.transitionHealthStatus(LocationHealthStatus.Unavailable, isReadOnlyRequest); } + case Unavailable: + return this.consecutiveExceptionBasedCircuitBreaker + .handleException( + locationSpecificHealthContext, + partitionKeyRangeWrapper, + regionWithException, + isReadOnlyRequest); default: throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); } From 0847f402ff378e13e6e1d32d5fcdcebe8219c69a Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 7 Aug 2024 17:30:04 -0400 Subject: [PATCH 04/51] Fixing tests. --- .../PartitionLevelCircuitBreakerTests.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index c3191495028f..38d9e7ddf374 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -1117,10 +1117,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { 15, 15 }, - // 449 injected into first preferred region for QUERY_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) - // and will have two regions contacted post circuit breaking (one for QueryPlan and the other for the data plane request). +// 449 injected into first preferred region for QUERY_ITEM operation +// injected into all replicas of the faulty EPK range. +// Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) +// and will have two regions contacted post circuit breaking (one for QueryPlan and the other for the data plane request). new Object[]{ String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() @@ -1165,10 +1165,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { 40, 15 }, - // 500 injected into all regions for READ_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to see InternalServerError in all regions - // and will contact one region contacted post circuit breaking. +// 500 injected into all regions for READ_ITEM operation +// injected into all replicas of the faulty EPK range. +// Expectation is for the operation to see InternalServerError in all regions +// and will contact one region contacted post circuit breaking. new Object[]{ String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() @@ -2659,12 +2659,12 @@ private void execute( if (!hasReachedCircuitBreakingThreshold) { - hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == + hasReachedCircuitBreakingThreshold = (expectedCircuitBreakingThreshold - 1) == getEstimatedFailureCountSeenPerRegionPerPartitionKeyRange( partitionKeyRangeWrapper, partitionKeyRangeToLocationSpecificUnavailabilityInfo, locationEndpointToLocationSpecificContextForPartitionField, - expectedCircuitBreakingThreshold, + expectedCircuitBreakingThreshold - 1, expectedRegionCountWithFailures); validateResponseInPresenceOfFailures.accept(response); } else { From e9589667391861c9a52e5396c950b5a0a73170ad Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 7 Aug 2024 22:05:21 -0400 Subject: [PATCH 05/51] Fixing tests. --- .../PartitionLevelCircuitBreakerTests.java | 2738 ++++++++++++----- .../implementation/RxDocumentClientImpl.java | 43 +- .../caches/RxPartitionKeyRangeCache.java | 5 +- 3 files changed, 1908 insertions(+), 878 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index 38d9e7ddf374..49ebf2fe9a1d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -29,7 +29,6 @@ import com.azure.cosmos.models.CosmosBatch; import com.azure.cosmos.models.CosmosBatchResponse; import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; -import com.azure.cosmos.models.CosmosContainerIdentity; import com.azure.cosmos.models.CosmosItemIdentity; import com.azure.cosmos.models.CosmosItemRequestOptions; import com.azure.cosmos.models.CosmosItemResponse; @@ -64,7 +63,6 @@ import java.net.URI; import java.time.Duration; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -2103,6 +2101,880 @@ public Object[][] readAllTestConfigs() { }; } + @DataProvider(name = "gatewayRoutedFailureParametersDataProvider_ReadAll") + public Object[][] gatewayRoutedFailureParametersDataProvider_ReadAll() { + + Function> executeReadAllOperation = (paramsWrapper) -> { + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + PartitionKey partitionKey = paramsWrapper.partitionKeyForReadAllOperation; + CosmosQueryRequestOptions queryRequestOptions = paramsWrapper.queryRequestOptions; + + try { + + FeedResponse response = asyncContainer.readAllItems( + partitionKey, + queryRequestOptions, + TestObject.class) + .byPage() + .next() + .block(); + + return new ResponseWrapper<>(response); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + + return new Object[][]{ + { + "Test read all operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + executeReadAllOperation, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ALL_CONNECTION_MODES_INCLUDED + }, + // todo: for read all and read many - collection resolution and pkRange resolution happens + // todo: outside the document retry loop so the operation fails with 404:1002 + // todo: weird thing is the operation succeeds when the client is in DIRECT connectivity mode + // todo: track this +// { +// "Test read all operation injected with read session not available in first preferred region.", +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withOverrideFaultInjectionOperationType(true) +// .withHitLimit(3) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), +// this.buildReadWriteSessionNotAvailableFaultInjectionRules, +// executeReadAllOperation, +// NO_REGION_SWITCH_HINT, +// this.validateResponseHasSuccess, +// ONLY_DIRECT_MODE +// }, + { + "Test read all operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + executeReadAllOperation, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ALL_CONNECTION_MODES_INCLUDED + } + }; + } + + @DataProvider(name = "masterResourceFailuresDataProviderReadMany") + public Object[][] masterResourceFailuresDataProviderReadMany() { + + Function> executeReadManyOperation = (paramsWrapper) -> { + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + List itemIdentities = paramsWrapper.itemIdentitiesForReadManyOperation; + CosmosReadManyRequestOptions readManyRequestOptions = paramsWrapper.readManyRequestOptions; + + try { + + FeedResponse response = asyncContainer.readMany( + itemIdentities, + readManyRequestOptions, + TestObject.class) + .block(); + + return new ResponseWrapper<>(response); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + + return new Object[][]{ + { + "Test read many operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + executeReadManyOperation, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ALL_CONNECTION_MODES_INCLUDED + }, + // todo: for read all and read many - collection resolution and pkRange resolution happens + // todo: outside the document retry loop so the operation fails with 404:1002 + // todo: weird thing is the operation succeeds when the client is in DIRECT connectivity mode + // todo: track this +// { +// "Test read many operation injected with read session not available in first preferred region.", +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withOverrideFaultInjectionOperationType(true) +// .withHitLimit(3) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), +// this.buildReadWriteSessionNotAvailableFaultInjectionRules, +// executeReadManyOperation, +// NO_REGION_SWITCH_HINT, +// this.validateResponseHasSuccess, +// ALL_CONNECTION_MODES_INCLUDED +// }, + { + "Test read many operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + executeReadManyOperation, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ALL_CONNECTION_MODES_INCLUDED + } + }; + } + + @DataProvider(name = "masterResourceFailuresDataProviderMiscGateway") + public Object[][] masterResourceFailuresDataProviderMiscGateway() { + + return new Object[][]{ + { + "Test read operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test read operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test read operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test create operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test create operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test create operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test upsert operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test upsert operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test upsert operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test replace operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test replace operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test replace operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test delete operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test delete operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test delete operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test patch operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test patch operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test patch operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test batch operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test batch operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test batch operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test query operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test query operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test query operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test read feed operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test read feed operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + }, + { + "Test read feed operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_GATEWAY_MODE + } + }; + } + + @DataProvider(name = "masterResourceFailuresDataProviderMiscDirect") + public Object[][] masterResourceFailuresDataProviderMiscDirect() { + + return new Object[][]{ + { + "Test read operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test read operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test read operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test create operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test create operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test create operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test upsert operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test upsert operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test upsert operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test replace operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test replace operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test replace operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test delete operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test delete operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test delete operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test patch operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test patch operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test patch operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test batch operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test batch operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test batch operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test query operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test query operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test query operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test read feed operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildServiceUnavailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test read feed operation injected with read session not available in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + }, + { + "Test read feed operation injected with too many requests exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withOverrideFaultInjectionOperationType(true) + .withHitLimit(3) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionConnectionType(FaultInjectionConnectionType.GATEWAY), + this.buildTooManyRequestsErrorFaultInjectionRules, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + ONLY_DIRECT_MODE + } + }; + } + @Test(groups = {"circuit-breaker-misc-direct"}, dataProvider = "miscellaneousOpTestConfigsDirect", timeOut = 4 * TIMEOUT) public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsDirect( String testId, @@ -2110,7 +2982,298 @@ public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsDir Function> generateFaultInjectionRules, CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, CosmosRegionSwitchHint regionSwitchHint, - Boolean nonIdempotentWriteRetriesEnabled, + Boolean nonIdempotentWriteRetriesEnabled, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + executeMiscOperationHitsTerminalExceptionAcrossKRegions( + testId, + faultInjectionRuleParamsWrapper, + generateFaultInjectionRules, + e2eLatencyPolicyCfg, + regionSwitchHint, + nonIdempotentWriteRetriesEnabled, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + allowedConnectionModes, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); + } + + @Test(groups = {"circuit-breaker-misc-gateway"}, dataProvider = "miscellaneousOpTestConfigsGateway", timeOut = 4 * TIMEOUT) + public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsGateway( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Boolean nonIdempotentWriteRetriesEnabled, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + executeMiscOperationHitsTerminalExceptionAcrossKRegions( + testId, + faultInjectionRuleParamsWrapper, + generateFaultInjectionRules, + e2eLatencyPolicyCfg, + regionSwitchHint, + nonIdempotentWriteRetriesEnabled, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + allowedConnectionModes, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); + } + + private void executeMiscOperationHitsTerminalExceptionAcrossKRegions( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Boolean nonIdempotentWriteRetriesEnabled, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); + + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); + } + + CosmosAsyncClient asyncClient = null; + FaultInjectionOperationType faultInjectionOperationType = faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(); + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + + try { + + asyncClient = clientBuilder.buildAsyncClient(); + + operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom = resolveTestObjectCountToBootstrapFrom(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(), 15); + int testObjCountToBootstrapFrom = operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom; + + operationInvocationParamsWrapper.containerIdToTarget = resolveContainerIdByFaultInjectionOperationType(faultInjectionOperationType); + + validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + + List testObjects = new ArrayList<>(); + + for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { + TestObject testObject = TestObject.create(); + testObjects.add(testObject); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + } + + FeedRange faultyFeedRange; + + if (testObjects.size() != 1) { + faultyFeedRange = FeedRange.forFullRange(); + } else { + faultyFeedRange = FeedRange.forLogicalPartition(new PartitionKey(testObjects.get(0).getId())); + } + + operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; + operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith = testObjects; + + } catch (Exception ex) { + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); + } finally { + safeClose(asyncClient); + } + + Function> executeDataPlaneOperation + = resolveDataPlaneOperation(faultInjectionOperationType); + + operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions(); + + if (e2eLatencyPolicyCfg != null) { + operationInvocationParamsWrapper.patchItemRequestOptions = new CosmosPatchItemRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + + operationInvocationParamsWrapper.queryRequestOptions = new CosmosQueryRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + + operationInvocationParamsWrapper.itemRequestOptions + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + } + + if (nonIdempotentWriteRetriesEnabled) { + operationInvocationParamsWrapper.itemRequestOptions + .setNonIdempotentWriteRetryPolicy(true, true); + } + + execute( + testId, + faultInjectionRuleParamsWrapper, + operationInvocationParamsWrapper, + generateFaultInjectionRules, + executeDataPlaneOperation, + regionSwitchHint, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); + } + + @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readManyTestConfigs", timeOut = 4 * TIMEOUT) + public void readManyOperationHitsTerminalExceptionAcrossKRegions( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + Function> executeDataPlaneOperation, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = this.writeRegions.get(0); + this.secondPreferredRegion = this.writeRegions.get(1); + + CosmosAsyncClient asyncClient = null; + + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + operationInvocationParamsWrapper.queryType = QueryType.READ_MANY; + + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); + } + + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + + try { + + asyncClient = clientBuilder.buildAsyncClient(); + + operationInvocationParamsWrapper.containerIdToTarget = this.sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey; + + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + + List feedRanges = asyncContainer.getFeedRanges().block(); + + assertThat(feedRanges).isNotNull().as("feedRanges is not expected to be null!"); + assertThat(feedRanges).isNotEmpty().as("feedRanges is not expected to be empty!"); + + Map> partitionKeyToItemIdentityList = new HashMap<>(); + List partitionKeys = new ArrayList<>(); + + for (FeedRange ignored : feedRanges) { + String pkForFeedRange = UUID.randomUUID().toString(); + + partitionKeys.add(pkForFeedRange); + partitionKeyToItemIdentityList.put(pkForFeedRange, new ArrayList<>()); + + for (int i = 0; i < 10; i++) { + TestObject testObject = TestObject.create(pkForFeedRange); + + partitionKeyToItemIdentityList.get(pkForFeedRange).add(new CosmosItemIdentity(new PartitionKey(pkForFeedRange), testObject.getId())); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getMypk()), new CosmosItemRequestOptions()).block(); + } + } + + CosmosReadManyRequestOptions readManyRequestOptions = new CosmosReadManyRequestOptions(); + + if (e2eLatencyPolicyCfg != null) { + readManyRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + } + + operationInvocationParamsWrapper.readManyRequestOptions = readManyRequestOptions; + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); + + PartitionKey faultyPartitionKey = new PartitionKey(partitionKeys.get(0)); + FeedRange faultyFeedRange = FeedRange.forLogicalPartition(faultyPartitionKey); + + operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); + + operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation = partitionKeyToItemIdentityList.get(partitionKeys.get(0)); + } catch (Exception ex) { + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); + } finally { + safeClose(asyncClient); + } + + execute( + testId, + faultInjectionRuleParamsWrapper, + operationInvocationParamsWrapper, + generateFaultInjectionRules, + executeDataPlaneOperation, + regionSwitchHint, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); + } + + @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readAllTestConfigs", timeOut = 4 * TIMEOUT) + public void readAllOperationHitsTerminalExceptionAcrossKRegions( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + Function> executeDataPlaneOperation, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, Consumer> validateResponseInPresenceOfFaults, Consumer> validateResponseInAbsenceOfFaults, Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, @@ -2121,198 +3284,394 @@ public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsDir int operationIterationCountInFailureFlow, int operationIterationCountInRecoveryFlow) { - executeMiscOperationHitsTerminalExceptionAcrossKRegions( + CosmosAsyncClient asyncClient = null; + + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + operationInvocationParamsWrapper.queryType = QueryType.READ_ALL; + + List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); + + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); + } + + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + + try { + + asyncClient = clientBuilder.buildAsyncClient(); + + operationInvocationParamsWrapper.containerIdToTarget = this.sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey; + + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + deleteAllDocuments(asyncContainer); + + List feedRanges = asyncContainer.getFeedRanges().block(); + + assertThat(feedRanges).isNotNull().as("feedRanges is not expected to be null!"); + assertThat(feedRanges).isNotEmpty().as("feedRanges is not expected to be empty!"); + + Map> partitionKeyToItemIdentityList = new HashMap<>(); + List partitionKeys = new ArrayList<>(); + + for (FeedRange ignored : feedRanges) { + String pkForFeedRange = UUID.randomUUID().toString(); + + partitionKeys.add(pkForFeedRange); + partitionKeyToItemIdentityList.put(pkForFeedRange, new ArrayList<>()); + + for (int i = 0; i < 10; i++) { + TestObject testObject = TestObject.create(pkForFeedRange); + + partitionKeyToItemIdentityList.get(pkForFeedRange).add(new CosmosItemIdentity(new PartitionKey(pkForFeedRange), testObject.getId())); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getMypk()), new CosmosItemRequestOptions()).block(); + } + } + + CosmosQueryRequestOptions queryRequestOptions = new CosmosQueryRequestOptions(); + + if (e2eLatencyPolicyCfg != null) { + queryRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + } + + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); + + PartitionKey faultyPartitionKey = new PartitionKey(partitionKeys.get(0)); + FeedRange faultyFeedRange = FeedRange.forLogicalPartition(faultyPartitionKey); + + operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; + operationInvocationParamsWrapper.partitionKeyForReadAllOperation = faultyPartitionKey; + operationInvocationParamsWrapper.queryRequestOptions = queryRequestOptions; + + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); + } catch (Exception ex) { + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); + } finally { + safeClose(asyncClient); + } + + execute( testId, faultInjectionRuleParamsWrapper, + operationInvocationParamsWrapper, generateFaultInjectionRules, - e2eLatencyPolicyCfg, + executeDataPlaneOperation, regionSwitchHint, - nonIdempotentWriteRetriesEnabled, validateResponseInPresenceOfFaults, validateResponseInAbsenceOfFaults, validateRegionsContactedWhenShortCircuitingHasKickedIn, validateRegionsContactedWhenExceptionBubblesUp, validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - allowedConnectionModes, expectedRegionCountWithFailures, operationIterationCountInFailureFlow, operationIterationCountInRecoveryFlow); } - @Test(groups = {"circuit-breaker-misc-gateway"}, dataProvider = "miscellaneousOpTestConfigsGateway", timeOut = 4 * TIMEOUT) - public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsGateway( - String testId, - FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, - Function> generateFaultInjectionRules, - CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, - CosmosRegionSwitchHint regionSwitchHint, - Boolean nonIdempotentWriteRetriesEnabled, - Consumer> validateResponseInPresenceOfFaults, - Consumer> validateResponseInAbsenceOfFaults, - Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, - Consumer validateRegionsContactedWhenExceptionBubblesUp, - Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - Set allowedConnectionModes, - int expectedRegionCountWithFailures, - int operationIterationCountInFailureFlow, - int operationIterationCountInRecoveryFlow) { + private void execute( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + OperationInvocationParamsWrapper operationInvocationParamsWrapper, + Function> generateFaultInjectionRules, + Function> executeDataPlaneOperation, + CosmosRegionSwitchHint regionSwitchHint, + Consumer> validateResponseInPresenceOfFailures, + Consumer> validateResponseInAbsenceOfFailures, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + logger.info("Checking circuit breaking behavior for test type {}", testId); + + List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); + + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); + + if (regionSwitchHint != null) { + clientBuilder = clientBuilder + .sessionRetryOptions(new SessionRetryOptionsBuilder().regionSwitchHint(regionSwitchHint).build()); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + + validateNonEmptyString(this.sharedAsyncDatabaseId); + CosmosAsyncDatabase database = client.getDatabase(this.sharedAsyncDatabaseId); + + CosmosAsyncContainer container; + + try { + + validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); + container = database.getContainer(operationInvocationParamsWrapper.containerIdToTarget); + + RxDocumentClientImpl documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(client); + + RxCollectionCache collectionCache = ReflectionUtils.getClientCollectionCache(documentClient); + RxPartitionKeyRangeCache partitionKeyRangeCache = ReflectionUtils.getPartitionKeyRangeCache(documentClient); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = documentClient.getGlobalPartitionEndpointManagerForCircuitBreaker(); + + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(operationInvocationParamsWrapper.faultyFeedRange); + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(container); + + Utils.ValueHolder faultyFeedRangeEpkImpl = new Utils.ValueHolder<>(); + Utils.ValueHolder faultyFeedRangePartitionKeyImpl = new Utils.ValueHolder<>(); + Utils.ValueHolder> faultyPartitionKeyRanges = new Utils.ValueHolder<>(); + Utils.ValueHolder faultyDocumentCollection = new Utils.ValueHolder<>(); + + assertThat(operationInvocationParamsWrapper.faultyFeedRange).isNotNull().as("Argument 'operationInvocationParamsWrapper.faultyFeedRange' cannot be null!"); + + if (operationInvocationParamsWrapper.faultyFeedRange instanceof FeedRangeEpkImpl) { + + faultyFeedRangeEpkImpl.v = (FeedRangeEpkImpl) operationInvocationParamsWrapper.faultyFeedRange; + + collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) + .flatMap(collection -> { + faultyDocumentCollection.v = collection; + return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangeEpkImpl.v.getRange(), true, null); + }) + .flatMap(listValueHolder -> { + faultyPartitionKeyRanges.v = listValueHolder.v; + return Mono.just(listValueHolder); + }).block(); + } else if (operationInvocationParamsWrapper.faultyFeedRange instanceof FeedRangePartitionKeyImpl) { + + faultyFeedRangePartitionKeyImpl.v = (FeedRangePartitionKeyImpl) operationInvocationParamsWrapper.faultyFeedRange; + + collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) + .flatMap(collection -> { + faultyDocumentCollection.v = collection; + return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangePartitionKeyImpl.v.getEffectiveRange(collection.getPartitionKey()), true, null); + }) + .flatMap(listValueHolder -> { + faultyPartitionKeyRanges.v = listValueHolder.v; + return Mono.just(listValueHolder); + }).block(); + } else { + fail("Argument 'operationInvocationParamsWrapper.faultyFeedRange' has to be a sub-type of FeedRangeEpkImpl or FeedRangePartitionKeyImpl!"); + } + + validateNonEmptyList(faultyPartitionKeyRanges.v); + assertThat(faultyDocumentCollection.v).isNotNull(); + + List faultInjectionRules = generateFaultInjectionRules.apply(faultInjectionRuleParamsWrapper); + + if (faultInjectionRules != null && !faultInjectionRules.isEmpty()) { + + operationInvocationParamsWrapper.asyncContainer = container; + operationInvocationParamsWrapper.feedRangeToDrainForChangeFeed = operationInvocationParamsWrapper.faultyFeedRange; + operationInvocationParamsWrapper.feedRangeForQuery = operationInvocationParamsWrapper.faultyFeedRange; + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) + .block(); - executeMiscOperationHitsTerminalExceptionAcrossKRegions( - testId, - faultInjectionRuleParamsWrapper, - generateFaultInjectionRules, - e2eLatencyPolicyCfg, - regionSwitchHint, - nonIdempotentWriteRetriesEnabled, - validateResponseInPresenceOfFaults, - validateResponseInAbsenceOfFaults, - validateRegionsContactedWhenShortCircuitingHasKickedIn, - validateRegionsContactedWhenExceptionBubblesUp, - validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - allowedConnectionModes, - expectedRegionCountWithFailures, - operationIterationCountInFailureFlow, - operationIterationCountInRecoveryFlow); - } + boolean hasReachedCircuitBreakingThreshold = false; + int executionCountAfterCircuitBreakingThresholdBreached = 0; - private void executeMiscOperationHitsTerminalExceptionAcrossKRegions( - String testId, - FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, - Function> generateFaultInjectionRules, - CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, - CosmosRegionSwitchHint regionSwitchHint, - Boolean nonIdempotentWriteRetriesEnabled, - Consumer> validateResponseInPresenceOfFaults, - Consumer> validateResponseInAbsenceOfFaults, - Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, - Consumer validateRegionsContactedWhenExceptionBubblesUp, - Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - Set allowedConnectionModes, - int expectedRegionCountWithFailures, - int operationIterationCountInFailureFlow, - int operationIterationCountInRecoveryFlow) { + List testObjects = operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith; + PartitionKeyRangeWrapper partitionKeyRangeWrapper + = new PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId()); - List preferredRegions = this.writeRegions; + for (int i = 1; i <= operationIterationCountInFailureFlow; i++) { - this.firstPreferredRegion = preferredRegions.get(0); - this.secondPreferredRegion = preferredRegions.get(1); + if (!(operationInvocationParamsWrapper.queryType == QueryType.READ_MANY || operationInvocationParamsWrapper.queryType == QueryType.READ_ALL)) { + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); + } else if (operationInvocationParamsWrapper.queryType == QueryType.READ_MANY) { + validateNonEmptyList(operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation); + } - OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); - CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + ResponseWrapper response = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); - ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker(); - if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { - throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); - } + int expectedCircuitBreakingThreshold + = doesOperationHaveWriteSemantics(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()) ? + consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, false) : + consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, true); - CosmosAsyncClient asyncClient = null; - FaultInjectionOperationType faultInjectionOperationType = faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(); - faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + if (!hasReachedCircuitBreakingThreshold) { - try { + hasReachedCircuitBreakingThreshold = (expectedCircuitBreakingThreshold - 1) == + getEstimatedFailureCountSeenPerRegionPerPartitionKeyRange( + partitionKeyRangeWrapper, + partitionKeyRangeToLocationSpecificUnavailabilityInfo, + locationEndpointToLocationSpecificContextForPartitionField, + expectedCircuitBreakingThreshold - 1, + expectedRegionCountWithFailures); + validateResponseInPresenceOfFailures.accept(response); + } else { + executionCountAfterCircuitBreakingThresholdBreached++; + } - asyncClient = clientBuilder.buildAsyncClient(); + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + validateResponseInAbsenceOfFailures.accept(response); + } - operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom = resolveTestObjectCountToBootstrapFrom(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(), 15); - int testObjCountToBootstrapFrom = operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom; + if (response.cosmosItemResponse != null) { + assertThat(response.cosmosItemResponse).isNotNull(); + assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); - operationInvocationParamsWrapper.containerIdToTarget = resolveContainerIdByFaultInjectionOperationType(faultInjectionOperationType); + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); + } + } else if (response.feedResponse != null) { + assertThat(response.feedResponse).isNotNull(); + assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); - validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); - CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); + } + } else if (response.cosmosException != null) { + assertThat(response.cosmosException).isNotNull(); + assertThat(response.cosmosException.getDiagnostics()).isNotNull(); - List testObjects = new ArrayList<>(); + if (!hasReachedCircuitBreakingThreshold) { + CosmosDiagnosticsContext ctx = response.cosmosException.getDiagnostics().getDiagnosticsContext(); - for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { - TestObject testObject = TestObject.create(); - testObjects.add(testObject); - asyncContainer.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); - } + validateRegionsContactedWhenExceptionBubblesUp.accept(ctx); + } + } else if (response.batchResponse != null) { + assertThat(response.batchResponse).isNotNull(); + assertThat(response.batchResponse.getDiagnostics()).isNotNull(); - FeedRange faultyFeedRange; + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); + } + } + } - if (testObjects.size() != 1) { - faultyFeedRange = FeedRange.forFullRange(); - } else { - faultyFeedRange = FeedRange.forLogicalPartition(new PartitionKey(testObjects.get(0).getId())); - } + logger.info("Sleep for 90 seconds to allow Unavailable partitions to be HealthyTentative"); + Thread.sleep(90_000); - operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; - operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith = testObjects; + for (int i = operationIterationCountInFailureFlow + 1; i <= operationIterationCountInFailureFlow + operationIterationCountInRecoveryFlow; i++) { - } catch (Exception ex) { - logger.error("Test failed with ex :", ex); - fail(String.format("Test %s failed in bootstrap stage.", testId)); - } finally { - safeClose(asyncClient); - } + if (!(operationInvocationParamsWrapper.queryType == QueryType.READ_MANY || operationInvocationParamsWrapper.queryType == QueryType.READ_ALL)) { + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); + } else if (operationInvocationParamsWrapper.queryType == QueryType.READ_MANY) { + validateNonEmptyList(operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation); + } - Function> executeDataPlaneOperation - = resolveDataPlaneOperation(faultInjectionOperationType); + ResponseWrapper response = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); + validateResponseInAbsenceOfFailures.accept(response); - operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions(); + if (response.cosmosItemResponse != null) { + assertThat(response.cosmosItemResponse).isNotNull(); + assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); - if (e2eLatencyPolicyCfg != null) { - operationInvocationParamsWrapper.patchItemRequestOptions = new CosmosPatchItemRequestOptions() - .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); + } else if (response.feedResponse != null) { + assertThat(response.feedResponse).isNotNull(); + assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); - operationInvocationParamsWrapper.queryRequestOptions = new CosmosQueryRequestOptions() - .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); + } else if (response.cosmosException != null) { + assertThat(response.cosmosException).isNotNull(); + assertThat(response.cosmosException.getDiagnostics()).isNotNull(); - operationInvocationParamsWrapper.itemRequestOptions - .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); - } + response.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response.batchResponse != null) { + assertThat(response.batchResponse).isNotNull(); + assertThat(response.batchResponse.getDiagnostics()).isNotNull(); - if (nonIdempotentWriteRetriesEnabled) { - operationInvocationParamsWrapper.itemRequestOptions - .setNonIdempotentWriteRetryPolicy(true, true); + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); + } + } + } + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Test should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + safeClose(client); } + } - execute( - testId, - faultInjectionRuleParamsWrapper, - operationInvocationParamsWrapper, - generateFaultInjectionRules, - executeDataPlaneOperation, - regionSwitchHint, - validateResponseInPresenceOfFaults, - validateResponseInAbsenceOfFaults, - validateRegionsContactedWhenShortCircuitingHasKickedIn, - validateRegionsContactedWhenExceptionBubblesUp, - validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - expectedRegionCountWithFailures, - operationIterationCountInFailureFlow, - operationIterationCountInRecoveryFlow); + private static int resolveTestObjectCountToBootstrapFrom(FaultInjectionOperationType faultInjectionOperationType, int opCount) { + switch (faultInjectionOperationType) { + case READ_ITEM: + case UPSERT_ITEM: + case REPLACE_ITEM: + case QUERY_ITEM: + case PATCH_ITEM: + case READ_FEED_ITEM: + return 1; + case DELETE_ITEM: + return 2 * opCount; + case CREATE_ITEM: + case BATCH_ITEM: + return 0; + default: + throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); + } } - @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readManyTestConfigs", timeOut = 4 * TIMEOUT) - public void readManyOperationHitsTerminalExceptionAcrossKRegions( + + @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "gatewayRoutedFailureParametersDataProvider_ReadAll", timeOut = 4 * TIMEOUT) + public void testReadAll_withAllGatewayRoutedOperationFailuresInPrimaryRegion( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, Function> generateFaultInjectionRules, Function> executeDataPlaneOperation, - CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, CosmosRegionSwitchHint regionSwitchHint, - Consumer> validateResponseInPresenceOfFaults, - Consumer> validateResponseInAbsenceOfFaults, - Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, - Consumer validateRegionsContactedWhenExceptionBubblesUp, - Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - Set allowedConnectionModes, - int expectedRegionCountWithFailures, - int operationIterationCountInFailureFlow, - int operationIterationCountInRecoveryFlow) { + Consumer> validateResponse, + Set allowedConnectionModes) { + + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); List preferredRegions = this.writeRegions; - this.firstPreferredRegion = this.writeRegions.get(0); - this.secondPreferredRegion = this.writeRegions.get(1); - - CosmosAsyncClient asyncClient = null; - - OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); - operationInvocationParamsWrapper.queryType = QueryType.READ_MANY; + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); @@ -2322,100 +3681,83 @@ public void readManyOperationHitsTerminalExceptionAcrossKRegions( throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); } + CosmosAsyncClient asyncClient = null; + FaultInjectionOperationType faultInjectionOperationType = faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(); faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + List testObjects = new ArrayList<>(); try { asyncClient = clientBuilder.buildAsyncClient(); - operationInvocationParamsWrapper.containerIdToTarget = this.sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey; - - CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); - - List feedRanges = asyncContainer.getFeedRanges().block(); - - assertThat(feedRanges).isNotNull().as("feedRanges is not expected to be null!"); - assertThat(feedRanges).isNotEmpty().as("feedRanges is not expected to be empty!"); + int testObjCountToBootstrapFrom = resolveTestObjectCountToBootstrapFrom(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(), 1); - Map> partitionKeyToItemIdentityList = new HashMap<>(); - List partitionKeys = new ArrayList<>(); - - for (FeedRange ignored : feedRanges) { - String pkForFeedRange = UUID.randomUUID().toString(); - - partitionKeys.add(pkForFeedRange); - partitionKeyToItemIdentityList.put(pkForFeedRange, new ArrayList<>()); + operationInvocationParamsWrapper.containerIdToTarget = resolveContainerIdByFaultInjectionOperationType(faultInjectionOperationType); - for (int i = 0; i < 10; i++) { - TestObject testObject = TestObject.create(pkForFeedRange); + validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); - partitionKeyToItemIdentityList.get(pkForFeedRange).add(new CosmosItemIdentity(new PartitionKey(pkForFeedRange), testObject.getId())); - asyncContainer.createItem(testObject, new PartitionKey(testObject.getMypk()), new CosmosItemRequestOptions()).block(); - } + for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { + TestObject testObject = TestObject.create(); + testObjects.add(testObject); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); } - CosmosReadManyRequestOptions readManyRequestOptions = new CosmosReadManyRequestOptions(); + } catch (Exception ex) { + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); + } finally { + safeClose(asyncClient); + } - if (e2eLatencyPolicyCfg != null) { - readManyRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + try { + asyncClient = clientBuilder.buildAsyncClient(); + + if (regionSwitchHint != null) { + clientBuilder = clientBuilder + .sessionRetryOptions(new SessionRetryOptionsBuilder().regionSwitchHint(regionSwitchHint).build()); } - operationInvocationParamsWrapper.readManyRequestOptions = readManyRequestOptions; + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + operationInvocationParamsWrapper.asyncContainer = asyncContainer; + operationInvocationParamsWrapper.partitionKeyForReadAllOperation = new PartitionKey(testObjects.get(0).getMypk()); faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); - PartitionKey faultyPartitionKey = new PartitionKey(partitionKeys.get(0)); - FeedRange faultyFeedRange = FeedRange.forLogicalPartition(faultyPartitionKey); + List faultInjectionRules = generateFaultInjectionRules.apply(faultInjectionRuleParamsWrapper); - operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; - faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); + CosmosFaultInjectionHelper + .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) + .block(); - operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation = partitionKeyToItemIdentityList.get(partitionKeys.get(0)); + ResponseWrapper responseWrapper = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); + + validateResponse.accept(responseWrapper); } catch (Exception ex) { - logger.error("Test failed with ex :", ex); - fail(String.format("Test %s failed in bootstrap stage.", testId)); + logger.error("Exception thrown :", ex); + fail("Test should have passed!"); } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); safeClose(asyncClient); } - - execute( - testId, - faultInjectionRuleParamsWrapper, - operationInvocationParamsWrapper, - generateFaultInjectionRules, - executeDataPlaneOperation, - regionSwitchHint, - validateResponseInPresenceOfFaults, - validateResponseInAbsenceOfFaults, - validateRegionsContactedWhenShortCircuitingHasKickedIn, - validateRegionsContactedWhenExceptionBubblesUp, - validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - expectedRegionCountWithFailures, - operationIterationCountInFailureFlow, - operationIterationCountInRecoveryFlow); } - @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readAllTestConfigs", timeOut = 4 * TIMEOUT) - public void readAllOperationHitsTerminalExceptionAcrossKRegions( - String testId, - FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, - Function> generateFaultInjectionRules, - Function> executeDataPlaneOperation, - CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, - CosmosRegionSwitchHint regionSwitchHint, - Consumer> validateResponseInPresenceOfFaults, - Consumer> validateResponseInAbsenceOfFaults, - Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, - Consumer validateRegionsContactedWhenExceptionBubblesUp, - Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - Set allowedConnectionModes, - int expectedRegionCountWithFailures, - int operationIterationCountInFailureFlow, - int operationIterationCountInRecoveryFlow) { - - CosmosAsyncClient asyncClient = null; - OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); - operationInvocationParamsWrapper.queryType = QueryType.READ_ALL; + @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "masterResourceFailuresDataProviderReadMany", timeOut = 4 * TIMEOUT) + public void testReadMany_withAllGatewayRoutedOperationFailuresInPrimaryRegion(String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + Function> executeDataPlaneOperation, + CosmosRegionSwitchHint regionSwitchHint, + Consumer> validateResponse, + Set allowedConnectionModes) { + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); List preferredRegions = this.writeRegions; @@ -2426,20 +3768,22 @@ public void readAllOperationHitsTerminalExceptionAcrossKRegions( ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + logger.info("Connection mode : {}", connectionPolicy.getConnectionMode()); + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); } + CosmosAsyncClient asyncClient = null; faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); try { - asyncClient = clientBuilder.buildAsyncClient(); operationInvocationParamsWrapper.containerIdToTarget = this.sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey; CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); - deleteAllDocuments(asyncContainer); List feedRanges = asyncContainer.getFeedRanges().block(); @@ -2463,22 +3807,13 @@ public void readAllOperationHitsTerminalExceptionAcrossKRegions( } } - CosmosQueryRequestOptions queryRequestOptions = new CosmosQueryRequestOptions(); - - if (e2eLatencyPolicyCfg != null) { - queryRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); - } + CosmosReadManyRequestOptions readManyRequestOptions = new CosmosReadManyRequestOptions(); + operationInvocationParamsWrapper.readManyRequestOptions = readManyRequestOptions; faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); - PartitionKey faultyPartitionKey = new PartitionKey(partitionKeys.get(0)); - FeedRange faultyFeedRange = FeedRange.forLogicalPartition(faultyPartitionKey); - - operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; - operationInvocationParamsWrapper.partitionKeyForReadAllOperation = faultyPartitionKey; - operationInvocationParamsWrapper.queryRequestOptions = queryRequestOptions; + operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation = partitionKeyToItemIdentityList.get(partitionKeys.get(0)); - faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); } catch (Exception ex) { logger.error("Test failed with ex :", ex); fail(String.format("Test %s failed in bootstrap stage.", testId)); @@ -2486,613 +3821,215 @@ public void readAllOperationHitsTerminalExceptionAcrossKRegions( safeClose(asyncClient); } - execute( - testId, - faultInjectionRuleParamsWrapper, - operationInvocationParamsWrapper, - generateFaultInjectionRules, - executeDataPlaneOperation, - regionSwitchHint, - validateResponseInPresenceOfFaults, - validateResponseInAbsenceOfFaults, - validateRegionsContactedWhenShortCircuitingHasKickedIn, - validateRegionsContactedWhenExceptionBubblesUp, - validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - expectedRegionCountWithFailures, - operationIterationCountInFailureFlow, - operationIterationCountInRecoveryFlow); + try { + + if (regionSwitchHint != null) { + clientBuilder = clientBuilder + .sessionRetryOptions(new SessionRetryOptionsBuilder().regionSwitchHint(regionSwitchHint).build()); + } + + asyncClient = clientBuilder.buildAsyncClient(); + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + operationInvocationParamsWrapper.asyncContainer = asyncContainer; + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); + + List faultInjectionRules = generateFaultInjectionRules.apply(faultInjectionRuleParamsWrapper); + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) + .block(); + + ResponseWrapper responseWrapper = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); + + validateResponse.accept(responseWrapper); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Test should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + safeClose(asyncClient); + } } - private void execute( + @Test(groups = {"circuit-breaker-misc-gateway"}, dataProvider = "masterResourceFailuresDataProviderMiscGateway", timeOut = 4 * TIMEOUT) + public void testMiscOperation_withAllGatewayRoutedOperationFailuresInPrimaryRegion_withGatewayConnectivity( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, - OperationInvocationParamsWrapper operationInvocationParamsWrapper, Function> generateFaultInjectionRules, - Function> executeDataPlaneOperation, CosmosRegionSwitchHint regionSwitchHint, - Consumer> validateResponseInPresenceOfFailures, - Consumer> validateResponseInAbsenceOfFailures, - Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, - Consumer validateRegionsContactedWhenExceptionBubblesUp, - Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - int expectedRegionCountWithFailures, - int operationIterationCountInFailureFlow, - int operationIterationCountInRecoveryFlow) { - - logger.info("Checking circuit breaking behavior for test type {}", testId); + Consumer> validateResponse, + Set allowedConnectionModes) { List preferredRegions = this.writeRegions; this.firstPreferredRegion = preferredRegions.get(0); this.secondPreferredRegion = preferredRegions.get(1); + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); - System.setProperty( - "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," - + "}"); + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - if (regionSwitchHint != null) { - clientBuilder = clientBuilder - .sessionRetryOptions(new SessionRetryOptionsBuilder().regionSwitchHint(regionSwitchHint).build()); + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); } - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - - validateNonEmptyString(this.sharedAsyncDatabaseId); - CosmosAsyncDatabase database = client.getDatabase(this.sharedAsyncDatabaseId); - - CosmosAsyncContainer container; + CosmosAsyncClient asyncClient = null; + FaultInjectionOperationType faultInjectionOperationType = faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(); + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + List testObjects = new ArrayList<>(); try { - validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); - container = database.getContainer(operationInvocationParamsWrapper.containerIdToTarget); - - RxDocumentClientImpl documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(client); - - RxCollectionCache collectionCache = ReflectionUtils.getClientCollectionCache(documentClient); - RxPartitionKeyRangeCache partitionKeyRangeCache = ReflectionUtils.getPartitionKeyRangeCache(documentClient); - - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker - = documentClient.getGlobalPartitionEndpointManagerForCircuitBreaker(); - - Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); - Class partitionLevelUnavailabilityInfoClass - = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); - assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); - - Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField - = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); - partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); - - Field locationEndpointToLocationSpecificContextForPartitionField - = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); - locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); - - ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo - = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); - - faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(operationInvocationParamsWrapper.faultyFeedRange); - faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(container); - - Utils.ValueHolder faultyFeedRangeEpkImpl = new Utils.ValueHolder<>(); - Utils.ValueHolder faultyFeedRangePartitionKeyImpl = new Utils.ValueHolder<>(); - Utils.ValueHolder> faultyPartitionKeyRanges = new Utils.ValueHolder<>(); - Utils.ValueHolder faultyDocumentCollection = new Utils.ValueHolder<>(); - - assertThat(operationInvocationParamsWrapper.faultyFeedRange).isNotNull().as("Argument 'operationInvocationParamsWrapper.faultyFeedRange' cannot be null!"); - - if (operationInvocationParamsWrapper.faultyFeedRange instanceof FeedRangeEpkImpl) { + asyncClient = clientBuilder.buildAsyncClient(); - faultyFeedRangeEpkImpl.v = (FeedRangeEpkImpl) operationInvocationParamsWrapper.faultyFeedRange; + operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom = resolveTestObjectCountToBootstrapFrom(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(), 15); + int testObjCountToBootstrapFrom = operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom; - collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) - .flatMap(collection -> { - faultyDocumentCollection.v = collection; - return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangeEpkImpl.v.getRange(), true, null); - }) - .flatMap(listValueHolder -> { - faultyPartitionKeyRanges.v = listValueHolder.v; - return Mono.just(listValueHolder); - }).block(); - } else if (operationInvocationParamsWrapper.faultyFeedRange instanceof FeedRangePartitionKeyImpl) { + operationInvocationParamsWrapper.containerIdToTarget = resolveContainerIdByFaultInjectionOperationType(faultInjectionOperationType); - faultyFeedRangePartitionKeyImpl.v = (FeedRangePartitionKeyImpl) operationInvocationParamsWrapper.faultyFeedRange; + validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); - collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) - .flatMap(collection -> { - faultyDocumentCollection.v = collection; - return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangePartitionKeyImpl.v.getEffectiveRange(collection.getPartitionKey()), true, null); - }) - .flatMap(listValueHolder -> { - faultyPartitionKeyRanges.v = listValueHolder.v; - return Mono.just(listValueHolder); - }).block(); - } else { - fail("Argument 'operationInvocationParamsWrapper.faultyFeedRange' has to be a sub-type of FeedRangeEpkImpl or FeedRangePartitionKeyImpl!"); + for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { + TestObject testObject = TestObject.create(); + testObjects.add(testObject); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); } - validateNonEmptyList(faultyPartitionKeyRanges.v); - assertThat(faultyDocumentCollection.v).isNotNull(); + operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith = testObjects; + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(0); - List faultInjectionRules = generateFaultInjectionRules.apply(faultInjectionRuleParamsWrapper); + } catch (Exception ex) { + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); + } finally { + safeClose(asyncClient); + } - if (faultInjectionRules != null && !faultInjectionRules.isEmpty()) { + Function> executeDataPlaneOperation + = resolveDataPlaneOperation(faultInjectionOperationType); - operationInvocationParamsWrapper.asyncContainer = container; - operationInvocationParamsWrapper.feedRangeToDrainForChangeFeed = operationInvocationParamsWrapper.faultyFeedRange; - operationInvocationParamsWrapper.feedRangeForQuery = operationInvocationParamsWrapper.faultyFeedRange; + operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions(); - CosmosFaultInjectionHelper - .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) - .block(); + try { - boolean hasReachedCircuitBreakingThreshold = false; - int executionCountAfterCircuitBreakingThresholdBreached = 0; + if (regionSwitchHint != null) { + clientBuilder = clientBuilder + .sessionRetryOptions(new SessionRetryOptionsBuilder().regionSwitchHint(regionSwitchHint).build()); + } - List testObjects = operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith; - PartitionKeyRangeWrapper partitionKeyRangeWrapper - = new PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId()); + asyncClient = clientBuilder.buildAsyncClient(); + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + operationInvocationParamsWrapper.asyncContainer = asyncContainer; + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); - for (int i = 1; i <= operationIterationCountInFailureFlow; i++) { + List faultInjectionRules = generateFaultInjectionRules.apply(faultInjectionRuleParamsWrapper); - if (!(operationInvocationParamsWrapper.queryType == QueryType.READ_MANY || operationInvocationParamsWrapper.queryType == QueryType.READ_ALL)) { - operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); - } else if (operationInvocationParamsWrapper.queryType == QueryType.READ_MANY) { - validateNonEmptyList(operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation); - } + CosmosFaultInjectionHelper + .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) + .block(); - ResponseWrapper response = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); + ResponseWrapper responseWrapper = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); - ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker - = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker(); + validateResponse.accept(responseWrapper); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Test should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + safeClose(asyncClient); + } + } - int expectedCircuitBreakingThreshold - = doesOperationHaveWriteSemantics(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()) ? - consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, false) : - consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, true); + @Test(groups = {"circuit-breaker-misc-direct"}, dataProvider = "masterResourceFailuresDataProviderMiscDirect", timeOut = 4 * TIMEOUT) + public void testMiscOperation_withAllGatewayRoutedOperationFailuresInPrimaryRegion_withDirectConnectivity(String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + CosmosRegionSwitchHint regionSwitchHint, + Consumer> validateResponse, + Set allowedConnectionModes) { + List preferredRegions = this.writeRegions; - if (!hasReachedCircuitBreakingThreshold) { + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); - hasReachedCircuitBreakingThreshold = (expectedCircuitBreakingThreshold - 1) == - getEstimatedFailureCountSeenPerRegionPerPartitionKeyRange( - partitionKeyRangeWrapper, - partitionKeyRangeToLocationSpecificUnavailabilityInfo, - locationEndpointToLocationSpecificContextForPartitionField, - expectedCircuitBreakingThreshold - 1, - expectedRegionCountWithFailures); - validateResponseInPresenceOfFailures.accept(response); - } else { - executionCountAfterCircuitBreakingThresholdBreached++; - } + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); - if (executionCountAfterCircuitBreakingThresholdBreached > 1) { - validateResponseInAbsenceOfFailures.accept(response); - } + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - if (response.cosmosItemResponse != null) { - assertThat(response.cosmosItemResponse).isNotNull(); - assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); + } - if (executionCountAfterCircuitBreakingThresholdBreached > 1) { - validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); - } - } else if (response.feedResponse != null) { - assertThat(response.feedResponse).isNotNull(); - assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); + CosmosAsyncClient asyncClient = null; + FaultInjectionOperationType faultInjectionOperationType = faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(); + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + List testObjects = new ArrayList<>(); - if (executionCountAfterCircuitBreakingThresholdBreached > 1) { - validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); - } - } else if (response.cosmosException != null) { - assertThat(response.cosmosException).isNotNull(); - assertThat(response.cosmosException.getDiagnostics()).isNotNull(); + try { - if (!hasReachedCircuitBreakingThreshold) { - CosmosDiagnosticsContext ctx = response.cosmosException.getDiagnostics().getDiagnosticsContext(); + asyncClient = clientBuilder.buildAsyncClient(); - validateRegionsContactedWhenExceptionBubblesUp.accept(ctx); - } - } else if (response.batchResponse != null) { - assertThat(response.batchResponse).isNotNull(); - assertThat(response.batchResponse.getDiagnostics()).isNotNull(); + operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom = resolveTestObjectCountToBootstrapFrom(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(), 15); + int testObjCountToBootstrapFrom = operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom; - if (executionCountAfterCircuitBreakingThresholdBreached > 1) { - validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); - } - } - } + operationInvocationParamsWrapper.containerIdToTarget = resolveContainerIdByFaultInjectionOperationType(faultInjectionOperationType); - logger.info("Sleep for 90 seconds to allow Unavailable partitions to be HealthyTentative"); - Thread.sleep(90_000); + validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); - for (int i = operationIterationCountInFailureFlow + 1; i <= operationIterationCountInFailureFlow + operationIterationCountInRecoveryFlow; i++) { + for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { + TestObject testObject = TestObject.create(); + testObjects.add(testObject); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + } - if (!(operationInvocationParamsWrapper.queryType == QueryType.READ_MANY || operationInvocationParamsWrapper.queryType == QueryType.READ_ALL)) { - operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); - } else if (operationInvocationParamsWrapper.queryType == QueryType.READ_MANY) { - validateNonEmptyList(operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation); - } + operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith = testObjects; + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(0); - ResponseWrapper response = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); - validateResponseInAbsenceOfFailures.accept(response); + } catch (Exception ex) { + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); + } finally { + safeClose(asyncClient); + } - if (response.cosmosItemResponse != null) { - assertThat(response.cosmosItemResponse).isNotNull(); - assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); + Function> executeDataPlaneOperation + = resolveDataPlaneOperation(faultInjectionOperationType); - validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); - } else if (response.feedResponse != null) { - assertThat(response.feedResponse).isNotNull(); - assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); + operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions(); - validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); - } else if (response.cosmosException != null) { - assertThat(response.cosmosException).isNotNull(); - assertThat(response.cosmosException.getDiagnostics()).isNotNull(); + try { - response.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } else if (response.batchResponse != null) { - assertThat(response.batchResponse).isNotNull(); - assertThat(response.batchResponse.getDiagnostics()).isNotNull(); + if (regionSwitchHint != null) { + clientBuilder = clientBuilder + .sessionRetryOptions(new SessionRetryOptionsBuilder().regionSwitchHint(regionSwitchHint).build()); + } - validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); - } - } - } - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); - } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Test should have passed!"); - } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); - safeClose(client); - } - } + asyncClient = clientBuilder.buildAsyncClient(); + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + operationInvocationParamsWrapper.asyncContainer = asyncContainer; + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); - private static int resolveTestObjectCountToBootstrapFrom(FaultInjectionOperationType faultInjectionOperationType, int opCount) { - switch (faultInjectionOperationType) { - case READ_ITEM: - case UPSERT_ITEM: - case REPLACE_ITEM: - case QUERY_ITEM: - case PATCH_ITEM: - case READ_FEED_ITEM: - return 1; - case DELETE_ITEM: - return 2 * opCount; - case CREATE_ITEM: - case BATCH_ITEM: - return 0; - default: - throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); - } - } + List faultInjectionRules = generateFaultInjectionRules.apply(faultInjectionRuleParamsWrapper); -// @Test(groups = {"multi-master"}) -// public void testCreate_404_1002_FirstRegionOnly_LocalPreferred_EagerAvailabilityStrategy_WithRetries() { -// -// System.setProperty("COSMOS.SESSION_CAPTURING_TYPE", "REGION_SCOPED"); -// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT", "5000000"); -// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE", "0.001"); -// -// CosmosAsyncClient asyncClient = buildCosmosClient( -// ConsistencyLevel.SESSION, -// Arrays.asList("West US 2", "South Central US", "East US"), -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// ConnectionMode.GATEWAY, -// new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) -// .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) -// .build(), -// new NonIdempotentWriteRetryOptions() -// .setEnabled(true) -// .setTrackingIdUsed(true)); -// -// CosmosAsyncDatabase asyncDatabase = asyncClient.getDatabase("testDb"); -// CosmosAsyncContainer asyncContainer = asyncDatabase.getContainer("testContainer"); -// -// FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders -// .getResultBuilder(FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE) -// .build(); -// -// FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() -// .connectionType(FaultInjectionConnectionType.GATEWAY) -//// .operationType(FaultInjectionOperationType.CREATE_ITEM) -// .region("West US 2") -// .build(); -// -// FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) -// .condition(faultInjectionCondition) -// .result(faultInjectionServerErrorResult) -// .build(); -// -// asyncContainer.getFeedRanges().block(); -// CosmosFaultInjectionHelper.configureFaultInjectionRules(asyncContainer, Arrays.asList(faultInjectionRule)).block(); -// -// try { -// CosmosItemResponse response = asyncContainer.createItem(TestObject.create(UUID.randomUUID().toString())).block(); -// -// System.out.println("Success Diagnostics : " + response.getDiagnostics()); -// } catch (CosmosException ex) { -// -// System.out.println("Failure Diagnostics : " + ex.getDiagnostics()); -// } finally { -// asyncClient.close(); -// -// System.clearProperty("COSMOS.SESSION_CAPTURING_TYPE"); -// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT"); -// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE"); -// } -// } -// -// @Test(groups = {"multi-master"}) -// public void testCreate_500_FirstRegionOnly_LocalPreferred_EagerAvailabilityStrategy_WithRetries() { -// -// System.setProperty("COSMOS.SESSION_CAPTURING_TYPE", "REGION_SCOPED"); -// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT", "5000000"); -// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE", "0.001"); -// System.setProperty( -// "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", -// "{\"isPartitionLevelCircuitBreakerEnabled\": true, " -// + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," -// + "\"consecutiveExceptionCountToleratedForReads\": 10," -// + "\"consecutiveExceptionCountToleratedForWrites\": 5," -// + "}"); -// -// -// CosmosAsyncClient asyncClient = buildCosmosClient( -// ConsistencyLevel.SESSION, -// Arrays.asList("West US 2", "South Central US", "East US"), -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// ConnectionMode.DIRECT, -// new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) -// .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) -// .build(), -// new NonIdempotentWriteRetryOptions() -// .setEnabled(true) -// .setTrackingIdUsed(true)); -// -// CosmosAsyncDatabase asyncDatabase = asyncClient.getDatabase("testDb"); -// CosmosAsyncContainer asyncContainer = asyncDatabase.getContainer("testContainer"); -// -// FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders -// .getResultBuilder(FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR) -// .build(); -// -// FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() -// .connectionType(FaultInjectionConnectionType.DIRECT) -// .region("West US 2") -// .build(); -// -// FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) -// .condition(faultInjectionCondition) -// .result(faultInjectionServerErrorResult) -// .build(); -// -// CosmosFaultInjectionHelper.configureFaultInjectionRules(asyncContainer, Arrays.asList(faultInjectionRule)).block(); -// -// try { -// CosmosItemResponse response = null; -// -// for (int i = 1; i <= 7; i++) { -// response = asyncContainer.createItem(TestObject.create(UUID.randomUUID().toString())).onErrorResume(throwable -> Mono.empty()).block(); -// } -// -// System.out.println("Success Diagnostics : " + response.getDiagnostics()); -// } catch (CosmosException ex) { -// -// System.out.println("Failure Diagnostics : " + ex.getDiagnostics()); -// } finally { -// asyncClient.close(); -// -// System.clearProperty("COSMOS.SESSION_CAPTURING_TYPE"); -// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT"); -// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE"); -// System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); -// } -// } -// -// @Test(groups = {"multi-master"}) -// public void testRead_503_FirstRegionOnly() { -// System.setProperty("COSMOS.SESSION_CAPTURING_TYPE", "REGION_SCOPED"); -// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT", "5000000"); -// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE", "0.001"); -// System.setProperty( -// "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", -// "{\"isPartitionLevelCircuitBreakerEnabled\": true, " -// + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," -// + "\"consecutiveExceptionCountToleratedForReads\": 10," -// + "\"consecutiveExceptionCountToleratedForWrites\": 5," -// + "}"); -// -// -// CosmosAsyncClient asyncClient = buildCosmosClient( -// ConsistencyLevel.SESSION, -// Arrays.asList("West US 2", "South Central US", "East US"), -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// ConnectionMode.DIRECT, -// new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) -// .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) -// .build(), -// new NonIdempotentWriteRetryOptions() -// .setEnabled(true) -// .setTrackingIdUsed(true)); -// -// CosmosAsyncDatabase asyncDatabase = asyncClient.getDatabase("testDb"); -// CosmosAsyncContainer asyncContainer = asyncDatabase.getContainer("testContainer"); -// -// FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders -// .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) -// .build(); -// -// FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() -// .connectionType(FaultInjectionConnectionType.DIRECT) -// .region("West US 2") -// .build(); -// -// FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) -// .condition(faultInjectionCondition) -// .result(faultInjectionServerErrorResult) -// .build(); -// -// TestObject testObject = TestObject.create(); -// String id = testObject.getId(); -// String myPk = testObject.getMypk(); -// -// CosmosItemResponse responseFromCreate = asyncContainer.createItem(testObject).block(); -// -// CosmosAsyncClient asyncClient2 = buildCosmosClient( -// ConsistencyLevel.SESSION, -// Arrays.asList("West US 2", "South Central US", "East US"), -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// ConnectionMode.DIRECT, -// new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) -// .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) -// .build(), -// new NonIdempotentWriteRetryOptions() -// .setEnabled(true) -// .setTrackingIdUsed(true)); -// -// asyncDatabase = asyncClient2.getDatabase("testDb"); -// asyncContainer = asyncDatabase.getContainer("testContainer"); -// -// CosmosFaultInjectionHelper.configureFaultInjectionRules(asyncContainer, Arrays.asList(faultInjectionRule)).block(); -// -// try { -// CosmosItemResponse response = null; -// -// for (int i = 1; i <= 10; i++) { -// response = asyncContainer.readItem(id, new PartitionKey(myPk), TestObject.class).onErrorResume(throwable -> Mono.empty()).block(); -// System.out.println("Success Diagnostics : " + response.getDiagnostics()); -// } -// -// response = asyncContainer.readItem(id, new PartitionKey(myPk), TestObject.class).onErrorResume(throwable -> Mono.empty()).block(); -// System.out.println("Success Diagnostics : " + response.getDiagnostics()); -// } catch (CosmosException ex) { -// -// System.out.println("Failure Diagnostics : " + ex.getDiagnostics()); -// } finally { -// asyncClient.close(); -// -// System.clearProperty("COSMOS.SESSION_CAPTURING_TYPE"); -// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT"); -// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE"); -// System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); -// } -// } -// -// @Test(groups = {"multi-master"}) -// public void testCreate_503_FirstRegionOnly() { -// System.setProperty("COSMOS.SESSION_CAPTURING_TYPE", "REGION_SCOPED"); -// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT", "5000000"); -// System.setProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE", "0.001"); -// System.setProperty( -// "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", -// "{\"isPartitionLevelCircuitBreakerEnabled\": true, " -// + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," -// + "\"consecutiveExceptionCountToleratedForReads\": 10," -// + "\"consecutiveExceptionCountToleratedForWrites\": 5," -// + "}"); -// -// -// CosmosAsyncClient asyncClient = buildCosmosClient( -// ConsistencyLevel.SESSION, -// Arrays.asList("West US 2", "South Central US", "East US"), -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// ConnectionMode.DIRECT, -// new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) -// .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) -// .build(), -// new NonIdempotentWriteRetryOptions() -// .setEnabled(true) -// .setTrackingIdUsed(true)); -// -// CosmosAsyncDatabase asyncDatabase = asyncClient.getDatabase("testDb"); -// CosmosAsyncContainer asyncContainer = asyncDatabase.getContainer("testContainer"); -// -// FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders -// .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) -// .build(); -// -// FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() -// .connectionType(FaultInjectionConnectionType.DIRECT) -// .region("West US 2") -// .build(); -// -// FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) -// .condition(faultInjectionCondition) -// .result(faultInjectionServerErrorResult) -// .build(); -// -// TestObject testObject = TestObject.create(); -// String id = testObject.getId(); -// String myPk = testObject.getMypk(); -// -// CosmosItemResponse responseFromCreate = asyncContainer.createItem(testObject).block(); -// CosmosFaultInjectionHelper.configureFaultInjectionRules(asyncContainer, Arrays.asList(faultInjectionRule)).block(); -// -// try { -// CosmosItemResponse response = null; -// -// for (int i = 1; i <= 5; i++) { -// response = asyncContainer.createItem(TestObject.create()).onErrorResume(throwable -> Mono.empty()).block(); -// System.out.println("Success Diagnostics : " + response.getDiagnostics()); -// } -// -// response = asyncContainer.createItem(TestObject.create()).onErrorResume(throwable -> Mono.empty()).block(); -// System.out.println("Success Diagnostics : " + response.getDiagnostics()); -// } catch (CosmosException ex) { -// -// System.out.println("Failure Diagnostics : " + ex.getDiagnostics()); -// } finally { -// asyncClient.close(); -// -// System.clearProperty("COSMOS.SESSION_CAPTURING_TYPE"); -// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_INSERTION_COUNT"); -// System.clearProperty("COSMOS.PK_BASED_BLOOM_FILTER_EXPECTED_FFP_RATE"); -// System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); -// } -// } -// -// private static CosmosAsyncClient buildCosmosClient( -// ConsistencyLevel consistencyLevel, -// List preferredRegions, -// CosmosRegionSwitchHint regionSwitchHint, -// ConnectionMode connectionMode, -// CosmosEndToEndOperationLatencyPolicyConfig cosmosEndToEndOperationLatencyPolicyConfig, -// NonIdempotentWriteRetryOptions nonIdempotentWriteRetryOptions) { -// -// CosmosClientBuilder clientBuilder = new CosmosClientBuilder() -// .endpoint(TestConfigurations.HOST) -// .key(TestConfigurations.MASTER_KEY) -// .consistencyLevel(consistencyLevel) -// .preferredRegions(preferredRegions) -// .sessionRetryOptions(new SessionRetryOptionsBuilder() -// .regionSwitchHint(regionSwitchHint) -// .build()) -// .endToEndOperationLatencyPolicyConfig(cosmosEndToEndOperationLatencyPolicyConfig) -// .nonIdempotentWriteRetryOptions(nonIdempotentWriteRetryOptions) -// .openConnectionsAndInitCaches(new CosmosContainerProactiveInitConfigBuilder(Arrays.asList(new CosmosContainerIdentity("testDb", "testContainer"))) -// .setProactiveConnectionRegionsCount(3) -// .build()) -// .multipleWriteRegionsEnabled(true); -// -// if (connectionMode == ConnectionMode.DIRECT) { -// clientBuilder.directMode(); -// } else { -// clientBuilder.gatewayMode(); -// } -// -// return clientBuilder.buildAsyncClient(); -// } + CosmosFaultInjectionHelper + .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) + .block(); + + ResponseWrapper responseWrapper = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); + + validateResponse.accept(responseWrapper); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Test should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + safeClose(asyncClient); + } + } private static Function> resolveDataPlaneOperation(FaultInjectionOperationType faultInjectionOperationType) { @@ -3236,8 +4173,8 @@ private static Function> re return (paramsWrapper) -> { CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; - CosmosQueryRequestOptions queryRequestOptions = paramsWrapper.queryRequestOptions; - queryRequestOptions = queryRequestOptions.setFeedRange(paramsWrapper.feedRangeForQuery); + CosmosQueryRequestOptions queryRequestOptions = paramsWrapper.queryRequestOptions == null ? new CosmosQueryRequestOptions() : paramsWrapper.queryRequestOptions; + queryRequestOptions = paramsWrapper.feedRangeForQuery == null ? queryRequestOptions.setFeedRange(FeedRange.forFullRange()) : queryRequestOptions.setFeedRange(paramsWrapper.feedRangeForQuery); try { @@ -3316,7 +4253,7 @@ private static Function> re try { FeedResponse feedResponseFromChangeFeed = asyncContainer.queryChangeFeed( - CosmosChangeFeedRequestOptions.createForProcessingFromBeginning(paramsWrapper.feedRangeToDrainForChangeFeed), + CosmosChangeFeedRequestOptions.createForProcessingFromBeginning(paramsWrapper.feedRangeToDrainForChangeFeed == null ? FeedRange.forFullRange() : paramsWrapper.feedRangeToDrainForChangeFeed), TestObject.class) .byPage() .blockLast(); @@ -3445,6 +4382,16 @@ private static class FaultInjectionRuleParamsWrapper { private FeedRange faultInjectionApplicableFeedRange; private FaultInjectionOperationType faultInjectionOperationType; private FaultInjectionConnectionType faultInjectionConnectionType; + private boolean isOverrideFaultInjectionOperationType = false; + + public boolean getIsOverrideFaultInjectionOperationType() { + return isOverrideFaultInjectionOperationType; + } + + public FaultInjectionRuleParamsWrapper withOverrideFaultInjectionOperationType(boolean isOverrideFaultInjectionOperationType) { + this.isOverrideFaultInjectionOperationType = isOverrideFaultInjectionOperationType; + return this; + } public CosmosAsyncContainer getFaultInjectionApplicableAsyncContainer() { return faultInjectionApplicableAsyncContainer; @@ -3538,24 +4485,37 @@ private static List buildServiceUnavailableFaultInjectionRul for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(paramsWrapper.getFaultInjectionOperationType()) + FaultInjectionConditionBuilder faultInjectionConditionBuilder = new FaultInjectionConditionBuilder() .connectionType(paramsWrapper.getFaultInjectionConnectionType()) - .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) - .region(applicableRegion) - .build(); + .region(applicableRegion); + + if (paramsWrapper.getFaultInjectionApplicableFeedRange() != null) { + faultInjectionConditionBuilder.endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()); + } + + if (!paramsWrapper.getIsOverrideFaultInjectionOperationType() && paramsWrapper.getFaultInjectionOperationType() != null) { + faultInjectionConditionBuilder.operationType(paramsWrapper.getFaultInjectionOperationType()); + } + + FaultInjectionCondition faultInjectionCondition = faultInjectionConditionBuilder.build(); FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) .build(); - FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + FaultInjectionRuleBuilder faultInjectionRuleBuilder = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(paramsWrapper.getHitLimit()) - .build(); + .result(faultInjectionServerErrorResult); + + if (paramsWrapper.getFaultInjectionDuration() != null) { + faultInjectionRuleBuilder.duration(paramsWrapper.getFaultInjectionDuration()); + } - faultInjectionRules.add(faultInjectionRule); + if (paramsWrapper.getHitLimit() != null) { + faultInjectionRuleBuilder.hitLimit(paramsWrapper.getHitLimit()); + } + + faultInjectionRules.add(faultInjectionRuleBuilder.build()); } return faultInjectionRules; @@ -3571,20 +4531,33 @@ private static List buildServerGeneratedGoneErrorFaultInject for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(paramsWrapper.getFaultInjectionOperationType()) + FaultInjectionConditionBuilder faultInjectionConditionBuilder = new FaultInjectionConditionBuilder() .connectionType(paramsWrapper.getFaultInjectionConnectionType()) - .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) - .region(applicableRegion) - .build(); + .region(applicableRegion); + + if (paramsWrapper.getFaultInjectionApplicableFeedRange() != null) { + faultInjectionConditionBuilder.endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()); + } + + if (!paramsWrapper.getIsOverrideFaultInjectionOperationType() && paramsWrapper.getFaultInjectionOperationType() != null) { + faultInjectionConditionBuilder.operationType(paramsWrapper.getFaultInjectionOperationType()); + } + + FaultInjectionCondition faultInjectionCondition = faultInjectionConditionBuilder.build(); - FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("gone-rule-" + UUID.randomUUID()) + FaultInjectionRuleBuilder faultInjectionRuleBuilder = new FaultInjectionRuleBuilder("gone-rule-" + UUID.randomUUID()) .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .duration(paramsWrapper.getFaultInjectionDuration()) - .build(); + .result(faultInjectionServerErrorResult); + + if (paramsWrapper.getFaultInjectionDuration() != null) { + faultInjectionRuleBuilder.duration(paramsWrapper.getFaultInjectionDuration()); + } + + if (paramsWrapper.getHitLimit() != null) { + faultInjectionRuleBuilder.hitLimit(paramsWrapper.getHitLimit()); + } - faultInjectionRules.add(faultInjectionRule); + faultInjectionRules.add(faultInjectionRuleBuilder.build()); } return faultInjectionRules; @@ -3602,20 +4575,33 @@ private static List buildTransitTimeoutFaultInjectionRules(F for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(paramsWrapper.getFaultInjectionOperationType()) + FaultInjectionConditionBuilder faultInjectionConditionBuilder = new FaultInjectionConditionBuilder() .connectionType(paramsWrapper.getFaultInjectionConnectionType()) - .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) - .region(applicableRegion) - .build(); + .region(applicableRegion); + + if (paramsWrapper.getFaultInjectionApplicableFeedRange() != null) { + faultInjectionConditionBuilder.endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()); + } - FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("response-delay-rule-" + UUID.randomUUID()) + if (!paramsWrapper.getIsOverrideFaultInjectionOperationType() && paramsWrapper.getFaultInjectionOperationType() != null) { + faultInjectionConditionBuilder.operationType(paramsWrapper.getFaultInjectionOperationType()); + } + + FaultInjectionCondition faultInjectionCondition = faultInjectionConditionBuilder.build(); + + FaultInjectionRuleBuilder faultInjectionRuleBuilder = new FaultInjectionRuleBuilder("response-delay-rule-" + UUID.randomUUID()) .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .duration(paramsWrapper.getFaultInjectionDuration()) - .build(); + .result(faultInjectionServerErrorResult); - faultInjectionRules.add(faultInjectionRule); + if (paramsWrapper.getFaultInjectionDuration() != null) { + faultInjectionRuleBuilder.duration(paramsWrapper.getFaultInjectionDuration()); + } + + if (paramsWrapper.getHitLimit() != null) { + faultInjectionRuleBuilder.hitLimit(paramsWrapper.getHitLimit()); + } + + faultInjectionRules.add(faultInjectionRuleBuilder.build()); } return faultInjectionRules; @@ -3631,20 +4617,33 @@ private static List buildReadWriteSessionNotAvailableFaultIn for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(paramsWrapper.getFaultInjectionOperationType()) + FaultInjectionConditionBuilder faultInjectionConditionBuilder = new FaultInjectionConditionBuilder() .connectionType(paramsWrapper.getFaultInjectionConnectionType()) - .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) - .region(applicableRegion) - .build(); + .region(applicableRegion); + + if (paramsWrapper.getFaultInjectionApplicableFeedRange() != null) { + faultInjectionConditionBuilder.endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()); + } + + if (!paramsWrapper.getIsOverrideFaultInjectionOperationType() && paramsWrapper.getFaultInjectionOperationType() != null) { + faultInjectionConditionBuilder.operationType(paramsWrapper.getFaultInjectionOperationType()); + } + + FaultInjectionCondition faultInjectionCondition = faultInjectionConditionBuilder.build(); - FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) + FaultInjectionRuleBuilder faultInjectionRuleBuilder = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .duration(paramsWrapper.getFaultInjectionDuration()) - .build(); + .result(faultInjectionServerErrorResult); + + if (paramsWrapper.getFaultInjectionDuration() != null) { + faultInjectionRuleBuilder.duration(paramsWrapper.getFaultInjectionDuration()); + } + + if (paramsWrapper.getHitLimit() != null) { + faultInjectionRuleBuilder.hitLimit(paramsWrapper.getHitLimit()); + } - faultInjectionRules.add(faultInjectionRule); + faultInjectionRules.add(faultInjectionRuleBuilder.build()); } return faultInjectionRules; @@ -3660,20 +4659,33 @@ private static List buildTooManyRequestsErrorFaultInjectionR for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(paramsWrapper.getFaultInjectionOperationType()) + FaultInjectionConditionBuilder faultInjectionConditionBuilder = new FaultInjectionConditionBuilder() .connectionType(paramsWrapper.getFaultInjectionConnectionType()) - .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) - .region(applicableRegion) - .build(); + .region(applicableRegion); - FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("too-many-requests-rule-" + UUID.randomUUID()) + if (paramsWrapper.getFaultInjectionApplicableFeedRange() != null) { + faultInjectionConditionBuilder.endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()); + } + + if (!paramsWrapper.getIsOverrideFaultInjectionOperationType() && paramsWrapper.getFaultInjectionOperationType() != null) { + faultInjectionConditionBuilder.operationType(paramsWrapper.getFaultInjectionOperationType()); + } + + FaultInjectionCondition faultInjectionCondition = faultInjectionConditionBuilder.build(); + + FaultInjectionRuleBuilder faultInjectionRuleBuilder = new FaultInjectionRuleBuilder("too-many-requests-rule-" + UUID.randomUUID()) .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .duration(paramsWrapper.getFaultInjectionDuration()) - .build(); + .result(faultInjectionServerErrorResult); + + if (paramsWrapper.getFaultInjectionDuration() != null) { + faultInjectionRuleBuilder.duration(paramsWrapper.getFaultInjectionDuration()); + } - faultInjectionRules.add(faultInjectionRule); + if (paramsWrapper.getHitLimit() != null) { + faultInjectionRuleBuilder.hitLimit(paramsWrapper.getHitLimit()); + } + + faultInjectionRules.add(faultInjectionRuleBuilder.build()); } return faultInjectionRules; @@ -3689,20 +4701,33 @@ private static List buildInternalServerErrorFaultInjectionRu for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(paramsWrapper.getFaultInjectionOperationType()) + FaultInjectionConditionBuilder faultInjectionConditionBuilder = new FaultInjectionConditionBuilder() .connectionType(paramsWrapper.getFaultInjectionConnectionType()) - .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) - .region(applicableRegion) - .build(); + .region(applicableRegion); + + if (paramsWrapper.getFaultInjectionApplicableFeedRange() != null) { + faultInjectionConditionBuilder.endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()); + } + + if (!paramsWrapper.getIsOverrideFaultInjectionOperationType() && paramsWrapper.getFaultInjectionOperationType() != null) { + faultInjectionConditionBuilder.operationType(paramsWrapper.getFaultInjectionOperationType()); + } + + FaultInjectionCondition faultInjectionCondition = faultInjectionConditionBuilder.build(); - FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("internal-server-error-rule-" + UUID.randomUUID()) + FaultInjectionRuleBuilder faultInjectionRuleBuilder = new FaultInjectionRuleBuilder("internal-server-error-rule-" + UUID.randomUUID()) .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(paramsWrapper.getHitLimit()) - .build(); + .result(faultInjectionServerErrorResult); + + if (paramsWrapper.getFaultInjectionDuration() != null) { + faultInjectionRuleBuilder.duration(paramsWrapper.getFaultInjectionDuration()); + } + + if (paramsWrapper.getHitLimit() != null) { + faultInjectionRuleBuilder.hitLimit(paramsWrapper.getHitLimit()); + } - faultInjectionRules.add(faultInjectionRule); + faultInjectionRules.add(faultInjectionRuleBuilder.build()); } return faultInjectionRules; @@ -3717,20 +4742,33 @@ private static List buildRetryWithFaultInjectionRules(FaultI for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(paramsWrapper.getFaultInjectionOperationType()) + FaultInjectionConditionBuilder faultInjectionConditionBuilder = new FaultInjectionConditionBuilder() .connectionType(paramsWrapper.getFaultInjectionConnectionType()) - .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) - .region(applicableRegion) - .build(); + .region(applicableRegion); + + if (paramsWrapper.getFaultInjectionApplicableFeedRange() != null) { + faultInjectionConditionBuilder.endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()); + } - FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("retry-with-rule-" + UUID.randomUUID()) + if (!paramsWrapper.getIsOverrideFaultInjectionOperationType() && paramsWrapper.getFaultInjectionOperationType() != null) { + faultInjectionConditionBuilder.operationType(paramsWrapper.getFaultInjectionOperationType()); + } + + FaultInjectionCondition faultInjectionCondition = faultInjectionConditionBuilder.build(); + + FaultInjectionRuleBuilder faultInjectionRuleBuilder = new FaultInjectionRuleBuilder("retry-with-rule-" + UUID.randomUUID()) .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .duration(paramsWrapper.getFaultInjectionDuration()) - .build(); + .result(faultInjectionServerErrorResult); + + if (paramsWrapper.getFaultInjectionDuration() != null) { + faultInjectionRuleBuilder.duration(paramsWrapper.getFaultInjectionDuration()); + } + + if (paramsWrapper.getHitLimit() != null) { + faultInjectionRuleBuilder.hitLimit(paramsWrapper.getHitLimit()); + } - faultInjectionRules.add(faultInjectionRule); + faultInjectionRules.add(faultInjectionRuleBuilder.build()); } return faultInjectionRules; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 4969c79c3f7e..4b7aead3a1f4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -3359,40 +3359,31 @@ private Mono> readDocumentInternal( Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); return collectionObs.flatMap(documentCollectionValueHolder -> { - if (documentCollectionValueHolder != null && documentCollectionValueHolder.v != null) { - DocumentCollection documentCollection = documentCollectionValueHolder.v; - return this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollection.getResourceId(), null, null) - .flatMap(collectionRoutingMapValueHolder -> { - if (collectionRoutingMapValueHolder.v != null) { - - Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs, pointOperationContextForCircuitBreaker); + DocumentCollection documentCollection = documentCollectionValueHolder.v; + return this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollection.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { + Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs, pointOperationContextForCircuitBreaker); - return requestObs.flatMap(req -> { + return requestObs.flatMap(req -> { - options.setPartitionKeyDefinition(documentCollection.getPartitionKey()); - addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); + options.setPartitionKeyDefinition(documentCollection.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); - req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - requestReference.set(req); + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired - // to the RxDocumentServiceRequest instance - mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); - return this.read(req, retryPolicyInstance) - .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); - }); - } else { - return Mono.error(new CollectionRoutingMapNotFoundException("")); - } + return this.read(req, retryPolicyInstance) + .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); }); - } else { - return Mono.error(new NotFoundException()); - } - } - ); + }); + } + ); } catch (Exception e) { logger.debug("Failure in reading a document due to [{}]", e.getMessage()); return Mono.error(e); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java index 9750771b6147..f0c66f61f9da 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java @@ -7,6 +7,7 @@ import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.Exceptions; +import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.MetadataDiagnosticsContext; import com.azure.cosmos.implementation.NotFoundException; import com.azure.cosmos.implementation.OperationType; @@ -69,7 +70,7 @@ public Mono> tryLookupAsync(MetadataDiag .onErrorResume(err -> { logger.debug("tryLookupAsync on collectionRid {} encountered failure", collectionRid, err); CosmosException dce = Utils.as(err, CosmosException.class); - if (dce != null && Exceptions.isNotFound(dce)) { + if (dce != null && Exceptions.isNotFound(dce)&& !Exceptions.isSubStatusCode(dce, HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE)) { return Mono.just(new Utils.ValueHolder<>(null)); } @@ -178,7 +179,7 @@ public Mono> tryGetRangeByPartitionKeyRange partitionKeyRangeId, err); - if (dce != null && Exceptions.isNotFound(dce)) { + if (dce != null && Exceptions.isNotFound(dce) && !Exceptions.isSubStatusCode(dce, HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE)) { return Mono.just(new Utils.ValueHolder<>(null)); } From 8512d86653ee7b70d3d99a0874c9d2acbb87984c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 7 Aug 2024 22:42:59 -0400 Subject: [PATCH 06/51] Fixing tests. --- .../PartitionLevelCircuitBreakerTests.java | 22 +-- .../implementation/RxDocumentClientImpl.java | 167 ++++++++++++------ .../caches/RxPartitionKeyRangeCache.java | 5 +- .../DefaultDocumentQueryExecutionContext.java | 18 +- .../query/DocumentProducer.java | 5 - 5 files changed, 139 insertions(+), 78 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index 49ebf2fe9a1d..5deef3870c3f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -2181,8 +2181,8 @@ public Object[][] gatewayRoutedFailureParametersDataProvider_ReadAll() { }; } - @DataProvider(name = "masterResourceFailuresDataProviderReadMany") - public Object[][] masterResourceFailuresDataProviderReadMany() { + @DataProvider(name = "gatewayRoutedFailuresParametersDataProvider_ReadMany") + public Object[][] gatewayRoutedFailuresParametersDataProvider_ReadMany() { Function> executeReadManyOperation = (paramsWrapper) -> { CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; @@ -2259,8 +2259,8 @@ public Object[][] masterResourceFailuresDataProviderReadMany() { }; } - @DataProvider(name = "masterResourceFailuresDataProviderMiscGateway") - public Object[][] masterResourceFailuresDataProviderMiscGateway() { + @DataProvider(name = "gatewayRoutedFailuresParametersDataProviderMiscGateway") + public Object[][] gatewayRoutedFailuresParametersDataProviderMiscGateway() { return new Object[][]{ { @@ -2617,8 +2617,8 @@ public Object[][] masterResourceFailuresDataProviderMiscGateway() { }; } - @DataProvider(name = "masterResourceFailuresDataProviderMiscDirect") - public Object[][] masterResourceFailuresDataProviderMiscDirect() { + @DataProvider(name = "gatewayRoutedFailuresParametersDataProviderMiscDirect") + public Object[][] gatewayRoutedFailuresParametersDataProviderMiscDirect() { return new Object[][]{ { @@ -3651,7 +3651,7 @@ private static int resolveTestObjectCountToBootstrapFrom(FaultInjectionOperation @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "gatewayRoutedFailureParametersDataProvider_ReadAll", timeOut = 4 * TIMEOUT) - public void testReadAll_withAllGatewayRoutedOperationFailuresInPrimaryRegion( + public void testReadAll_withAllGatewayRoutedOperationFailures( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, Function> generateFaultInjectionRules, @@ -3743,8 +3743,8 @@ public void testReadAll_withAllGatewayRoutedOperationFailuresInPrimaryRegion( } - @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "masterResourceFailuresDataProviderReadMany", timeOut = 4 * TIMEOUT) - public void testReadMany_withAllGatewayRoutedOperationFailuresInPrimaryRegion(String testId, + @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "gatewayRoutedFailuresParametersDataProvider_ReadMany", timeOut = 4 * TIMEOUT) + public void testReadMany_withAllGatewayRoutedOperationFailures(String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, Function> generateFaultInjectionRules, Function> executeDataPlaneOperation, @@ -3851,7 +3851,7 @@ public void testReadMany_withAllGatewayRoutedOperationFailuresInPrimaryRegion(St } } - @Test(groups = {"circuit-breaker-misc-gateway"}, dataProvider = "masterResourceFailuresDataProviderMiscGateway", timeOut = 4 * TIMEOUT) + @Test(groups = {"circuit-breaker-misc-gateway"}, dataProvider = "gatewayRoutedFailuresParametersDataProviderMiscGateway", timeOut = 4 * TIMEOUT) public void testMiscOperation_withAllGatewayRoutedOperationFailuresInPrimaryRegion_withGatewayConnectivity( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, @@ -3942,7 +3942,7 @@ public void testMiscOperation_withAllGatewayRoutedOperationFailuresInPrimaryRegi } } - @Test(groups = {"circuit-breaker-misc-direct"}, dataProvider = "masterResourceFailuresDataProviderMiscDirect", timeOut = 4 * TIMEOUT) + @Test(groups = {"circuit-breaker-misc-direct"}, dataProvider = "gatewayRoutedFailuresParametersDataProviderMiscDirect", timeOut = 4 * TIMEOUT) public void testMiscOperation_withAllGatewayRoutedOperationFailuresInPrimaryRegion_withDirectConnectivity(String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, Function> generateFaultInjectionRules, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 4b7aead3a1f4..8520368d3214 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -1952,18 +1952,29 @@ private Mono getBatchDocumentRequest(DocumentClientRet serializationDiagnosticsContext)); return this.collectionCache.resolveCollectionAsync(metadataDiagnosticsContext, request) - .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(metadataDiagnosticsContext, documentCollectionValueHolder.v.getResourceId(), null, null) - .flatMap(collectionRoutingMapValueHolder -> { + .flatMap(documentCollectionValueHolder -> { - addBatchHeaders(request, serverBatchRequest, documentCollectionValueHolder.v); + if (documentCollectionValueHolder == null || documentCollectionValueHolder.v == null) { + return Mono.error(new IllegalStateException("documentCollectionValueHolder or documentCollectionValueHolder.v cannot be null")); + } - if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request) && options != null) { - options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); - addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v, requestRetryPolicy); - } + return this.partitionKeyRangeCache.tryLookupAsync(metadataDiagnosticsContext, documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { - return Mono.just(request); - })); + if (collectionRoutingMapValueHolder == null || collectionRoutingMapValueHolder.v == null) { + return Mono.error(new IllegalStateException("collectionRoutingMapValueHolder or collectionRoutingMapValueHolder.v cannot be null")); + } + + addBatchHeaders(request, serverBatchRequest, documentCollectionValueHolder.v); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request) && options != null) { + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v, requestRetryPolicy); + } + + return Mono.just(request); + }); + }); } private RxDocumentServiceRequest addBatchHeaders(RxDocumentServiceRequest request, @@ -2327,9 +2338,17 @@ private Mono> createDocumentInternal( RxDocumentServiceRequest request = requestToDocumentCollection.getT1(); Utils.ValueHolder documentCollectionValueHolder = requestToDocumentCollection.getT2(); + if (documentCollectionValueHolder == null || documentCollectionValueHolder.v == null) { + return Mono.error(new IllegalStateException("documentCollectionValueHolder or documentCollectionValueHolder.v cannot be null")); + } + return this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) .flatMap(collectionRoutingMapValueHolder -> { + if (collectionRoutingMapValueHolder == null || collectionRoutingMapValueHolder.v == null) { + return Mono.error(new IllegalStateException("collectionRoutingMapValueHolder or collectionRoutingMapValueHolder.v cannot be null")); + } + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v, requestRetryPolicy); documentServiceRequestReference.set(request); @@ -2619,32 +2638,35 @@ private Mono> upsertDocumentInternal( RxDocumentServiceRequest request = requestToDocumentCollection.getT1(); Utils.ValueHolder documentCollectionValueHolder = requestToDocumentCollection.getT2(); + if (documentCollectionValueHolder == null || documentCollectionValueHolder.v == null) { + return Mono.error(new IllegalStateException("documentCollectionValueHolder or documentCollectionValueHolder.v cannot be null")); + } + return this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) .flatMap(collectionRoutingMapValueHolder -> { - if (collectionRoutingMapValueHolder != null && collectionRoutingMapValueHolder.v != null) { + if (collectionRoutingMapValueHolder == null || collectionRoutingMapValueHolder.v == null) { + return Mono.error(new IllegalStateException("collectionRoutingMapValueHolder or collectionRoutingMapValueHolder.v cannot be null")); + } - options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); - addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); - request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - requestReference.set(request); + request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(request); - // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses - // excluded regions to know the next location endpoint to route the request to - // unavailable regions are effectively excluded regions for this request - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired - // to the RxDocumentServiceRequest instance - mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); - return upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)); - } else { - return Mono.error(new NotFoundException()); - } + return upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)); }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); @@ -2899,25 +2921,36 @@ private Mono> replaceDocumentInternal( addPartitionKeyInformation(request, content, document, options, collectionObs, pointOperationContextForCircuitBreaker); return collectionObs - .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) - .flatMap(collectionRoutingMapValueHolder -> { - return requestObs.flatMap(req -> { + .flatMap(documentCollectionValueHolder -> { - options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); - addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); + if (documentCollectionValueHolder == null || documentCollectionValueHolder.v == null) { + return Mono.error(new IllegalStateException("documentCollectionValueHolder or documentCollectionValueHolder.v cannot be null")); + } - req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - requestReference.set(req); + return this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired - // to the RxDocumentServiceRequest instance - mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + if (collectionRoutingMapValueHolder == null || collectionRoutingMapValueHolder.v == null) { + return Mono.error(new IllegalStateException("collectionRoutingMapValueHolder or collectionRoutingMapValueHolder.v cannot be null")); + } - return replace(request, retryPolicyInstance); - }) - .map(resp -> toResourceResponse(resp, Document.class)); + return requestObs.flatMap(req -> { + + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); - })); + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); + + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + + return replace(request, retryPolicyInstance); + }) + .map(resp -> toResourceResponse(resp, Document.class)); + }); + }); } private CosmosEndToEndOperationLatencyPolicyConfig getEndToEndOperationLatencyPolicyConfig( @@ -3076,23 +3109,37 @@ private Mono> patchDocumentInternal( pointOperationContextForCircuitBreaker); return collectionObs - .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) - .flatMap(collectionRoutingMapValueHolder -> requestObs - .flatMap(req -> { + .flatMap(documentCollectionValueHolder -> { - options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); - addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); + if (documentCollectionValueHolder == null || documentCollectionValueHolder.v == null) { + return Mono.error(new IllegalStateException("documentCollectionValueHolder or documentCollectionValueHolder.v cannot be null")); + } - req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - requestReference.set(req); + return this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired - // to the RxDocumentServiceRequest instance - mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + if (collectionRoutingMapValueHolder == null || collectionRoutingMapValueHolder.v == null) { + return Mono.error(new IllegalStateException("collectionRoutingMapValueHolder or collectionRoutingMapValueHolder.v cannot be null")); + } - return patch(request, retryPolicyInstance); - }) - .map(resp -> toResourceResponse(resp, Document.class)))); + return requestObs + .flatMap(req -> { + + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMapValueHolder.v, retryPolicyInstance); + + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + requestReference.set(req); + + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + + return patch(request, retryPolicyInstance); + }) + .map(resp -> toResourceResponse(resp, Document.class)); + }); + }); } @Override @@ -3359,9 +3406,19 @@ private Mono> readDocumentInternal( Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); return collectionObs.flatMap(documentCollectionValueHolder -> { + + if (documentCollectionValueHolder == null || documentCollectionValueHolder.v == null) { + return Mono.error(new IllegalStateException("documentCollectionValueHolder or documentCollectionValueHolder.v cannot be null")); + } + DocumentCollection documentCollection = documentCollectionValueHolder.v; return this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), documentCollection.getResourceId(), null, null) .flatMap(collectionRoutingMapValueHolder -> { + + if (collectionRoutingMapValueHolder == null || collectionRoutingMapValueHolder.v == null) { + return Mono.error(new IllegalStateException("collectionRoutingMapValueHolder or collectionRoutingMapValueHolder.v cannot be null")); + } + Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs, pointOperationContextForCircuitBreaker); return requestObs.flatMap(req -> { @@ -3994,6 +4051,8 @@ public Mono addPartitionLevelUnavailableRegionsOnReque RxDocumentClientImpl.this.addPartitionLevelUnavailableRegionsForFeedRequest(request, queryRequestOptions, collectionRoutingMapValueHolder.v); + // onBeforeSendRequest uses excluded regions to know the next location endpoint + // to route the request to unavailable regions are effectively excluded regions for this request if (documentClientRetryPolicy != null) { documentClientRetryPolicy.onBeforeSendRequest(request); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java index f0c66f61f9da..d1c625c0f8d8 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java @@ -70,7 +70,9 @@ public Mono> tryLookupAsync(MetadataDiag .onErrorResume(err -> { logger.debug("tryLookupAsync on collectionRid {} encountered failure", collectionRid, err); CosmosException dce = Utils.as(err, CosmosException.class); - if (dce != null && Exceptions.isNotFound(dce)&& !Exceptions.isSubStatusCode(dce, HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE)) { + + // bubble up in case a 404:1002 is seen to force retries as a part of document retries + if (dce != null && Exceptions.isNotFound(dce) && !Exceptions.isSubStatusCode(dce, HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE)) { return Mono.just(new Utils.ValueHolder<>(null)); } @@ -179,6 +181,7 @@ public Mono> tryGetRangeByPartitionKeyRange partitionKeyRangeId, err); + // bubble up in case a 404:1002 is seen to force retries as a part of document retries if (dce != null && Exceptions.isNotFound(dce) && !Exceptions.isSubStatusCode(dce, HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE)) { return Mono.just(new Utils.ValueHolder<>(null)); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java index b095e0d2762f..4d2add0cc8bb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java @@ -205,14 +205,18 @@ private Mono> executeInternalFuncCore( this.retries.incrementAndGet(); return Mono.just(req) - .flatMap(request -> client.populateFeedRangeHeader(request)) - .flatMap(request -> client.addPartitionLevelUnavailableRegionsOnRequest(request, cosmosQueryRequestOptions, finalRetryPolicyInstance)) .flatMap(request -> { - finalRetryPolicyInstance.onBeforeSendRequest(request); - return executeRequestAsync( - this.itemSerializer, - req); - }); + + if(finalRetryPolicyInstance != null) { + finalRetryPolicyInstance.onBeforeSendRequest(request); + } + + return client.populateFeedRangeHeader(request); + }) + .flatMap(request -> client.addPartitionLevelUnavailableRegionsOnRequest(request, cosmosQueryRequestOptions, finalRetryPolicyInstance)) + .flatMap(request -> executeRequestAsync( + this.itemSerializer, + req)); }, finalRetryPolicyInstance) .map(tFeedResponse -> { this.fetchSchedulingMetrics.stop(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java index 3cc9b8fcdb4b..90f446528390 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java @@ -156,11 +156,6 @@ public DocumentProducer( }) .flatMap(req -> client.addPartitionLevelUnavailableRegionsOnRequest(req, cosmosQueryRequestOptions, finalRetryPolicy)) .flatMap(req -> { - - if(finalRetryPolicy != null) { - finalRetryPolicy.onBeforeSendRequest(req); - } - ++retries; return executeRequestFunc.apply(req); }), finalRetryPolicy); From 6b518ae5db7d0a7ef0967ead1c2cf11c3ddc6216 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 8 Aug 2024 10:24:10 -0400 Subject: [PATCH 07/51] Perform collectionLink normalization. --- .../PartitionLevelCircuitBreakerTests.java | 10 ++++++++-- .../implementation/PathsHelperTest.java | 19 +++++++++++++++++++ .../cosmos/implementation/PathsHelper.java | 9 ++++++--- .../implementation/RxDocumentClientImpl.java | 14 -------------- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index 5deef3870c3f..dad4a62c9069 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -3649,7 +3649,8 @@ private static int resolveTestObjectCountToBootstrapFrom(FaultInjectionOperation } } - + // test whether the operation succeeds when there are availability issues (404:1002, 503, 429) in the primary region + // for gateway routed requests @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "gatewayRoutedFailureParametersDataProvider_ReadAll", timeOut = 4 * TIMEOUT) public void testReadAll_withAllGatewayRoutedOperationFailures( String testId, @@ -3742,7 +3743,8 @@ public void testReadAll_withAllGatewayRoutedOperationFailures( } } - + // test whether the operation succeeds when there are availability issues (404:1002, 503, 429) in the primary region + // for gateway routed requests @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "gatewayRoutedFailuresParametersDataProvider_ReadMany", timeOut = 4 * TIMEOUT) public void testReadMany_withAllGatewayRoutedOperationFailures(String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, @@ -3851,6 +3853,8 @@ public void testReadMany_withAllGatewayRoutedOperationFailures(String testId, } } + // test whether the operation succeeds when there are availability issues (404:1002, 503, 429) in the primary region + // for gateway routed requests @Test(groups = {"circuit-breaker-misc-gateway"}, dataProvider = "gatewayRoutedFailuresParametersDataProviderMiscGateway", timeOut = 4 * TIMEOUT) public void testMiscOperation_withAllGatewayRoutedOperationFailuresInPrimaryRegion_withGatewayConnectivity( String testId, @@ -3942,6 +3946,8 @@ public void testMiscOperation_withAllGatewayRoutedOperationFailuresInPrimaryRegi } } + // test whether the operation succeeds when there are availability issues (404:1002, 503, 429) in the primary region + // for gateway routed requests @Test(groups = {"circuit-breaker-misc-direct"}, dataProvider = "gatewayRoutedFailuresParametersDataProviderMiscDirect", timeOut = 4 * TIMEOUT) public void testMiscOperation_withAllGatewayRoutedOperationFailuresInPrimaryRegion_withDirectConnectivity(String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PathsHelperTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PathsHelperTest.java index 911d7596258b..93e8f4b433cd 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PathsHelperTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PathsHelperTest.java @@ -3,6 +3,7 @@ package com.azure.cosmos.implementation; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import static com.azure.cosmos.implementation.TestUtils.mockDiagnosticsClientContext; @@ -34,6 +35,19 @@ public class PathsHelperTest { private static final String INCORRECT = "incorrect"; + @DataProvider(name = "collectionPathsProvider") + public Object[] collectionPathsProvider() { + return new Object[][] { + {"/dbs/db1/colls/coll1/", "dbs/db1/colls/coll1"}, + {"dbs/db1/colls/coll1/", "dbs/db1/colls/coll1"}, + {"/dbs/db1/colls/coll1", "dbs/db1/colls/coll1"}, + {"dbs/db1/colls/coll1", "dbs/db1/colls/coll1"}, + {"dbs/db1/colls/coll1/docs/doc1/", "dbs/db1/colls/coll1"}, + {"/dbs/db1/colls/coll1/docs/doc1", "dbs/db1/colls/coll1"}, + {"dbs/db1/colls/coll1/docs/doc1/", "dbs/db1/colls/coll1"}, + }; + } + @Test(groups = { "unit" }) public void validateResourceID() { assertThat(PathsHelper.validateResourceId(ResourceType.Database, DATABASE_ID)).isTrue(); @@ -131,4 +145,9 @@ public void testEscapeCharacters() { assertThat(input).isNotEqualTo(output); assertThat("dbs/db\t").isEqualTo(output); } + + @Test(groups = {"unit"}, dataProvider = "collectionPathsProvider") + public void testCollectionLinkTrimming(String input, String expectedOutput) { + assertThat(PathsHelper.getCollectionPath(input)).isEqualTo(expectedOutput); + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java index 205405cdaa81..960bd5578a75 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java @@ -578,14 +578,17 @@ public static String generatePathForNameBased(ResourceType resourceType, String } public static String getCollectionPath(String resourceFullName) { + + StringBuilder trimmedResourceFullName = new StringBuilder(); + if (resourceFullName != null) { - String trimmedResourceFullName = Utils.trimBeginningAndEndingSlashes(resourceFullName); - int index = indexOfNth(trimmedResourceFullName, '/', 4); + trimmedResourceFullName.append(Utils.trimBeginningAndEndingSlashes(resourceFullName)); + int index = indexOfNth(trimmedResourceFullName.toString(), '/', 4); if (index > 0) return trimmedResourceFullName.substring(0, index); } - return resourceFullName; + return trimmedResourceFullName.isEmpty() ? resourceFullName : trimmedResourceFullName.toString(); } public static String getDatabasePath(String resourceFullName) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 8520368d3214..48abaa617e18 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2354,13 +2354,6 @@ private Mono> createDocumentInternal( documentServiceRequestReference.set(request); request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); - // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses - // excluded regions to know the next location endpoint to route the request to - // unavailable regions are effectively excluded regions for this request - if (requestRetryPolicy != null) { - requestRetryPolicy.onBeforeSendRequest(request); - } - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired // to the RxDocumentServiceRequest instance mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); @@ -2655,13 +2648,6 @@ private Mono> upsertDocumentInternal( request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(request); - // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses - // excluded regions to know the next location endpoint to route the request to - // unavailable regions are effectively excluded regions for this request - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } - // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired // to the RxDocumentServiceRequest instance mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); From dda284b27a3c08187b56fdc84041b7ac1fcfd10e Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 8 Aug 2024 10:40:40 -0400 Subject: [PATCH 08/51] Fix CI pipeline. --- .../main/java/com/azure/cosmos/implementation/PathsHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java index 960bd5578a75..07937fc19def 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java @@ -588,7 +588,7 @@ public static String getCollectionPath(String resourceFullName) { return trimmedResourceFullName.substring(0, index); } - return trimmedResourceFullName.isEmpty() ? resourceFullName : trimmedResourceFullName.toString(); + return trimmedResourceFullName.length() == 0 ? resourceFullName : trimmedResourceFullName.toString(); } public static String getDatabasePath(String resourceFullName) { From 4ad9cb7d3ff98775d64cad11beb9d4a1dffc6701 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 8 Aug 2024 16:55:08 -0400 Subject: [PATCH 09/51] Reacting to review comments. --- .../com/azure/cosmos/implementation/PathsHelper.java | 11 ++++++----- .../caches/RxPartitionKeyRangeCache.java | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java index 07937fc19def..0bf1689dc012 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PathsHelper.java @@ -579,16 +579,17 @@ public static String generatePathForNameBased(ResourceType resourceType, String public static String getCollectionPath(String resourceFullName) { - StringBuilder trimmedResourceFullName = new StringBuilder(); - if (resourceFullName != null) { - trimmedResourceFullName.append(Utils.trimBeginningAndEndingSlashes(resourceFullName)); + String trimmedResourceFullName = Utils.trimBeginningAndEndingSlashes(resourceFullName); int index = indexOfNth(trimmedResourceFullName.toString(), '/', 4); - if (index > 0) + if (index > 0) { return trimmedResourceFullName.substring(0, index); + } else { + return trimmedResourceFullName; + } } - return trimmedResourceFullName.length() == 0 ? resourceFullName : trimmedResourceFullName.toString(); + return resourceFullName; } public static String getDatabasePath(String resourceFullName) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java index d1c625c0f8d8..ca7321aab679 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java @@ -72,6 +72,7 @@ public Mono> tryLookupAsync(MetadataDiag CosmosException dce = Utils.as(err, CosmosException.class); // bubble up in case a 404:1002 is seen to force retries as a part of document retries + // todo: revert change when fault injection excludes 404:1002 for master resources if (dce != null && Exceptions.isNotFound(dce) && !Exceptions.isSubStatusCode(dce, HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE)) { return Mono.just(new Utils.ValueHolder<>(null)); } @@ -182,6 +183,7 @@ public Mono> tryGetRangeByPartitionKeyRange err); // bubble up in case a 404:1002 is seen to force retries as a part of document retries + // todo: revert change when fault injection excludes 404:1002 for master resources if (dce != null && Exceptions.isNotFound(dce) && !Exceptions.isSubStatusCode(dce, HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE)) { return Mono.just(new Utils.ValueHolder<>(null)); } From 17c6fc3feb87bb6054ce4e10d31be304ead9899b Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 9 Aug 2024 13:38:34 -0400 Subject: [PATCH 10/51] Updated CHANGELOG.md. --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 6b885dd8ed82..d5235ac1e136 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -2,13 +2,12 @@ ### 4.64.0-beta.1 (Unreleased) -#### Features Added - -#### Breaking Changes - #### Bugs Fixed +* Fixed an issue to not fail fast for metadata resource resolution when faults are injected for Gateway routed operations. - See [PR 41428](https://github.com/Azure/azure-sdk-for-java/pull/41428) +* Fixed an issue to adhere with exception tolerance thresholds for consecutive read and write failures with circuit breaker. - See [PR 41248](https://github.com/Azure/azure-sdk-for-java/pull/41428) #### Other Changes +* Normalized `collectionLink` formatting. - See [PR 41248](https://github.com/Azure/azure-sdk-for-java/pull/41428) ### 4.63.0 (2024-07-26) From 72f711c1c6985b526a50c7c089fa0c0c65240344 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 9 Aug 2024 13:53:06 -0400 Subject: [PATCH 11/51] Force circuit breaking to be enabled. --- .../azure/cosmos/implementation/RxDocumentClientImpl.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 2a8027d4f313..3805f6185a67 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -557,6 +557,14 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.sessionContainer = new SessionContainer(this.serviceEndpoint.getHost(), disableSessionCapturing); + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); + this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); this.globalPartitionEndpointManagerForCircuitBreaker.init(); From 12b8cb232e2f23ff62606559762d086926bb7a7a Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 12 Aug 2024 18:39:24 -0400 Subject: [PATCH 12/51] Increase error thresholds. --- .../com/azure/cosmos/implementation/RxDocumentClientImpl.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 2762e4924fc3..05a0d423e9de 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -561,8 +561,8 @@ private RxDocumentClientImpl(URI serviceEndpoint, "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "\"consecutiveExceptionCountToleratedForReads\": 50," + + "\"consecutiveExceptionCountToleratedForWrites\": 25," + "}"); this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); From 6da8d6fa41e46fe377644529e8941760ce55289a Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 13 Aug 2024 15:49:07 -0400 Subject: [PATCH 13/51] Force circuit breaker for certain tests. --- .../com/azure/cosmos/ExcludeRegionTests.java | 7 +++++++ ...InjectionWithAvailabilityStrategyTests.java | 18 +++++++++++++++--- ...ssionConsistencyWithRegionScopingTests.java | 14 ++++++++++++++ .../implementation/RxDocumentClientImpl.java | 14 +++++++------- 4 files changed, 43 insertions(+), 10 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java index 008d25bc7162..ecf699f360a2 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java @@ -55,6 +55,13 @@ public ExcludeRegionTests(CosmosClientBuilder clientBuilder) { public void beforeClass() { System.setProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_WAIT_TIME_IN_MILLISECONDS", "1000"); System.setProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_INITIAL_BACKOFF_TIME_IN_MILLISECONDS", "500"); + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); CosmosAsyncClient dummyClient = null; try { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java index 4a1901a07cf1..ee76d7345b6f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java @@ -208,6 +208,15 @@ public String resolveTestNameSuffix(Object[] row) { @BeforeClass(groups = { "multi-master" }) public void beforeClass() { + + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); + CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) .key(TestConfigurations.MASTER_KEY) @@ -229,7 +238,7 @@ public void beforeClass() { Map writeRegionMap = this.getRegionMap(databaseAccount, true); - this.writeableRegions = new ArrayList<>(writeRegionMap.keySet()); + this.writeableRegions = new ArrayList<>(Arrays.asList("East US", "South Central US", "West US 2")); assertThat(this.writeableRegions).isNotNull(); assertThat(this.writeableRegions.size()).isGreaterThanOrEqualTo(2); @@ -323,7 +332,7 @@ public void beforeClass() { // When the container does not exist yet, you would see 401 for example for point reads etc. // So, adding this delay after container creation to minimize risk of hitting these errors try { - Thread.sleep(3000); + Thread.sleep(10_000); } catch (InterruptedException e) { throw new RuntimeException(e); } @@ -334,6 +343,9 @@ public void beforeClass() { } @AfterClass(groups = { "multi-master" }) public void afterClass() { + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) .key(TestConfigurations.MASTER_KEY) @@ -5086,7 +5098,7 @@ private void execute( // When the container does not exist yet, you would see 401 for example for point reads etc. // So, adding this delay after container creation to minimize risk of hitting these errors try { - Thread.sleep(3000); + Thread.sleep(10_000); } catch (InterruptedException e) { throw new RuntimeException(e); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java index 3488374002cb..c641ac334540 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java @@ -54,6 +54,7 @@ import com.fasterxml.jackson.databind.JsonNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Factory; @@ -1428,6 +1429,14 @@ public Object[][] readYouWriteWithExplicitRegionSwitchingTestContext() { @BeforeClass(groups = {"multi-region", "multi-master"}) public void beforeClass() { + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); + try (CosmosAsyncClient tempClient = getClientBuilder().buildAsyncClient()) { RxDocumentClientImpl rxDocumentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(tempClient); @@ -1444,6 +1453,11 @@ public void beforeClass() { } } + @AfterClass(groups = {"multi-region", "multi-master"}) + public void afterClass() { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + @Test(groups = {"multi-region"}, dataProvider = "readYouWriteWithNoExplicitRegionSwitchingTestContext", timeOut = 80 * TIMEOUT) public void readYouWriteWithNoExplicitRegionSwitching( Function> func, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 05a0d423e9de..c50ca814970e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -557,13 +557,13 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.sessionContainer = new SessionContainer(this.serviceEndpoint.getHost(), disableSessionCapturing); - System.setProperty( - "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 50," - + "\"consecutiveExceptionCountToleratedForWrites\": 25," - + "}"); +// System.setProperty( +// "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", +// "{\"isPartitionLevelCircuitBreakerEnabled\": true, " +// + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," +// + "\"consecutiveExceptionCountToleratedForReads\": 50," +// + "\"consecutiveExceptionCountToleratedForWrites\": 25," +// + "}"); this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); From 26b9e374c3a8a123e2490843b33eadd940f61d63 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 13 Aug 2024 15:59:07 -0400 Subject: [PATCH 14/51] Scope live test matrix. --- sdk/cosmos/live-platform-matrix.json | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 933a3eeb6322..5f9a0321da71 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -1,18 +1,10 @@ { "displayNames": { - "-Pe2e": "E2E", - "-Pcfp-split": "ChangeFeedSplit", - "-Pquery": "Query", - "-Psplit": "Split", - "-Pfast": "Fast", - "-Pdirect": "Direct", "-Pmulti-master": "MultiMaster", "-Pflaky-multi-master": "FlakyMultiMaster", "-Pcircuit-breaker-misc-direct": "CircuitBreakerMiscDirect", "-Pcircuit-breaker-misc-gateway": "CircuitBreakerMiscGateway", "-Pcircuit-breaker-read-all-read-many": "CircuitBreakerReadAllAndReadMany", - "-Pmulti-region": "MultiRegion", - "-Plong": "Long", "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"": "TCP", "Session": "", "ubuntu": "", From 9f29399b1151fe83a91313128495838dbd57ca2d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 13 Aug 2024 16:19:28 -0400 Subject: [PATCH 15/51] Scope live test matrix. --- sdk/cosmos/azure-cosmos-tests/pom.xml | 294 +++++++++++++------------- sdk/cosmos/live-platform-matrix.json | 8 + 2 files changed, 155 insertions(+), 147 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/pom.xml b/sdk/cosmos/azure-cosmos-tests/pom.xml index e936812c2a49..36be0aace6f1 100644 --- a/sdk/cosmos/azure-cosmos-tests/pom.xml +++ b/sdk/cosmos/azure-cosmos-tests/pom.xml @@ -342,132 +342,132 @@ Licensed under the MIT License. - - - fast - - simple - - - - - org.apache.maven.plugins - maven-failsafe-plugin - 3.2.5 - - - src/test/resources/fast-testng.xml - - - - - - - - - split - - split - - - - - org.apache.maven.plugins - maven-failsafe-plugin - 3.2.5 - - - src/test/resources/split-testng.xml - - - - - - - - - cfp-split - - cfp-split - - - - - org.apache.maven.plugins - maven-failsafe-plugin - 3.2.5 - - - src/test/resources/cfp-split-testng.xml - - - - - - - - - query - - query - - - - - org.apache.maven.plugins - maven-failsafe-plugin - 3.2.5 - - - src/test/resources/query-testng.xml - - - - - - - - - long - - long - - - - - org.apache.maven.plugins - maven-failsafe-plugin - 3.2.5 - - - src/test/resources/long-testng.xml - - - - - - - - - direct - - direct - - - - - org.apache.maven.plugins - maven-failsafe-plugin - 3.2.5 - - - src/test/resources/direct-testng.xml - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + multi-master @@ -573,27 +573,27 @@ Licensed under the MIT License. - - - multi-region - - multi-region - - - - - org.apache.maven.plugins - maven-failsafe-plugin - 3.2.5 - - - src/test/resources/multi-region-testng.xml - - - - - - + + + + + + + + + + + + + + + + + + + + + examples diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 5f9a0321da71..933a3eeb6322 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -1,10 +1,18 @@ { "displayNames": { + "-Pe2e": "E2E", + "-Pcfp-split": "ChangeFeedSplit", + "-Pquery": "Query", + "-Psplit": "Split", + "-Pfast": "Fast", + "-Pdirect": "Direct", "-Pmulti-master": "MultiMaster", "-Pflaky-multi-master": "FlakyMultiMaster", "-Pcircuit-breaker-misc-direct": "CircuitBreakerMiscDirect", "-Pcircuit-breaker-misc-gateway": "CircuitBreakerMiscGateway", "-Pcircuit-breaker-read-all-read-many": "CircuitBreakerReadAllAndReadMany", + "-Pmulti-region": "MultiRegion", + "-Plong": "Long", "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"": "TCP", "Session": "", "ubuntu": "", From 90a2367fe48115a4cef42aee2bca0b276cacf75a Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 13 Aug 2024 17:49:24 -0400 Subject: [PATCH 16/51] Scope live test matrix. --- .../cosmos/FaultInjectionWithAvailabilityStrategyTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java index ee76d7345b6f..474ae3ebeb8d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java @@ -238,7 +238,7 @@ public void beforeClass() { Map writeRegionMap = this.getRegionMap(databaseAccount, true); - this.writeableRegions = new ArrayList<>(Arrays.asList("East US", "South Central US", "West US 2")); + this.writeableRegions = new ArrayList<>(writeRegionMap.keySet()); assertThat(this.writeableRegions).isNotNull(); assertThat(this.writeableRegions.size()).isGreaterThanOrEqualTo(2); From c56db8790d44a2e1d43d7ba2b1e628e922cf06bc Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 13 Aug 2024 21:34:00 -0400 Subject: [PATCH 17/51] Close globalPartitionEndpointManagerForCircuitBreaker. --- .../FaultInjectionWithAvailabilityStrategyTests.java | 4 ++-- .../cosmos/implementation/RxDocumentClientImpl.java | 5 +++++ ...balPartitionEndpointManagerForCircuitBreaker.java | 12 ++++++++---- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java index 474ae3ebeb8d..ddbc2880f46f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java @@ -332,7 +332,7 @@ public void beforeClass() { // When the container does not exist yet, you would see 401 for example for point reads etc. // So, adding this delay after container creation to minimize risk of hitting these errors try { - Thread.sleep(10_000); + Thread.sleep(3000); } catch (InterruptedException e) { throw new RuntimeException(e); } @@ -5098,7 +5098,7 @@ private void execute( // When the container does not exist yet, you would see 401 for example for point reads etc. // So, adding this delay after container creation to minimize risk of hitting these errors try { - Thread.sleep(10_000); + Thread.sleep(3000); } catch (InterruptedException e) { throw new RuntimeException(e); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index c50ca814970e..3df6e69be984 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -5770,6 +5770,11 @@ public void close() { this.throughputControlStore.close(); } + if (this.globalPartitionEndpointManagerForCircuitBreaker != null) { + logger.info("Closing globalPartitionEndpointManagerForCircuitBreaker..."); + LifeCycleUtils.closeQuietly(this.globalPartitionEndpointManagerForCircuitBreaker); + } + logger.info("Shutting down completed."); } else { logger.warn("Already shutdown!"); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 3b4ca3a429b6..57a78b384f7c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -36,7 +36,7 @@ import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; -public class GlobalPartitionEndpointManagerForCircuitBreaker { +public class GlobalPartitionEndpointManagerForCircuitBreaker implements AutoCloseable { private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreaker.class); @@ -49,6 +49,7 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; private final AtomicReference globalAddressResolverSnapshot; private final ConcurrentHashMap locationToRegion; + private final AtomicBoolean isClosed = new AtomicBoolean(false); public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToLocationSpecificUnavailabilityInfo = new ConcurrentHashMap<>(); @@ -86,8 +87,6 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest AtomicBoolean isFailoverPossible = new AtomicBoolean(true); AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - String collectionLink = getCollectionLink(request); - this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionLevelLocationUnavailabilityInfoAsVal) -> { if (partitionLevelLocationUnavailabilityInfoAsVal == null) { @@ -199,7 +198,7 @@ public List getUnavailableRegionsForPartitionKeyRange(String collectionR private Flux updateStaleLocationInfo() { return Mono.just(1) .delayElement(Duration.ofSeconds(Configs.getStalePartitionUnavailabilityRefreshIntervalInSeconds())) - .repeat() + .repeat(() -> !this.isClosed.get()) .flatMap(ignore -> Flux.fromIterable(this.partitionKeyRangesWithPossibleUnavailableRegions.entrySet())) .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) .flatMap(partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair -> { @@ -348,6 +347,11 @@ public void setGlobalAddressResolver(GlobalAddressResolver globalAddressResolver this.globalAddressResolverSnapshot.set(globalAddressResolver); } + @Override + public void close() { + this.isClosed.set(true); + } + private class PartitionLevelLocationUnavailabilityInfo { private final ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition; From ff830788547b51bc8647b07ba0f40d7d90372cec Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 14 Aug 2024 11:37:29 -0400 Subject: [PATCH 18/51] Close globalPartitionEndpointManagerForCircuitBreaker. --- .../cosmos/implementation/RxDocumentClientImpl.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 3df6e69be984..f0a2e8d058eb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -5756,6 +5756,12 @@ public void close() { if (!closed.getAndSet(true)) { activeClientsCnt.decrementAndGet(); logger.info("Shutting down ..."); + + if (this.globalPartitionEndpointManagerForCircuitBreaker != null) { + logger.info("Closing globalPartitionEndpointManagerForCircuitBreaker..."); + LifeCycleUtils.closeQuietly(this.globalPartitionEndpointManagerForCircuitBreaker); + } + logger.info("Closing Global Endpoint Manager ..."); LifeCycleUtils.closeQuietly(this.globalEndpointManager); logger.info("Closing StoreClientFactory ..."); @@ -5770,11 +5776,6 @@ public void close() { this.throughputControlStore.close(); } - if (this.globalPartitionEndpointManagerForCircuitBreaker != null) { - logger.info("Closing globalPartitionEndpointManagerForCircuitBreaker..."); - LifeCycleUtils.closeQuietly(this.globalPartitionEndpointManagerForCircuitBreaker); - } - logger.info("Shutting down completed."); } else { logger.warn("Already shutdown!"); From 9139730fc494e6995a2cf9e3a8b316552da2f11b Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 14 Aug 2024 11:45:47 -0400 Subject: [PATCH 19/51] Use non-static scheduler. --- .../azure/cosmos/implementation/CosmosSchedulers.java | 5 ----- ...lobalPartitionEndpointManagerForCircuitBreaker.java | 10 +++++++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java index cce49c6d17e6..00b6202f608e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java @@ -86,9 +86,4 @@ public class CosmosSchedulers { TTL_FOR_SCHEDULER_WORKER_IN_SECONDS, true ); - - public final static Scheduler PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE = Schedulers.newSingle( - "partition-availability-staleness-check", - true - ); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 57a78b384f7c..1e2b6f68326c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -22,6 +22,8 @@ import org.slf4j.LoggerFactory; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import reactor.core.scheduler.Scheduler; +import reactor.core.scheduler.Schedulers; import java.net.URI; import java.time.Duration; @@ -50,6 +52,7 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker implements AutoClos private final AtomicReference globalAddressResolverSnapshot; private final ConcurrentHashMap locationToRegion; private final AtomicBoolean isClosed = new AtomicBoolean(false); + private final Scheduler scheduler = Schedulers.newSingle("partition-availability-staleness-check"); public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToLocationSpecificUnavailabilityInfo = new ConcurrentHashMap<>(); @@ -66,7 +69,7 @@ public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager glo public void init() { if (this.consecutiveExceptionBasedCircuitBreaker.isPartitionLevelCircuitBreakerEnabled()) { - this.updateStaleLocationInfo().subscribeOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE).subscribe(); + this.updateStaleLocationInfo().subscribeOn(scheduler).subscribe(); } } @@ -200,7 +203,7 @@ private Flux updateStaleLocationInfo() { .delayElement(Duration.ofSeconds(Configs.getStalePartitionUnavailabilityRefreshIntervalInSeconds())) .repeat(() -> !this.isClosed.get()) .flatMap(ignore -> Flux.fromIterable(this.partitionKeyRangesWithPossibleUnavailableRegions.entrySet())) - .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) + .publishOn(this.scheduler) .flatMap(partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair -> { logger.debug("Background updateStaleLocationInfo kicking in..."); @@ -257,7 +260,7 @@ private Flux updateStaleLocationInfo() { return gatewayAddressCache .submitOpenConnectionTasks(partitionKeyRangeWrapper.getPartitionKeyRange(), partitionKeyRangeWrapper.getCollectionResourceId()) - .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) + .publishOn(this.scheduler) .timeout(Duration.ofSeconds(Configs.getConnectionEstablishmentTimeoutForPartitionRecoveryInSeconds())) .doOnComplete(() -> { @@ -350,6 +353,7 @@ public void setGlobalAddressResolver(GlobalAddressResolver globalAddressResolver @Override public void close() { this.isClosed.set(true); + this.scheduler.dispose(); } private class PartitionLevelLocationUnavailabilityInfo { From 9dbd51981d70e336542002f68d94afe154ec9f88 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 14 Aug 2024 19:09:52 -0400 Subject: [PATCH 20/51] Test multi-region + circuit-breaker job. --- .../EndToEndTimeOutWithAvailabilityTest.java | 6 ++--- .../com/azure/cosmos/ExcludeRegionTests.java | 15 ++++-------- ...njectionWithAvailabilityStrategyTests.java | 24 ++++++------------- .../com/azure/cosmos/MaxRetryCountTests.java | 20 ++++++++-------- ...sionConsistencyWithRegionScopingTests.java | 20 ++++------------ ...ExcludedRegionWithFaultInjectionTests.java | 14 +++++------ ...aultInjectionMetadataRequestRuleTests.java | 8 +++---- .../com/azure/cosmos/rx/TestSuiteBase.java | 4 ++-- .../IncrementalChangeFeedProcessorTest.java | 6 ++--- .../IncrementalChangeFeedProcessorTest.java | 2 +- .../implementation/RxDocumentClientImpl.java | 21 ++++++++++------ sdk/cosmos/live-platform-matrix.json | 19 +++++++++++++++ 12 files changed, 79 insertions(+), 80 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/EndToEndTimeOutWithAvailabilityTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/EndToEndTimeOutWithAvailabilityTest.java index b39af38a0e61..40ab53781d80 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/EndToEndTimeOutWithAvailabilityTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/EndToEndTimeOutWithAvailabilityTest.java @@ -69,7 +69,7 @@ public EndToEndTimeOutWithAvailabilityTest(CosmosClientBuilder clientBuilder) { random = new Random(); } - @BeforeClass(groups = {"multi-master"}, timeOut = SETUP_TIMEOUT * 100) + @BeforeClass(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = SETUP_TIMEOUT * 100) public void beforeClass() throws Exception { System.setProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_WAIT_TIME_IN_MILLISECONDS", "1000"); System.setProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_INITIAL_BACKOFF_TIME_IN_MILLISECONDS", "500"); @@ -93,7 +93,7 @@ public void beforeClass() throws Exception { } } - @Test(groups = {"multi-master"}, dataProvider = "faultInjectionArgProvider", timeOut = TIMEOUT*100) + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "faultInjectionArgProvider", timeOut = TIMEOUT*100) public void testThresholdAvailabilityStrategy(OperationType operationType, FaultInjectionOperationType faultInjectionOperationType) throws InterruptedException { if (this.preferredRegionList.size() <= 1) { throw new SkipException("excludeRegionTest_SkipFirstPreferredRegion can only be tested for multi-master with multi-regions"); @@ -206,7 +206,7 @@ private List insertDocuments(int docu return documentInserted; } - @AfterClass(groups = {"multi-master"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + @AfterClass(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) public void afterClass() { safeClose(this.clientWithPreferredRegions); System.clearProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_INITIAL_BACKOFF_TIME_IN_MILLISECONDS"); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java index ecf699f360a2..73bd5562253d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java @@ -51,17 +51,10 @@ public ExcludeRegionTests(CosmosClientBuilder clientBuilder) { super(clientBuilder); } - @BeforeClass(groups = {"multi-master"}, timeOut = SETUP_TIMEOUT) + @BeforeClass(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = SETUP_TIMEOUT) public void beforeClass() { System.setProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_WAIT_TIME_IN_MILLISECONDS", "1000"); System.setProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_INITIAL_BACKOFF_TIME_IN_MILLISECONDS", "500"); - System.setProperty( - "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," - + "}"); CosmosAsyncClient dummyClient = null; try { @@ -81,7 +74,7 @@ public void beforeClass() { } } - @AfterClass(groups = {"multi-master"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + @AfterClass(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) public void afterClass() { safeClose(this.clientWithPreferredRegions); System.clearProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_INITIAL_BACKOFF_TIME_IN_MILLISECONDS"); @@ -112,7 +105,7 @@ public static Object[][] faultInjectionArgProvider() { }; } - @Test(groups = {"multi-master"}, dataProvider = "operationTypeArgProvider", timeOut = TIMEOUT) + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "operationTypeArgProvider", timeOut = TIMEOUT) public void excludeRegionTest_SkipFirstPreferredRegion(OperationType operationType) { if (this.preferredRegionList.size() <= 1) { @@ -138,7 +131,7 @@ public void excludeRegionTest_SkipFirstPreferredRegion(OperationType operationTy assertThat(cosmosDiagnostics.getContactedRegionNames()).containsAll(this.preferredRegionList.subList(1, 2)); } - @Test(groups = {"multi-master"}, dataProvider = "faultInjectionArgProvider", timeOut = TIMEOUT) + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "faultInjectionArgProvider", timeOut = TIMEOUT) public void excludeRegionTest_readSessionNotAvailable( OperationType operationType, FaultInjectionOperationType faultInjectionOperationType) { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java index ddbc2880f46f..5b3367c527ea 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java @@ -206,17 +206,9 @@ public String resolveTestNameSuffix(Object[] row) { return (String)row[0]; } - @BeforeClass(groups = { "multi-master" }) + @BeforeClass(groups = { "multi-master", "multi-master-circuit-breaker" }) public void beforeClass() { - System.setProperty( - "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," - + "}"); - CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) .key(TestConfigurations.MASTER_KEY) @@ -341,11 +333,9 @@ public void beforeClass() { safeClose(dummyClient); } } - @AfterClass(groups = { "multi-master" }) + @AfterClass(groups = { "multi-master", "multi-master-circuit-breaker" }) public void afterClass() { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); - CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) .key(TestConfigurations.MASTER_KEY) @@ -839,7 +829,7 @@ public Object[][] testConfigs_readAfterCreation() { }; } - @Test(groups = {"multi-master"}, dataProvider = "testConfigs_readAfterCreation") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "testConfigs_readAfterCreation") public void readAfterCreation( String testCaseId, Duration endToEndTimeout, @@ -2257,7 +2247,7 @@ public Object[][] testConfigs_writeAfterCreation() { }; } - @Test(groups = {"multi-master"}, dataProvider = "testConfigs_writeAfterCreation") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "testConfigs_writeAfterCreation") public void writeAfterCreation( String testCaseId, Duration endToEndTimeout, @@ -3416,7 +3406,7 @@ public Object[][] testConfigs_queryAfterCreation() { }; } - @Test(groups = {"multi-master"}, dataProvider = "testConfigs_queryAfterCreation") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "testConfigs_queryAfterCreation") public void queryAfterCreation( String testCaseId, Duration endToEndTimeout, @@ -3988,7 +3978,7 @@ public Object[][] testConfigs_readManyAfterCreation() { }; } - @Test(groups = {"multi-master"}, dataProvider = "testConfigs_readManyAfterCreation") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "testConfigs_readManyAfterCreation") public void readManyAfterCreation( String testCaseId, Duration endToEndTimeout, @@ -4766,7 +4756,7 @@ public Object[][] testConfigs_readAllAfterCreation() { }; } - @Test(groups = {"multi-master"}, dataProvider = "testConfigs_readAllAfterCreation") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "testConfigs_readAllAfterCreation") public void readAllAfterCreation( String testCaseId, Duration endToEndTimeout, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java index 7cefbadb33b4..bc00f0a39fe6 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java @@ -181,7 +181,7 @@ public String resolveTestNameSuffix(Object[] row) { return (String)row[0]; } - @BeforeClass(groups = { "multi-master" }) + @BeforeClass(groups = { "multi-master", "multi-master-circuit-breaker" }) public void beforeClass() { CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) @@ -256,7 +256,7 @@ public void beforeClass() { safeClose(dummyClient); } } - @AfterClass(groups = { "multi-master" }) + @AfterClass(groups = { "multi-master", "multi-master-circuit-breaker" }) public void afterClass() { CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) @@ -1301,7 +1301,7 @@ public Object[][] testConfigs_readMaxRetryCount_serverInternalServerError() { }; } - @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_readSessionNotAvailable") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_readSessionNotAvailable") public void readMaxRetryCount_readSessionNotAvailable( String testCaseId, Duration endToEndTimeout, @@ -1422,7 +1422,7 @@ public void readMaxRetryCount_readSessionNotAvailable( } } - @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_retryWith") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_retryWith") public void readMaxRetryCount_retryWith( String testCaseId, Duration endToEndTimeout, @@ -1499,7 +1499,7 @@ public void readMaxRetryCount_retryWith( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_serverGone") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_serverGone") public void readMaxRetryCount_serverGone( String testCaseId, Duration endToEndTimeout, @@ -1587,7 +1587,7 @@ public void readMaxRetryCount_serverGone( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_transitTimeout") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_transitTimeout") public void readMaxRetryCount_transitTimeout( String testCaseId, Duration endToEndTimeout, @@ -1676,7 +1676,7 @@ public void readMaxRetryCount_transitTimeout( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_serverTimeout") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_serverTimeout") public void readMaxRetryCount_serverTimeout( String testCaseId, Duration endToEndTimeout, @@ -1766,7 +1766,7 @@ public void readMaxRetryCount_serverTimeout( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_serverServiceUnavailable") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_serverServiceUnavailable") public void readMaxRetryCount_serverServiceUnavailable( String testCaseId, Duration endToEndTimeout, @@ -1854,7 +1854,7 @@ public void readMaxRetryCount_serverServiceUnavailable( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_serverInternalServerError") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_serverInternalServerError") public void readMaxRetryCount_serverInternalServerError( String testCaseId, Duration endToEndTimeout, @@ -1933,7 +1933,7 @@ public void readMaxRetryCount_serverInternalServerError( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_serverRequestRateTooLarge") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_serverRequestRateTooLarge") public void readMaxRetryCount_serverRequestRateTooLarge( String testCaseId, Duration endToEndTimeout, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java index c641ac334540..d74c0fb794e3 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java @@ -1426,17 +1426,9 @@ public Object[][] readYouWriteWithExplicitRegionSwitchingTestContext() { }; } - @BeforeClass(groups = {"multi-region", "multi-master"}) + @BeforeClass(groups = {"multi-region", "multi-master", "multi-master-circuit-breaker"}) public void beforeClass() { - System.setProperty( - "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," - + "}"); - try (CosmosAsyncClient tempClient = getClientBuilder().buildAsyncClient()) { RxDocumentClientImpl rxDocumentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(tempClient); @@ -1453,10 +1445,8 @@ public void beforeClass() { } } - @AfterClass(groups = {"multi-region", "multi-master"}) - public void afterClass() { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); - } + @AfterClass(groups = {"multi-region", "multi-master", "multi-master-circuit-breaker"}) + public void afterClass() {} @Test(groups = {"multi-region"}, dataProvider = "readYouWriteWithNoExplicitRegionSwitchingTestContext", timeOut = 80 * TIMEOUT) public void readYouWriteWithNoExplicitRegionSwitching( @@ -1528,7 +1518,7 @@ public void readYouWriteWithNoExplicitRegionSwitching( } } - @Test(groups = {"multi-master"}, dataProvider = "readYouWriteWithExplicitRegionSwitchingTestContext", timeOut = 80 * TIMEOUT) + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readYouWriteWithExplicitRegionSwitchingTestContext", timeOut = 80 * TIMEOUT) public void readYouWriteWithExplicitRegionSwitching( Function> func, String testId, @@ -1650,7 +1640,7 @@ public void readManyWithNoExplicitRegionSwitching( } } - @Test(groups = {"multi-master"}, dataProvider = "readManyWithExplicitRegionSwitchingTestContext", timeOut = 10 * TIMEOUT) + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readManyWithExplicitRegionSwitchingTestContext", timeOut = 10 * TIMEOUT) public void readManyWithExplicitRegionSwitching( Function> func, String testId, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/ExcludedRegionWithFaultInjectionTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/ExcludedRegionWithFaultInjectionTests.java index 1b42c08346f7..3b76730f768e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/ExcludedRegionWithFaultInjectionTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/ExcludedRegionWithFaultInjectionTests.java @@ -129,7 +129,7 @@ public ExcludedRegionWithFaultInjectionTests(CosmosClientBuilder cosmosClientBui // 13. bulkExecutionOptions: a CosmosBulkExecutionOptions instance configured to set on the data plane operation after mutation is done // 14. batchRequestOptions: a CosmosBatchRequestOptions instance configured to set on the data plane operation after mutation is done // 15. perRegionDuplicateCount: no. of times to duplicate a particular region in excludedRegions - @BeforeClass(groups = {"multi-master"}) + @BeforeClass(groups = {"multi-master", "multi-master-circuit-breaker"}) public void beforeClass() { this.cosmosAsyncClient = getClientBuilder().buildAsyncClient(); this.cosmosAsyncContainer = getSharedMultiPartitionCosmosContainerWithIdAsPartitionKey(this.cosmosAsyncClient); @@ -2257,31 +2257,31 @@ public Object[][] regionExclusionBulkTestConfigs() { return null; } - @Test(groups = { "multi-master" }, dataProvider = "regionExclusionReadAfterCreateTestConfigs") + @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, dataProvider = "regionExclusionReadAfterCreateTestConfigs") public void regionExclusionMutationOnClient_readAfterCreate_test(String testTitle, MutationTestConfig mutationTestConfig) throws InterruptedException { logger.info("Test started with title : {}", testTitle); execute(mutationTestConfig); } - @Test(groups = { "multi-master" }, dataProvider = "regionExclusionQueryAfterCreateTestConfigs") + @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, dataProvider = "regionExclusionQueryAfterCreateTestConfigs") public void regionExclusionMutationOnClient_queryAfterCreate_test(String testTitle, MutationTestConfig mutationTestConfig) throws InterruptedException { logger.info("Test started with title : {}", testTitle); execute(mutationTestConfig); } - @Test(groups = { "multi-master" }, dataProvider = "regionExclusionWriteAfterCreateTestConfigs") + @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, dataProvider = "regionExclusionWriteAfterCreateTestConfigs") public void regionExclusionMutationOnClient_writeAfterCreate_test(String testTitle, MutationTestConfig mutationTestConfig) throws InterruptedException { logger.info("Test started with title : {}", testTitle); execute(mutationTestConfig); } - @Test(groups = {"multi-master"}, dataProvider = "regionExclusionBatchTestConfigs") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "regionExclusionBatchTestConfigs") public void regionExclusionMutationOnClient_batch_test(String testTitle, MutationTestConfig mutationTestConfig) throws InterruptedException { logger.info("Test started with title : {}", testTitle); execute(mutationTestConfig); } - @Test(groups = {"multi-master"}, dataProvider = "regionExclusionBulkTestConfigs") + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "regionExclusionBulkTestConfigs") public void regionExclusionMutationOnClient_bulk_test(String testTitle, MutationTestConfig mutationTestConfig) throws InterruptedException { logger.info("Test started with title : {}", testTitle); execute(mutationTestConfig); @@ -2392,7 +2392,7 @@ private void execute(MutationTestConfig mutationTestConfig) throws InterruptedEx } } - @AfterClass(groups = {"multi-master"}) + @AfterClass(groups = {"multi-master", "multi-master-circuit-breaker"}) public void afterClass() { safeCloseAsync(this.cosmosAsyncClient); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java index f8e1364b8013..a264b54ee27e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java @@ -88,7 +88,7 @@ public static Object[][] partitionKeyRangesArgProvider() { }; } - @BeforeClass(groups = { "multi-region", "multi-master" }, timeOut = TIMEOUT) + @BeforeClass(groups = { "multi-region", "multi-master", "multi-master-circuit-breaker" }, timeOut = TIMEOUT) public void beforeClass() { this.client = getClientBuilder().buildAsyncClient(); AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); @@ -215,7 +215,7 @@ public void faultInjectionServerErrorRuleTests_AddressRefresh_ConnectionDelay() } } - @Test(groups = { "multi-master" }, dataProvider = "operationTypeProvider", timeOut = 4 * TIMEOUT) + @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, dataProvider = "operationTypeProvider", timeOut = 4 * TIMEOUT) public void faultInjectionServerErrorRuleTests_AddressRefresh_ResponseDelay( FaultInjectionOperationType faultInjectionOperationType, OperationType operationType) throws JsonProcessingException { @@ -481,7 +481,7 @@ public void faultInjectionServerErrorRuleTests_AddressRefresh_TooManyRequest() t } } - @Test(groups = { "multi-master" }, dataProvider = "partitionKeyRangesArgProvider", timeOut = 40 * TIMEOUT) + @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, dataProvider = "partitionKeyRangesArgProvider", timeOut = 40 * TIMEOUT) public void faultInjectionServerErrorRuleTests_PartitionKeyRanges_DelayError( FaultInjectionServerErrorType faultInjectionServerErrorType, Duration delay, @@ -591,7 +591,7 @@ public void faultInjectionServerErrorRuleTests_PartitionKeyRanges_DelayError( } } - @Test(groups = { "multi-master" }, timeOut = 40 * TIMEOUT) + @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, timeOut = 40 * TIMEOUT) public void faultInjectionServerErrorRuleTests_CollectionRead_ConnectionDelay() throws JsonProcessingException { // We need to create a new client because client may have marked region unavailable in other tests diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java index 4d5d33056081..f38a19995093 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java @@ -203,7 +203,7 @@ public CosmosAsyncDatabase getDatabase(String id) { } } - @BeforeSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}, timeOut = SUITE_SETUP_TIMEOUT) + @BeforeSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many", "multi-master-circuit-breaker"}, timeOut = SUITE_SETUP_TIMEOUT) public void beforeSuite() { logger.info("beforeSuite Started"); @@ -226,7 +226,7 @@ public static void parallelizeUnitTests(ITestContext context) { // context.getSuite().getXmlSuite().setThreadCount(Runtime.getRuntime().availableProcessors()); } - @AfterSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) + @AfterSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many", "multi-master-circuit-breaker"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) public void afterSuite() { logger.info("afterSuite Started"); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/epkversion/IncrementalChangeFeedProcessorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/epkversion/IncrementalChangeFeedProcessorTest.java index e836852b7bfb..13394ae8ae52 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/epkversion/IncrementalChangeFeedProcessorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/epkversion/IncrementalChangeFeedProcessorTest.java @@ -256,7 +256,7 @@ public void readFeedDocumentsStartFromCustomDate() throws InterruptedException { } } - @Test(groups = {"multi-master"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) public void readFeedDocumentsStartFromCustomDateForMultiWrite_test() throws InterruptedException { CosmosClientBuilder clientBuilder = getClientBuilder(); @@ -370,7 +370,7 @@ public void readFeedDocumentsStartFromCustomDateForMultiWrite_test() throws Inte } } - @Test(groups = {"multi-master"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) public void readFeedDocumentsStartFromCustomDateForMultiWrite_WithCFPReadFromSatelliteRegion_test() throws InterruptedException { CosmosClientBuilder clientBuilder = getClientBuilder(); @@ -505,7 +505,7 @@ public void readFeedDocumentsStartFromCustomDateForMultiWrite_WithCFPReadFromSat } } - @Test(groups = {"multi-master"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) public void readFeedDocumentsStartFromCustomDateForMultiWrite_WithCFPReadSwitchToSatelliteRegion_test() throws InterruptedException { CosmosClientBuilder clientBuilder = getClientBuilder(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/pkversion/IncrementalChangeFeedProcessorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/pkversion/IncrementalChangeFeedProcessorTest.java index 0f0ab04840e1..5c7c779b1998 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/pkversion/IncrementalChangeFeedProcessorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/pkversion/IncrementalChangeFeedProcessorTest.java @@ -257,7 +257,7 @@ public void readFeedDocumentsStartFromCustomDate() throws InterruptedException { } } - @Test(groups = {"multi-master"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) + @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) public void readFeedDocumentsStartFromCustomDateForMultiWrite_test() throws InterruptedException { CosmosClientBuilder clientBuilder = getClientBuilder(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index f0a2e8d058eb..3e4765f814ec 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -5982,24 +5982,31 @@ public void addPartitionLevelUnavailableRegionsForRequest( checkNotNull(options.getPartitionKeyDefinition(), "Argument 'partitionKeyDefinition' within options cannot be null!"); checkNotNull(collectionRoutingMap, "Argument 'collectionRoutingMap' cannot be null!"); + PartitionKeyRange resolvedPartitionKeyRange = null; + PartitionKeyDefinition partitionKeyDefinition = options.getPartitionKeyDefinition(); PartitionKeyInternal partitionKeyInternal = request.getPartitionKeyInternal(); - String effectivePartitionKeyString = PartitionKeyInternalHelper.getEffectivePartitionKeyString(partitionKeyInternal, partitionKeyDefinition); - PartitionKeyRange partitionKeyRange = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKeyString); + if (partitionKeyInternal != null) { + String effectivePartitionKeyString = PartitionKeyInternalHelper.getEffectivePartitionKeyString(partitionKeyInternal, partitionKeyDefinition); + resolvedPartitionKeyRange = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKeyString); + + // cache the effective partition key if possible - can be a bottleneck, + // since it is also recomputed in AddressResolver + request.setEffectivePartitionKey(effectivePartitionKeyString); + } else if (request.getPartitionKeyRangeIdentity() != null) { + resolvedPartitionKeyRange = collectionRoutingMap.getRangeByPartitionKeyRangeId(request.getPartitionKeyRangeIdentity().getPartitionKeyRangeId()); + } - checkNotNull(partitionKeyRange, "partitionKeyRange cannot be null!"); + checkNotNull(resolvedPartitionKeyRange, "resolvedPartitionKeyRange cannot be null!"); checkNotNull(this.globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); List unavailableRegionsForPartition = this.globalPartitionEndpointManagerForCircuitBreaker.getUnavailableRegionsForPartitionKeyRange( request.getResourceId(), - partitionKeyRange, + resolvedPartitionKeyRange, request.getOperationType()); - // cache the effective partition key if possible - can be a bottleneck, - // since it is also recomputed in AddressResolver - request.setEffectivePartitionKey(effectivePartitionKeyString); request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); // onBeforeSendRequest uses excluded regions to know the next location endpoint diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 933a3eeb6322..12fd191e2d1e 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -12,6 +12,7 @@ "-Pcircuit-breaker-misc-gateway": "CircuitBreakerMiscGateway", "-Pcircuit-breaker-read-all-read-many": "CircuitBreakerReadAllAndReadMany", "-Pmulti-region": "MultiRegion", + "-Pmulti-master-circuit-breaker": "MultiMasterCircuitBreaker", "-Plong": "Long", "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"": "TCP", "Session": "", @@ -109,6 +110,24 @@ "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "MultiMaster_MultiRegion": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[\"East US 2\"]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pmulti-master-circuit-breaker" ], + "AdditionalArgs": [ + "-DargLine=\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG=\"{\\\"isPartitionLevelCircuitBreakerEnabled\\\": true,\"\n+ \"\\\"circuitBreakerType\\\": \\\"CONSECUTIVE_EXCEPTION_COUNT_BASED\\\",\"\n+\"\\\"consecutiveExceptionCountToleratedForReads\\\": 10,\"\n+\"\\\"consecutiveExceptionCountToleratedForWrites\\\": 5,\"\n+ \"}\"\"" + ], + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, { "DESIRED_CONSISTENCIES": "[\"Session\"]", "ACCOUNT_CONSISTENCY": "Session", From daee7ae6d2d47fc6464c891c4a437641f2b23776 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 14 Aug 2024 20:07:42 -0400 Subject: [PATCH 21/51] Test multi-region + circuit-breaker job. --- sdk/cosmos/azure-cosmos-tests/pom.xml | 21 +++++++++++ .../multi-master-circuit-breaker-testng.xml | 35 +++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/resources/multi-master-circuit-breaker-testng.xml diff --git a/sdk/cosmos/azure-cosmos-tests/pom.xml b/sdk/cosmos/azure-cosmos-tests/pom.xml index 36be0aace6f1..00639487e7b7 100644 --- a/sdk/cosmos/azure-cosmos-tests/pom.xml +++ b/sdk/cosmos/azure-cosmos-tests/pom.xml @@ -489,6 +489,27 @@ Licensed under the MIT License. + + + multi-master-circuit-breaker + + multi-master-circuit-breaker + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/multi-master-circuit-breaker-testng.xml + + + + + + circuit-breaker-read-all-read-many diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/resources/multi-master-circuit-breaker-testng.xml b/sdk/cosmos/azure-cosmos-tests/src/test/resources/multi-master-circuit-breaker-testng.xml new file mode 100644 index 000000000000..1cba368f3475 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/resources/multi-master-circuit-breaker-testng.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + From 1ad46eeca116d0d3e9893e4c795e89658b45afac Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 14 Aug 2024 20:17:24 -0400 Subject: [PATCH 22/51] Test multi-region + circuit-breaker job. --- ...EndpointManagerForCircuitBreakerTests.java | 80 ++++--------------- 1 file changed, 15 insertions(+), 65 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index e2d1cc39c5f9..760830239313 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -51,7 +51,7 @@ public class GlobalPartitionEndpointManagerForCircuitBreakerTests { private GlobalEndpointManager globalEndpointManagerMock; - @BeforeClass(groups = {"unit"}) + @BeforeClass(groups = {"multi-master-circuit-breaker"}) public void beforeClass() { this.globalEndpointManagerMock = Mockito.mock(GlobalEndpointManager.class); @@ -76,31 +76,12 @@ public void beforeClass() { } @DataProvider(name = "partitionLevelCircuitBreakerConfigs") - public Object[][] partitionLevelCircuitBreakerConfigs() { - return new Object[][]{ - new Object[]{ - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," - + "}", - READ_OPERATION_TRUE - }, - new Object[]{ - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," - + "}", - !READ_OPERATION_TRUE - } - }; + public Object[] partitionLevelCircuitBreakerConfigs() { + return new Object[]{READ_OPERATION_TRUE, !READ_OPERATION_TRUE}; } - @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { - - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyStatus(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -150,14 +131,10 @@ public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonS assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); - - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { - - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyToHealthyWithFailuresStatusTransition(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -218,14 +195,10 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionL assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); - - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyWithFailuresToUnavailableStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { - - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyWithFailuresToUnavailableStatusTransition(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -291,15 +264,11 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); - - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") public void recordUnavailableToHealthyTentativeStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -377,15 +346,11 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); - - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") public void recordHealthyTentativeToHealthyStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -470,15 +435,11 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); - - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") public void recordHealthyTentativeToUnavailableTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -563,13 +524,10 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); - - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -636,11 +594,9 @@ public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerCon = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); assertThat(partitionAndLocationSpecificUnavailabilityInfo).isNull(); - - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchFieldException, IllegalAccessException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -735,15 +691,11 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(St assertThat(locationSpecificHealthContext2.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificHealthContext2.isExceptionThresholdBreached()).isFalse(); - - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") public void allRegionsUnavailableHandlingWithMultiThreading(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); - int threadPoolSizeForExecutors = 4; ScheduledThreadPoolExecutor executorForEastUs = new ScheduledThreadPoolExecutor(threadPoolSizeForExecutors); @@ -873,8 +825,6 @@ public void allRegionsUnavailableHandlingWithMultiThreading(String partitionLeve executorForEastUs.shutdown(); executorForCentralUs.shutdown(); executorForEastUs2.shutdown(); - - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } private static void validateAllRegionsAreNotUnavailableAfterExceptionInLocation( From d0aed31e4592ffd594c6c4e8ff5511e1312ee760 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 14 Aug 2024 21:07:17 -0400 Subject: [PATCH 23/51] Test multi-region + circuit-breaker job. --- .../com/azure/cosmos/CosmosClientBuilder.java | 11 +++++++++++ .../com/azure/cosmos/implementation/Configs.java | 15 +++++++++++++++ sdk/cosmos/live-platform-matrix.json | 4 +--- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index 9e284349b40a..4bfacb14c2ec 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -18,6 +18,7 @@ import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.apachecommons.lang.time.StopWatch; +import com.azure.cosmos.implementation.circuitBreaker.PartitionLevelCircuitBreakerConfig; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.guava25.base.Preconditions; import com.azure.cosmos.implementation.routing.LocationHelper; @@ -1177,6 +1178,11 @@ public CosmosAsyncClient buildAsyncClient() { CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { StopWatch stopwatch = new StopWatch(); stopwatch.start(); + + if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", PartitionLevelCircuitBreakerConfig.DEFAULT.toJson()); + } + this.resetSessionCapturingType(); validateConfig(); buildConnectionPolicy(); @@ -1212,6 +1218,11 @@ CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { public CosmosClient buildClient() { StopWatch stopwatch = new StopWatch(); stopwatch.start(); + + if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", PartitionLevelCircuitBreakerConfig.DEFAULT.toJson()); + } + this.resetSessionCapturingType(); validateConfig(); buildConnectionPolicy(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index a459a3e45abe..9e07e1b57749 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -245,6 +245,10 @@ public class Configs { private static final boolean DEFAULT_SHOULD_LOG_INCORRECTLY_MAPPED_SESSION_TOKEN = true; private static final String SHOULD_LOG_INCORRECTLY_MAPPED_SESSION_TOKEN = "COSMOS.SHOULD_LOG_INCORRECTLY_MAPPED_USER_SESSION_TOKEN"; + private static final boolean DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN = false; + private static final String PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN = "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN"; + + public Configs() { this.sslContext = sslContextInit(); } @@ -657,6 +661,17 @@ public static boolean shouldLogIncorrectlyMappedSessionToken() { return Boolean.parseBoolean(shouldSystemExit); } + public static boolean shouldOptInDefaultCircuitBreakerConfig() { + String shouldOptInDefaultPartitionLevelCircuitBreakerConfig = + System.getProperty( + PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN, + firstNonNull( + emptyToNull(System.getenv().get(PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN)), + String.valueOf(DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN))); + + return Boolean.parseBoolean(shouldOptInDefaultPartitionLevelCircuitBreakerConfig); + } + public static CosmosMicrometerMetricsConfig getMetricsConfig() { String metricsConfig = System.getProperty( diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 12fd191e2d1e..38698915a912 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -121,9 +121,7 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master-circuit-breaker" ], - "AdditionalArgs": [ - "-DargLine=\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG=\"{\\\"isPartitionLevelCircuitBreakerEnabled\\\": true,\"\n+ \"\\\"circuitBreakerType\\\": \\\"CONSECUTIVE_EXCEPTION_COUNT_BASED\\\",\"\n+\"\\\"consecutiveExceptionCountToleratedForReads\\\": 10,\"\n+\"\\\"consecutiveExceptionCountToleratedForWrites\\\": 5,\"\n+ \"}\"\"" - ], + "AdditionalArgs": "-DargLine=\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=true\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From 339d8514f3c783a28b82631e7c33c012f0c4d2e6 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 14 Aug 2024 23:23:59 -0400 Subject: [PATCH 24/51] Test multi-region + circuit-breaker job. --- ...ionEndpointManagerForCircuitBreakerTests.java | 13 ++++++------- .../com/azure/cosmos/CosmosClientBuilder.java | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index 760830239313..e0e26af4b3aa 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -267,7 +267,7 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(boolean readO } @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordUnavailableToHealthyTentativeStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + public void recordUnavailableToHealthyTentativeStatusTransition(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -349,7 +349,7 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition } @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyTentativeToHealthyStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + public void recordHealthyTentativeToHealthyStatusTransition(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -438,7 +438,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve } @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyTentativeToUnavailableTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + public void recordHealthyTentativeToUnavailableTransition(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -527,7 +527,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC } @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + public void allRegionsUnavailableHandling(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -597,8 +597,7 @@ public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerCon } @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchFieldException, IllegalAccessException { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(boolean readOperationTrue) throws NoSuchFieldException, IllegalAccessException { GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -694,7 +693,7 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(St } @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void allRegionsUnavailableHandlingWithMultiThreading(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + public void allRegionsUnavailableHandlingWithMultiThreading(boolean readOperationTrue) { int threadPoolSizeForExecutors = 4; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index 4bfacb14c2ec..d08a6992aef1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -1180,7 +1180,13 @@ CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { stopwatch.start(); if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", PartitionLevelCircuitBreakerConfig.DEFAULT.toJson()); + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); } this.resetSessionCapturingType(); @@ -1220,7 +1226,13 @@ public CosmosClient buildClient() { stopwatch.start(); if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", PartitionLevelCircuitBreakerConfig.DEFAULT.toJson()); + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); } this.resetSessionCapturingType(); From 187e167ef11daf13e461b554745a936d597976dd Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 01:51:55 -0400 Subject: [PATCH 25/51] Test multi-region + circuit-breaker job. --- sdk/cosmos/live-platform-matrix.json | 103 +-------------------------- 1 file changed, 1 insertion(+), 102 deletions(-) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 38698915a912..b85c1fd590af 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -24,92 +24,6 @@ "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true; enablePartitionMerge = $true}": "" }, "include": [ - { - "ConsistencyConfig": { - "Session": { - "DESIRED_CONSISTENCY": "Session", - "ACCOUNT_CONSISTENCY": "Session", - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enablePartitionMerge = $true }" - }, - "Strong": { - "DESIRED_CONSISTENCY": "Strong", - "ACCOUNT_CONSISTENCY": "Strong", - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }" - } - }, - "AdditionalArgs": [ - "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"" - ], - "ProfileFlag": "-Pe2e", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": "[\"Session\"]", - "ACCOUNT_CONSISTENCY": "Session", - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pfast", "-Pdirect" ], - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session' }", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": [ "[\"Strong\", \"Session\"]", "[\"BoundedStaleness\"]", "[\"ConsistentPrefix\"]" ], - "ACCOUNT_CONSISTENCY": "Strong", - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pfast" ], - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCY": "BoundedStaleness", - "ACCOUNT_CONSISTENCY": "Strong", - "AdditionalArgs": "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"", - "ProfileFlag": "-Pe2e", - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": "[\"Strong\", \"Session\"]", - "ACCOUNT_CONSISTENCY": "Strong", - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pdirect" ], - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": "[\"Session\"]", - "ACCOUNT_CONSISTENCY": "Session", - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Plong" ], - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enablePartitionMerge = $true }", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": "[\"Session\"]", - "ACCOUNT_CONSISTENCY": "Session", - "ArmConfig": { - "MultiMaster_MultiRegion": { - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", - "PREFERRED_LOCATIONS": "[\"East US 2\"]" - } - }, - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pmulti-master", "-Pflaky-multi-master", "-Pcircuit-breaker-misc-direct", "-Pcircuit-breaker-misc-gateway", "-Pcircuit-breaker-read-all-read-many", "-Pfast", "-Pdirect" ], - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, { "DESIRED_CONSISTENCIES": "[\"Session\"]", "ACCOUNT_CONSISTENCY": "Session", @@ -121,22 +35,7 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master-circuit-breaker" ], - "AdditionalArgs": "-DargLine=\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=true\"", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": "[\"Session\"]", - "ACCOUNT_CONSISTENCY": "Session", - "ArmConfig": { - "SingleMaster_MultiRegion": { - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true; enablePartitionMerge = $true}" - } - }, - "ProfileFlag": "-Pmulti-region", - "PROTOCOLS": "[\"Tcp\"]", - "PREFERRED_LOCATIONS": null, + "AdditionalArgs": "-DargLine=-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=true", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From a7d4b75fbad52e6d917bd5a3663900f21e8d9192 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 01:52:17 -0400 Subject: [PATCH 26/51] Test multi-region + circuit-breaker job. --- .../src/main/java/com/azure/cosmos/CosmosClientBuilder.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index d08a6992aef1..40adada5f6fe 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -1179,6 +1179,8 @@ CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { StopWatch stopwatch = new StopWatch(); stopwatch.start(); + logger.info("Circuit breaker default opt-in result : {}", Configs.shouldOptInDefaultCircuitBreakerConfig()); + if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { System.setProperty( "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", @@ -1225,6 +1227,8 @@ public CosmosClient buildClient() { StopWatch stopwatch = new StopWatch(); stopwatch.start(); + logger.info("Circuit breaker default opt-in result : {}", Configs.shouldOptInDefaultCircuitBreakerConfig()); + if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { System.setProperty( "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", From 6f46b19716149896fcd09a8d42cb7ac0656e018f Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 02:12:06 -0400 Subject: [PATCH 27/51] Test multi-region + circuit-breaker job. --- sdk/cosmos/live-platform-matrix.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index b85c1fd590af..824b69b69cce 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -35,7 +35,7 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master-circuit-breaker" ], - "AdditionalArgs": "-DargLine=-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=true", + "AdditionalArgs": "-DargLine=\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=\"true\"\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From 6ef8ad9ce638de69d6845f0c76343a2b40161b71 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 02:13:35 -0400 Subject: [PATCH 28/51] Test multi-region + circuit-breaker job. --- sdk/cosmos/kafka-cosmos-matrix.json | 26 ++--- sdk/cosmos/kafka-testcontainer-matrix.json | 26 ++--- sdk/cosmos/tests.yml | 116 ++++++++++----------- 3 files changed, 84 insertions(+), 84 deletions(-) diff --git a/sdk/cosmos/kafka-cosmos-matrix.json b/sdk/cosmos/kafka-cosmos-matrix.json index 9cce7eada94d..c1f43e8c04e6 100644 --- a/sdk/cosmos/kafka-cosmos-matrix.json +++ b/sdk/cosmos/kafka-cosmos-matrix.json @@ -1,13 +1,13 @@ -{ - "matrix": { - "Cosmos": { - "Session_Integration": { - "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleWriteLocations = $false }", - "ProfileFlag": "-Pkafka", - "Pool": "env:LINUXPOOL", - "OSVmImage": "env:LINUXVMIMAGE" - } - }, - "JavaTestVersion": ["1.8", "1.11", "1.17", "1.21"] - } -} +//{ +// "matrix": { +// "Cosmos": { +// "Session_Integration": { +// "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleWriteLocations = $false }", +// "ProfileFlag": "-Pkafka", +// "Pool": "env:LINUXPOOL", +// "OSVmImage": "env:LINUXVMIMAGE" +// } +// }, +// "JavaTestVersion": ["1.8", "1.11", "1.17", "1.21"] +// } +//} diff --git a/sdk/cosmos/kafka-testcontainer-matrix.json b/sdk/cosmos/kafka-testcontainer-matrix.json index f596cb337b07..58190f7ff09a 100644 --- a/sdk/cosmos/kafka-testcontainer-matrix.json +++ b/sdk/cosmos/kafka-testcontainer-matrix.json @@ -1,13 +1,13 @@ -{ - "matrix": { - "Cosmos": { - "Session_Integration": { - "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleWriteLocations = $false }", - "ProfileFlag": "-P build-kafka,kafka-integration", - "Pool": "env:LINUXPOOL", - "OSVmImage": "env:LINUXVMIMAGE" - } - }, - "JavaTestVersion": ["1.8", "1.11", "1.17", "1.21"] - } -} +//{ +// "matrix": { +// "Cosmos": { +// "Session_Integration": { +// "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleWriteLocations = $false }", +// "ProfileFlag": "-P build-kafka,kafka-integration", +// "Pool": "env:LINUXPOOL", +// "OSVmImage": "env:LINUXVMIMAGE" +// } +// }, +// "JavaTestVersion": ["1.8", "1.11", "1.17", "1.21"] +// } +//} diff --git a/sdk/cosmos/tests.yml b/sdk/cosmos/tests.yml index e98292e25c7c..8c529f1ba9f4 100644 --- a/sdk/cosmos/tests.yml +++ b/sdk/cosmos/tests.yml @@ -40,62 +40,62 @@ extends: - name: AdditionalArgs value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' - - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml - parameters: - TestName: 'Spring_Data_Cosmos_Integration' - CloudConfig: - Public: - SubscriptionConfigurations: - - $(sub-config-azure-cloud-test-resources) - - $(sub-config-cosmos-azure-cloud-test-resources) - MatrixConfigs: - - Name: Cosmos_live_test_integration - Path: sdk/spring/pipeline/cosmos-integration-matrix.json - Selection: all - GenerateVMJobs: true - ServiceDirectory: spring - TestResourceDirectories: - - spring/spring-cloud-azure-integration-tests/test-resources/cosmos-spring - Artifacts: - - name: azure-spring-data-cosmos - groupId: com.azure - safeName: azurespringdatacosmos - TimeoutInMinutes: 90 - PreSteps: - - template: /eng/pipelines/templates/steps/install-reporting-tools.yml - TestGoals: 'verify' - TestOptions: '$(ProfileFlag) -DskipCompile=true -DskipTestCompile=true -DcreateSourcesJar=false' - AdditionalVariables: - - name: AdditionalArgs - value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' - - - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml - parameters: - TestName: 'Kafka_Cosmos_Integration' - CloudConfig: - Public: - SubscriptionConfigurations: - - $(sub-config-azure-cloud-test-resources) - - $(sub-config-cosmos-azure-cloud-test-resources) - MatrixConfigs: - - Name: Kafka_Cosmos_Integration_Test - Path: sdk/cosmos/kafka-cosmos-matrix.json - Selection: all - GenerateVMJobs: true - TestFromSource: false - ServiceDirectory: cosmos - TestResourceDirectories: - - cosmos/ - Artifacts: - - name: azure-cosmos-kafka-connect - groupId: com.azure.cosmos.kafka - safeName: azurecosmoskafkaconnect - TimeoutInMinutes: 120 - PreSteps: - - template: /eng/pipelines/templates/steps/install-reporting-tools.yml - TestGoals: 'clean verify' - TestOptions: '$(ProfileFlag) $(AdditionalArgs)' - AdditionalVariables: - - name: AdditionalArgs - value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' +# - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml +# parameters: +# TestName: 'Spring_Data_Cosmos_Integration' +# CloudConfig: +# Public: +# SubscriptionConfigurations: +# - $(sub-config-azure-cloud-test-resources) +# - $(sub-config-cosmos-azure-cloud-test-resources) +# MatrixConfigs: +# - Name: Cosmos_live_test_integration +# Path: sdk/spring/pipeline/cosmos-integration-matrix.json +# Selection: all +# GenerateVMJobs: true +# ServiceDirectory: spring +# TestResourceDirectories: +# - spring/spring-cloud-azure-integration-tests/test-resources/cosmos-spring +# Artifacts: +# - name: azure-spring-data-cosmos +# groupId: com.azure +# safeName: azurespringdatacosmos +# TimeoutInMinutes: 90 +# PreSteps: +# - template: /eng/pipelines/templates/steps/install-reporting-tools.yml +# TestGoals: 'verify' +# TestOptions: '$(ProfileFlag) -DskipCompile=true -DskipTestCompile=true -DcreateSourcesJar=false' +# AdditionalVariables: +# - name: AdditionalArgs +# value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' +# +# - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml +# parameters: +# TestName: 'Kafka_Cosmos_Integration' +# CloudConfig: +# Public: +# SubscriptionConfigurations: +# - $(sub-config-azure-cloud-test-resources) +# - $(sub-config-cosmos-azure-cloud-test-resources) +# MatrixConfigs: +# - Name: Kafka_Cosmos_Integration_Test +# Path: sdk/cosmos/kafka-cosmos-matrix.json +# Selection: all +# GenerateVMJobs: true +# TestFromSource: false +# ServiceDirectory: cosmos +# TestResourceDirectories: +# - cosmos/ +# Artifacts: +# - name: azure-cosmos-kafka-connect +# groupId: com.azure.cosmos.kafka +# safeName: azurecosmoskafkaconnect +# TimeoutInMinutes: 120 +# PreSteps: +# - template: /eng/pipelines/templates/steps/install-reporting-tools.yml +# TestGoals: 'clean verify' +# TestOptions: '$(ProfileFlag) $(AdditionalArgs)' +# AdditionalVariables: +# - name: AdditionalArgs +# value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' From 5bd178d8de9c5ec8e6b09feef85d2fc2fc39b933 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 02:31:49 -0400 Subject: [PATCH 29/51] Test multi-region + circuit-breaker job. --- sdk/cosmos/live-platform-matrix.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 824b69b69cce..1d219505bfca 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -35,7 +35,7 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master-circuit-breaker" ], - "AdditionalArgs": "-DargLine=\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=\"true\"\"", + "AdditionalArgs": "-DargLine=\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=\"TRUE\"\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From d30cba2154267f5ee43d84face63641e0df5cda3 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 02:35:11 -0400 Subject: [PATCH 30/51] Test multi-region + circuit-breaker job. --- .../main/java/com/azure/cosmos/implementation/Configs.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 9e07e1b57749..fe6f7ab02105 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -662,6 +662,10 @@ public static boolean shouldLogIncorrectlyMappedSessionToken() { } public static boolean shouldOptInDefaultCircuitBreakerConfig() { + + logger.info("Circuit breaker opt-in {}", System.getProperty( + PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN)); + String shouldOptInDefaultPartitionLevelCircuitBreakerConfig = System.getProperty( PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN, From f879593e37f2692fce90f79d704c4bc2375cdcac Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 02:55:38 -0400 Subject: [PATCH 31/51] Test multi-region + circuit-breaker job. --- sdk/cosmos/live-platform-matrix.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 1d219505bfca..bd905413639a 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -35,7 +35,7 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master-circuit-breaker" ], - "AdditionalArgs": "-DargLine=\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=\"TRUE\"\"", + "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=\"TRUE\"\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From a6322fe0acea1f23cdf5031c712566f4393bc90c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 02:57:16 -0400 Subject: [PATCH 32/51] Test multi-region + circuit-breaker job. --- .../main/java/com/azure/cosmos/implementation/Configs.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index fe6f7ab02105..34db7000dd25 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -663,7 +663,9 @@ public static boolean shouldLogIncorrectlyMappedSessionToken() { public static boolean shouldOptInDefaultCircuitBreakerConfig() { - logger.info("Circuit breaker opt-in {}", System.getProperty( + logger.info("Circuit breaker opt-in (sys property) {}", System.getProperty( + PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN)); + logger.info("Circuit breaker opt-in (env variable) {}", System.getenv().get( PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN)); String shouldOptInDefaultPartitionLevelCircuitBreakerConfig = From 1a35bdd8a310c73dfd910749bb218282f6228cb0 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 10:12:21 -0400 Subject: [PATCH 33/51] Test multi-region + circuit-breaker job. --- .../main/java/com/azure/cosmos/implementation/Configs.java | 5 ----- sdk/cosmos/live-platform-matrix.json | 4 ++-- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 34db7000dd25..b4e947570f3b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -663,11 +663,6 @@ public static boolean shouldLogIncorrectlyMappedSessionToken() { public static boolean shouldOptInDefaultCircuitBreakerConfig() { - logger.info("Circuit breaker opt-in (sys property) {}", System.getProperty( - PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN)); - logger.info("Circuit breaker opt-in (env variable) {}", System.getenv().get( - PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN)); - String shouldOptInDefaultPartitionLevelCircuitBreakerConfig = System.getProperty( PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN, diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index bd905413639a..aef1cc4d432d 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -34,8 +34,8 @@ } }, "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pmulti-master-circuit-breaker" ], - "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=\"TRUE\"\"", + "ProfileFlag": [ "-Pmulti-master" ], + "AdditionalArgs": ["\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=\"TRUE\"\"", "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=\"FALSE\"\""], "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From 911387f3ceaf0eacd1965ee6862edfd422d45e43 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 10:31:20 -0400 Subject: [PATCH 34/51] Test multi-region + circuit-breaker job. --- sdk/cosmos/live-platform-matrix.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index aef1cc4d432d..dda0de4256ec 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -26,6 +26,7 @@ "include": [ { "DESIRED_CONSISTENCIES": "[\"Session\"]", + "DESIRED_CONSISTENCIES": [ "[\"Strong\", \"Session\"]", "[\"BoundedStaleness\"]", "[\"ConsistentPrefix\"]" ], "ACCOUNT_CONSISTENCY": "Session", "ArmConfig": { "MultiMaster_MultiRegion": { @@ -35,7 +36,7 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master" ], - "AdditionalArgs": ["\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=\"TRUE\"\"", "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=\"FALSE\"\""], + "AdditionalArgs": ["[\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=TRUE\", \"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=FALSE\"]"], "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From 62084dcfcd342a2f14e83c462d4fa3206dfa6072 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 10:34:05 -0400 Subject: [PATCH 35/51] Test multi-region + circuit-breaker job. --- sdk/cosmos/live-platform-matrix.json | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index dda0de4256ec..ea2de7f6c505 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -26,17 +26,32 @@ "include": [ { "DESIRED_CONSISTENCIES": "[\"Session\"]", - "DESIRED_CONSISTENCIES": [ "[\"Strong\", \"Session\"]", "[\"BoundedStaleness\"]", "[\"ConsistentPrefix\"]" ], "ACCOUNT_CONSISTENCY": "Session", "ArmConfig": { - "MultiMaster_MultiRegion": { + "MultiMaster_MultiRegion_CircuitBreaker_True": { "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", "PREFERRED_LOCATIONS": "[\"East US 2\"]" } }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master" ], - "AdditionalArgs": ["[\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=TRUE\", \"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=FALSE\"]"], + "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=TRUE\"", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "MultiMaster_MultiRegion_CircuitBreaker_False": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[\"East US 2\"]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pmulti-master" ], + "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=FALSE\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From 84a470c387956188d63af1a5349db502a3dcb095 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 13:37:05 -0400 Subject: [PATCH 36/51] Test multi-region + circuit-breaker job. --- sdk/cosmos/azure-cosmos-tests/pom.xml | 287 ++++++++---------- .../EndToEndTimeOutWithAvailabilityTest.java | 6 +- .../com/azure/cosmos/ExcludeRegionTests.java | 8 +- ...njectionWithAvailabilityStrategyTests.java | 22 +- ...EndpointManagerForCircuitBreakerTests.java | 93 ++++-- .../com/azure/cosmos/MaxRetryCountTests.java | 20 +- ...sionConsistencyWithRegionScopingTests.java | 10 +- ...ExcludedRegionWithFaultInjectionTests.java | 14 +- ...aultInjectionMetadataRequestRuleTests.java | 8 +- .../com/azure/cosmos/rx/TestSuiteBase.java | 4 +- .../IncrementalChangeFeedProcessorTest.java | 6 +- .../IncrementalChangeFeedProcessorTest.java | 2 +- .../multi-master-circuit-breaker-testng.xml | 35 --- sdk/cosmos/kafka-cosmos-matrix.json | 26 +- sdk/cosmos/kafka-testcontainer-matrix.json | 26 +- sdk/cosmos/live-platform-matrix.json | 72 ++++- sdk/cosmos/tests.yml | 116 +++---- 17 files changed, 407 insertions(+), 348 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/resources/multi-master-circuit-breaker-testng.xml diff --git a/sdk/cosmos/azure-cosmos-tests/pom.xml b/sdk/cosmos/azure-cosmos-tests/pom.xml index 00639487e7b7..e936812c2a49 100644 --- a/sdk/cosmos/azure-cosmos-tests/pom.xml +++ b/sdk/cosmos/azure-cosmos-tests/pom.xml @@ -342,137 +342,11 @@ Licensed under the MIT License. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - multi-master + + fast - multi-master + simple @@ -482,7 +356,112 @@ Licensed under the MIT License. 3.2.5 - src/test/resources/multi-master-testng.xml + src/test/resources/fast-testng.xml + + + + + + + + + split + + split + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/split-testng.xml + + + + + + + + + cfp-split + + cfp-split + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/cfp-split-testng.xml + + + + + + + + + query + + query + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/query-testng.xml + + + + + + + + + long + + long + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/long-testng.xml + + + + + + + + + direct + + direct + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/direct-testng.xml @@ -491,9 +470,9 @@ Licensed under the MIT License. - multi-master-circuit-breaker + multi-master - multi-master-circuit-breaker + multi-master @@ -503,7 +482,7 @@ Licensed under the MIT License. 3.2.5 - src/test/resources/multi-master-circuit-breaker-testng.xml + src/test/resources/multi-master-testng.xml @@ -594,27 +573,27 @@ Licensed under the MIT License. - - - - - - - - - - - - - - - - - - - - - + + + multi-region + + multi-region + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/multi-region-testng.xml + + + + + + examples diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/EndToEndTimeOutWithAvailabilityTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/EndToEndTimeOutWithAvailabilityTest.java index 40ab53781d80..b39af38a0e61 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/EndToEndTimeOutWithAvailabilityTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/EndToEndTimeOutWithAvailabilityTest.java @@ -69,7 +69,7 @@ public EndToEndTimeOutWithAvailabilityTest(CosmosClientBuilder clientBuilder) { random = new Random(); } - @BeforeClass(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = SETUP_TIMEOUT * 100) + @BeforeClass(groups = {"multi-master"}, timeOut = SETUP_TIMEOUT * 100) public void beforeClass() throws Exception { System.setProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_WAIT_TIME_IN_MILLISECONDS", "1000"); System.setProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_INITIAL_BACKOFF_TIME_IN_MILLISECONDS", "500"); @@ -93,7 +93,7 @@ public void beforeClass() throws Exception { } } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "faultInjectionArgProvider", timeOut = TIMEOUT*100) + @Test(groups = {"multi-master"}, dataProvider = "faultInjectionArgProvider", timeOut = TIMEOUT*100) public void testThresholdAvailabilityStrategy(OperationType operationType, FaultInjectionOperationType faultInjectionOperationType) throws InterruptedException { if (this.preferredRegionList.size() <= 1) { throw new SkipException("excludeRegionTest_SkipFirstPreferredRegion can only be tested for multi-master with multi-regions"); @@ -206,7 +206,7 @@ private List insertDocuments(int docu return documentInserted; } - @AfterClass(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + @AfterClass(groups = {"multi-master"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) public void afterClass() { safeClose(this.clientWithPreferredRegions); System.clearProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_INITIAL_BACKOFF_TIME_IN_MILLISECONDS"); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java index 73bd5562253d..008d25bc7162 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/ExcludeRegionTests.java @@ -51,7 +51,7 @@ public ExcludeRegionTests(CosmosClientBuilder clientBuilder) { super(clientBuilder); } - @BeforeClass(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = SETUP_TIMEOUT) + @BeforeClass(groups = {"multi-master"}, timeOut = SETUP_TIMEOUT) public void beforeClass() { System.setProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_WAIT_TIME_IN_MILLISECONDS", "1000"); System.setProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_INITIAL_BACKOFF_TIME_IN_MILLISECONDS", "500"); @@ -74,7 +74,7 @@ public void beforeClass() { } } - @AfterClass(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) + @AfterClass(groups = {"multi-master"}, timeOut = SHUTDOWN_TIMEOUT, alwaysRun = true) public void afterClass() { safeClose(this.clientWithPreferredRegions); System.clearProperty("COSMOS.DEFAULT_SESSION_TOKEN_MISMATCH_INITIAL_BACKOFF_TIME_IN_MILLISECONDS"); @@ -105,7 +105,7 @@ public static Object[][] faultInjectionArgProvider() { }; } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "operationTypeArgProvider", timeOut = TIMEOUT) + @Test(groups = {"multi-master"}, dataProvider = "operationTypeArgProvider", timeOut = TIMEOUT) public void excludeRegionTest_SkipFirstPreferredRegion(OperationType operationType) { if (this.preferredRegionList.size() <= 1) { @@ -131,7 +131,7 @@ public void excludeRegionTest_SkipFirstPreferredRegion(OperationType operationTy assertThat(cosmosDiagnostics.getContactedRegionNames()).containsAll(this.preferredRegionList.subList(1, 2)); } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "faultInjectionArgProvider", timeOut = TIMEOUT) + @Test(groups = {"multi-master"}, dataProvider = "faultInjectionArgProvider", timeOut = TIMEOUT) public void excludeRegionTest_readSessionNotAvailable( OperationType operationType, FaultInjectionOperationType faultInjectionOperationType) { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java index 5b3367c527ea..466ed5826797 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java @@ -206,9 +206,8 @@ public String resolveTestNameSuffix(Object[] row) { return (String)row[0]; } - @BeforeClass(groups = { "multi-master", "multi-master-circuit-breaker" }) + @BeforeClass(groups = { "multi-master" }) public void beforeClass() { - CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) .key(TestConfigurations.MASTER_KEY) @@ -230,7 +229,7 @@ public void beforeClass() { Map writeRegionMap = this.getRegionMap(databaseAccount, true); - this.writeableRegions = new ArrayList<>(writeRegionMap.keySet()); + this.writeableRegions = new ArrayList<>(Arrays.asList("East US", "South Central US")); assertThat(this.writeableRegions).isNotNull(); assertThat(this.writeableRegions.size()).isGreaterThanOrEqualTo(2); @@ -324,7 +323,7 @@ public void beforeClass() { // When the container does not exist yet, you would see 401 for example for point reads etc. // So, adding this delay after container creation to minimize risk of hitting these errors try { - Thread.sleep(3000); + Thread.sleep(10_000); } catch (InterruptedException e) { throw new RuntimeException(e); } @@ -333,9 +332,8 @@ public void beforeClass() { safeClose(dummyClient); } } - @AfterClass(groups = { "multi-master", "multi-master-circuit-breaker" }) + @AfterClass(groups = { "multi-master" }) public void afterClass() { - CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) .key(TestConfigurations.MASTER_KEY) @@ -829,7 +827,7 @@ public Object[][] testConfigs_readAfterCreation() { }; } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "testConfigs_readAfterCreation") + @Test(groups = {"multi-master"}, dataProvider = "testConfigs_readAfterCreation") public void readAfterCreation( String testCaseId, Duration endToEndTimeout, @@ -2247,7 +2245,7 @@ public Object[][] testConfigs_writeAfterCreation() { }; } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "testConfigs_writeAfterCreation") + @Test(groups = {"multi-master"}, dataProvider = "testConfigs_writeAfterCreation") public void writeAfterCreation( String testCaseId, Duration endToEndTimeout, @@ -3406,7 +3404,7 @@ public Object[][] testConfigs_queryAfterCreation() { }; } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "testConfigs_queryAfterCreation") + @Test(groups = {"multi-master"}, dataProvider = "testConfigs_queryAfterCreation") public void queryAfterCreation( String testCaseId, Duration endToEndTimeout, @@ -3978,7 +3976,7 @@ public Object[][] testConfigs_readManyAfterCreation() { }; } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "testConfigs_readManyAfterCreation") + @Test(groups = {"multi-master"}, dataProvider = "testConfigs_readManyAfterCreation") public void readManyAfterCreation( String testCaseId, Duration endToEndTimeout, @@ -4756,7 +4754,7 @@ public Object[][] testConfigs_readAllAfterCreation() { }; } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "testConfigs_readAllAfterCreation") + @Test(groups = {"multi-master"}, dataProvider = "testConfigs_readAllAfterCreation") public void readAllAfterCreation( String testCaseId, Duration endToEndTimeout, @@ -5088,7 +5086,7 @@ private void execute( // When the container does not exist yet, you would see 401 for example for point reads etc. // So, adding this delay after container creation to minimize risk of hitting these errors try { - Thread.sleep(3000); + Thread.sleep(10_000); } catch (InterruptedException e) { throw new RuntimeException(e); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index e0e26af4b3aa..e2d1cc39c5f9 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -51,7 +51,7 @@ public class GlobalPartitionEndpointManagerForCircuitBreakerTests { private GlobalEndpointManager globalEndpointManagerMock; - @BeforeClass(groups = {"multi-master-circuit-breaker"}) + @BeforeClass(groups = {"unit"}) public void beforeClass() { this.globalEndpointManagerMock = Mockito.mock(GlobalEndpointManager.class); @@ -76,12 +76,31 @@ public void beforeClass() { } @DataProvider(name = "partitionLevelCircuitBreakerConfigs") - public Object[] partitionLevelCircuitBreakerConfigs() { - return new Object[]{READ_OPERATION_TRUE, !READ_OPERATION_TRUE}; + public Object[][] partitionLevelCircuitBreakerConfigs() { + return new Object[][]{ + new Object[]{ + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}", + READ_OPERATION_TRUE + }, + new Object[]{ + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}", + !READ_OPERATION_TRUE + } + }; } - @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyStatus(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -131,10 +150,14 @@ public void recordHealthyStatus(boolean readOperationTrue) throws IllegalAccessE assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyToHealthyWithFailuresStatusTransition(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -195,10 +218,14 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(boolean readOpera assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyWithFailuresToUnavailableStatusTransition(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyWithFailuresToUnavailableStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -264,10 +291,14 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(boolean readO assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordUnavailableToHealthyTentativeStatusTransition(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordUnavailableToHealthyTentativeStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -346,10 +377,14 @@ public void recordUnavailableToHealthyTentativeStatusTransition(boolean readOper assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyTentativeToHealthyStatusTransition(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyTentativeToHealthyStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -435,10 +470,14 @@ public void recordHealthyTentativeToHealthyStatusTransition(boolean readOperatio assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyTentativeToUnavailableTransition(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyTentativeToUnavailableTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -524,10 +563,13 @@ public void recordHealthyTentativeToUnavailableTransition(boolean readOperationT assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void allRegionsUnavailableHandling(boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -594,10 +636,13 @@ public void allRegionsUnavailableHandling(boolean readOperationTrue) throws Ille = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); assertThat(partitionAndLocationSpecificUnavailabilityInfo).isNull(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(boolean readOperationTrue) throws NoSuchFieldException, IllegalAccessException { + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchFieldException, IllegalAccessException { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); @@ -690,10 +735,14 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(bo assertThat(locationSpecificHealthContext2.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificHealthContext2.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } - @Test(groups = {"multi-master-circuit-breaker"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void allRegionsUnavailableHandlingWithMultiThreading(boolean readOperationTrue) { + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void allRegionsUnavailableHandlingWithMultiThreading(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); int threadPoolSizeForExecutors = 4; @@ -824,6 +873,8 @@ public void allRegionsUnavailableHandlingWithMultiThreading(boolean readOperatio executorForEastUs.shutdown(); executorForCentralUs.shutdown(); executorForEastUs2.shutdown(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } private static void validateAllRegionsAreNotUnavailableAfterExceptionInLocation( diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java index bc00f0a39fe6..7cefbadb33b4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java @@ -181,7 +181,7 @@ public String resolveTestNameSuffix(Object[] row) { return (String)row[0]; } - @BeforeClass(groups = { "multi-master", "multi-master-circuit-breaker" }) + @BeforeClass(groups = { "multi-master" }) public void beforeClass() { CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) @@ -256,7 +256,7 @@ public void beforeClass() { safeClose(dummyClient); } } - @AfterClass(groups = { "multi-master", "multi-master-circuit-breaker" }) + @AfterClass(groups = { "multi-master" }) public void afterClass() { CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) @@ -1301,7 +1301,7 @@ public Object[][] testConfigs_readMaxRetryCount_serverInternalServerError() { }; } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_readSessionNotAvailable") + @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_readSessionNotAvailable") public void readMaxRetryCount_readSessionNotAvailable( String testCaseId, Duration endToEndTimeout, @@ -1422,7 +1422,7 @@ public void readMaxRetryCount_readSessionNotAvailable( } } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_retryWith") + @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_retryWith") public void readMaxRetryCount_retryWith( String testCaseId, Duration endToEndTimeout, @@ -1499,7 +1499,7 @@ public void readMaxRetryCount_retryWith( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_serverGone") + @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_serverGone") public void readMaxRetryCount_serverGone( String testCaseId, Duration endToEndTimeout, @@ -1587,7 +1587,7 @@ public void readMaxRetryCount_serverGone( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_transitTimeout") + @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_transitTimeout") public void readMaxRetryCount_transitTimeout( String testCaseId, Duration endToEndTimeout, @@ -1676,7 +1676,7 @@ public void readMaxRetryCount_transitTimeout( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_serverTimeout") + @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_serverTimeout") public void readMaxRetryCount_serverTimeout( String testCaseId, Duration endToEndTimeout, @@ -1766,7 +1766,7 @@ public void readMaxRetryCount_serverTimeout( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_serverServiceUnavailable") + @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_serverServiceUnavailable") public void readMaxRetryCount_serverServiceUnavailable( String testCaseId, Duration endToEndTimeout, @@ -1854,7 +1854,7 @@ public void readMaxRetryCount_serverServiceUnavailable( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_serverInternalServerError") + @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_serverInternalServerError") public void readMaxRetryCount_serverInternalServerError( String testCaseId, Duration endToEndTimeout, @@ -1933,7 +1933,7 @@ public void readMaxRetryCount_serverInternalServerError( defaultThrottlingRetryOptions); } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readMaxRetryCount_serverRequestRateTooLarge") + @Test(groups = {"multi-master"}, dataProvider = "readMaxRetryCount_serverRequestRateTooLarge") public void readMaxRetryCount_serverRequestRateTooLarge( String testCaseId, Duration endToEndTimeout, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java index d74c0fb794e3..3488374002cb 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/SessionConsistencyWithRegionScopingTests.java @@ -54,7 +54,6 @@ import com.fasterxml.jackson.databind.JsonNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Factory; @@ -1426,7 +1425,7 @@ public Object[][] readYouWriteWithExplicitRegionSwitchingTestContext() { }; } - @BeforeClass(groups = {"multi-region", "multi-master", "multi-master-circuit-breaker"}) + @BeforeClass(groups = {"multi-region", "multi-master"}) public void beforeClass() { try (CosmosAsyncClient tempClient = getClientBuilder().buildAsyncClient()) { @@ -1445,9 +1444,6 @@ public void beforeClass() { } } - @AfterClass(groups = {"multi-region", "multi-master", "multi-master-circuit-breaker"}) - public void afterClass() {} - @Test(groups = {"multi-region"}, dataProvider = "readYouWriteWithNoExplicitRegionSwitchingTestContext", timeOut = 80 * TIMEOUT) public void readYouWriteWithNoExplicitRegionSwitching( Function> func, @@ -1518,7 +1514,7 @@ public void readYouWriteWithNoExplicitRegionSwitching( } } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readYouWriteWithExplicitRegionSwitchingTestContext", timeOut = 80 * TIMEOUT) + @Test(groups = {"multi-master"}, dataProvider = "readYouWriteWithExplicitRegionSwitchingTestContext", timeOut = 80 * TIMEOUT) public void readYouWriteWithExplicitRegionSwitching( Function> func, String testId, @@ -1640,7 +1636,7 @@ public void readManyWithNoExplicitRegionSwitching( } } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "readManyWithExplicitRegionSwitchingTestContext", timeOut = 10 * TIMEOUT) + @Test(groups = {"multi-master"}, dataProvider = "readManyWithExplicitRegionSwitchingTestContext", timeOut = 10 * TIMEOUT) public void readManyWithExplicitRegionSwitching( Function> func, String testId, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/ExcludedRegionWithFaultInjectionTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/ExcludedRegionWithFaultInjectionTests.java index 3b76730f768e..1b42c08346f7 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/ExcludedRegionWithFaultInjectionTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/ExcludedRegionWithFaultInjectionTests.java @@ -129,7 +129,7 @@ public ExcludedRegionWithFaultInjectionTests(CosmosClientBuilder cosmosClientBui // 13. bulkExecutionOptions: a CosmosBulkExecutionOptions instance configured to set on the data plane operation after mutation is done // 14. batchRequestOptions: a CosmosBatchRequestOptions instance configured to set on the data plane operation after mutation is done // 15. perRegionDuplicateCount: no. of times to duplicate a particular region in excludedRegions - @BeforeClass(groups = {"multi-master", "multi-master-circuit-breaker"}) + @BeforeClass(groups = {"multi-master"}) public void beforeClass() { this.cosmosAsyncClient = getClientBuilder().buildAsyncClient(); this.cosmosAsyncContainer = getSharedMultiPartitionCosmosContainerWithIdAsPartitionKey(this.cosmosAsyncClient); @@ -2257,31 +2257,31 @@ public Object[][] regionExclusionBulkTestConfigs() { return null; } - @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, dataProvider = "regionExclusionReadAfterCreateTestConfigs") + @Test(groups = { "multi-master" }, dataProvider = "regionExclusionReadAfterCreateTestConfigs") public void regionExclusionMutationOnClient_readAfterCreate_test(String testTitle, MutationTestConfig mutationTestConfig) throws InterruptedException { logger.info("Test started with title : {}", testTitle); execute(mutationTestConfig); } - @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, dataProvider = "regionExclusionQueryAfterCreateTestConfigs") + @Test(groups = { "multi-master" }, dataProvider = "regionExclusionQueryAfterCreateTestConfigs") public void regionExclusionMutationOnClient_queryAfterCreate_test(String testTitle, MutationTestConfig mutationTestConfig) throws InterruptedException { logger.info("Test started with title : {}", testTitle); execute(mutationTestConfig); } - @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, dataProvider = "regionExclusionWriteAfterCreateTestConfigs") + @Test(groups = { "multi-master" }, dataProvider = "regionExclusionWriteAfterCreateTestConfigs") public void regionExclusionMutationOnClient_writeAfterCreate_test(String testTitle, MutationTestConfig mutationTestConfig) throws InterruptedException { logger.info("Test started with title : {}", testTitle); execute(mutationTestConfig); } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "regionExclusionBatchTestConfigs") + @Test(groups = {"multi-master"}, dataProvider = "regionExclusionBatchTestConfigs") public void regionExclusionMutationOnClient_batch_test(String testTitle, MutationTestConfig mutationTestConfig) throws InterruptedException { logger.info("Test started with title : {}", testTitle); execute(mutationTestConfig); } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, dataProvider = "regionExclusionBulkTestConfigs") + @Test(groups = {"multi-master"}, dataProvider = "regionExclusionBulkTestConfigs") public void regionExclusionMutationOnClient_bulk_test(String testTitle, MutationTestConfig mutationTestConfig) throws InterruptedException { logger.info("Test started with title : {}", testTitle); execute(mutationTestConfig); @@ -2392,7 +2392,7 @@ private void execute(MutationTestConfig mutationTestConfig) throws InterruptedEx } } - @AfterClass(groups = {"multi-master", "multi-master-circuit-breaker"}) + @AfterClass(groups = {"multi-master"}) public void afterClass() { safeCloseAsync(this.cosmosAsyncClient); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java index a264b54ee27e..f8e1364b8013 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java @@ -88,7 +88,7 @@ public static Object[][] partitionKeyRangesArgProvider() { }; } - @BeforeClass(groups = { "multi-region", "multi-master", "multi-master-circuit-breaker" }, timeOut = TIMEOUT) + @BeforeClass(groups = { "multi-region", "multi-master" }, timeOut = TIMEOUT) public void beforeClass() { this.client = getClientBuilder().buildAsyncClient(); AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); @@ -215,7 +215,7 @@ public void faultInjectionServerErrorRuleTests_AddressRefresh_ConnectionDelay() } } - @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, dataProvider = "operationTypeProvider", timeOut = 4 * TIMEOUT) + @Test(groups = { "multi-master" }, dataProvider = "operationTypeProvider", timeOut = 4 * TIMEOUT) public void faultInjectionServerErrorRuleTests_AddressRefresh_ResponseDelay( FaultInjectionOperationType faultInjectionOperationType, OperationType operationType) throws JsonProcessingException { @@ -481,7 +481,7 @@ public void faultInjectionServerErrorRuleTests_AddressRefresh_TooManyRequest() t } } - @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, dataProvider = "partitionKeyRangesArgProvider", timeOut = 40 * TIMEOUT) + @Test(groups = { "multi-master" }, dataProvider = "partitionKeyRangesArgProvider", timeOut = 40 * TIMEOUT) public void faultInjectionServerErrorRuleTests_PartitionKeyRanges_DelayError( FaultInjectionServerErrorType faultInjectionServerErrorType, Duration delay, @@ -591,7 +591,7 @@ public void faultInjectionServerErrorRuleTests_PartitionKeyRanges_DelayError( } } - @Test(groups = { "multi-master", "multi-master-circuit-breaker" }, timeOut = 40 * TIMEOUT) + @Test(groups = { "multi-master" }, timeOut = 40 * TIMEOUT) public void faultInjectionServerErrorRuleTests_CollectionRead_ConnectionDelay() throws JsonProcessingException { // We need to create a new client because client may have marked region unavailable in other tests diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java index f38a19995093..4d5d33056081 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java @@ -203,7 +203,7 @@ public CosmosAsyncDatabase getDatabase(String id) { } } - @BeforeSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many", "multi-master-circuit-breaker"}, timeOut = SUITE_SETUP_TIMEOUT) + @BeforeSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}, timeOut = SUITE_SETUP_TIMEOUT) public void beforeSuite() { logger.info("beforeSuite Started"); @@ -226,7 +226,7 @@ public static void parallelizeUnitTests(ITestContext context) { // context.getSuite().getXmlSuite().setThreadCount(Runtime.getRuntime().availableProcessors()); } - @AfterSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many", "multi-master-circuit-breaker"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) + @AfterSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) public void afterSuite() { logger.info("afterSuite Started"); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/epkversion/IncrementalChangeFeedProcessorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/epkversion/IncrementalChangeFeedProcessorTest.java index 13394ae8ae52..e836852b7bfb 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/epkversion/IncrementalChangeFeedProcessorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/epkversion/IncrementalChangeFeedProcessorTest.java @@ -256,7 +256,7 @@ public void readFeedDocumentsStartFromCustomDate() throws InterruptedException { } } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) + @Test(groups = {"multi-master"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) public void readFeedDocumentsStartFromCustomDateForMultiWrite_test() throws InterruptedException { CosmosClientBuilder clientBuilder = getClientBuilder(); @@ -370,7 +370,7 @@ public void readFeedDocumentsStartFromCustomDateForMultiWrite_test() throws Inte } } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) + @Test(groups = {"multi-master"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) public void readFeedDocumentsStartFromCustomDateForMultiWrite_WithCFPReadFromSatelliteRegion_test() throws InterruptedException { CosmosClientBuilder clientBuilder = getClientBuilder(); @@ -505,7 +505,7 @@ public void readFeedDocumentsStartFromCustomDateForMultiWrite_WithCFPReadFromSat } } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) + @Test(groups = {"multi-master"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) public void readFeedDocumentsStartFromCustomDateForMultiWrite_WithCFPReadSwitchToSatelliteRegion_test() throws InterruptedException { CosmosClientBuilder clientBuilder = getClientBuilder(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/pkversion/IncrementalChangeFeedProcessorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/pkversion/IncrementalChangeFeedProcessorTest.java index 5c7c779b1998..0f0ab04840e1 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/pkversion/IncrementalChangeFeedProcessorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/changefeed/pkversion/IncrementalChangeFeedProcessorTest.java @@ -257,7 +257,7 @@ public void readFeedDocumentsStartFromCustomDate() throws InterruptedException { } } - @Test(groups = {"multi-master", "multi-master-circuit-breaker"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) + @Test(groups = {"multi-master"}, timeOut = 50 * CHANGE_FEED_PROCESSOR_TIMEOUT) public void readFeedDocumentsStartFromCustomDateForMultiWrite_test() throws InterruptedException { CosmosClientBuilder clientBuilder = getClientBuilder(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/resources/multi-master-circuit-breaker-testng.xml b/sdk/cosmos/azure-cosmos-tests/src/test/resources/multi-master-circuit-breaker-testng.xml deleted file mode 100644 index 1cba368f3475..000000000000 --- a/sdk/cosmos/azure-cosmos-tests/src/test/resources/multi-master-circuit-breaker-testng.xml +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - - - - - - - - - diff --git a/sdk/cosmos/kafka-cosmos-matrix.json b/sdk/cosmos/kafka-cosmos-matrix.json index c1f43e8c04e6..9cce7eada94d 100644 --- a/sdk/cosmos/kafka-cosmos-matrix.json +++ b/sdk/cosmos/kafka-cosmos-matrix.json @@ -1,13 +1,13 @@ -//{ -// "matrix": { -// "Cosmos": { -// "Session_Integration": { -// "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleWriteLocations = $false }", -// "ProfileFlag": "-Pkafka", -// "Pool": "env:LINUXPOOL", -// "OSVmImage": "env:LINUXVMIMAGE" -// } -// }, -// "JavaTestVersion": ["1.8", "1.11", "1.17", "1.21"] -// } -//} +{ + "matrix": { + "Cosmos": { + "Session_Integration": { + "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleWriteLocations = $false }", + "ProfileFlag": "-Pkafka", + "Pool": "env:LINUXPOOL", + "OSVmImage": "env:LINUXVMIMAGE" + } + }, + "JavaTestVersion": ["1.8", "1.11", "1.17", "1.21"] + } +} diff --git a/sdk/cosmos/kafka-testcontainer-matrix.json b/sdk/cosmos/kafka-testcontainer-matrix.json index 58190f7ff09a..f596cb337b07 100644 --- a/sdk/cosmos/kafka-testcontainer-matrix.json +++ b/sdk/cosmos/kafka-testcontainer-matrix.json @@ -1,13 +1,13 @@ -//{ -// "matrix": { -// "Cosmos": { -// "Session_Integration": { -// "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleWriteLocations = $false }", -// "ProfileFlag": "-P build-kafka,kafka-integration", -// "Pool": "env:LINUXPOOL", -// "OSVmImage": "env:LINUXVMIMAGE" -// } -// }, -// "JavaTestVersion": ["1.8", "1.11", "1.17", "1.21"] -// } -//} +{ + "matrix": { + "Cosmos": { + "Session_Integration": { + "ArmTemplateParameters": "@{ defaultConsistencyLevel = 'Session'; enableMultipleWriteLocations = $false }", + "ProfileFlag": "-P build-kafka,kafka-integration", + "Pool": "env:LINUXPOOL", + "OSVmImage": "env:LINUXVMIMAGE" + } + }, + "JavaTestVersion": ["1.8", "1.11", "1.17", "1.21"] + } +} diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index ea2de7f6c505..f4de8c45fdd5 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -12,7 +12,6 @@ "-Pcircuit-breaker-misc-gateway": "CircuitBreakerMiscGateway", "-Pcircuit-breaker-read-all-read-many": "CircuitBreakerReadAllAndReadMany", "-Pmulti-region": "MultiRegion", - "-Pmulti-master-circuit-breaker": "MultiMasterCircuitBreaker", "-Plong": "Long", "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"": "TCP", "Session": "", @@ -24,6 +23,77 @@ "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true; enablePartitionMerge = $true}": "" }, "include": [ + { + "ConsistencyConfig": { + "Session": { + "DESIRED_CONSISTENCY": "Session", + "ACCOUNT_CONSISTENCY": "Session", + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enablePartitionMerge = $true }" + }, + "Strong": { + "DESIRED_CONSISTENCY": "Strong", + "ACCOUNT_CONSISTENCY": "Strong", + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }" + } + }, + "AdditionalArgs": [ + "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"" + ], + "ProfileFlag": "-Pe2e", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pfast", "-Pdirect" ], + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session' }", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": [ "[\"Strong\", \"Session\"]", "[\"BoundedStaleness\"]", "[\"ConsistentPrefix\"]" ], + "ACCOUNT_CONSISTENCY": "Strong", + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pfast" ], + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCY": "BoundedStaleness", + "ACCOUNT_CONSISTENCY": "Strong", + "AdditionalArgs": "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"", + "ProfileFlag": "-Pe2e", + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": "[\"Strong\", \"Session\"]", + "ACCOUNT_CONSISTENCY": "Strong", + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pdirect" ], + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Plong" ], + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enablePartitionMerge = $true }", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, { "DESIRED_CONSISTENCIES": "[\"Session\"]", "ACCOUNT_CONSISTENCY": "Session", diff --git a/sdk/cosmos/tests.yml b/sdk/cosmos/tests.yml index 8c529f1ba9f4..e98292e25c7c 100644 --- a/sdk/cosmos/tests.yml +++ b/sdk/cosmos/tests.yml @@ -40,62 +40,62 @@ extends: - name: AdditionalArgs value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' -# - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml -# parameters: -# TestName: 'Spring_Data_Cosmos_Integration' -# CloudConfig: -# Public: -# SubscriptionConfigurations: -# - $(sub-config-azure-cloud-test-resources) -# - $(sub-config-cosmos-azure-cloud-test-resources) -# MatrixConfigs: -# - Name: Cosmos_live_test_integration -# Path: sdk/spring/pipeline/cosmos-integration-matrix.json -# Selection: all -# GenerateVMJobs: true -# ServiceDirectory: spring -# TestResourceDirectories: -# - spring/spring-cloud-azure-integration-tests/test-resources/cosmos-spring -# Artifacts: -# - name: azure-spring-data-cosmos -# groupId: com.azure -# safeName: azurespringdatacosmos -# TimeoutInMinutes: 90 -# PreSteps: -# - template: /eng/pipelines/templates/steps/install-reporting-tools.yml -# TestGoals: 'verify' -# TestOptions: '$(ProfileFlag) -DskipCompile=true -DskipTestCompile=true -DcreateSourcesJar=false' -# AdditionalVariables: -# - name: AdditionalArgs -# value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' -# -# - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml -# parameters: -# TestName: 'Kafka_Cosmos_Integration' -# CloudConfig: -# Public: -# SubscriptionConfigurations: -# - $(sub-config-azure-cloud-test-resources) -# - $(sub-config-cosmos-azure-cloud-test-resources) -# MatrixConfigs: -# - Name: Kafka_Cosmos_Integration_Test -# Path: sdk/cosmos/kafka-cosmos-matrix.json -# Selection: all -# GenerateVMJobs: true -# TestFromSource: false -# ServiceDirectory: cosmos -# TestResourceDirectories: -# - cosmos/ -# Artifacts: -# - name: azure-cosmos-kafka-connect -# groupId: com.azure.cosmos.kafka -# safeName: azurecosmoskafkaconnect -# TimeoutInMinutes: 120 -# PreSteps: -# - template: /eng/pipelines/templates/steps/install-reporting-tools.yml -# TestGoals: 'clean verify' -# TestOptions: '$(ProfileFlag) $(AdditionalArgs)' -# AdditionalVariables: -# - name: AdditionalArgs -# value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' + - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml + parameters: + TestName: 'Spring_Data_Cosmos_Integration' + CloudConfig: + Public: + SubscriptionConfigurations: + - $(sub-config-azure-cloud-test-resources) + - $(sub-config-cosmos-azure-cloud-test-resources) + MatrixConfigs: + - Name: Cosmos_live_test_integration + Path: sdk/spring/pipeline/cosmos-integration-matrix.json + Selection: all + GenerateVMJobs: true + ServiceDirectory: spring + TestResourceDirectories: + - spring/spring-cloud-azure-integration-tests/test-resources/cosmos-spring + Artifacts: + - name: azure-spring-data-cosmos + groupId: com.azure + safeName: azurespringdatacosmos + TimeoutInMinutes: 90 + PreSteps: + - template: /eng/pipelines/templates/steps/install-reporting-tools.yml + TestGoals: 'verify' + TestOptions: '$(ProfileFlag) -DskipCompile=true -DskipTestCompile=true -DcreateSourcesJar=false' + AdditionalVariables: + - name: AdditionalArgs + value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' + + - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml + parameters: + TestName: 'Kafka_Cosmos_Integration' + CloudConfig: + Public: + SubscriptionConfigurations: + - $(sub-config-azure-cloud-test-resources) + - $(sub-config-cosmos-azure-cloud-test-resources) + MatrixConfigs: + - Name: Kafka_Cosmos_Integration_Test + Path: sdk/cosmos/kafka-cosmos-matrix.json + Selection: all + GenerateVMJobs: true + TestFromSource: false + ServiceDirectory: cosmos + TestResourceDirectories: + - cosmos/ + Artifacts: + - name: azure-cosmos-kafka-connect + groupId: com.azure.cosmos.kafka + safeName: azurecosmoskafkaconnect + TimeoutInMinutes: 120 + PreSteps: + - template: /eng/pipelines/templates/steps/install-reporting-tools.yml + TestGoals: 'clean verify' + TestOptions: '$(ProfileFlag) $(AdditionalArgs)' + AdditionalVariables: + - name: AdditionalArgs + value: '-DCOSMOS.CLIENT_TELEMETRY_ENDPOINT=$(cosmos-client-telemetry-endpoint) -DCOSMOS.CLIENT_TELEMETRY_COSMOS_ACCOUNT=$(cosmos-client-telemetry-cosmos-account)' From 7f17459711638120c41813b2416e9076086b3a85 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 13:41:37 -0400 Subject: [PATCH 37/51] Test multi-region + circuit-breaker job. --- sdk/cosmos/live-platform-matrix.json | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index f4de8c45fdd5..fc01f3698f29 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -125,6 +125,21 @@ "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } + }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "MultiMaster_MultiRegion": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[\"East US 2\"]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pflaky-multi-master", "-Pcircuit-breaker-misc-direct", "-Pcircuit-breaker-misc-gateway", "-Pcircuit-breaker-read-all-read-many", "-Pfast", "-Pdirect" ], + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } } ] } From 9c338d1055f67ad3e20d16cd20d2d75903a77327 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 13:45:25 -0400 Subject: [PATCH 38/51] Test multi-region + circuit-breaker job. --- .../main/java/com/azure/cosmos/CosmosClientBuilder.java | 4 ---- .../azure/cosmos/implementation/RxDocumentClientImpl.java | 8 -------- 2 files changed, 12 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index 40adada5f6fe..d08a6992aef1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -1179,8 +1179,6 @@ CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { StopWatch stopwatch = new StopWatch(); stopwatch.start(); - logger.info("Circuit breaker default opt-in result : {}", Configs.shouldOptInDefaultCircuitBreakerConfig()); - if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { System.setProperty( "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", @@ -1227,8 +1225,6 @@ public CosmosClient buildClient() { StopWatch stopwatch = new StopWatch(); stopwatch.start(); - logger.info("Circuit breaker default opt-in result : {}", Configs.shouldOptInDefaultCircuitBreakerConfig()); - if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { System.setProperty( "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 3e4765f814ec..3b33a60f1436 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -557,14 +557,6 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.sessionContainer = new SessionContainer(this.serviceEndpoint.getHost(), disableSessionCapturing); -// System.setProperty( -// "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", -// "{\"isPartitionLevelCircuitBreakerEnabled\": true, " -// + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," -// + "\"consecutiveExceptionCountToleratedForReads\": 50," -// + "\"consecutiveExceptionCountToleratedForWrites\": 25," -// + "}"); - this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); this.globalPartitionEndpointManagerForCircuitBreaker.init(); From 12e01eb1786706628507af7b316eadaf5734edd2 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 13:46:39 -0400 Subject: [PATCH 39/51] Test multi-region + circuit-breaker job. --- .../cosmos/FaultInjectionWithAvailabilityStrategyTests.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java index 466ed5826797..4a1901a07cf1 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java @@ -229,7 +229,7 @@ public void beforeClass() { Map writeRegionMap = this.getRegionMap(databaseAccount, true); - this.writeableRegions = new ArrayList<>(Arrays.asList("East US", "South Central US")); + this.writeableRegions = new ArrayList<>(writeRegionMap.keySet()); assertThat(this.writeableRegions).isNotNull(); assertThat(this.writeableRegions.size()).isGreaterThanOrEqualTo(2); @@ -323,7 +323,7 @@ public void beforeClass() { // When the container does not exist yet, you would see 401 for example for point reads etc. // So, adding this delay after container creation to minimize risk of hitting these errors try { - Thread.sleep(10_000); + Thread.sleep(3000); } catch (InterruptedException e) { throw new RuntimeException(e); } @@ -5086,7 +5086,7 @@ private void execute( // When the container does not exist yet, you would see 401 for example for point reads etc. // So, adding this delay after container creation to minimize risk of hitting these errors try { - Thread.sleep(10_000); + Thread.sleep(3000); } catch (InterruptedException e) { throw new RuntimeException(e); } From 0b224f94ea6d45e7bd0a9611dff9a64e54d2135b Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 21:04:50 -0400 Subject: [PATCH 40/51] Test multi-region + circuit-breaker job. --- .../FaultInjectionMetadataRequestRuleTests.java | 12 +++++++++++- .../implementation/RxDocumentClientImpl.java | 16 ++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java index f8e1364b8013..ca61aac2187d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java @@ -71,6 +71,7 @@ public static Object[][] operationTypeProvider() { { FaultInjectionOperationType.READ_ITEM, OperationType.Read }, { FaultInjectionOperationType.REPLACE_ITEM, OperationType.Replace }, { FaultInjectionOperationType.CREATE_ITEM, OperationType.Create }, + { FaultInjectionOperationType.UPSERT_ITEM, OperationType.Upsert }, { FaultInjectionOperationType.DELETE_ITEM, OperationType.Delete }, { FaultInjectionOperationType.QUERY_ITEM, OperationType.Query }, { FaultInjectionOperationType.PATCH_ITEM, OperationType.Patch } @@ -90,6 +91,15 @@ public static Object[][] partitionKeyRangesArgProvider() { @BeforeClass(groups = { "multi-region", "multi-master" }, timeOut = TIMEOUT) public void beforeClass() { + + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); + this.client = getClientBuilder().buildAsyncClient(); AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); GlobalEndpointManager globalEndpointManager = asyncDocumentClient.getGlobalEndpointManager(); @@ -215,7 +225,7 @@ public void faultInjectionServerErrorRuleTests_AddressRefresh_ConnectionDelay() } } - @Test(groups = { "multi-master" }, dataProvider = "operationTypeProvider", timeOut = 4 * TIMEOUT) + @Test(groups = { "multi-master" }, dataProvider = "operationTypeProvider", timeOut = 4 * TIMEOUT, invocationCount = 3) public void faultInjectionServerErrorRuleTests_AddressRefresh_ResponseDelay( FaultInjectionOperationType faultInjectionOperationType, OperationType operationType) throws JsonProcessingException { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 3b33a60f1436..ad2e24289b8c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -5993,6 +5993,14 @@ public void addPartitionLevelUnavailableRegionsForRequest( checkNotNull(resolvedPartitionKeyRange, "resolvedPartitionKeyRange cannot be null!"); checkNotNull(this.globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); + // setting it here in case request.requestContext.resolvedPartitionKeyRange + // is not assigned in either GlobalAddressResolver / RxGatewayStoreModel (possible if there are Gateway timeouts) + // and circuit breaker also kicks in to mark a failure resolvedPartitionKeyRange (will result in NullPointerException and will + // help failover as well) + // also resolvedPartitionKeyRange will be overridden in GlobalAddressResolver / RxGatewayStoreModel irrespective + // so staleness is not an issue + request.requestContext.resolvedPartitionKeyRange = resolvedPartitionKeyRange; + List unavailableRegionsForPartition = this.globalPartitionEndpointManagerForCircuitBreaker.getUnavailableRegionsForPartitionKeyRange( request.getResourceId(), @@ -6039,6 +6047,14 @@ public void addPartitionLevelUnavailableRegionsForFeedRequest( checkNotNull(resolvedPartitionKeyRange, "resolvedPartitionKeyRange cannot be null!"); + // setting it here in case request.requestContext.resolvedPartitionKeyRange + // is not assigned in either GlobalAddressResolver / RxGatewayStoreModel (possible if there are Gateway timeouts) + // and circuit breaker also kicks in to mark a failure resolvedPartitionKeyRange (will result in NullPointerException and will + // help failover as well) + // also resolvedPartitionKeyRange will be overridden in GlobalAddressResolver / RxGatewayStoreModel irrespective + // so staleness is not an issue + request.requestContext.resolvedPartitionKeyRange = resolvedPartitionKeyRange; + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); From 4823309888aefb0dbb748a0d64a81088766b2765 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 21:09:05 -0400 Subject: [PATCH 41/51] Test multi-region + circuit-breaker job. --- .../com/azure/cosmos/implementation/RxDocumentClientImpl.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index ad2e24289b8c..c1e38014c8b0 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -5998,7 +5998,7 @@ public void addPartitionLevelUnavailableRegionsForRequest( // and circuit breaker also kicks in to mark a failure resolvedPartitionKeyRange (will result in NullPointerException and will // help failover as well) // also resolvedPartitionKeyRange will be overridden in GlobalAddressResolver / RxGatewayStoreModel irrespective - // so staleness is not an issue + // so staleness is not an issue (after doing a validation of parent-child relationship b/w initial and new partitionKeyRange) request.requestContext.resolvedPartitionKeyRange = resolvedPartitionKeyRange; List unavailableRegionsForPartition @@ -6052,7 +6052,7 @@ public void addPartitionLevelUnavailableRegionsForFeedRequest( // and circuit breaker also kicks in to mark a failure resolvedPartitionKeyRange (will result in NullPointerException and will // help failover as well) // also resolvedPartitionKeyRange will be overridden in GlobalAddressResolver / RxGatewayStoreModel irrespective - // so staleness is not an issue + // so staleness is not an issue (after doing a validation of parent-child relationship b/w initial and new partitionKeyRange) request.requestContext.resolvedPartitionKeyRange = resolvedPartitionKeyRange; if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { From 8bffa7802875afede258332e8ccf4ff215e90d63 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 15 Aug 2024 21:16:22 -0400 Subject: [PATCH 42/51] Test multi-region + circuit-breaker job. --- .../FaultInjectionMetadataRequestRuleTests.java | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java index ca61aac2187d..d9875ac79f0f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/faultinjection/FaultInjectionMetadataRequestRuleTests.java @@ -91,15 +91,6 @@ public static Object[][] partitionKeyRangesArgProvider() { @BeforeClass(groups = { "multi-region", "multi-master" }, timeOut = TIMEOUT) public void beforeClass() { - - System.setProperty( - "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," - + "}"); - this.client = getClientBuilder().buildAsyncClient(); AsyncDocumentClient asyncDocumentClient = BridgeInternal.getContextClient(this.client); GlobalEndpointManager globalEndpointManager = asyncDocumentClient.getGlobalEndpointManager(); @@ -225,7 +216,7 @@ public void faultInjectionServerErrorRuleTests_AddressRefresh_ConnectionDelay() } } - @Test(groups = { "multi-master" }, dataProvider = "operationTypeProvider", timeOut = 4 * TIMEOUT, invocationCount = 3) + @Test(groups = { "multi-master" }, dataProvider = "operationTypeProvider", timeOut = 4 * TIMEOUT) public void faultInjectionServerErrorRuleTests_AddressRefresh_ResponseDelay( FaultInjectionOperationType faultInjectionOperationType, OperationType operationType) throws JsonProcessingException { From a9311ce23f006ee32730a9a3a1f90302baffcc8a Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 16 Aug 2024 11:54:32 -0400 Subject: [PATCH 43/51] Scrubbing off Java 21 emulator targeting Spring emulator test. --- .../templates/stages/cosmos-emulator-matrix.json | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/eng/pipelines/templates/stages/cosmos-emulator-matrix.json b/eng/pipelines/templates/stages/cosmos-emulator-matrix.json index 3e3808935e83..21a437cf6f0d 100644 --- a/eng/pipelines/templates/stages/cosmos-emulator-matrix.json +++ b/eng/pipelines/templates/stages/cosmos-emulator-matrix.json @@ -18,19 +18,6 @@ "JavaTestVersion": "1.17", "AdditionalArgs": "-DargLine=\"-DACCOUNT_HOST=https://localhost:8081/ -DCOSMOS.AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY=true\"" }, - "Spring Emulator Only Integration Tests - Java 21": { - "ProfileFlag": "-Pintegration-test-emulator", - "JavaTestVersion": "1.21", - "ACCOUNT_HOST": "https://localhost:8081/", - "ACCOUNT_KEY": "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==", - "SECONDARY_ACCOUNT_KEY": "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==", - "NEW_ACCOUNT_HOST": "https://localhost:8081/", - "NEW_ACCOUNT_KEY": "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==", - "NEW_SECONDARY_ACCOUNT_KEY": "C2y6yDjf5/R+ob0N8A7Cgv30VRDJIWEHLM+4QDU5DE2nQ9nDuVTqobD4b8mGGyPMbIZnqyMsEcaGQy67XIw/Jw==", - "TestFromSource": true, - "AdditionalArgs": "-Dspring-e2e -DargLine=-DACCOUNT_HOST=https://localhost:8081/ -DCOSMOS.AZURE_COSMOS_DISABLE_NON_STREAMING_ORDER_BY=true", - "Language": "Spring" - }, "Spring Emulator Only Integration Tests - Java 17": { "ProfileFlag": "-Pintegration-test-emulator", "JavaTestVersion": "1.17", From a13261804a3fb0d6b9d10027cea304ac62318071 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 19 Aug 2024 09:43:47 -0400 Subject: [PATCH 44/51] Refactoring. --- ...obalPartitionEndpointManagerForCircuitBreaker.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 1e2b6f68326c..90121a297663 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -4,7 +4,6 @@ package com.azure.cosmos.implementation.circuitBreaker; import com.azure.cosmos.implementation.Configs; -import com.azure.cosmos.implementation.CosmosSchedulers; import com.azure.cosmos.implementation.FeedOperationContextForCircuitBreaker; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; @@ -52,7 +51,7 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker implements AutoClos private final AtomicReference globalAddressResolverSnapshot; private final ConcurrentHashMap locationToRegion; private final AtomicBoolean isClosed = new AtomicBoolean(false); - private final Scheduler scheduler = Schedulers.newSingle("partition-availability-staleness-check"); + private final Scheduler partitionRecoveryScheduler = Schedulers.newSingle("partition-availability-staleness-check"); public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToLocationSpecificUnavailabilityInfo = new ConcurrentHashMap<>(); @@ -69,7 +68,7 @@ public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager glo public void init() { if (this.consecutiveExceptionBasedCircuitBreaker.isPartitionLevelCircuitBreakerEnabled()) { - this.updateStaleLocationInfo().subscribeOn(scheduler).subscribe(); + this.updateStaleLocationInfo().subscribeOn(this.partitionRecoveryScheduler).subscribe(); } } @@ -203,7 +202,7 @@ private Flux updateStaleLocationInfo() { .delayElement(Duration.ofSeconds(Configs.getStalePartitionUnavailabilityRefreshIntervalInSeconds())) .repeat(() -> !this.isClosed.get()) .flatMap(ignore -> Flux.fromIterable(this.partitionKeyRangesWithPossibleUnavailableRegions.entrySet())) - .publishOn(this.scheduler) + .publishOn(this.partitionRecoveryScheduler) .flatMap(partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair -> { logger.debug("Background updateStaleLocationInfo kicking in..."); @@ -260,7 +259,7 @@ private Flux updateStaleLocationInfo() { return gatewayAddressCache .submitOpenConnectionTasks(partitionKeyRangeWrapper.getPartitionKeyRange(), partitionKeyRangeWrapper.getCollectionResourceId()) - .publishOn(this.scheduler) + .publishOn(this.partitionRecoveryScheduler) .timeout(Duration.ofSeconds(Configs.getConnectionEstablishmentTimeoutForPartitionRecoveryInSeconds())) .doOnComplete(() -> { @@ -353,7 +352,7 @@ public void setGlobalAddressResolver(GlobalAddressResolver globalAddressResolver @Override public void close() { this.isClosed.set(true); - this.scheduler.dispose(); + this.partitionRecoveryScheduler.dispose(); } private class PartitionLevelLocationUnavailabilityInfo { From 5c7d736c2d8dc9b88c4e9006d892b37900845c8b Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 20 Aug 2024 09:51:39 -0400 Subject: [PATCH 45/51] Reacting to review comments. --- .../com/azure/cosmos/CosmosClientBuilder.java | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index d08a6992aef1..f0239dc54152 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -1180,13 +1180,10 @@ CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { stopwatch.start(); if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { - System.setProperty( - "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," - + "}"); + + if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true}"); + } } this.resetSessionCapturingType(); @@ -1226,13 +1223,7 @@ public CosmosClient buildClient() { stopwatch.start(); if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { - System.setProperty( - "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," - + "}"); + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true}"); } this.resetSessionCapturingType(); From 8cb5dbf8193078dddf862039d3bb84a39027a27c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 21 Aug 2024 10:51:12 -0400 Subject: [PATCH 46/51] Attempt at fixing live tests pipeline. --- sdk/cosmos/tests.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sdk/cosmos/tests.yml b/sdk/cosmos/tests.yml index e6b99fb796ea..6300d61dca6c 100644 --- a/sdk/cosmos/tests.yml +++ b/sdk/cosmos/tests.yml @@ -3,10 +3,10 @@ trigger: none extends: template: /eng/pipelines/templates/stages/1es-redirect.yml parameters: - UseFederatedAuth: false stages: - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml parameters: + UseFederatedAuth: false CloudConfig: Cosmos_Public: SubscriptionConfigurations: @@ -43,6 +43,7 @@ extends: - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml parameters: + UseFederatedAuth: false TestName: 'Spring_Data_Cosmos_Integration' CloudConfig: Public: @@ -72,6 +73,7 @@ extends: - template: /eng/pipelines/templates/stages/archetype-sdk-tests-isolated.yml parameters: + UseFederatedAuth: false TestName: 'Kafka_Cosmos_Integration' CloudConfig: Public: From 93b4d47a8e3713499ab3d04277c6f4636944cbbd Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 21 Aug 2024 11:23:10 -0400 Subject: [PATCH 47/51] Reacting to review comments. --- .../com/azure/cosmos/CosmosClientBuilder.java | 11 --- sdk/cosmos/live-platform-matrix.json | 89 +------------------ 2 files changed, 1 insertion(+), 99 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index f0239dc54152..1c9e58517fbb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -1179,13 +1179,6 @@ CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { StopWatch stopwatch = new StopWatch(); stopwatch.start(); - if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { - - if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true}"); - } - } - this.resetSessionCapturingType(); validateConfig(); buildConnectionPolicy(); @@ -1222,10 +1215,6 @@ public CosmosClient buildClient() { StopWatch stopwatch = new StopWatch(); stopwatch.start(); - if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true}"); - } - this.resetSessionCapturingType(); validateConfig(); buildConnectionPolicy(); diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index fc01f3698f29..7234a1fa1603 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -23,77 +23,6 @@ "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true; enablePartitionMerge = $true}": "" }, "include": [ - { - "ConsistencyConfig": { - "Session": { - "DESIRED_CONSISTENCY": "Session", - "ACCOUNT_CONSISTENCY": "Session", - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enablePartitionMerge = $true }" - }, - "Strong": { - "DESIRED_CONSISTENCY": "Strong", - "ACCOUNT_CONSISTENCY": "Strong", - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }" - } - }, - "AdditionalArgs": [ - "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"" - ], - "ProfileFlag": "-Pe2e", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": "[\"Session\"]", - "ACCOUNT_CONSISTENCY": "Session", - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pfast", "-Pdirect" ], - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session' }", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": [ "[\"Strong\", \"Session\"]", "[\"BoundedStaleness\"]", "[\"ConsistentPrefix\"]" ], - "ACCOUNT_CONSISTENCY": "Strong", - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pfast" ], - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCY": "BoundedStaleness", - "ACCOUNT_CONSISTENCY": "Strong", - "AdditionalArgs": "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"", - "ProfileFlag": "-Pe2e", - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": "[\"Strong\", \"Session\"]", - "ACCOUNT_CONSISTENCY": "Strong", - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pdirect" ], - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": "[\"Session\"]", - "ACCOUNT_CONSISTENCY": "Session", - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Plong" ], - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enablePartitionMerge = $true }", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, { "DESIRED_CONSISTENCIES": "[\"Session\"]", "ACCOUNT_CONSISTENCY": "Session", @@ -105,7 +34,7 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master" ], - "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=TRUE\"", + "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG={\"isPartitionLevelCircuitBreakerEnabled\": true}\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } @@ -121,22 +50,6 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master" ], - "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=FALSE\"", - "Agent": { - "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } - } - }, - { - "DESIRED_CONSISTENCIES": "[\"Session\"]", - "ACCOUNT_CONSISTENCY": "Session", - "ArmConfig": { - "MultiMaster_MultiRegion": { - "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", - "PREFERRED_LOCATIONS": "[\"East US 2\"]" - } - }, - "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pflaky-multi-master", "-Pcircuit-breaker-misc-direct", "-Pcircuit-breaker-misc-gateway", "-Pcircuit-breaker-read-all-read-many", "-Pfast", "-Pdirect" ], "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From 3735f9c18a68335f46f009dd100e4ddcd1305c2d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 21 Aug 2024 11:45:37 -0400 Subject: [PATCH 48/51] Reacting to review comments. --- sdk/cosmos/live-platform-matrix.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 7234a1fa1603..f8cc40f56375 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -34,7 +34,7 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master" ], - "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG={\"isPartitionLevelCircuitBreakerEnabled\": true}\"", + "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG=\"{\"isPartitionLevelCircuitBreakerEnabled\": true}\"\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From 036ebe62b60fa6a56543caa0c8f79c7bcb5e8d91 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 21 Aug 2024 12:18:18 -0400 Subject: [PATCH 49/51] Reacting to review comments. --- sdk/cosmos/live-platform-matrix.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index f8cc40f56375..7234a1fa1603 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -34,7 +34,7 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master" ], - "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG=\"{\"isPartitionLevelCircuitBreakerEnabled\": true}\"\"", + "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG={\"isPartitionLevelCircuitBreakerEnabled\": true}\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From 82fb59d916eca107293b037ca7f830ed0219599d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 21 Aug 2024 12:56:47 -0400 Subject: [PATCH 50/51] Revert some live test pipeline changes. --- .../com/azure/cosmos/CosmosClientBuilder.java | 8 ++ sdk/cosmos/live-platform-matrix.json | 89 ++++++++++++++++++- 2 files changed, 96 insertions(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index 1c9e58517fbb..f0fece03b0be 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -1179,6 +1179,10 @@ CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { StopWatch stopwatch = new StopWatch(); stopwatch.start(); + if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true}"); + } + this.resetSessionCapturingType(); validateConfig(); buildConnectionPolicy(); @@ -1215,6 +1219,10 @@ public CosmosClient buildClient() { StopWatch stopwatch = new StopWatch(); stopwatch.start(); + if (Configs.shouldOptInDefaultCircuitBreakerConfig()) { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true}"); + } + this.resetSessionCapturingType(); validateConfig(); buildConnectionPolicy(); diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index 7234a1fa1603..fc01f3698f29 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -23,6 +23,77 @@ "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true; enablePartitionMerge = $true}": "" }, "include": [ + { + "ConsistencyConfig": { + "Session": { + "DESIRED_CONSISTENCY": "Session", + "ACCOUNT_CONSISTENCY": "Session", + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enablePartitionMerge = $true }" + }, + "Strong": { + "DESIRED_CONSISTENCY": "Strong", + "ACCOUNT_CONSISTENCY": "Strong", + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }" + } + }, + "AdditionalArgs": [ + "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"" + ], + "ProfileFlag": "-Pe2e", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pfast", "-Pdirect" ], + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session' }", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": [ "[\"Strong\", \"Session\"]", "[\"BoundedStaleness\"]", "[\"ConsistentPrefix\"]" ], + "ACCOUNT_CONSISTENCY": "Strong", + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pfast" ], + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCY": "BoundedStaleness", + "ACCOUNT_CONSISTENCY": "Strong", + "AdditionalArgs": "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"", + "ProfileFlag": "-Pe2e", + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": "[\"Strong\", \"Session\"]", + "ACCOUNT_CONSISTENCY": "Strong", + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pdirect" ], + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Strong' }", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Plong" ], + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $false; defaultConsistencyLevel = 'Session'; enablePartitionMerge = $true }", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, { "DESIRED_CONSISTENCIES": "[\"Session\"]", "ACCOUNT_CONSISTENCY": "Session", @@ -34,7 +105,7 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master" ], - "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG={\"isPartitionLevelCircuitBreakerEnabled\": true}\"", + "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=TRUE\"", "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } @@ -50,6 +121,22 @@ }, "PROTOCOLS": "[\"Tcp\"]", "ProfileFlag": [ "-Pmulti-master" ], + "AdditionalArgs": "\"-DCOSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_DEFAULT_CONFIG_OPT_IN=FALSE\"", + "Agent": { + "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } + } + }, + { + "DESIRED_CONSISTENCIES": "[\"Session\"]", + "ACCOUNT_CONSISTENCY": "Session", + "ArmConfig": { + "MultiMaster_MultiRegion": { + "ArmTemplateParameters": "@{ enableMultipleWriteLocations = $true; defaultConsistencyLevel = 'Session'; enableMultipleRegions = $true }", + "PREFERRED_LOCATIONS": "[\"East US 2\"]" + } + }, + "PROTOCOLS": "[\"Tcp\"]", + "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pflaky-multi-master", "-Pcircuit-breaker-misc-direct", "-Pcircuit-breaker-misc-gateway", "-Pcircuit-breaker-read-all-read-many", "-Pfast", "-Pdirect" ], "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From 74651546a661476f24104e8e11f6e5256ba793ed Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 22 Aug 2024 15:30:11 -0400 Subject: [PATCH 51/51] Updated CHANGELOG.md. --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index bbdc914939e2..5c2b9237c9fa 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -7,6 +7,7 @@ #### Breaking Changes #### Bugs Fixed +* Fixed a direct buffer memory leak due to not explicitly stopping the partition recovery flow in per-partition circuit breaker. - See [PR 41486](https://github.com/Azure/azure-sdk-for-java/pull/41486) #### Other Changes @@ -15,7 +16,7 @@ #### Bugs Fixed * Fixed an eager prefetch issue for order by queries to prevent unnecessary round trips. - See [PR 41348](https://github.com/Azure/azure-sdk-for-java/pull/41348) * Fixed an issue to not fail fast for metadata resource resolution when faults are injected for Gateway routed operations. - See [PR 41428](https://github.com/Azure/azure-sdk-for-java/pull/41428) -* Fixed an issue to adhere with exception tolerance thresholds for consecutive read and write failures with circuit breaker. - See [PR 41248](https://github.com/Azure/azure-sdk-for-java/pull/41428) +* Fixed an issue to adhere with exception tolerance thresholds for consecutive read and write failures with circuit breaker. - See [PR 41428](https://github.com/Azure/azure-sdk-for-java/pull/41428) * Fixed excessive retries bug when it has been identified that operations through a closed `CosmosClient` [or] `CosmosAsyncClient` are executed. - See [PR 41364](https://github.com/Azure/azure-sdk-for-java/pull/41364) #### Other Changes