From 140574fa90dc00a5750c58c6cf3c70594b0a2f9e Mon Sep 17 00:00:00 2001 From: Andrew Ross Date: Tue, 4 Oct 2022 17:13:07 -0700 Subject: [PATCH] Fix flaky DecommissionControllerTests.testTimesOut This test fails pretty reliably if I run it on repeat. I believe the problem is that the test assumes the function will take longer than 2ms, which is likely not a valid assumption in all cases. Fortunately, I can pass in a zero duration which is guaranteed to timeout even if the system clock does not advance at all. Also moved the assertions out of the callback into the main test method, otherwise the assertion error messages would get buried and the test report would just show a timeout error. Signed-off-by: Andrew Ross --- CHANGELOG.md | 1 + .../DecommissionControllerTests.java | 18 +++++++++++------- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d20a934588c4b..acb1bbbdd6a86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -111,6 +111,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - [Bug]: Fixed invalid location of JDK dependency for arm64 architecture([#4613](https://github.com/opensearch-project/OpenSearch/pull/4613)) - [Bug]: Alias filter lost after rollover ([#4499](https://github.com/opensearch-project/OpenSearch/pull/4499)) - Attempt to fix Github workflow for Gradle Check job ([#4679](https://github.com/opensearch-project/OpenSearch/pull/4679)) +- Fix flaky DecommissionControllerTests.testTimesOut ([4683](https://github.com/opensearch-project/OpenSearch/pull/4683)) ### Security diff --git a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java index 8b5343184dabd..5b2dce277189c 100644 --- a/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java +++ b/server/src/test/java/org/opensearch/cluster/decommission/DecommissionControllerTests.java @@ -8,6 +8,7 @@ package org.opensearch.cluster.decommission; +import org.hamcrest.MatcherAssert; import org.junit.After; import org.junit.Before; import org.opensearch.OpenSearchTimeoutException; @@ -45,6 +46,7 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -53,6 +55,7 @@ import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.sameInstance; import static org.opensearch.cluster.ClusterState.builder; import static org.opensearch.cluster.OpenSearchAllocationTestCase.createAllocationService; @@ -213,24 +216,25 @@ public void testTimesOut() throws InterruptedException { nodesToBeRemoved.add(clusterService.state().nodes().get("node13")); nodesToBeRemoved.add(clusterService.state().nodes().get("node14")); nodesToBeRemoved.add(clusterService.state().nodes().get("node15")); + final AtomicReference exceptionReference = new AtomicReference<>(); decommissionController.removeDecommissionedNodes( nodesToBeRemoved, "unit-test-timeout", - TimeValue.timeValueMillis(2), - new ActionListener() { + TimeValue.timeValueMillis(0), + new ActionListener<>() { @Override - public void onResponse(Void unused) { - fail("response shouldn't have been called"); - } + public void onResponse(Void unused) {} @Override public void onFailure(Exception e) { - assertThat(e, instanceOf(OpenSearchTimeoutException.class)); - assertThat(e.getMessage(), containsString("waiting for removal of decommissioned nodes")); + exceptionReference.set(e); countDownLatch.countDown(); } } ); + MatcherAssert.assertThat("Expected onFailure to be called", exceptionReference.get(), notNullValue()); + MatcherAssert.assertThat(exceptionReference.get(), instanceOf(OpenSearchTimeoutException.class)); + MatcherAssert.assertThat(exceptionReference.get().getMessage(), containsString("waiting for removal of decommissioned nodes")); assertTrue(countDownLatch.await(30, TimeUnit.SECONDS)); }