From bf20e2a424e1d99407e2ec7700447177c596e746 Mon Sep 17 00:00:00 2001 From: Zachary Pinto Date: Fri, 28 Jun 2024 12:42:24 -0700 Subject: [PATCH] Fix flaky updateInstance test. (#2825) The cause of the test case being flaky is due to switching the resources from SEMI_AUTO to FULL_AUTO while the cluster is in MaintenanceMode. When a resource is SEMI_AUTO, the MM rebalancer is not used because that would cause the preferenceList to potentially change and never recover to what it previously was. In the test case, we were switching the resources from SEMI_AUTO to FULL_AUTO causing the MM rebalancer to be used. There is then a RACE condition between the controller computing a new IdealState which drops the offline instances from the preferenceList, making the IdealState invalid for SEMI_AUTO, and us setting the resources back to SEMI_AUTO. If the controller wins, persisting the IdealState again with SEMI_AUTO will throw an exception. Removing this logic to just test that isEvacuateFinished is true since all resources are SEMI_AUTO. We test isEvacuateFinished on FULL_AUTO resources in other places like TestZkHelixAdmin and TestInstanceOperation. --- .../rest/server/TestPerInstanceAccessor.java | 33 ++----------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java b/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java index 6ab727e85e..e3f0dcd6c5 100644 --- a/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java +++ b/helix-rest/src/test/java/org/apache/helix/rest/server/TestPerInstanceAccessor.java @@ -525,21 +525,6 @@ public void updateInstance() throws Exception { Assert.assertFalse((boolean) responseMap.get("successful")); // test isEvacuateFinished on instance with EVACUATE but has currentState - // Put the cluster in MM so no assignment is calculated - _gSetupTool.getClusterManagementTool() - .enableMaintenanceMode(CLUSTER_NAME, true, "Change resource to full-auto"); - - // Make the DBs FULL_AUTO and wait because EVACUATE is only supported for FULL_AUTO resources - Set resources = _resourcesMap.get(CLUSTER_NAME); - for (String resource : resources) { - IdealState idealState = - _gSetupTool.getClusterManagementTool().getResourceIdealState(CLUSTER_NAME, resource); - idealState.setRebalanceMode(IdealState.RebalanceMode.FULL_AUTO); - idealState.setDelayRebalanceEnabled(true); - idealState.setRebalanceDelay(360000); - _gSetupTool.getClusterManagementTool().setResourceIdealState(CLUSTER_NAME, resource, idealState); - } - new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=setInstanceOperation&instanceOperation=EVACUATE") .format(CLUSTER_NAME, INSTANCE_NAME).post(this, entity); instanceConfig = _configAccessor.getInstanceConfig(CLUSTER_NAME, INSTANCE_NAME); @@ -550,22 +535,8 @@ public void updateInstance() throws Exception { .format(CLUSTER_NAME, INSTANCE_NAME).post(this, entity); Map evacuateFinishedResult = OBJECT_MAPPER.readValue(response.readEntity(String.class), Map.class); Assert.assertEquals(response.getStatus(), Response.Status.OK.getStatusCode()); - // Returns false because the node still contains full-auto resources - Assert.assertFalse(evacuateFinishedResult.get("successful")); - - // Make all resources SEMI_AUTO again - for (String resource : resources) { - IdealState idealState = - _gSetupTool.getClusterManagementTool().getResourceIdealState(CLUSTER_NAME, resource); - idealState.setRebalanceMode(IdealState.RebalanceMode.SEMI_AUTO); - idealState.setDelayRebalanceEnabled(false); - idealState.setRebalanceDelay(0); - _gSetupTool.getClusterManagementTool().setResourceIdealState(CLUSTER_NAME, resource, idealState); - } - - // Exit MM - _gSetupTool.getClusterManagementTool() - .enableMaintenanceMode(CLUSTER_NAME, false, "Change resource to full-auto"); + // Returns true because the node only contains semi-auto resources + Assert.assertTrue(evacuateFinishedResult.get("successful")); // Because the resources are now all semi-auto, is EvacuateFinished should return true response = new JerseyUriRequestBuilder("clusters/{}/instances/{}?command=isEvacuateFinished")