diff --git a/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/cluster/WaitForClusterDisableTask.groovy b/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/cluster/WaitForClusterDisableTask.groovy index 84256d1500..825c9995c1 100644 --- a/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/cluster/WaitForClusterDisableTask.groovy +++ b/orca-clouddriver/src/main/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/cluster/WaitForClusterDisableTask.groovy @@ -23,13 +23,15 @@ import com.netflix.spinnaker.orca.clouddriver.pipeline.servergroup.support.Targe import com.netflix.spinnaker.orca.clouddriver.tasks.servergroup.ServerGroupCreator import com.netflix.spinnaker.orca.clouddriver.tasks.servergroup.WaitForRequiredInstancesDownTask import com.netflix.spinnaker.orca.clouddriver.utils.CloudProviderAware -import com.netflix.spinnaker.orca.clouddriver.utils.HealthHelper import org.springframework.beans.factory.annotation.Autowired import org.springframework.stereotype.Component import org.springframework.beans.factory.annotation.Value @Component class WaitForClusterDisableTask extends AbstractWaitForClusterWideClouddriverTask implements CloudProviderAware { + // to make the logic more readable, we map true and false to words + private static final boolean RUNNING = true + private static final boolean COMPLETED = false @Value('${tasks.disable-cluster-min-time-millis:90000}') private int MINIMUM_WAIT_TIME_MS @@ -68,33 +70,22 @@ class WaitForClusterDisableTask extends AbstractWaitForClusterWideClouddriverTas // null vs empty interestingHealthProviderNames do mean very different things to Spinnaker // a null value will result in Spinnaker waiting for discovery + platform, etc. whereas an empty will not wait for anything. if (interestingHealthProviderNames != null && interestingHealthProviderNames.isEmpty()) { - return false + return COMPLETED } if (!serverGroup.isPresent()) { - return false + return COMPLETED } - // Even if the server group is disabled, we want to make sure instances are down - // to prevent downstream stages (e.g. scaleDownCluster) from having to deal with disabled-but-instances-up server groups + // make sure that we wait for the disabled flag to be set def targetServerGroup = serverGroup.get() - if (targetServerGroup.isDisabled() || stage.context.desiredPercentage) { - return !waitForRequiredInstancesDownTask.hasSucceeded(stage, targetServerGroup as Map, targetServerGroup.getInstances(), interestingHealthProviderNames) + if (!targetServerGroup.isDisabled()) { + return RUNNING } - // TODO(lwander) investigate if the non-desiredPercentage/only-platform-health case code can be dropped in favor of waitForRequiredInstancesDownTask - // The operation can be considered complete if it was requested to only consider the platform health. - def platformHealthType = getPlatformHealthType(stage, targetServerGroup) - return !(platformHealthType && interestingHealthProviderNames == [platformHealthType]) - } - - private String getPlatformHealthType(StageExecution stage, TargetServerGroup targetServerGroup) { - def platformHealthType = targetServerGroup.instances.collect { instance -> - HealthHelper.findPlatformHealth(instance.health) - }?.find { - it.type - }?.type - - return platformHealthType ? platformHealthType : healthProviderNamesByPlatform[getCloudProvider(stage)] + // we want to make sure instances are down + // to prevent downstream stages (e.g. scaleDownCluster) from having to deal with disabled-but-instances-up server groups + // note that waitForRequiredInstancesDownTask knows how to deal with desiredPercentages, interestingHealthProviderNames, etc. + return !waitForRequiredInstancesDownTask.hasSucceeded(stage, targetServerGroup as Map, targetServerGroup.getInstances(), interestingHealthProviderNames) } } diff --git a/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/cluster/WaitForClusterDisableTaskSpec.groovy b/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/cluster/WaitForClusterDisableTaskSpec.groovy index b719b2efc4..211b87ccc6 100644 --- a/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/cluster/WaitForClusterDisableTaskSpec.groovy +++ b/orca-clouddriver/src/test/groovy/com/netflix/spinnaker/orca/clouddriver/tasks/cluster/WaitForClusterDisableTaskSpec.groovy @@ -31,7 +31,7 @@ class WaitForClusterDisableTaskSpec extends Specification { @Subject def task = new WaitForClusterDisableTask([serverGroupCreator]) @Unroll - def "status=#status when oldSGDisabled=#oldSGDisabled, desiredPercentage=#desiredPct, interestingHealthProviderNames=#interestingHealthProviderNames"() { + def "status=#status when desiredPercentage=#desiredPct, interestingHealthProviderNames=#interestingHealthProviderNames"() { given: def stage = stage { context = [ @@ -53,7 +53,7 @@ class WaitForClusterDisableTaskSpec extends Specification { serverGroup("$clusterName-v051".toString(), "us-west-1", [:]), serverGroup("$clusterName-$newServerGroup".toString(), region, [:]), serverGroup("$clusterName-$oldServerGroup".toString(), region, [ - disabled: oldSGDisabled, + disabled: disabled, capacity: [desired: desired], instances: [ instance('i-1', platformHealthState, extraHealths), @@ -75,36 +75,37 @@ class WaitForClusterDisableTaskSpec extends Specification { result.getStatus() == status where: - dsgregion | minWaitTime | oldSGDisabled | desired | desiredPct | interestingHealthProviderNames | extraHealths | platformHealthState || status - "other" | 0 | false | 3 | null | ['platformHealthType'] | [] | 'Unknown' || SUCCEEDED // exercises if (!remainingDeployServerGroups)... - "other" | 90 | false | 3 | null | ['platformHealthType'] | [] | 'Unknown' || RUNNING // keeps running if duration < minWaitTime + dsgregion | minWaitTime | disabled | desired | desiredPct | interestingHealthProviderNames | extraHealths | platformHealthState || status + "other" | 0 | false | 3 | null | ['platformHealthType'] | [] | 'Unknown' || SUCCEEDED // exercises if (!remainingDeployServerGroups)... + "other" | 90 | false | 3 | null | ['platformHealthType'] | [] | 'Unknown' || RUNNING // keeps running if duration < minWaitTime + region | 0 | false | 3 | null | null | [] | 'Unknown' || RUNNING // keeps running if disabled is false // tests for isDisabled==true - region | 0 | true | 3 | null | ['platformHealthType'] | [] | 'Unknown' || SUCCEEDED - region | 0 | true | 3 | null | ['platformHealthType'] | [] | 'NotUnknown' || RUNNING // wait for instances down even if cluster is disabled - region | 0 | true | 3 | 100 | ['platformHealthType'] | [] | 'NotUnknown' || RUNNING // also wait for instances down with a desiredPct - region | 0 | true | 4 | 50 | ['platformHealthType'] | [] | 'Unknown' || SUCCEEDED - region | 0 | true | 3 | null | ['strangeType'] | [] | 'Unknown' || SUCCEEDED // intersection of interesting and provided healths is empty, so we're done - region | 0 | true | 3 | null | ['strangeType'] | health('strange', 'Down') | 'Unknown' || SUCCEEDED // also done if we provide it and are down... - region | 0 | true | 3 | null | ['strangeType'] | health('strange', 'Up') | 'Unknown' || RUNNING // ...but not if that extra health is up - - // tests for isDisabled==false, no desiredPct - region | 0 | false | 3 | null | [] | [] | 'Unknown' || SUCCEEDED // no health providers to check so short-circuits early - region | 0 | false | 3 | null | null | [] | 'Unknown' || RUNNING // exercises null vs empty behavior of interestingHealthProviderNames - region | 0 | false | 3 | null | ['platformHealthType'] | [] | 'Unknown' || SUCCEEDED // considered complete because only considers the platform health - region | 0 | false | 3 | null | ['platformHealthType'] | [] | 'Up' || SUCCEEDED // considered complete because only considers the platform health, despite platform health being Up - region | 0 | false | 3 | null | ['strangeType'] | [] | 'Unknown' || RUNNING // can't complete if we need to monitor an unknown health provider... - region | 0 | false | 3 | null | ['strangeType'] | health('strange', 'Down') | 'Unknown' || RUNNING // ...regardless of down status - - // tests for waitForRequiredInstancesDownTask.hasSucceeded - region | 0 | false | 3 | 100 | null | [] | 'Unknown' || SUCCEEDED // no other health providers than platform, and it looks down - region | 0 | false | 3 | 100 | null | [] | 'NotUnknown' || RUNNING // no other health providers than platform, and it looks NOT down - region | 0 | false | 4 | 100 | ['platformHealthType'] | [] | 'Unknown' || RUNNING // can't reach count(someAreDownAndNoneAreUp) >= targetDesiredSize - region | 0 | false | 4 | 50 | ['platformHealthType'] | [] | 'Unknown' || SUCCEEDED // all look down, and we want at least 2 down so we're done - region | 0 | false | 3 | 100 | ['strangeType'] | [] | 'Unknown' || SUCCEEDED // intersection of interesting and provided healths is empty, so we're done - region | 0 | false | 3 | 100 | ['strangeType'] | health('strange', 'Down') | 'Unknown' || SUCCEEDED // ...unless we have data for that health provider - region | 0 | false | 3 | 100 | ['strangeType'] | health('strange', 'Up') | 'Unknown' || RUNNING // ...unless we have data for that health provider - region | 0 | false | 3 | 100 | ['strangeType'] | health('strange', 'Starting') | 'Unknown' || SUCCEEDED // Starting is considered down + region | 0 | true | 3 | null | ['platformHealthType'] | [] | 'Unknown' || SUCCEEDED + region | 0 | true | 3 | null | ['platformHealthType'] | [] | 'NotUnknown' || RUNNING // wait for instances down even if cluster is disabled + region | 0 | true | 3 | 100 | ['platformHealthType'] | [] | 'NotUnknown' || RUNNING // also wait for instances down with a desiredPct + region | 0 | true | 4 | 50 | ['platformHealthType'] | [] | 'Unknown' || SUCCEEDED + region | 0 | true | 3 | null | ['strangeType'] | [] | 'Unknown' || SUCCEEDED // intersection of interesting and provided healths is empty, so we're done + region | 0 | true | 3 | null | ['strangeType'] | health('strange', 'Down') | 'Unknown' || SUCCEEDED // also done if we provide it and are down... + region | 0 | true | 3 | null | ['strangeType'] | health('strange', 'Up') | 'Unknown' || RUNNING // ...but not if that extra health is up + + // tests for no desiredPct + region | 0 | true | 3 | null | [] | [] | 'Unknown' || SUCCEEDED // no health providers to check so short-circuits early + region | 0 | true | 3 | null | null | [] | 'Unknown' || SUCCEEDED // exercises null vs empty behavior of interestingHealthProviderNames + region | 0 | true | 3 | null | ['platformHealthType'] | [] | 'Unknown' || SUCCEEDED // considered complete because only considers the platform health + region | 0 | true | 3 | null | ['platformHealthType'] | health('eureka', 'Up') | 'Unknown' || SUCCEEDED // up health is filtered out so we're done + region | 0 | true | 3 | null | ['strangeType'] | [] | 'Unknown' || SUCCEEDED // missing health is filtered out so we're done + region | 0 | true | 3 | null | ['strangeType'] | health('strange', 'Down') | 'Unknown' || SUCCEEDED // extra health is down so we're done + + // tests with desiredPct + region | 0 | true | 3 | 100 | null | [] | 'Unknown' || SUCCEEDED // no other health providers than platform + region | 0 | true | 3 | 100 | null | [] | 'NotUnknown' || RUNNING // no other health providers than platform + region | 0 | true | 4 | 100 | ['platformHealthType'] | [] | 'Unknown' || RUNNING // can't reach count(someAreDownAndNoneAreUp) >= targetDesiredSize + region | 0 | true | 4 | 50 | ['platformHealthType'] | [] | 'Unknown' || SUCCEEDED // all look down, and we want at least 2 down so we're done + region | 0 | true | 3 | 100 | ['strangeType'] | [] | 'Unknown' || SUCCEEDED // intersection of interesting and provided healths is empty, so we're done + region | 0 | true | 3 | 100 | ['strangeType'] | health('strange', 'Down') | 'Unknown' || SUCCEEDED // extra health is down so we're done + region | 0 | true | 3 | 100 | ['strangeType'] | health('strange', 'Up') | 'Unknown' || RUNNING // extra health is up so we're not done + region | 0 | true | 3 | 100 | ['strangeType'] | health('strange', 'Starting') | 'Unknown' || SUCCEEDED // Starting is considered down oldServerGroup = "v167" newServerGroup = "v168"