diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager.go b/cluster-autoscaler/cloudprovider/aws/aws_manager.go index 51de01e88ebf..142b7f918877 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager.go @@ -19,6 +19,7 @@ limitations under the License. package aws import ( + "errors" "fmt" "io" "math/rand" @@ -183,14 +184,14 @@ func (m *AwsManager) GetAsgNodes(ref AwsRef) ([]AwsInstanceRef, error) { func (m *AwsManager) getAsgTemplate(asg *asg) (*asgTemplate, error) { if len(asg.AvailabilityZones) < 1 { - return nil, fmt.Errorf("Unable to get first AvailabilityZone for %s", asg.Name) + return nil, fmt.Errorf("Unable to get first AvailabilityZone for ASG %q", asg.Name) } az := asg.AvailabilityZones[0] region := az[0 : len(az)-1] if len(asg.AvailabilityZones) > 1 { - klog.Warningf("Found multiple availability zones, using %s\n", az) + klog.Warningf("Found multiple availability zones for ASG %q; using %s\n", asg.Name, az) } instanceTypeName, err := m.buildInstanceType(asg) @@ -198,12 +199,15 @@ func (m *AwsManager) getAsgTemplate(asg *asg) (*asgTemplate, error) { return nil, err } - return &asgTemplate{ - InstanceType: InstanceTypes[instanceTypeName], - Region: region, - Zone: az, - Tags: asg.Tags, - }, nil + if t, ok := InstanceTypes[instanceTypeName]; ok { + return &asgTemplate{ + InstanceType: t, + Region: region, + Zone: az, + Tags: asg.Tags, + }, nil + } + return nil, fmt.Errorf("ASG %q uses the unknown EC2 instance type %q", asg.Name, instanceTypeName) } func (m *AwsManager) buildInstanceType(asg *asg) (string, error) { @@ -213,7 +217,7 @@ func (m *AwsManager) buildInstanceType(asg *asg) (string, error) { return m.ec2Service.getInstanceTypeByLT(asg.LaunchTemplateName, asg.LaunchTemplateVersion) } - return "", fmt.Errorf("Unable to get instance type from launch config or launch template") + return "", errors.New("Unable to get instance type from launch config or launch template") } func (m *AwsManager) buildNodeFromTemplate(asg *asg, template *asgTemplate) (*apiv1.Node, error) { diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go index fe3af85f78d1..6003a4846a56 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go @@ -228,6 +228,81 @@ func TestBuildInstanceType(t *testing.T) { assert.Equal(t, instanceType, builtInstanceType) } +func TestGetASGTemplate(t *testing.T) { + const ( + knownInstanceType = "t3.micro" + region = "us-east-1" + az = region + "a" + ltName = "launcher" + ltVersion = "1" + ) + + tags := []*autoscaling.TagDescription{ + { + Key: aws.String("k8s.io/cluster-autoscaler/node-template/taint/dedicated"), + Value: aws.String("foo:NoSchedule"), + }, + } + + tests := []struct { + description string + instanceType string + availabilityZones []string + error bool + }{ + {"insufficient availability zones", + knownInstanceType, []string{}, true}, + {"single availability zone", + knownInstanceType, []string{az}, false}, + {"multiple availability zones", + knownInstanceType, []string{az, "us-west-1b"}, false}, + {"unknown instance type", + "nonexistent.xlarge", []string{az}, true}, + } + + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + s := &EC2Mock{} + s.On("DescribeLaunchTemplateVersions", &ec2.DescribeLaunchTemplateVersionsInput{ + LaunchTemplateName: aws.String(ltName), + Versions: []*string{aws.String(ltVersion)}, + }).Return(&ec2.DescribeLaunchTemplateVersionsOutput{ + LaunchTemplateVersions: []*ec2.LaunchTemplateVersion{ + { + LaunchTemplateData: &ec2.ResponseLaunchTemplateData{ + InstanceType: aws.String(test.instanceType), + }, + }, + }, + }) + + m, err := createAWSManagerInternal(nil, cloudprovider.NodeGroupDiscoveryOptions{}, nil, &ec2Wrapper{s}) + assert.NoError(t, err) + + asg := &asg{ + AwsRef: AwsRef{Name: "sample"}, + AvailabilityZones: test.availabilityZones, + LaunchTemplateName: ltName, + LaunchTemplateVersion: ltVersion, + Tags: tags, + } + + template, err := m.getAsgTemplate(asg) + if test.error { + assert.Error(t, err) + } else { + assert.NoError(t, err) + if assert.NotNil(t, template) { + assert.Equal(t, test.instanceType, template.InstanceType.InstanceType) + assert.Equal(t, region, template.Region) + assert.Equal(t, test.availabilityZones[0], template.Zone) + assert.Equal(t, tags, template.Tags) + } + } + }) + } +} + /* Disabled due to flakiness. See https://github.com/kubernetes/autoscaler/issues/608 func TestFetchAutoAsgs(t *testing.T) { min, max := 1, 10 diff --git a/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go b/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go index e5911b892617..faf30984af29 100644 --- a/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go +++ b/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go @@ -255,6 +255,12 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 124928, GPU: 0, }, + "f1.4xlarge": { + InstanceType: "f1.4xlarge", + VCPU: 16, + MemoryMb: 249856, + GPU: 0, + }, "g2": { InstanceType: "g2", VCPU: 32, @@ -297,6 +303,12 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 249856, GPU: 2, }, + "g3s.xlarge": { + InstanceType: "g3s.xlarge", + VCPU: 4, + MemoryMb: 31232, + GPU: 0, + }, "h1": { InstanceType: "h1", VCPU: 64, @@ -567,6 +579,42 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 16384, GPU: 0, }, + "m5a.12xlarge": { + InstanceType: "m5a.12xlarge", + VCPU: 48, + MemoryMb: 196608, + GPU: 0, + }, + "m5a.24xlarge": { + InstanceType: "m5a.24xlarge", + VCPU: 96, + MemoryMb: 393216, + GPU: 0, + }, + "m5a.2xlarge": { + InstanceType: "m5a.2xlarge", + VCPU: 8, + MemoryMb: 32768, + GPU: 0, + }, + "m5a.4xlarge": { + InstanceType: "m5a.4xlarge", + VCPU: 16, + MemoryMb: 65536, + GPU: 0, + }, + "m5a.large": { + InstanceType: "m5a.large", + VCPU: 2, + MemoryMb: 8192, + GPU: 0, + }, + "m5a.xlarge": { + InstanceType: "m5a.xlarge", + VCPU: 4, + MemoryMb: 16384, + GPU: 0, + }, "m5d": { InstanceType: "m5d", VCPU: 96, @@ -777,6 +825,42 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 32768, GPU: 0, }, + "r5a.12xlarge": { + InstanceType: "r5a.12xlarge", + VCPU: 48, + MemoryMb: 393216, + GPU: 0, + }, + "r5a.24xlarge": { + InstanceType: "r5a.24xlarge", + VCPU: 96, + MemoryMb: 786432, + GPU: 0, + }, + "r5a.2xlarge": { + InstanceType: "r5a.2xlarge", + VCPU: 8, + MemoryMb: 65536, + GPU: 0, + }, + "r5a.4xlarge": { + InstanceType: "r5a.4xlarge", + VCPU: 16, + MemoryMb: 131072, + GPU: 0, + }, + "r5a.large": { + InstanceType: "r5a.large", + VCPU: 2, + MemoryMb: 16384, + GPU: 0, + }, + "r5a.xlarge": { + InstanceType: "r5a.xlarge", + VCPU: 4, + MemoryMb: 32768, + GPU: 0, + }, "r5d": { InstanceType: "r5d", VCPU: 96, @@ -909,6 +993,24 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 16384, GPU: 0, }, + "u-12tb1": { + InstanceType: "u-12tb1", + VCPU: 448, + MemoryMb: 12582912, + GPU: 0, + }, + "u-6tb1": { + InstanceType: "u-6tb1", + VCPU: 448, + MemoryMb: 6291456, + GPU: 0, + }, + "u-9tb1": { + InstanceType: "u-9tb1", + VCPU: 448, + MemoryMb: 9437184, + GPU: 0, + }, "x1": { InstanceType: "x1", VCPU: 128, diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index b5410d76135c..61efa16c003e 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -60,6 +60,10 @@ type AutoscalingOptions struct { EstimatorName string // ExpanderName sets the type of node group expander to be used in scale up ExpanderName string + // IgnoreDaemonSetsUtilization is whether CA will ignore DaemonSet pods when calculating resource utilization for scaling down + IgnoreDaemonSetsUtilization bool + // IgnoreMirrorPodsUtilization is whether CA will ignore Mirror pods when calculating resource utilization for scaling down + IgnoreMirrorPodsUtilization bool // MaxGracefulTerminationSec is maximum number of seconds scale down waits for pods to terminate before // removing the node from cloud provider. MaxGracefulTerminationSec int diff --git a/cluster-autoscaler/core/scale_down.go b/cluster-autoscaler/core/scale_down.go index 25653d4a285f..7374dc5a4e1b 100644 --- a/cluster-autoscaler/core/scale_down.go +++ b/cluster-autoscaler/core/scale_down.go @@ -399,7 +399,7 @@ func (sd *ScaleDown) UpdateUnneededNodes( klog.Errorf("Node info for %s not found", node.Name) continue } - utilInfo, err := simulator.CalculateUtilization(node, nodeInfo) + utilInfo, err := simulator.CalculateUtilization(node, nodeInfo, sd.context.IgnoreDaemonSetsUtilization, sd.context.IgnoreMirrorPodsUtilization) if err != nil { klog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err) diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 7519ba98a8fd..7332761fd3ac 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -140,6 +140,11 @@ var ( expanderFlag = flag.String("expander", expander.RandomExpanderName, "Type of node group expander to be used in scale up. Available values: ["+strings.Join(expander.AvailableExpanders, ",")+"]") + ignoreDaemonSetsUtilization = flag.Bool("ignore-daemonsets-utilization", false, + "Should CA ignore DaemonSet pods when calculating resource utilization for scaling down") + ignoreMirrorPodsUtilization = flag.Bool("ignore-mirror-pods-utilization", false, + "Should CA ignore Mirror pods when calculating resource utilization for scaling down") + writeStatusConfigMapFlag = flag.Bool("write-status-configmap", true, "Should CA write status information to a configmap") maxInactivityTimeFlag = flag.Duration("max-inactivity", 10*time.Minute, "Maximum time from last recorded autoscaler activity before automatic restart") maxFailingTimeFlag = flag.Duration("max-failing-time", 15*time.Minute, "Maximum time from last recorded successful autoscaler run before automatic restart") @@ -179,6 +184,8 @@ func createAutoscalingOptions() config.AutoscalingOptions { OkTotalUnreadyCount: *okTotalUnreadyCount, EstimatorName: *estimatorFlag, ExpanderName: *expanderFlag, + IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization, + IgnoreMirrorPodsUtilization: *ignoreMirrorPodsUtilization, MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag, MaxGracefulTerminationSec: *maxGracefulTerminationFlag, MaxNodeProvisionTime: *maxNodeProvisionTime, @@ -314,6 +321,8 @@ func run(healthCheck *metrics.HealthCheck) { } func main() { + klog.InitFlags(nil) + leaderElection := defaultLeaderElectionConfiguration() leaderElection.LeaderElect = true diff --git a/cluster-autoscaler/simulator/cluster.go b/cluster-autoscaler/simulator/cluster.go index 3f2bff9c5b7a..326f55b8aea8 100644 --- a/cluster-autoscaler/simulator/cluster.go +++ b/cluster-autoscaler/simulator/cluster.go @@ -23,6 +23,7 @@ import ( "math/rand" "time" + "k8s.io/autoscaler/cluster-autoscaler/utils/drain" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" "k8s.io/autoscaler/cluster-autoscaler/utils/glogx" scheduler_util "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler" @@ -151,19 +152,19 @@ func FindEmptyNodesToRemove(candidates []*apiv1.Node, pods []*apiv1.Pod) []*apiv // CalculateUtilization calculates utilization of a node, defined as maximum of (cpu, memory) utilization. // Per resource utilization is the sum of requests for it divided by allocatable. It also returns the individual // cpu and memory utilization. -func CalculateUtilization(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo) (utilInfo UtilizationInfo, err error) { - cpu, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceCPU) +func CalculateUtilization(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, skipDaemonSetPods, skipMirrorPods bool) (utilInfo UtilizationInfo, err error) { + cpu, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceCPU, skipDaemonSetPods, skipMirrorPods) if err != nil { return UtilizationInfo{}, err } - mem, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceMemory) + mem, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceMemory, skipDaemonSetPods, skipMirrorPods) if err != nil { return UtilizationInfo{}, err } return UtilizationInfo{CpuUtil: cpu, MemUtil: mem, Utilization: math.Max(cpu, mem)}, nil } -func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, resourceName apiv1.ResourceName) (float64, error) { +func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, resourceName apiv1.ResourceName, skipDaemonSetPods, skipMirrorPods bool) (float64, error) { nodeAllocatable, found := node.Status.Allocatable[resourceName] if !found { return 0, fmt.Errorf("Failed to get %v from %s", resourceName, node.Name) @@ -173,6 +174,14 @@ func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.N } podsRequest := resource.MustParse("0") for _, pod := range nodeInfo.Pods() { + // factor daemonset pods out of the utilization calculations + if skipDaemonSetPods && isDaemonSet(pod) { + continue + } + // factor mirror pods out of the utilization calculations + if skipMirrorPods && drain.IsMirrorPod(pod) { + continue + } for _, container := range pod.Spec.Containers { if resourceValue, found := container.Resources.Requests[resourceName]; found { podsRequest.Add(resourceValue) @@ -283,3 +292,12 @@ func shuffleNodes(nodes []*apiv1.Node) []*apiv1.Node { } return result } + +func isDaemonSet(pod *apiv1.Pod) bool { + for _, ownerReference := range pod.ObjectMeta.OwnerReferences { + if ownerReference.Kind == "DaemonSet" { + return true + } + } + return false +} diff --git a/cluster-autoscaler/simulator/cluster_test.go b/cluster-autoscaler/simulator/cluster_test.go index 4b8310f8adf0..2897a94a0de3 100644 --- a/cluster-autoscaler/simulator/cluster_test.go +++ b/cluster-autoscaler/simulator/cluster_test.go @@ -38,14 +38,42 @@ func TestUtilization(t *testing.T) { node := BuildTestNode("node1", 2000, 2000000) SetNodeReadyState(node, true, time.Time{}) - utilInfo, err := CalculateUtilization(node, nodeInfo) + utilInfo, err := CalculateUtilization(node, nodeInfo, false, false) assert.NoError(t, err) assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01) node2 := BuildTestNode("node1", 2000, -1) - _, err = CalculateUtilization(node2, nodeInfo) + _, err = CalculateUtilization(node2, nodeInfo, false, false) assert.Error(t, err) + + daemonSetPod3 := BuildTestPod("p3", 100, 200000) + daemonSetPod3.OwnerReferences = GenerateOwnerReferences("ds", "DaemonSet", "apps/v1", "") + + nodeInfo = schedulercache.NewNodeInfo(pod, pod, pod2, daemonSetPod3) + utilInfo, err = CalculateUtilization(node, nodeInfo, true, false) + assert.NoError(t, err) + assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01) + + nodeInfo = schedulercache.NewNodeInfo(pod, pod2, daemonSetPod3) + utilInfo, err = CalculateUtilization(node, nodeInfo, false, false) + assert.NoError(t, err) + assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01) + + mirrorPod4 := BuildTestPod("p4", 100, 200000) + mirrorPod4.Annotations = map[string]string{ + types.ConfigMirrorAnnotationKey: "", + } + + nodeInfo = schedulercache.NewNodeInfo(pod, pod, pod2, mirrorPod4) + utilInfo, err = CalculateUtilization(node, nodeInfo, false, true) + assert.NoError(t, err) + assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01) + + nodeInfo = schedulercache.NewNodeInfo(pod, pod2, mirrorPod4) + utilInfo, err = CalculateUtilization(node, nodeInfo, false, false) + assert.NoError(t, err) + assert.InEpsilon(t, 2.0/10, utilInfo.Utilization, 0.01) } func TestFindPlaceAllOk(t *testing.T) { diff --git a/cluster-autoscaler/version.go b/cluster-autoscaler/version.go index da7f657edd3b..c6508efd31d3 100644 --- a/cluster-autoscaler/version.go +++ b/cluster-autoscaler/version.go @@ -17,4 +17,4 @@ limitations under the License. package main // ClusterAutoscalerVersion contains version of CA. -const ClusterAutoscalerVersion = "1.13.0-rc.1" +const ClusterAutoscalerVersion = "1.13.0-rc.2"