Merge pull request #1450 from losipiuk/lo/cherry-picks-ca-13

CA cherry picks to 1.13 and update version
kubernetes · Nov 27, 2018 · 6ffe3e7 · 6ffe3e7
2 parents 7d42c40 + ab47574
commit 6ffe3e7
Show file tree

Hide file tree

Showing 9 changed files with 257 additions and 17 deletions.
diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager.go b/cluster-autoscaler/cloudprovider/aws/aws_manager.go
@@ -19,6 +19,7 @@ limitations under the License.
 package aws
 
 import (
+	"errors"
 	"fmt"
 	"io"
 	"math/rand"
@@ -183,27 +184,30 @@ func (m *AwsManager) GetAsgNodes(ref AwsRef) ([]AwsInstanceRef, error) {
 
 func (m *AwsManager) getAsgTemplate(asg *asg) (*asgTemplate, error) {
 	if len(asg.AvailabilityZones) < 1 {
-		return nil, fmt.Errorf("Unable to get first AvailabilityZone for %s", asg.Name)
+		return nil, fmt.Errorf("Unable to get first AvailabilityZone for ASG %q", asg.Name)
 	}
 
 	az := asg.AvailabilityZones[0]
 	region := az[0 : len(az)-1]
 
 	if len(asg.AvailabilityZones) > 1 {
-		klog.Warningf("Found multiple availability zones, using %s\n", az)
+		klog.Warningf("Found multiple availability zones for ASG %q; using %s\n", asg.Name, az)
 	}
 
 	instanceTypeName, err := m.buildInstanceType(asg)
 	if err != nil {
 		return nil, err
 	}
 
-	return &asgTemplate{
-		InstanceType: InstanceTypes[instanceTypeName],
-		Region:       region,
-		Zone:         az,
-		Tags:         asg.Tags,
-	}, nil
+	if t, ok := InstanceTypes[instanceTypeName]; ok {
+		return &asgTemplate{
+			InstanceType: t,
+			Region:       region,
+			Zone:         az,
+			Tags:         asg.Tags,
+		}, nil
+	}
+	return nil, fmt.Errorf("ASG %q uses the unknown EC2 instance type %q", asg.Name, instanceTypeName)
 }
 
 func (m *AwsManager) buildInstanceType(asg *asg) (string, error) {
@@ -213,7 +217,7 @@ func (m *AwsManager) buildInstanceType(asg *asg) (string, error) {
 		return m.ec2Service.getInstanceTypeByLT(asg.LaunchTemplateName, asg.LaunchTemplateVersion)
 	}
 
-	return "", fmt.Errorf("Unable to get instance type from launch config or launch template")
+	return "", errors.New("Unable to get instance type from launch config or launch template")
 }
 
 func (m *AwsManager) buildNodeFromTemplate(asg *asg, template *asgTemplate) (*apiv1.Node, error) {

diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
@@ -228,6 +228,81 @@ func TestBuildInstanceType(t *testing.T) {
 	assert.Equal(t, instanceType, builtInstanceType)
 }
 
+func TestGetASGTemplate(t *testing.T) {
+	const (
+		knownInstanceType = "t3.micro"
+		region            = "us-east-1"
+		az                = region + "a"
+		ltName            = "launcher"
+		ltVersion         = "1"
+	)
+
+	tags := []*autoscaling.TagDescription{
+		{
+			Key:   aws.String("k8s.io/cluster-autoscaler/node-template/taint/dedicated"),
+			Value: aws.String("foo:NoSchedule"),
+		},
+	}
+
+	tests := []struct {
+		description       string
+		instanceType      string
+		availabilityZones []string
+		error             bool
+	}{
+		{"insufficient availability zones",
+			knownInstanceType, []string{}, true},
+		{"single availability zone",
+			knownInstanceType, []string{az}, false},
+		{"multiple availability zones",
+			knownInstanceType, []string{az, "us-west-1b"}, false},
+		{"unknown instance type",
+			"nonexistent.xlarge", []string{az}, true},
+	}
+
+	for _, test := range tests {
+		t.Run(test.description, func(t *testing.T) {
+			s := &EC2Mock{}
+			s.On("DescribeLaunchTemplateVersions", &ec2.DescribeLaunchTemplateVersionsInput{
+				LaunchTemplateName: aws.String(ltName),
+				Versions:           []*string{aws.String(ltVersion)},
+			}).Return(&ec2.DescribeLaunchTemplateVersionsOutput{
+				LaunchTemplateVersions: []*ec2.LaunchTemplateVersion{
+					{
+						LaunchTemplateData: &ec2.ResponseLaunchTemplateData{
+							InstanceType: aws.String(test.instanceType),
+						},
+					},
+				},
+			})
+
+			m, err := createAWSManagerInternal(nil, cloudprovider.NodeGroupDiscoveryOptions{}, nil, &ec2Wrapper{s})
+			assert.NoError(t, err)
+
+			asg := &asg{
+				AwsRef:                AwsRef{Name: "sample"},
+				AvailabilityZones:     test.availabilityZones,
+				LaunchTemplateName:    ltName,
+				LaunchTemplateVersion: ltVersion,
+				Tags:                  tags,
+			}
+
+			template, err := m.getAsgTemplate(asg)
+			if test.error {
+				assert.Error(t, err)
+			} else {
+				assert.NoError(t, err)
+				if assert.NotNil(t, template) {
+					assert.Equal(t, test.instanceType, template.InstanceType.InstanceType)
+					assert.Equal(t, region, template.Region)
+					assert.Equal(t, test.availabilityZones[0], template.Zone)
+					assert.Equal(t, tags, template.Tags)
+				}
+			}
+		})
+	}
+}
+
 /* Disabled due to flakiness. See https://github.com/kubernetes/autoscaler/issues/608
 func TestFetchAutoAsgs(t *testing.T) {
 	min, max := 1, 10

diff --git a/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go b/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go
@@ -255,6 +255,12 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     124928,
 		GPU:          0,
 	},
+	"f1.4xlarge": {
+		InstanceType: "f1.4xlarge",
+		VCPU:         16,
+		MemoryMb:     249856,
+		GPU:          0,
+	},
 	"g2": {
 		InstanceType: "g2",
 		VCPU:         32,
@@ -297,6 +303,12 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     249856,
 		GPU:          2,
 	},
+	"g3s.xlarge": {
+		InstanceType: "g3s.xlarge",
+		VCPU:         4,
+		MemoryMb:     31232,
+		GPU:          0,
+	},
 	"h1": {
 		InstanceType: "h1",
 		VCPU:         64,
@@ -567,6 +579,42 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     16384,
 		GPU:          0,
 	},
+	"m5a.12xlarge": {
+		InstanceType: "m5a.12xlarge",
+		VCPU:         48,
+		MemoryMb:     196608,
+		GPU:          0,
+	},
+	"m5a.24xlarge": {
+		InstanceType: "m5a.24xlarge",
+		VCPU:         96,
+		MemoryMb:     393216,
+		GPU:          0,
+	},
+	"m5a.2xlarge": {
+		InstanceType: "m5a.2xlarge",
+		VCPU:         8,
+		MemoryMb:     32768,
+		GPU:          0,
+	},
+	"m5a.4xlarge": {
+		InstanceType: "m5a.4xlarge",
+		VCPU:         16,
+		MemoryMb:     65536,
+		GPU:          0,
+	},
+	"m5a.large": {
+		InstanceType: "m5a.large",
+		VCPU:         2,
+		MemoryMb:     8192,
+		GPU:          0,
+	},
+	"m5a.xlarge": {
+		InstanceType: "m5a.xlarge",
+		VCPU:         4,
+		MemoryMb:     16384,
+		GPU:          0,
+	},
 	"m5d": {
 		InstanceType: "m5d",
 		VCPU:         96,
@@ -777,6 +825,42 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     32768,
 		GPU:          0,
 	},
+	"r5a.12xlarge": {
+		InstanceType: "r5a.12xlarge",
+		VCPU:         48,
+		MemoryMb:     393216,
+		GPU:          0,
+	},
+	"r5a.24xlarge": {
+		InstanceType: "r5a.24xlarge",
+		VCPU:         96,
+		MemoryMb:     786432,
+		GPU:          0,
+	},
+	"r5a.2xlarge": {
+		InstanceType: "r5a.2xlarge",
+		VCPU:         8,
+		MemoryMb:     65536,
+		GPU:          0,
+	},
+	"r5a.4xlarge": {
+		InstanceType: "r5a.4xlarge",
+		VCPU:         16,
+		MemoryMb:     131072,
+		GPU:          0,
+	},
+	"r5a.large": {
+		InstanceType: "r5a.large",
+		VCPU:         2,
+		MemoryMb:     16384,
+		GPU:          0,
+	},
+	"r5a.xlarge": {
+		InstanceType: "r5a.xlarge",
+		VCPU:         4,
+		MemoryMb:     32768,
+		GPU:          0,
+	},
 	"r5d": {
 		InstanceType: "r5d",
 		VCPU:         96,
@@ -909,6 +993,24 @@ var InstanceTypes = map[string]*instanceType{
 		MemoryMb:     16384,
 		GPU:          0,
 	},
+	"u-12tb1": {
+		InstanceType: "u-12tb1",
+		VCPU:         448,
+		MemoryMb:     12582912,
+		GPU:          0,
+	},
+	"u-6tb1": {
+		InstanceType: "u-6tb1",
+		VCPU:         448,
+		MemoryMb:     6291456,
+		GPU:          0,
+	},
+	"u-9tb1": {
+		InstanceType: "u-9tb1",
+		VCPU:         448,
+		MemoryMb:     9437184,
+		GPU:          0,
+	},
 	"x1": {
 		InstanceType: "x1",
 		VCPU:         128,

diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go
@@ -60,6 +60,10 @@ type AutoscalingOptions struct {
 	EstimatorName string
 	// ExpanderName sets the type of node group expander to be used in scale up
 	ExpanderName string
+	// IgnoreDaemonSetsUtilization is whether CA will ignore DaemonSet pods when calculating resource utilization for scaling down
+	IgnoreDaemonSetsUtilization bool
+	// IgnoreMirrorPodsUtilization is whether CA will ignore Mirror pods when calculating resource utilization for scaling down
+	IgnoreMirrorPodsUtilization bool
 	// MaxGracefulTerminationSec is maximum number of seconds scale down waits for pods to terminate before
 	// removing the node from cloud provider.
 	MaxGracefulTerminationSec int

diff --git a/cluster-autoscaler/core/scale_down.go b/cluster-autoscaler/core/scale_down.go
@@ -399,7 +399,7 @@ func (sd *ScaleDown) UpdateUnneededNodes(
 			klog.Errorf("Node info for %s not found", node.Name)
 			continue
 		}
-		utilInfo, err := simulator.CalculateUtilization(node, nodeInfo)
+		utilInfo, err := simulator.CalculateUtilization(node, nodeInfo, sd.context.IgnoreDaemonSetsUtilization, sd.context.IgnoreMirrorPodsUtilization)
 
 		if err != nil {
 			klog.Warningf("Failed to calculate utilization for %s: %v", node.Name, err)

diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go
@@ -140,6 +140,11 @@ var (
 	expanderFlag = flag.String("expander", expander.RandomExpanderName,
 		"Type of node group expander to be used in scale up. Available values: ["+strings.Join(expander.AvailableExpanders, ",")+"]")
 
+	ignoreDaemonSetsUtilization = flag.Bool("ignore-daemonsets-utilization", false,
+		"Should CA ignore DaemonSet pods when calculating resource utilization for scaling down")
+	ignoreMirrorPodsUtilization = flag.Bool("ignore-mirror-pods-utilization", false,
+		"Should CA ignore Mirror pods when calculating resource utilization for scaling down")
+
 	writeStatusConfigMapFlag         = flag.Bool("write-status-configmap", true, "Should CA write status information to a configmap")
 	maxInactivityTimeFlag            = flag.Duration("max-inactivity", 10*time.Minute, "Maximum time from last recorded autoscaler activity before automatic restart")
 	maxFailingTimeFlag               = flag.Duration("max-failing-time", 15*time.Minute, "Maximum time from last recorded successful autoscaler run before automatic restart")
@@ -179,6 +184,8 @@ func createAutoscalingOptions() config.AutoscalingOptions {
 		OkTotalUnreadyCount:              *okTotalUnreadyCount,
 		EstimatorName:                    *estimatorFlag,
 		ExpanderName:                     *expanderFlag,
+		IgnoreDaemonSetsUtilization:      *ignoreDaemonSetsUtilization,
+		IgnoreMirrorPodsUtilization:      *ignoreMirrorPodsUtilization,
 		MaxEmptyBulkDelete:               *maxEmptyBulkDeleteFlag,
 		MaxGracefulTerminationSec:        *maxGracefulTerminationFlag,
 		MaxNodeProvisionTime:             *maxNodeProvisionTime,
@@ -314,6 +321,8 @@ func run(healthCheck *metrics.HealthCheck) {
 }
 
 func main() {
+	klog.InitFlags(nil)
+
 	leaderElection := defaultLeaderElectionConfiguration()
 	leaderElection.LeaderElect = true
 

diff --git a/cluster-autoscaler/simulator/cluster.go b/cluster-autoscaler/simulator/cluster.go
@@ -23,6 +23,7 @@ import (
 	"math/rand"
 	"time"
 
+	"k8s.io/autoscaler/cluster-autoscaler/utils/drain"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/glogx"
 	scheduler_util "k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
@@ -151,19 +152,19 @@ func FindEmptyNodesToRemove(candidates []*apiv1.Node, pods []*apiv1.Pod) []*apiv
 // CalculateUtilization calculates utilization of a node, defined as maximum of (cpu, memory) utilization.
 // Per resource utilization is the sum of requests for it divided by allocatable. It also returns the individual
 // cpu and memory utilization.
-func CalculateUtilization(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo) (utilInfo UtilizationInfo, err error) {
-	cpu, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceCPU)
+func CalculateUtilization(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, skipDaemonSetPods, skipMirrorPods bool) (utilInfo UtilizationInfo, err error) {
+	cpu, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceCPU, skipDaemonSetPods, skipMirrorPods)
 	if err != nil {
 		return UtilizationInfo{}, err
 	}
-	mem, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceMemory)
+	mem, err := calculateUtilizationOfResource(node, nodeInfo, apiv1.ResourceMemory, skipDaemonSetPods, skipMirrorPods)
 	if err != nil {
 		return UtilizationInfo{}, err
 	}
 	return UtilizationInfo{CpuUtil: cpu, MemUtil: mem, Utilization: math.Max(cpu, mem)}, nil
 }
 
-func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, resourceName apiv1.ResourceName) (float64, error) {
+func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.NodeInfo, resourceName apiv1.ResourceName, skipDaemonSetPods, skipMirrorPods bool) (float64, error) {
 	nodeAllocatable, found := node.Status.Allocatable[resourceName]
 	if !found {
 		return 0, fmt.Errorf("Failed to get %v from %s", resourceName, node.Name)
@@ -173,6 +174,14 @@ func calculateUtilizationOfResource(node *apiv1.Node, nodeInfo *schedulercache.N
 	}
 	podsRequest := resource.MustParse("0")
 	for _, pod := range nodeInfo.Pods() {
+		// factor daemonset pods out of the utilization calculations
+		if skipDaemonSetPods && isDaemonSet(pod) {
+			continue
+		}
+		// factor mirror pods out of the utilization calculations
+		if skipMirrorPods && drain.IsMirrorPod(pod) {
+			continue
+		}
 		for _, container := range pod.Spec.Containers {
 			if resourceValue, found := container.Resources.Requests[resourceName]; found {
 				podsRequest.Add(resourceValue)
@@ -283,3 +292,12 @@ func shuffleNodes(nodes []*apiv1.Node) []*apiv1.Node {
 	}
 	return result
 }
+
+func isDaemonSet(pod *apiv1.Pod) bool {
+	for _, ownerReference := range pod.ObjectMeta.OwnerReferences {
+		if ownerReference.Kind == "DaemonSet" {
+			return true
+		}
+	}
+	return false
+}