Move GPULabel and GPUTypes to cloud provider

kubernetes · Mar 25, 2019 · 9066688 · 9066688
1 parent d46dded
commit 9066688
Show file tree

Hide file tree

Showing 22 changed files with 405 additions and 150 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,9 @@
 .idea/
 *.iml
 
+# VSCode project files
+**/.vscode
+
 # Emacs save files
 *~
 \#*\#

diff --git a/cluster-autoscaler/cloudprovider/alicloud/alicloud_cloud_provider.go b/cluster-autoscaler/cloudprovider/alicloud/alicloud_cloud_provider.go
@@ -20,19 +20,31 @@ import (
 	"fmt"
 	"strings"
 
+	"os"
+
 	apiv1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 	"k8s.io/autoscaler/cluster-autoscaler/config"
 	"k8s.io/autoscaler/cluster-autoscaler/config/dynamic"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
 	"k8s.io/klog"
-	"os"
 )
 
 const (
 	// ProviderName  is the cloud provider name for alicloud
 	ProviderName = "alicloud"
+
+	// GPULabel is the label added to nodes with GPU resource.
+	GPULabel = "aliyun.accelerator/nvidia_name"
+)
+
+var (
+	availableGPUTypes = map[string]struct{}{
+		"nvidia-tesla-k80":  {},
+		"nvidia-tesla-p100": {},
+		"nvidia-tesla-v100": {},
+	}
 )
 
 type aliCloudProvider struct {
@@ -90,6 +102,16 @@ func (ali *aliCloudProvider) Name() string {
 	return ProviderName
 }
 
+// GPULabel returns the label added to nodes with GPU resource.
+func (ali *aliCloudProvider) GPULabel() string {
+	return GPULabel
+}
+
+// GetAvailableGPUTypes return all available GPU types cloud provider supports
+func (ali *aliCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
+	return availableGPUTypes
+}
+
 func (ali *aliCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
 	result := make([]cloudprovider.NodeGroup, 0, len(ali.asgs))
 	for _, asg := range ali.asgs {

diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
@@ -35,6 +35,17 @@ import (
 const (
 	// ProviderName is the cloud provider name for AWS
 	ProviderName = "aws"
+
+	// GPULabel is the label added to nodes with GPU resource.
+	GPULabel = "k8s.amazonaws.com/accelerator"
+)
+
+var (
+	availableGPUTypes = map[string]struct{}{
+		"nvidia-tesla-k80":  {},
+		"nvidia-tesla-p100": {},
+		"nvidia-tesla-v100": {},
+	}
 )
 
 // awsCloudProvider implements CloudProvider interface.
@@ -63,6 +74,16 @@ func (aws *awsCloudProvider) Name() string {
 	return ProviderName
 }
 
+// GPULabel returns the label added to nodes with GPU resource.
+func (aws *awsCloudProvider) GPULabel() string {
+	return GPULabel
+}
+
+// GetAvailableGPUTypes return all available GPU types cloud provider supports
+func (aws *awsCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
+	return availableGPUTypes
+}
+
 // NodeGroups returns all node groups configured for this cloud provider.
 func (aws *awsCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
 	asgs := aws.awsManager.getAsgs()

diff --git a/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go b/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go
@@ -32,6 +32,17 @@ import (
 const (
 	// ProviderName is the cloud provider name for Azure
 	ProviderName = "azure"
+
+	// GPULabel is the label added to nodes with GPU resource.
+	GPULabel = "cloud.google.com/gke-accelerator"
+)
+
+var (
+	availableGPUTypes = map[string]struct{}{
+		"nvidia-tesla-k80":  {},
+		"nvidia-tesla-p100": {},
+		"nvidia-tesla-v100": {},
+	}
 )
 
 // AzureCloudProvider provides implementation of CloudProvider interface for Azure.
@@ -61,6 +72,16 @@ func (azure *AzureCloudProvider) Name() string {
 	return "azure"
 }
 
+// GPULabel returns the label added to nodes with GPU resource.
+func (azure *AzureCloudProvider) GPULabel() string {
+	return GPULabel
+}
+
+// GetAvailableGPUTypes return all available GPU types cloud provider supports
+func (azure *AzureCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
+	return availableGPUTypes
+}
+
 // NodeGroups returns all node groups configured for this cloud provider.
 func (azure *AzureCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
 	asgs := azure.azureManager.getAsgs()

diff --git a/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go b/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go
@@ -35,6 +35,17 @@ import (
 const (
 	// ProviderName is the cloud provider name for baiducloud
 	ProviderName = "baiducloud"
+
+	// GPULabel is the label added to nodes with GPU resource.
+	GPULabel = "cloud.google.com/gke-accelerator"
+)
+
+var (
+	availableGPUTypes = map[string]struct{}{
+		"nvidia-tesla-k80":  {},
+		"nvidia-tesla-p100": {},
+		"nvidia-tesla-v100": {},
+	}
 )
 
 // baiducloudCloudProvider implements CloudProvider interface.
@@ -148,6 +159,16 @@ func (baiducloud *baiducloudCloudProvider) NodeGroups() []cloudprovider.NodeGrou
 	return result
 }
 
+// GPULabel returns the label added to nodes with GPU resource.
+func (baiducloud *baiducloudCloudProvider) GPULabel() string {
+	return GPULabel
+}
+
+// GetAvailableGPUTypes return all available GPU types cloud provider supports
+func (baiducloud *baiducloudCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
+	return availableGPUTypes
+}
+
 // NodeGroupForNode returns the node group for the given node, nil if the node
 // should not be processed by cluster autoscaler, or non-nil error if such
 // occurred. Must be implemented.

diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go
@@ -56,6 +56,12 @@ type CloudProvider interface {
 	// GetResourceLimiter returns struct containing limits (max, min) for resources (cores, memory etc.).
 	GetResourceLimiter() (*ResourceLimiter, error)
 
+	// GPULabel returns the label added to nodes with GPU resource.
+	GPULabel() string
+
+	// GetAvailableGPUTypes return all available GPU types cloud provider supports.
+	GetAvailableGPUTypes() map[string]struct{}
+
 	// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc.
 	Cleanup() error
 

diff --git a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
@@ -34,6 +34,17 @@ import (
 const (
 	// ProviderNameGCE is the name of GCE cloud provider.
 	ProviderNameGCE = "gce"
+
+	// GPULabel is the label added to nodes with GPU resource.
+	GPULabel = "cloud.google.com/gke-accelerator"
+)
+
+var (
+	availableGPUTypes = map[string]struct{}{
+		"nvidia-tesla-k80":  {},
+		"nvidia-tesla-p100": {},
+		"nvidia-tesla-v100": {},
+	}
 )
 
 // GceCloudProvider implements CloudProvider interface.
@@ -59,6 +70,16 @@ func (gce *GceCloudProvider) Name() string {
 	return ProviderNameGCE
 }
 
+// GPULabel returns the label added to nodes with GPU resource.
+func (gce *GceCloudProvider) GPULabel() string {
+	return GPULabel
+}
+
+// GetAvailableGPUTypes return all available GPU types cloud provider supports
+func (gce *GceCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
+	return availableGPUTypes
+}
+
 // NodeGroups returns all node groups configured for this cloud provider.
 func (gce *GceCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
 	migs := gce.gceManager.GetMigs()

diff --git a/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go b/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go
@@ -43,6 +43,17 @@ import (
 const (
 	// ProviderName is the cloud provider name for kubemark
 	ProviderName = "kubemark"
+
+	// GPULabel is the label added to nodes with GPU resource.
+	GPULabel = "cloud.google.com/gke-accelerator"
+)
+
+var (
+	availableGPUTypes = map[string]struct{}{
+		"nvidia-tesla-k80":  {},
+		"nvidia-tesla-p100": {},
+		"nvidia-tesla-v100": {},
+	}
 )
 
 // KubemarkCloudProvider implements CloudProvider interface for kubemark
@@ -83,6 +94,16 @@ func (kubemark *KubemarkCloudProvider) Name() string {
 	return ProviderName
 }
 
+// GPULabel returns the label added to nodes with GPU resource.
+func (kubemark *KubemarkCloudProvider) GPULabel() string {
+	return GPULabel
+}
+
+// GetAvailableGPUTypes return all available GPU types cloud provider supports
+func (kubemark *KubemarkCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
+	return availableGPUTypes
+}
+
 // NodeGroups returns all node groups configured for this cloud provider.
 func (kubemark *KubemarkCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
 	result := make([]cloudprovider.NodeGroup, 0, len(kubemark.nodeGroups))

diff --git a/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go b/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go
@@ -32,6 +32,17 @@ import (
 const (
 	// ProviderName is the cloud provider name for kubemark
 	ProviderName = "kubemark"
+
+	// GPULabel is the label added to nodes with GPU resource.
+	GPULabel = "cloud.google.com/gke-accelerator"
+)
+
+var (
+	availableGPUTypes = map[string]struct{}{
+		"nvidia-tesla-k80":  {},
+		"nvidia-tesla-p100": {},
+		"nvidia-tesla-v100": {},
+	}
 )
 
 // KubemarkCloudProvider implements CloudProvider interface.
@@ -46,6 +57,16 @@ func BuildKubemarkCloudProvider(kubemarkController interface{}, specs []string,
 // Name returns name of the cloud provider.
 func (kubemark *KubemarkCloudProvider) Name() string { return "" }
 
+// GPULabel returns the label added to nodes with GPU resource.
+func (kubemark *KubemarkCloudProvider) GPULabel() string {
+	return GPULabel
+}
+
+// GetAvailableGPUTypes return all available GPU types cloud provider supports
+func (kubemark *KubemarkCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
+	return availableGPUTypes
+}
+
 // NodeGroups returns all node groups configured for this cloud provider.
 func (kubemark *KubemarkCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
 	return []cloudprovider.NodeGroup{}

diff --git a/cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider.go b/cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider.go
@@ -33,6 +33,16 @@ import (
 const (
 	// ProviderName is the cloud provider name for Magnum
 	ProviderName = "magnum"
+	// GPULabel is the label added to nodes with GPU resource.
+	GPULabel = "cloud.google.com/gke-accelerator"
+)
+
+var (
+	availableGPUTypes = map[string]struct{}{
+		"nvidia-tesla-k80":  {},
+		"nvidia-tesla-p100": {},
+		"nvidia-tesla-v100": {},
+	}
 )
 
 // magnumCloudProvider implements CloudProvider interface from cluster-autoscaler/cloudprovider module.
@@ -56,6 +66,16 @@ func (os *magnumCloudProvider) Name() string {
 	return ProviderName
 }
 
+// GPULabel returns the label added to nodes with GPU resource.
+func (os *magnumCloudProvider) GPULabel() string {
+	return GPULabel
+}
+
+// GetAvailableGPUTypes return all available GPU types cloud provider supports
+func (os *magnumCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
+	return availableGPUTypes
+}
+
 // NodeGroups returns all node groups managed by this cloud provider.
 func (os *magnumCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
 	groups := make([]cloudprovider.NodeGroup, len(os.nodeGroups))

diff --git a/cluster-autoscaler/cloudprovider/mocks/CloudProvider.go b/cluster-autoscaler/cloudprovider/mocks/CloudProvider.go
@@ -41,6 +41,36 @@ func (_m *CloudProvider) Cleanup() error {
 	return r0
 }
 
+// GPULabel provides a mock function with given fields:
+func (_m *CloudProvider) GPULabel() string {
+	ret := _m.Called()
+
+	var r0 string
+	if rf, ok := ret.Get(0).(func() string); ok {
+		r0 = rf()
+	} else {
+		r0 = ret.Get(0).(string)
+	}
+
+	return r0
+}
+
+// GetAvailableGPUTypes provides a mock function with given fields:
+func (_m *CloudProvider) GetAvailableGPUTypes() map[string]struct{} {
+	ret := _m.Called()
+
+	var r0 map[string]struct{}
+	if rf, ok := ret.Get(0).(func() map[string]struct{}); ok {
+		r0 = rf()
+	} else {
+		if ret.Get(0) != nil {
+			r0 = ret.Get(0).(map[string]struct{})
+		}
+	}
+
+	return r0
+}
+
 // GetAvailableMachineTypes provides a mock function with given fields:
 func (_m *CloudProvider) GetAvailableMachineTypes() ([]string, error) {
 	ret := _m.Called()
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,6 +13,9 @@ @@
     .idea/
     *.iml
+    # VSCode project files
+    **/.vscode
     # Emacs save files
     *~
     \#*\#
@@ Expand Down @@