Skip to content

Commit

Permalink
Move GPULabel and GPUTypes to cloud provider
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffwan committed Mar 25, 2019
1 parent d46dded commit 9066688
Show file tree
Hide file tree
Showing 22 changed files with 405 additions and 150 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
.idea/
*.iml

# VSCode project files
**/.vscode

# Emacs save files
*~
\#*\#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,31 @@ import (
"fmt"
"strings"

"os"

apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/config/dynamic"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/klog"
"os"
)

const (
// ProviderName is the cloud provider name for alicloud
ProviderName = "alicloud"

// GPULabel is the label added to nodes with GPU resource.
GPULabel = "aliyun.accelerator/nvidia_name"
)

var (
availableGPUTypes = map[string]struct{}{
"nvidia-tesla-k80": {},
"nvidia-tesla-p100": {},
"nvidia-tesla-v100": {},
}
)

type aliCloudProvider struct {
Expand Down Expand Up @@ -90,6 +102,16 @@ func (ali *aliCloudProvider) Name() string {
return ProviderName
}

// GPULabel returns the label added to nodes with GPU resource.
func (ali *aliCloudProvider) GPULabel() string {
return GPULabel
}

// GetAvailableGPUTypes return all available GPU types cloud provider supports
func (ali *aliCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
return availableGPUTypes
}

func (ali *aliCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
result := make([]cloudprovider.NodeGroup, 0, len(ali.asgs))
for _, asg := range ali.asgs {
Expand Down
21 changes: 21 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ import (
const (
// ProviderName is the cloud provider name for AWS
ProviderName = "aws"

// GPULabel is the label added to nodes with GPU resource.
GPULabel = "k8s.amazonaws.com/accelerator"
)

var (
availableGPUTypes = map[string]struct{}{
"nvidia-tesla-k80": {},
"nvidia-tesla-p100": {},
"nvidia-tesla-v100": {},
}
)

// awsCloudProvider implements CloudProvider interface.
Expand Down Expand Up @@ -63,6 +74,16 @@ func (aws *awsCloudProvider) Name() string {
return ProviderName
}

// GPULabel returns the label added to nodes with GPU resource.
func (aws *awsCloudProvider) GPULabel() string {
return GPULabel
}

// GetAvailableGPUTypes return all available GPU types cloud provider supports
func (aws *awsCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
return availableGPUTypes
}

// NodeGroups returns all node groups configured for this cloud provider.
func (aws *awsCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
asgs := aws.awsManager.getAsgs()
Expand Down
21 changes: 21 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ import (
const (
// ProviderName is the cloud provider name for Azure
ProviderName = "azure"

// GPULabel is the label added to nodes with GPU resource.
GPULabel = "cloud.google.com/gke-accelerator"
)

var (
availableGPUTypes = map[string]struct{}{
"nvidia-tesla-k80": {},
"nvidia-tesla-p100": {},
"nvidia-tesla-v100": {},
}
)

// AzureCloudProvider provides implementation of CloudProvider interface for Azure.
Expand Down Expand Up @@ -61,6 +72,16 @@ func (azure *AzureCloudProvider) Name() string {
return "azure"
}

// GPULabel returns the label added to nodes with GPU resource.
func (azure *AzureCloudProvider) GPULabel() string {
return GPULabel
}

// GetAvailableGPUTypes return all available GPU types cloud provider supports
func (azure *AzureCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
return availableGPUTypes
}

// NodeGroups returns all node groups configured for this cloud provider.
func (azure *AzureCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
asgs := azure.azureManager.getAsgs()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ import (
const (
// ProviderName is the cloud provider name for baiducloud
ProviderName = "baiducloud"

// GPULabel is the label added to nodes with GPU resource.
GPULabel = "cloud.google.com/gke-accelerator"
)

var (
availableGPUTypes = map[string]struct{}{
"nvidia-tesla-k80": {},
"nvidia-tesla-p100": {},
"nvidia-tesla-v100": {},
}
)

// baiducloudCloudProvider implements CloudProvider interface.
Expand Down Expand Up @@ -148,6 +159,16 @@ func (baiducloud *baiducloudCloudProvider) NodeGroups() []cloudprovider.NodeGrou
return result
}

// GPULabel returns the label added to nodes with GPU resource.
func (baiducloud *baiducloudCloudProvider) GPULabel() string {
return GPULabel
}

// GetAvailableGPUTypes return all available GPU types cloud provider supports
func (baiducloud *baiducloudCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
return availableGPUTypes
}

// NodeGroupForNode returns the node group for the given node, nil if the node
// should not be processed by cluster autoscaler, or non-nil error if such
// occurred. Must be implemented.
Expand Down
6 changes: 6 additions & 0 deletions cluster-autoscaler/cloudprovider/cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ type CloudProvider interface {
// GetResourceLimiter returns struct containing limits (max, min) for resources (cores, memory etc.).
GetResourceLimiter() (*ResourceLimiter, error)

// GPULabel returns the label added to nodes with GPU resource.
GPULabel() string

// GetAvailableGPUTypes return all available GPU types cloud provider supports.
GetAvailableGPUTypes() map[string]struct{}

// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc.
Cleanup() error

Expand Down
21 changes: 21 additions & 0 deletions cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@ import (
const (
// ProviderNameGCE is the name of GCE cloud provider.
ProviderNameGCE = "gce"

// GPULabel is the label added to nodes with GPU resource.
GPULabel = "cloud.google.com/gke-accelerator"
)

var (
availableGPUTypes = map[string]struct{}{
"nvidia-tesla-k80": {},
"nvidia-tesla-p100": {},
"nvidia-tesla-v100": {},
}
)

// GceCloudProvider implements CloudProvider interface.
Expand All @@ -59,6 +70,16 @@ func (gce *GceCloudProvider) Name() string {
return ProviderNameGCE
}

// GPULabel returns the label added to nodes with GPU resource.
func (gce *GceCloudProvider) GPULabel() string {
return GPULabel
}

// GetAvailableGPUTypes return all available GPU types cloud provider supports
func (gce *GceCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
return availableGPUTypes
}

// NodeGroups returns all node groups configured for this cloud provider.
func (gce *GceCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
migs := gce.gceManager.GetMigs()
Expand Down
21 changes: 21 additions & 0 deletions cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,17 @@ import (
const (
// ProviderName is the cloud provider name for kubemark
ProviderName = "kubemark"

// GPULabel is the label added to nodes with GPU resource.
GPULabel = "cloud.google.com/gke-accelerator"
)

var (
availableGPUTypes = map[string]struct{}{
"nvidia-tesla-k80": {},
"nvidia-tesla-p100": {},
"nvidia-tesla-v100": {},
}
)

// KubemarkCloudProvider implements CloudProvider interface for kubemark
Expand Down Expand Up @@ -83,6 +94,16 @@ func (kubemark *KubemarkCloudProvider) Name() string {
return ProviderName
}

// GPULabel returns the label added to nodes with GPU resource.
func (kubemark *KubemarkCloudProvider) GPULabel() string {
return GPULabel
}

// GetAvailableGPUTypes return all available GPU types cloud provider supports
func (kubemark *KubemarkCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
return availableGPUTypes
}

// NodeGroups returns all node groups configured for this cloud provider.
func (kubemark *KubemarkCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
result := make([]cloudprovider.NodeGroup, 0, len(kubemark.nodeGroups))
Expand Down
21 changes: 21 additions & 0 deletions cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ import (
const (
// ProviderName is the cloud provider name for kubemark
ProviderName = "kubemark"

// GPULabel is the label added to nodes with GPU resource.
GPULabel = "cloud.google.com/gke-accelerator"
)

var (
availableGPUTypes = map[string]struct{}{
"nvidia-tesla-k80": {},
"nvidia-tesla-p100": {},
"nvidia-tesla-v100": {},
}
)

// KubemarkCloudProvider implements CloudProvider interface.
Expand All @@ -46,6 +57,16 @@ func BuildKubemarkCloudProvider(kubemarkController interface{}, specs []string,
// Name returns name of the cloud provider.
func (kubemark *KubemarkCloudProvider) Name() string { return "" }

// GPULabel returns the label added to nodes with GPU resource.
func (kubemark *KubemarkCloudProvider) GPULabel() string {
return GPULabel
}

// GetAvailableGPUTypes return all available GPU types cloud provider supports
func (kubemark *KubemarkCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
return availableGPUTypes
}

// NodeGroups returns all node groups configured for this cloud provider.
func (kubemark *KubemarkCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
return []cloudprovider.NodeGroup{}
Expand Down
20 changes: 20 additions & 0 deletions cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ import (
const (
// ProviderName is the cloud provider name for Magnum
ProviderName = "magnum"
// GPULabel is the label added to nodes with GPU resource.
GPULabel = "cloud.google.com/gke-accelerator"
)

var (
availableGPUTypes = map[string]struct{}{
"nvidia-tesla-k80": {},
"nvidia-tesla-p100": {},
"nvidia-tesla-v100": {},
}
)

// magnumCloudProvider implements CloudProvider interface from cluster-autoscaler/cloudprovider module.
Expand All @@ -56,6 +66,16 @@ func (os *magnumCloudProvider) Name() string {
return ProviderName
}

// GPULabel returns the label added to nodes with GPU resource.
func (os *magnumCloudProvider) GPULabel() string {
return GPULabel
}

// GetAvailableGPUTypes return all available GPU types cloud provider supports
func (os *magnumCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
return availableGPUTypes
}

// NodeGroups returns all node groups managed by this cloud provider.
func (os *magnumCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
groups := make([]cloudprovider.NodeGroup, len(os.nodeGroups))
Expand Down
30 changes: 30 additions & 0 deletions cluster-autoscaler/cloudprovider/mocks/CloudProvider.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,36 @@ func (_m *CloudProvider) Cleanup() error {
return r0
}

// GPULabel provides a mock function with given fields:
func (_m *CloudProvider) GPULabel() string {
ret := _m.Called()

var r0 string
if rf, ok := ret.Get(0).(func() string); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(string)
}

return r0
}

// GetAvailableGPUTypes provides a mock function with given fields:
func (_m *CloudProvider) GetAvailableGPUTypes() map[string]struct{} {
ret := _m.Called()

var r0 map[string]struct{}
if rf, ok := ret.Get(0).(func() map[string]struct{}); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(map[string]struct{})
}
}

return r0
}

// GetAvailableMachineTypes provides a mock function with given fields:
func (_m *CloudProvider) GetAvailableMachineTypes() ([]string, error) {
ret := _m.Called()
Expand Down
Loading

0 comments on commit 9066688

Please sign in to comment.