Skip to content

Commit

Permalink
Support running controllers as standalone component
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffwan committed Nov 29, 2024
1 parent b7f78f3 commit 1c091a4
Show file tree
Hide file tree
Showing 13 changed files with 245 additions and 27 deletions.
6 changes: 1 addition & 5 deletions build/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ RUN go mod download
# Copy the go source
COPY cmd/ cmd/
COPY api/ api/
COPY pkg/controller/ pkg/controller/
COPY pkg/utils/ pkg/utils/
COPY pkg/cache/ pkg/cache/
COPY pkg/client/ pkg/client/
COPY pkg/metrics/ pkg/metrics/
COPY pkg/ pkg/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
6 changes: 1 addition & 5 deletions build/container/Dockerfile.gateway
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ RUN go mod download
# Copy the go source
COPY cmd/ cmd/
COPY api/ api/
COPY pkg/plugins/ pkg/plugins/
COPY pkg/utils/ pkg/utils/
COPY pkg/cache/ pkg/cache/
COPY pkg/client/ pkg/client/
COPY pkg/metrics/ pkg/metrics/
COPY pkg/ pkg/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
49 changes: 40 additions & 9 deletions cmd/controllers/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"os"
"time"

clientgoscheme "k8s.io/client-go/kubernetes/scheme"

autoscalingv1alpha1 "github.com/aibrix/aibrix/api/autoscaling/v1alpha1"
modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1"
orchestrationv1alpha1 "github.com/aibrix/aibrix/api/orchestration/v1alpha1"
Expand All @@ -32,7 +34,6 @@ import (

"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
Expand Down Expand Up @@ -66,15 +67,32 @@ var (
func init() {
// Only register the base kubernetes scheme here
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(autoscalingv1alpha1.AddToScheme(scheme))
utilruntime.Must(modelv1alpha1.AddToScheme(scheme))
utilruntime.Must(orchestrationv1alpha1.AddToScheme(scheme))
utilruntime.Must(rayclusterv1.AddToScheme(scheme))

scheme.AddUnversionedTypes(metav1.SchemeGroupVersion, &metav1.UpdateOptions{}, &metav1.DeleteOptions{}, &metav1.CreateOptions{})
//+kubebuilder:scaffold:scheme
}

func RegisterSchemas(scheme *runtime.Scheme) error {

if features.IsControllerEnabled(features.PodAutoscalerController) {
utilruntime.Must(autoscalingv1alpha1.AddToScheme(scheme))
}

if features.IsControllerEnabled(features.ModelAdapterController) {
utilruntime.Must(modelv1alpha1.AddToScheme(scheme))
}

if features.IsControllerEnabled(features.DistributedInferenceController) {
utilruntime.Must(orchestrationv1alpha1.AddToScheme(scheme))
utilruntime.Must(rayclusterv1.AddToScheme(scheme))
}

scheme.AddUnversionedTypes(metav1.SchemeGroupVersion, &metav1.UpdateOptions{}, &metav1.DeleteOptions{}, &metav1.CreateOptions{})
//+kubebuilder:scaffold:scheme

return nil
}

func main() {
var metricsAddr string
var enableLeaderElection bool
Expand Down Expand Up @@ -109,9 +127,6 @@ func main() {
"leader-election-id determines the name of the resource that leader election will use for holding the leader lock, Default is aibrix-controller-manager.")
flag.StringVar(&controllers, "controllers", "*", "Comma-separated list of controllers to enable or disable, default value is * which indicates all controllers should be started.")

// initialize the controllers
features.InitControllers(controllers)

// Initialize the klog
klog.InitFlags(flag.CommandLine)
defer klog.Flush()
Expand All @@ -120,6 +135,19 @@ func main() {
// TODO: we will switch to textlogger or zap later
ctrl.SetLogger(klogr.New()) // nolint:staticcheck

// initialize the controllers
if err := features.ValidateControllers(controllers); err != nil {
setupLog.Error(err, "unable to validate the controllers, please type the right controller names through --controllers")
os.Exit(1)
}

features.InitControllers(controllers)

if err := RegisterSchemas(scheme); err != nil {
setupLog.Error(err, "unable to register schemas")
os.Exit(1)
}

// if the enable-http2 flag is false (the default), http/2 should be disabled
// due to its vulnerabilities. More specifically, disabling http/2 will
// prevent from being vulnerable to the HTTP/2 Stream Cancellation and
Expand Down Expand Up @@ -191,7 +219,10 @@ func main() {
panic(err)
}

cache.NewCache(config, stopCh, nil)
if features.IsControllerEnabled(features.ModelAdapterController) {
// cache is enabled for model adapter scheduling.
cache.NewCache(config, stopCh, nil)
}

// Initialize controllers
controller.Initialize()
Expand Down
8 changes: 1 addition & 7 deletions cmd/plugins/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ import (
"syscall"
"time"

"github.com/aibrix/aibrix/pkg/features"

"google.golang.org/grpc"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
Expand Down Expand Up @@ -73,11 +71,7 @@ func main() {
panic(err)
}

if features.IsControllerEnabled(features.ModelAdapterController) {
// cache is being used in model adapter scheduler.
// we need to initialize the cache here.
cache.NewCache(config, stopCh, redisClient)
}
cache.NewCache(config, stopCh, redisClient)

// Connect to K8s cluster
k8sClient, err := kubernetes.NewForConfig(config)
Expand Down
8 changes: 8 additions & 0 deletions config/standalone/autoscaler-controller/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ images:
- name: controller
newName: aibrix/controller-manager
newTag: nightly

patches:
- path: patch.yaml
target:
group: apps
version: v1
kind: Deployment
name: controller-manager
16 changes: 16 additions & 0 deletions config/standalone/autoscaler-controller/patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
namespace: system
spec:
template:
spec:
containers:
- name: manager
args:
- --leader-elect
- --leader-election-id=aibrix-pod-autoscaler-controller
- --health-probe-bind-address=:8081
- --metrics-bind-address=0
- --controllers=pod-autoscaler-controller
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ images:
- name: controller
newName: aibrix/controller-manager
newTag: nightly

patches:
- path: patch.yaml
target:
group: apps
version: v1
kind: Deployment
name: controller-manager
16 changes: 16 additions & 0 deletions config/standalone/distributed-inference-controller/patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
namespace: system
spec:
template:
spec:
containers:
- name: manager
args:
- --leader-elect
- --leader-election-id=aibrix-distributed-inference-controller
- --health-probe-bind-address=:8081
- --metrics-bind-address=0
- --controllers=distributed-inference-controller
8 changes: 8 additions & 0 deletions config/standalone/model-adapter-controller/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ images:
- name: controller
newName: aibrix/controller-manager
newTag: nightly

patches:
- path: patch.yaml
target:
group: apps
version: v1
kind: Deployment
name: controller-manager
16 changes: 16 additions & 0 deletions config/standalone/model-adapter-controller/patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
namespace: system
spec:
template:
spec:
containers:
- name: manager
args:
- --leader-elect
- --leader-election-id=aibrix-model-adapter-controller
- --health-probe-bind-address=:8081
- --metrics-bind-address=0
- --controllers=model-adapter-controller
2 changes: 1 addition & 1 deletion docs/development/app/pa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ spec:
maxReplicas: 10
targetMetric: "gpu_cache_usage_perc"
targetValue: "50"
scalingStrategy: "KPA"
scalingStrategy: "KPA"
16 changes: 16 additions & 0 deletions docs/source/getting_started/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,22 @@ Nightly Version
kubectl create -k config/default
Install Individual AIBrix Components
------------------------------------

.. code:: bash
# autoscaler controller
kubectl apply -k config/standalone/autoscaler-controller/
# distributed inference orchestrations controller
kubectl apply -k config/standalone/distributed-inference-controller/
# model adapter controller
kubectl apply -k config/standalone/model-adapter-controller
Install AIBrix on Kind Cluster
------------------------------

Expand Down
113 changes: 113 additions & 0 deletions pkg/features/features.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
Copyright 2024 The Aibrix Team.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package features

import (
"fmt"
"strings"
)

const (
PodAutoscalerController = "pod-autoscaler-controller"
DistributedInferenceController = "distributed-inference-controller"
ModelAdapterController = "model-adapter-controller"
ModelRouteController = "model-route-controller"
)

var (
// EnabledControllers is the map of controllers to enable or disable
// '*' means "all enabled by default controllers"
// 'foo' means "enable 'foo'"
// '-foo' means "disable 'foo'"
// first item for a particular name wins
EnabledControllers = make(map[string]bool)

ValidControllers = []string{
PodAutoscalerController, DistributedInferenceController, ModelAdapterController, ModelRouteController,
}
)

// ValidateControllers checks the list of controllers for any invalid entries.
func ValidateControllers(controllerList string) error {
controllers := strings.Split(controllerList, ",")
for _, controller := range controllers {
trimmed := strings.TrimSpace(controller)
if trimmed == "*" {
// Skip wildcard since it's always valid
continue
}

controllerName := trimmed
if strings.HasPrefix(trimmed, "-") {
controllerName = trimmed[1:] // Remove the '-' for validation
}

if !isValidController(controllerName) {
return fmt.Errorf("invalid controller specified: %s", controllerName)
}
}
return nil
}

// isValidController checks if the provided controller name is in the list of valid controllers.
func isValidController(name string) bool {
for _, valid := range ValidControllers {
if name == valid {
return true
}
}
return false
}

// InitControllers initializes the map of enabled controllers based on a comma-separated list.
func InitControllers(controllerList string) {
controllers := strings.Split(controllerList, ",")
for _, controller := range controllers {
trimmed := strings.TrimSpace(controller)
if trimmed == "*" {
// Enable all controllers by default
EnableAllControllers()
continue
}

if strings.HasPrefix(trimmed, "-") {
// Disable specified controller
EnabledControllers[trimmed[1:]] = false
} else {
// Enable specified controller
EnabledControllers[trimmed] = true
}
}
}

// IsControllerEnabled checks if a controller is enabled.
func IsControllerEnabled(name string) bool {
enabled, exists := EnabledControllers[name]
if !exists {
return false // If not specified, consider it disabled to be safe.
}
return enabled
}

// EnableAllControllers is used to enable all known controllers.
func EnableAllControllers() {
// This should be updated to reflect all available controllers.
EnabledControllers[PodAutoscalerController] = true
EnabledControllers[ModelAdapterController] = true
EnabledControllers[DistributedInferenceController] = true
EnabledControllers[ModelRouteController] = true
}

0 comments on commit 1c091a4

Please sign in to comment.