Skip to content

Commit

Permalink
Support installing individual controllers from giant controller-manag…
Browse files Browse the repository at this point in the history
…er (#442)

* standalone-pod-autoscaler

Co-authored-by: nwangfw <113055713+nwangfw@users.noreply.github.com>

* Move files to right overlays

* Refactor the crd folder structure for better reuse

* Support running controllers as standalone component

---------

Co-authored-by: Jiaxin Shan <seedjeffwan@gmail.com>
  • Loading branch information
nwangfw and Jeffwan authored Nov 29, 2024
1 parent c759a28 commit e15991b
Show file tree
Hide file tree
Showing 47 changed files with 437 additions and 99 deletions.
6 changes: 1 addition & 5 deletions build/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ RUN go mod download
# Copy the go source
COPY cmd/ cmd/
COPY api/ api/
COPY pkg/controller/ pkg/controller/
COPY pkg/utils/ pkg/utils/
COPY pkg/cache/ pkg/cache/
COPY pkg/client/ pkg/client/
COPY pkg/metrics/ pkg/metrics/
COPY pkg/ pkg/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
6 changes: 1 addition & 5 deletions build/container/Dockerfile.gateway
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ RUN go mod download
# Copy the go source
COPY cmd/ cmd/
COPY api/ api/
COPY pkg/plugins/ pkg/plugins/
COPY pkg/utils/ pkg/utils/
COPY pkg/cache/ pkg/cache/
COPY pkg/client/ pkg/client/
COPY pkg/metrics/ pkg/metrics/
COPY pkg/ pkg/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
61 changes: 50 additions & 11 deletions cmd/controllers/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@ import (
"os"
"time"

clientgoscheme "k8s.io/client-go/kubernetes/scheme"

autoscalingv1alpha1 "github.com/aibrix/aibrix/api/autoscaling/v1alpha1"
modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1"
orchestrationv1alpha1 "github.com/aibrix/aibrix/api/orchestration/v1alpha1"
"github.com/aibrix/aibrix/pkg/features"
rayclusterv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
Expand All @@ -41,9 +45,6 @@ import (
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"

autoscalingv1alpha1 "github.com/aibrix/aibrix/api/autoscaling/v1alpha1"
modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1"
orchestrationv1alpha1 "github.com/aibrix/aibrix/api/orchestration/v1alpha1"
"github.com/aibrix/aibrix/pkg/cache"
"github.com/aibrix/aibrix/pkg/controller"
//+kubebuilder:scaffold:imports
Expand All @@ -64,15 +65,32 @@ var (
)

func init() {
// Only register the base kubernetes scheme here
utilruntime.Must(clientgoscheme.AddToScheme(scheme))

utilruntime.Must(autoscalingv1alpha1.AddToScheme(scheme))
utilruntime.Must(modelv1alpha1.AddToScheme(scheme))
scheme.AddUnversionedTypes(metav1.SchemeGroupVersion, &metav1.UpdateOptions{}, &metav1.DeleteOptions{}, &metav1.CreateOptions{})
//+kubebuilder:scaffold:scheme
}

func RegisterSchemas(scheme *runtime.Scheme) error {

if features.IsControllerEnabled(features.PodAutoscalerController) {
utilruntime.Must(autoscalingv1alpha1.AddToScheme(scheme))
}

if features.IsControllerEnabled(features.ModelAdapterController) {
utilruntime.Must(modelv1alpha1.AddToScheme(scheme))
}

if features.IsControllerEnabled(features.DistributedInferenceController) {
utilruntime.Must(orchestrationv1alpha1.AddToScheme(scheme))
utilruntime.Must(rayclusterv1.AddToScheme(scheme))
}

scheme.AddUnversionedTypes(metav1.SchemeGroupVersion, &metav1.UpdateOptions{}, &metav1.DeleteOptions{}, &metav1.CreateOptions{})
utilruntime.Must(orchestrationv1alpha1.AddToScheme(scheme))
utilruntime.Must(rayclusterv1.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme

return nil
}

func main() {
Expand All @@ -86,6 +104,7 @@ func main() {
var renewDeadLine time.Duration
var leaderElectionResourceLock string
var leaderElectionId string
var controllers string
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
Expand All @@ -106,6 +125,7 @@ func main() {
"leader-election-resource-lock determines which resource lock to use for leader election, defaults to \"leases\".")
flag.StringVar(&leaderElectionId, "leader-election-id", "aibrix-controller-manager",
"leader-election-id determines the name of the resource that leader election will use for holding the leader lock, Default is aibrix-controller-manager.")
flag.StringVar(&controllers, "controllers", "*", "Comma-separated list of controllers to enable or disable, default value is * which indicates all controllers should be started.")

// Initialize the klog
klog.InitFlags(flag.CommandLine)
Expand All @@ -115,6 +135,19 @@ func main() {
// TODO: we will switch to textlogger or zap later
ctrl.SetLogger(klogr.New()) // nolint:staticcheck

// initialize the controllers
if err := features.ValidateControllers(controllers); err != nil {
setupLog.Error(err, "unable to validate the controllers, please type the right controller names through --controllers")
os.Exit(1)
}

features.InitControllers(controllers)

if err := RegisterSchemas(scheme); err != nil {
setupLog.Error(err, "unable to register schemas")
os.Exit(1)
}

// if the enable-http2 flag is false (the default), http/2 should be disabled
// due to its vulnerabilities. More specifically, disabling http/2 will
// prevent from being vulnerable to the HTTP/2 Stream Cancellation and
Expand Down Expand Up @@ -186,7 +219,13 @@ func main() {
panic(err)
}

cache.NewCache(config, stopCh, nil)
if features.IsControllerEnabled(features.ModelAdapterController) {
// cache is enabled for model adapter scheduling.
cache.NewCache(config, stopCh, nil)
}

// Initialize controllers
controller.Initialize()

// Kind controller registration is encapsulated inside the pkg/controller/controller.go
// So here we can use more clean registration flow and there's no need to change logics in future.
Expand Down
2 changes: 2 additions & 0 deletions config/crd/autoscaling/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
resources:
- autoscaling.aibrix.ai_podautoscalers.yaml
7 changes: 3 additions & 4 deletions config/crd/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
# since it depends on service name and namespace that are out of this kustomize package.
# It should be run by config/default
resources:
- bases/autoscaling.aibrix.ai_podautoscalers.yaml
- bases/model.aibrix.ai_modeladapters.yaml
- bases/orchestration.aibrix.ai_rayclusterreplicasets.yaml
- bases/orchestration.aibrix.ai_rayclusterfleets.yaml
- autoscaling
- model
- orchestration
#+kubebuilder:scaffold:crdkustomizeresource

patches:
Expand Down
2 changes: 2 additions & 0 deletions config/crd/model/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
resources:
- model.aibrix.ai_modeladapters.yaml
3 changes: 3 additions & 0 deletions config/crd/orchestration/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
resources:
- orchestration.aibrix.ai_rayclusterreplicasets.yaml
- orchestration.aibrix.ai_rayclusterfleets.yaml
1 change: 1 addition & 0 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namePrefix: aibrix-
# someName: someValue

resources:
- namespace.yaml
- ../crd
- ../rbac
- ../manager
Expand Down
8 changes: 8 additions & 0 deletions config/default/namespace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: Namespace
metadata:
labels:
control-plane: controller-manager
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: system
26 changes: 18 additions & 8 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
apiVersion: v1
kind: Namespace
metadata:
labels:
control-plane: controller-manager
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: system
---
apiVersion: apps/v1
kind: Deployment
Expand Down Expand Up @@ -66,3 +58,21 @@ spec:
memory: 64Mi
serviceAccountName: controller-manager
terminationGracePeriodSeconds: 10
---
apiVersion: v1
kind: Service
metadata:
labels:
control-plane: controller-manager
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: controller-manager-metrics-service
namespace: system
spec:
ports:
- name: http
port: 8080
protocol: TCP
targetPort: 8080
selector:
control-plane: controller-manager
3 changes: 3 additions & 0 deletions config/rbac/autoscaling/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
resources:
- autoscaling_podautoscaler_editor_role.yaml
- autoscaling_podautoscaler_viewer_role.yaml
8 changes: 8 additions & 0 deletions config/rbac/controller-manager/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# controller manager are common sa, role, rolebindings that could be reused

resources:
- leader_election_role.yaml
- leader_election_role_binding.yaml
- role.yaml
- role_binding.yaml
- service_account_controller_manager.yaml
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,3 @@ subjects:
- kind: ServiceAccount
name: controller-manager
namespace: system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: gateway-plugin-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gateway-plugin-role
subjects:
- kind: ServiceAccount
name: gateway-plugin
namespace: system
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,3 @@ metadata:
app.kubernetes.io/managed-by: kustomize
name: controller-manager
namespace: system
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: gateway-plugin
namespace: system
5 changes: 5 additions & 0 deletions config/rbac/gateway/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
resources:
# Unlike others files are generated. Gateway plugin role, role binding and serving account files are maintained by our own.
- service_account_gateway.yaml
- role_gateway.yaml
- role_binding_gateway.yaml
15 changes: 15 additions & 0 deletions config/rbac/gateway/role_binding_gateway.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: gateway-plugin-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gateway-plugin-role
subjects:
- kind: ServiceAccount
name: gateway-plugin
namespace: system
File renamed without changes.
8 changes: 8 additions & 0 deletions config/rbac/gateway/service_account_gateway.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: gateway-plugin
namespace: system
27 changes: 11 additions & 16 deletions config/rbac/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,18 @@ resources:
# if your manager will use a service account that exists at
# runtime. Be sure to update RoleBinding and ClusterRoleBinding
# subjects if changing service account names.
- service_account.yaml
- role.yaml
- role_gateway.yaml
- role_binding.yaml
- leader_election_role.yaml
- leader_election_role_binding.yaml
- metrics_service.yaml
- controller-manager
# For each CRD, "Editor" and "Viewer" roles are scaffolded by
# default, aiding admins in cluster management. Those roles are
# not used by the Project itself. You can comment the following lines
# if you do not want those helpers be installed with your Project.
- orchestration_rayclusterfleet_editor_role.yaml
- orchestration_rayclusterfleet_viewer_role.yaml
- orchestration_rayclusterreplicaset_editor_role.yaml
- orchestration_rayclusterreplicaset_viewer_role.yaml
- model_modeladapter_editor_role.yaml
- model_modeladapter_viewer_role.yaml
- autoscaling_podautoscaler_editor_role.yaml
- autoscaling_podautoscaler_viewer_role.yaml

- orchestration/orchestration_rayclusterfleet_editor_role.yaml
- orchestration/orchestration_rayclusterfleet_viewer_role.yaml
- orchestration/orchestration_rayclusterreplicaset_editor_role.yaml
- orchestration/orchestration_rayclusterreplicaset_viewer_role.yaml
- model/model_modeladapter_editor_role.yaml
- model/model_modeladapter_viewer_role.yaml
- autoscaling/autoscaling_podautoscaler_editor_role.yaml
- autoscaling/autoscaling_podautoscaler_viewer_role.yaml
# other components
- gateway
17 changes: 0 additions & 17 deletions config/rbac/metrics_service.yaml

This file was deleted.

3 changes: 3 additions & 0 deletions config/rbac/model/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
resources:
- model_modeladapter_editor_role.yaml
- model_modeladapter_viewer_role.yaml
5 changes: 5 additions & 0 deletions config/rbac/orchestration/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
resources:
- orchestration_rayclusterfleet_editor_role.yaml
- orchestration_rayclusterfleet_viewer_role.yaml
- orchestration_rayclusterreplicaset_editor_role.yaml
- orchestration_rayclusterreplicaset_viewer_role.yaml
31 changes: 31 additions & 0 deletions config/standalone/autoscaler-controller/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

resources:
- ../../crd/autoscaling
- ../../rbac/autoscaling
- ../../rbac/controller-manager
- ../../manager

# Adds namespace to all resources.
namespace: aibrix-system

# Value of this field is prepended to the
# names of all resources, e.g. a deployment named
# "wordpress" becomes "alices-wordpress".
# Note that it should also match with the prefix (text before '-') of the namespace
# field above.
namePrefix: aibrix-autoscaling-

images:
- name: controller
newName: aibrix/controller-manager
newTag: nightly

patches:
- path: patch.yaml
target:
group: apps
version: v1
kind: Deployment
name: controller-manager
Loading

0 comments on commit e15991b

Please sign in to comment.