Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support installing individual controllers from giant controller-manager #442

Merged
merged 4 commits into from
Nov 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions build/container/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ RUN go mod download
# Copy the go source
COPY cmd/ cmd/
COPY api/ api/
COPY pkg/controller/ pkg/controller/
COPY pkg/utils/ pkg/utils/
COPY pkg/cache/ pkg/cache/
COPY pkg/client/ pkg/client/
COPY pkg/metrics/ pkg/metrics/
COPY pkg/ pkg/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
6 changes: 1 addition & 5 deletions build/container/Dockerfile.gateway
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,7 @@ RUN go mod download
# Copy the go source
COPY cmd/ cmd/
COPY api/ api/
COPY pkg/plugins/ pkg/plugins/
COPY pkg/utils/ pkg/utils/
COPY pkg/cache/ pkg/cache/
COPY pkg/client/ pkg/client/
COPY pkg/metrics/ pkg/metrics/
COPY pkg/ pkg/

# Build
# the GOARCH has not a default value to allow the binary be built according to the host where the command
Expand Down
61 changes: 50 additions & 11 deletions cmd/controllers/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,17 @@ import (
"os"
"time"

clientgoscheme "k8s.io/client-go/kubernetes/scheme"

autoscalingv1alpha1 "github.com/aibrix/aibrix/api/autoscaling/v1alpha1"
modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1"
orchestrationv1alpha1 "github.com/aibrix/aibrix/api/orchestration/v1alpha1"
"github.com/aibrix/aibrix/pkg/features"
rayclusterv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1"
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
_ "k8s.io/client-go/plugin/pkg/client/auth"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
Expand All @@ -41,9 +45,6 @@ import (
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"

autoscalingv1alpha1 "github.com/aibrix/aibrix/api/autoscaling/v1alpha1"
modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1"
orchestrationv1alpha1 "github.com/aibrix/aibrix/api/orchestration/v1alpha1"
"github.com/aibrix/aibrix/pkg/cache"
"github.com/aibrix/aibrix/pkg/controller"
//+kubebuilder:scaffold:imports
Expand All @@ -64,15 +65,32 @@ var (
)

func init() {
// Only register the base kubernetes scheme here
utilruntime.Must(clientgoscheme.AddToScheme(scheme))

utilruntime.Must(autoscalingv1alpha1.AddToScheme(scheme))
utilruntime.Must(modelv1alpha1.AddToScheme(scheme))
scheme.AddUnversionedTypes(metav1.SchemeGroupVersion, &metav1.UpdateOptions{}, &metav1.DeleteOptions{}, &metav1.CreateOptions{})
//+kubebuilder:scaffold:scheme
}

func RegisterSchemas(scheme *runtime.Scheme) error {

if features.IsControllerEnabled(features.PodAutoscalerController) {
utilruntime.Must(autoscalingv1alpha1.AddToScheme(scheme))
}

if features.IsControllerEnabled(features.ModelAdapterController) {
utilruntime.Must(modelv1alpha1.AddToScheme(scheme))
}

if features.IsControllerEnabled(features.DistributedInferenceController) {
utilruntime.Must(orchestrationv1alpha1.AddToScheme(scheme))
utilruntime.Must(rayclusterv1.AddToScheme(scheme))
}

scheme.AddUnversionedTypes(metav1.SchemeGroupVersion, &metav1.UpdateOptions{}, &metav1.DeleteOptions{}, &metav1.CreateOptions{})
utilruntime.Must(orchestrationv1alpha1.AddToScheme(scheme))
utilruntime.Must(rayclusterv1.AddToScheme(scheme))
//+kubebuilder:scaffold:scheme

return nil
}

func main() {
Expand All @@ -86,6 +104,7 @@ func main() {
var renewDeadLine time.Duration
var leaderElectionResourceLock string
var leaderElectionId string
var controllers string
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
Expand All @@ -106,6 +125,7 @@ func main() {
"leader-election-resource-lock determines which resource lock to use for leader election, defaults to \"leases\".")
flag.StringVar(&leaderElectionId, "leader-election-id", "aibrix-controller-manager",
"leader-election-id determines the name of the resource that leader election will use for holding the leader lock, Default is aibrix-controller-manager.")
flag.StringVar(&controllers, "controllers", "*", "Comma-separated list of controllers to enable or disable, default value is * which indicates all controllers should be started.")

// Initialize the klog
klog.InitFlags(flag.CommandLine)
Expand All @@ -115,6 +135,19 @@ func main() {
// TODO: we will switch to textlogger or zap later
ctrl.SetLogger(klogr.New()) // nolint:staticcheck

// initialize the controllers
if err := features.ValidateControllers(controllers); err != nil {
setupLog.Error(err, "unable to validate the controllers, please type the right controller names through --controllers")
os.Exit(1)
}

features.InitControllers(controllers)

if err := RegisterSchemas(scheme); err != nil {
setupLog.Error(err, "unable to register schemas")
os.Exit(1)
}

// if the enable-http2 flag is false (the default), http/2 should be disabled
// due to its vulnerabilities. More specifically, disabling http/2 will
// prevent from being vulnerable to the HTTP/2 Stream Cancellation and
Expand Down Expand Up @@ -186,7 +219,13 @@ func main() {
panic(err)
}

cache.NewCache(config, stopCh, nil)
if features.IsControllerEnabled(features.ModelAdapterController) {
// cache is enabled for model adapter scheduling.
cache.NewCache(config, stopCh, nil)
}

// Initialize controllers
controller.Initialize()

// Kind controller registration is encapsulated inside the pkg/controller/controller.go
// So here we can use more clean registration flow and there's no need to change logics in future.
Expand Down
2 changes: 2 additions & 0 deletions config/crd/autoscaling/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
resources:
- autoscaling.aibrix.ai_podautoscalers.yaml
7 changes: 3 additions & 4 deletions config/crd/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
# since it depends on service name and namespace that are out of this kustomize package.
# It should be run by config/default
resources:
- bases/autoscaling.aibrix.ai_podautoscalers.yaml
- bases/model.aibrix.ai_modeladapters.yaml
- bases/orchestration.aibrix.ai_rayclusterreplicasets.yaml
- bases/orchestration.aibrix.ai_rayclusterfleets.yaml
- autoscaling
- model
- orchestration
#+kubebuilder:scaffold:crdkustomizeresource

patches:
Expand Down
2 changes: 2 additions & 0 deletions config/crd/model/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
resources:
- model.aibrix.ai_modeladapters.yaml
3 changes: 3 additions & 0 deletions config/crd/orchestration/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
resources:
- orchestration.aibrix.ai_rayclusterreplicasets.yaml
- orchestration.aibrix.ai_rayclusterfleets.yaml
1 change: 1 addition & 0 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namePrefix: aibrix-
# someName: someValue

resources:
- namespace.yaml
- ../crd
- ../rbac
- ../manager
Expand Down
8 changes: 8 additions & 0 deletions config/default/namespace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: Namespace
metadata:
labels:
control-plane: controller-manager
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: system
26 changes: 18 additions & 8 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,3 @@
apiVersion: v1
kind: Namespace
metadata:
labels:
control-plane: controller-manager
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: system
---
apiVersion: apps/v1
kind: Deployment
Expand Down Expand Up @@ -66,3 +58,21 @@ spec:
memory: 64Mi
serviceAccountName: controller-manager
terminationGracePeriodSeconds: 10
---
apiVersion: v1
kind: Service
metadata:
labels:
control-plane: controller-manager
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: controller-manager-metrics-service
namespace: system
spec:
ports:
- name: http
port: 8080
protocol: TCP
targetPort: 8080
selector:
control-plane: controller-manager
3 changes: 3 additions & 0 deletions config/rbac/autoscaling/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
resources:
- autoscaling_podautoscaler_editor_role.yaml
- autoscaling_podautoscaler_viewer_role.yaml
8 changes: 8 additions & 0 deletions config/rbac/controller-manager/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# controller manager are common sa, role, rolebindings that could be reused

resources:
- leader_election_role.yaml
- leader_election_role_binding.yaml
- role.yaml
- role_binding.yaml
- service_account_controller_manager.yaml
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,3 @@ subjects:
- kind: ServiceAccount
name: controller-manager
namespace: system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: gateway-plugin-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gateway-plugin-role
subjects:
- kind: ServiceAccount
name: gateway-plugin
namespace: system
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,3 @@ metadata:
app.kubernetes.io/managed-by: kustomize
name: controller-manager
namespace: system
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: gateway-plugin
namespace: system
5 changes: 5 additions & 0 deletions config/rbac/gateway/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
resources:
# Unlike others files are generated. Gateway plugin role, role binding and serving account files are maintained by our own.
- service_account_gateway.yaml
- role_gateway.yaml
- role_binding_gateway.yaml
15 changes: 15 additions & 0 deletions config/rbac/gateway/role_binding_gateway.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: gateway-plugin-rolebinding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: gateway-plugin-role
subjects:
- kind: ServiceAccount
name: gateway-plugin
namespace: system
File renamed without changes.
8 changes: 8 additions & 0 deletions config/rbac/gateway/service_account_gateway.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app.kubernetes.io/name: aibrix
app.kubernetes.io/managed-by: kustomize
name: gateway-plugin
namespace: system
27 changes: 11 additions & 16 deletions config/rbac/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,18 @@ resources:
# if your manager will use a service account that exists at
# runtime. Be sure to update RoleBinding and ClusterRoleBinding
# subjects if changing service account names.
- service_account.yaml
- role.yaml
- role_gateway.yaml
- role_binding.yaml
- leader_election_role.yaml
- leader_election_role_binding.yaml
- metrics_service.yaml
- controller-manager
# For each CRD, "Editor" and "Viewer" roles are scaffolded by
# default, aiding admins in cluster management. Those roles are
# not used by the Project itself. You can comment the following lines
# if you do not want those helpers be installed with your Project.
- orchestration_rayclusterfleet_editor_role.yaml
- orchestration_rayclusterfleet_viewer_role.yaml
- orchestration_rayclusterreplicaset_editor_role.yaml
- orchestration_rayclusterreplicaset_viewer_role.yaml
- model_modeladapter_editor_role.yaml
- model_modeladapter_viewer_role.yaml
- autoscaling_podautoscaler_editor_role.yaml
- autoscaling_podautoscaler_viewer_role.yaml

- orchestration/orchestration_rayclusterfleet_editor_role.yaml
- orchestration/orchestration_rayclusterfleet_viewer_role.yaml
- orchestration/orchestration_rayclusterreplicaset_editor_role.yaml
- orchestration/orchestration_rayclusterreplicaset_viewer_role.yaml
- model/model_modeladapter_editor_role.yaml
- model/model_modeladapter_viewer_role.yaml
- autoscaling/autoscaling_podautoscaler_editor_role.yaml
- autoscaling/autoscaling_podautoscaler_viewer_role.yaml
# other components
- gateway
17 changes: 0 additions & 17 deletions config/rbac/metrics_service.yaml

This file was deleted.

3 changes: 3 additions & 0 deletions config/rbac/model/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
resources:
- model_modeladapter_editor_role.yaml
- model_modeladapter_viewer_role.yaml
5 changes: 5 additions & 0 deletions config/rbac/orchestration/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
resources:
- orchestration_rayclusterfleet_editor_role.yaml
- orchestration_rayclusterfleet_viewer_role.yaml
- orchestration_rayclusterreplicaset_editor_role.yaml
- orchestration_rayclusterreplicaset_viewer_role.yaml
31 changes: 31 additions & 0 deletions config/standalone/autoscaler-controller/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

resources:
- ../../crd/autoscaling
- ../../rbac/autoscaling
- ../../rbac/controller-manager
- ../../manager

# Adds namespace to all resources.
namespace: aibrix-system

# Value of this field is prepended to the
# names of all resources, e.g. a deployment named
# "wordpress" becomes "alices-wordpress".
# Note that it should also match with the prefix (text before '-') of the namespace
# field above.
namePrefix: aibrix-autoscaling-

images:
- name: controller
newName: aibrix/controller-manager
newTag: nightly

patches:
- path: patch.yaml
target:
group: apps
version: v1
kind: Deployment
name: controller-manager
Loading
Loading