vllm-project · Jeffwan · Aug 1, 2024 · Aug 1, 2024 · Aug 1, 2024
diff --git a/api/autoscaling/v1alpha1/podautoscaler_types.go b/api/autoscaling/v1alpha1/podautoscaler_types.go
@@ -56,6 +56,7 @@ type PodAutoscalerSpec struct {
 	MinReplicas *int32 `json:"minReplicas,omitempty"`
 
 	// MaxReplicas is the maximum number of replicas to which the target can be scaled up.
+	// It cannot be less than minReplicas
 	MaxReplicas int32 `json:"maxReplicas"`
 
 	TargetMetric string `json:"targetMetric"`
@@ -73,9 +74,12 @@ type PodAutoscalerSpec struct {
 type ScalingStrategyType string
 
 const (
-	// HPA represents the Kubernetes Horizontal Pod Autoscaler.
+	// HPA represents the Kubernetes native Horizontal Pod Autoscaler.
 	HPA ScalingStrategyType = "HPA"
 
+	// KPA represents the KNative Pod Autoscaling Algorithms
+	KPA ScalingStrategyType = "KPA"
+
 	// Custom represents any custom scaling mechanism.
 	Custom ScalingStrategyType = "Custom"
 )
@@ -94,13 +98,22 @@ type PodAutoscalerStatus struct {
 	// INSERT ADDITIONAL STATUS FIELD - define observed state of cluster
 	// Important: Run "make" to regenerate code after modifying this file
 
+	// LastScaleTime is the last time the PodAutoscaler scaled the number of pods,
+	// used by the autoscaler to control how often the number of pods is changed.
+	// +optional
+	LastScaleTime *metav1.Time `json:"lastScaleTime,omitempty"`
+
 	// DesiredScale represents the desired number of instances computed by the PodAutoscaler based on the current metrics.
 	// it's computed according to Scaling policy after observing service metrics
-	DesiredScale *int32 `json:"desiredScale,omitempty"`
+	DesiredScale int32 `json:"desiredScale,omitempty"`
 
 	// ActualScale represents the actual number of running instances of the scaled target.
 	// it may be different from DesiredScale
-	ActualScale *int32 `json:"actualScale,omitempty"`
+	ActualScale int32 `json:"actualScale,omitempty"`
+
+	// Conditions is the set of conditions required for this autoscaler to scale its target,
+	// and indicates whether or not those conditions are met.
+	Conditions []metav1.Condition `json:"conditions,omitempty"`
 }
 
 //+kubebuilder:object:root=true

diff --git a/api/autoscaling/v1alpha1/zz_generated.deepcopy.go b/api/autoscaling/v1alpha1/zz_generated.deepcopy.go
diff --git a/config/crd/bases/autoscaling.aibrix.ai_podautoscalers.yaml b/config/crd/bases/autoscaling.aibrix.ai_podautoscalers.yaml
@@ -42,8 +42,9 @@ spec:
             description: Spec defines the desired behavior of the PodAutoscaler.
             properties:
               maxReplicas:
-                description: MaxReplicas is the maximum number of replicas to which
-                  the target can be scaled up.
+                description: |-
+                  MaxReplicas is the maximum number of replicas to which the target can be scaled up.
+                  It cannot be less than minReplicas
                 format: int32
                 type: integer
               metricsSources:
@@ -139,12 +140,90 @@ spec:
                   it may be different from DesiredScale
                 format: int32
                 type: integer
+              conditions:
+                description: |-
+                  Conditions is the set of conditions required for this autoscaler to scale its target,
+                  and indicates whether or not those conditions are met.
+                items:
+                  description: "Condition contains details for one aspect of the current
+                    state of this API Resource.\n---\nThis struct is intended for
+                    direct use as an array at the field path .status.conditions.  For
+                    example,\n\n\n\ttype FooStatus struct{\n\t    // Represents the
+                    observations of a foo's current state.\n\t    // Known .status.conditions.type
+                    are: \"Available\", \"Progressing\", and \"Degraded\"\n\t    //
+                    +patchMergeKey=type\n\t    // +patchStrategy=merge\n\t    // +listType=map\n\t
+                    \   // +listMapKey=type\n\t    Conditions []metav1.Condition `json:\"conditions,omitempty\"
+                    patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t
+                    \   // other fields\n\t}"
+                  properties:
+                    lastTransitionTime:
+                      description: |-
+                        lastTransitionTime is the last time the condition transitioned from one status to another.
+                        This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                      format: date-time
+                      type: string
+                    message:
+                      description: |-
+                        message is a human readable message indicating details about the transition.
+                        This may be an empty string.
+                      maxLength: 32768
+                      type: string
+                    observedGeneration:
+                      description: |-
+                        observedGeneration represents the .metadata.generation that the condition was set based upon.
+                        For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                        with respect to the current state of the instance.
+                      format: int64
+                      minimum: 0
+                      type: integer
+                    reason:
+                      description: |-
+                        reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                        Producers of specific condition types may define expected values and meanings for this field,
+                        and whether the values are considered a guaranteed API.
+                        The value should be a CamelCase string.
+                        This field may not be empty.
+                      maxLength: 1024
+                      minLength: 1
+                      pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                      type: string
+                    status:
+                      description: status of the condition, one of True, False, Unknown.
+                      enum:
+                      - "True"
+                      - "False"
+                      - Unknown
+                      type: string
+                    type:
+                      description: |-
+                        type of condition in CamelCase or in foo.example.com/CamelCase.
+                        ---
+                        Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be
+                        useful (see .node.status.conditions), the ability to deconflict is important.
+                        The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt)
+                      maxLength: 316
+                      pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                      type: string
+                  required:
+                  - lastTransitionTime
+                  - message
+                  - reason
+                  - status
+                  - type
+                  type: object
+                type: array
               desiredScale:
                 description: |-
                   DesiredScale represents the desired number of instances computed by the PodAutoscaler based on the current metrics.
                   it's computed according to Scaling policy after observing service metrics
                 format: int32
                 type: integer
+              lastScaleTime:
+                description: |-
+                  LastScaleTime is the last time the PodAutoscaler scaled the number of pods,
+                  used by the autoscaler to control how often the number of pods is changed.
+                format: date-time
+                type: string
             type: object
         type: object
     served: true

diff --git a/docs/tutorial/podautoscaler/README.md b/docs/tutorial/podautoscaler/README.md
@@ -16,7 +16,7 @@ make manifests && make build && make install
 Verify the installation:
 
 ```shell
-kubectl get crds | grep aibrix
+kubectl get crds | grep podautoscalers
 ```
 
 The expected output is as follows:

diff --git a/pkg/controller/podautoscaler/hpa_resources.go b/pkg/controller/podautoscaler/hpa_resources.go
@@ -19,7 +19,7 @@ package podautoscaler
 import (
 	"context"
 	"fmt"
-	pa_v1 "github.com/aibrix/aibrix/api/autoscaling/v1alpha1"
+	pav1 "github.com/aibrix/aibrix/api/autoscaling/v1alpha1"
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -34,15 +34,15 @@ import (
 )
 
 var (
-	controllerKind = pa_v1.GroupVersion.WithKind("PodAutoScaler") // Define the resource type for the controller
+	controllerKind = pav1.GroupVersion.WithKind("PodAutoScaler") // Define the resource type for the controller
 )
 
-func getHPANameFromPa(pa *pa_v1.PodAutoscaler) string {
+func getHPANameFromPa(pa *pav1.PodAutoscaler) string {
 	return fmt.Sprintf("%s-hpa", pa.Name)
 }
 
 // MakeHPA creates an HPA resource from a PodAutoscaler resource.
-func MakeHPA(pa *pa_v1.PodAutoscaler, ctx context.Context) *autoscalingv2.HorizontalPodAutoscaler {
+func MakeHPA(pa *pav1.PodAutoscaler, ctx context.Context) *autoscalingv2.HorizontalPodAutoscaler {
 	minReplicas, maxReplicas := pa.Spec.MinReplicas, pa.Spec.MaxReplicas
 	if maxReplicas == 0 {
 		maxReplicas = math.MaxInt32 // Set default to no upper limit if not specified
@@ -76,7 +76,7 @@ func MakeHPA(pa *pa_v1.PodAutoscaler, ctx context.Context) *autoscalingv2.Horizo
 		klog.V(3).InfoS("Creating HPA", "metric", pa.Spec.TargetMetric, "target", targetValue)
 
 		switch strings.ToLower(pa.Spec.TargetMetric) {
-		case pa_v1.CPU:
+		case pav1.CPU:
 			utilValue := int32(math.Ceil(targetValue))
 			hpa.Spec.Metrics = []autoscalingv2.MetricSpec{{
 				Type: autoscalingv2.ResourceMetricSourceType,
@@ -89,7 +89,7 @@ func MakeHPA(pa *pa_v1.PodAutoscaler, ctx context.Context) *autoscalingv2.Horizo
 				},
 			}}
 
-		case pa_v1.Memory:
+		case pav1.Memory:
 			memory := resource.NewQuantity(int64(targetValue)*1024*1024, resource.BinarySI)
 			hpa.Spec.Metrics = []autoscalingv2.MetricSpec{{
 				Type: autoscalingv2.ResourceMetricSourceType,

diff --git a/pkg/controller/podautoscaler/podautoscaler_controller.go b/pkg/controller/podautoscaler/podautoscaler_controller.go
@@ -94,14 +94,15 @@ func (r *PodAutoscalerReconciler) Reconcile(ctx context.Context, req ctrl.Reques
 	if err := r.Get(ctx, req.NamespacedName, &pa); err != nil {
 		if errors.IsNotFound(err) {
 			// Object might have been deleted after reconcile request, ignore and return.
-			klog.V(3).InfoS("PodAutoscaler resource not found. Ignoring since object must have been deleted")
+			klog.InfoS("PodAutoscaler resource not found. Ignoring since object must have been deleted")
 			return ctrl.Result{}, nil
 		}
-		klog.V(3).ErrorS(err, "Failed to get PodAutoscaler")
+		klog.ErrorS(err, "Failed to get PodAutoscaler")
 		return ctrl.Result{}, err
 	}
 
 	// Generate a corresponding HorizontalPodAutoscaler
+	// TODO: it should leverage pa.type to determine whether to reconcile HPA object
 	hpa := MakeHPA(&pa, ctx)
 	hpaName := types.NamespacedName{
 		Name:      hpa.Name,
@@ -112,26 +113,27 @@ func (r *PodAutoscalerReconciler) Reconcile(ctx context.Context, req ctrl.Reques
 	err := r.Get(ctx, hpaName, &existingHPA)
 	if err != nil && errors.IsNotFound(err) {
 		// HPA does not exist, create a new one.
-		klog.V(3).InfoS("Creating a new HPA", "HPA.Namespace", hpa.Namespace, "HPA.Name", hpa.Name)
+		klog.InfoS("Creating a new HPA", "HPA.Namespace", hpa.Namespace, "HPA.Name", hpa.Name)
 		if err = r.Create(ctx, hpa); err != nil {
-			klog.V(3).ErrorS(err, "Failed to create new HPA")
+			klog.ErrorS(err, "Failed to create new HPA")
 			return ctrl.Result{}, err
 		}
 	} else if err != nil {
 		// Error occurred while fetching the existing HPA, report the error and requeue.
-		klog.V(3).ErrorS(err, "Failed to get HPA")
+		klog.ErrorS(err, "Failed to get HPA")
 		return ctrl.Result{}, err
 	} else {
 		// Update the existing HPA if it already exists.
-		klog.V(3).InfoS("Updating existing HPA", "HPA.Namespace", existingHPA.Namespace, "HPA.Name", existingHPA.Name)
+		klog.InfoS("Updating existing HPA", "HPA.Namespace", existingHPA.Namespace, "HPA.Name", existingHPA.Name)
 
 		err = r.Update(ctx, hpa)
 		if err != nil {
-			klog.V(2).ErrorS(err, "Failed to update HPA")
+			klog.ErrorS(err, "Failed to update HPA")
 			return ctrl.Result{}, err
 		}
 	}
 
+	// TODO: add status update. Currently, actualScale and desireScale are not synced from HPA object yet.
 	// Return with no error and no requeue needed.
 	return ctrl.Result{}, nil
 }