open-telemetry · moh-osman3 · Mar 8, 2023 · Mar 8, 2023 · Mar 8, 2023 · Mar 8, 2023
@@ -0,0 +1,17 @@
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action)
+component: Autoscaler
+
+# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Support scaling on Pod custom metrics. 
+
+# One or more tracking issues related to the change
+issues:
+  - 1560
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
@@ -291,20 +291,25 @@ type OpenTelemetryCollectorList struct {
 
 // AutoscalerSpec defines the OpenTelemetryCollector's pod autoscaling specification.
 type AutoscalerSpec struct {
-	// MinReplicas sets a lower bound to the autoscaling feature.  Set this if your are using autoscaling. It must be at least 1
+	// MinReplicas sets a lower bound to the autoscaling feature.  Set this if you are using autoscaling. It must be at least 1
 	// +optional
 	MinReplicas *int32 `json:"minReplicas,omitempty"`
 	// MaxReplicas sets an upper bound to the autoscaling feature. If MaxReplicas is set autoscaling is enabled.
 	// +optional
 	MaxReplicas *int32 `json:"maxReplicas,omitempty"`
 	// +optional
 	Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
+	// Metrics is the array of metrics to scale on.
+	// +optional
+	Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
 	// TargetCPUUtilization sets the target average CPU used across all replicas.
 	// If average CPU exceeds this value, the HPA will scale up. Defaults to 90 percent.
 	// +optional
+	// Deprecated: Configure "OpenTelemetryCollector.Spec.Autoscaler.Metrics" instead.
 	TargetCPUUtilization *int32 `json:"targetCPUUtilization,omitempty"`
-	// +optional
 	// TargetMemoryUtilization sets the target average memory utilization across all replicas
+	// +optional
+	// Deprecated: Configure "OpenTelemetryCollector.Spec.Autoscaler.Metrics" instead.
 	TargetMemoryUtilization *int32 `json:"targetMemoryUtilization,omitempty"`
 }
 

@@ -17,6 +17,7 @@ package v1alpha1
 import (
 	"fmt"
 
+	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	"k8s.io/apimachinery/pkg/runtime"
 	"k8s.io/apimachinery/pkg/util/validation"
 	ctrl "sigs.k8s.io/controller-runtime"
@@ -219,13 +220,111 @@ func (r *OpenTelemetryCollector) validateCRDSpec() error {
 		if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.TargetMemoryUtilization != nil && (*r.Spec.Autoscaler.TargetMemoryUtilization < int32(1) || *r.Spec.Autoscaler.TargetMemoryUtilization > int32(99)) {
 			return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetMemoryUtilization should be greater than 0 and less than 100")
 		}
+
+		if r.Spec.Autoscaler != nil && len(r.Spec.Autoscaler.Metrics) != 0 {
+			err := validateMetricsArray(r.Spec.Autoscaler.Metrics)
+			if err != nil {
+				return err
+			}
+		}
 	}
 
 	if r.Spec.Ingress.Type == IngressTypeNginx && r.Spec.Mode == ModeSidecar {
-		return fmt.Errorf("the OptenTelemetry Spec Ingress configuiration is incorrect. Ingress can only be used in combination with the modes: %s, %s, %s",
+		return fmt.Errorf("the OpenTelemetry Spec Ingress configuiration is incorrect. Ingress can only be used in combination with the modes: %s, %s, %s",
 			ModeDeployment, ModeDaemonSet, ModeStatefulSet,
 		)
 	}
 
 	return nil
 }
+
+func validateMetricsArray(metrics []autoscalingv2.MetricSpec) error {
+	for _, metric := range metrics {
+		switch metric.Type {
+		case autoscalingv2.ObjectMetricSourceType:
+			if metric.Object.Target.Type == autoscalingv2.AverageValueMetricType {
+				if val, ok := metric.Object.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
+				}
+			} else if metric.Object.Target.Type == autoscalingv2.ValueMetricType {
+				if val, ok := metric.Object.Target.Value.AsInt64(); !ok || val < int64(1) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
+				}
+			} else if metric.Pods.Target.Type == autoscalingv2.UtilizationMetricType {
+				if *metric.Object.Target.AverageUtilization < int32(1) || *metric.Object.Target.AverageUtilization > int32(99) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100")
+				}
+			} else {
+				return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, unrecognized pods target type")
+			}
+
+		case autoscalingv2.PodsMetricSourceType:
+			if metric.Pods.Target.Type == autoscalingv2.AverageValueMetricType {
+				if val, ok := metric.Pods.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
+				}
+			} else if metric.Pods.Target.Type == autoscalingv2.ValueMetricType {
+				if val, ok := metric.Pods.Target.Value.AsInt64(); !ok || val < int64(1) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
+				}
+			} else if metric.Pods.Target.Type == autoscalingv2.UtilizationMetricType {
+				if *metric.Pods.Target.AverageUtilization < int32(1) || *metric.Pods.Target.AverageUtilization > int32(99) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100")
+				}
+			} else {
+				return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, unrecognized pods target type")
+			}
+		case autoscalingv2.ResourceMetricSourceType:
+			if metric.Resource.Target.Type == autoscalingv2.AverageValueMetricType {
+				if val, ok := metric.Resource.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
+				}
+			} else if metric.Resource.Target.Type == autoscalingv2.ValueMetricType {
+				if val, ok := metric.Resource.Target.Value.AsInt64(); !ok || val < int64(1) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
+				}
+			} else if metric.Resource.Target.Type == autoscalingv2.UtilizationMetricType {
+				if *metric.Resource.Target.AverageUtilization < int32(1) || *metric.Resource.Target.AverageUtilization > int32(99) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100")
+				}
+			} else {
+				return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, unrecognized pods target type")
+			}
+		case autoscalingv2.ContainerResourceMetricSourceType:
+			if metric.ContainerResource.Target.Type == autoscalingv2.AverageValueMetricType {
+				if val, ok := metric.ContainerResource.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
+				}
+			} else if metric.ContainerResource.Target.Type == autoscalingv2.ValueMetricType {
+				if val, ok := metric.ContainerResource.Target.Value.AsInt64(); !ok || val < int64(1) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
+				}
+			} else if metric.ContainerResource.Target.Type == autoscalingv2.UtilizationMetricType {
+				if *metric.ContainerResource.Target.AverageUtilization < int32(1) || *metric.ContainerResource.Target.AverageUtilization > int32(99) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100")
+				}
+			} else {
+				return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, unrecognized pods target type")
+			}
+		case autoscalingv2.ExternalMetricSourceType:
+			if metric.External.Target.Type == autoscalingv2.AverageValueMetricType {
+				if val, ok := metric.External.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
+				}
+			} else if metric.External.Target.Type == autoscalingv2.ValueMetricType {
+				if val, ok := metric.External.Target.Value.AsInt64(); !ok || val < int64(1) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
+				}
+			} else if metric.External.Target.Type == autoscalingv2.UtilizationMetricType {
+				if *metric.External.Target.AverageUtilization < int32(1) || *metric.External.Target.AverageUtilization > int32(99) {
+					return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100")
+				}
+			} else {
+				return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, unrecognized pods target type")
+			}
+		default:
+			return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, metric type unsupported")
+		}
+	}
+	return nil
+}
@@ -21,6 +21,7 @@ import (
 	"github.com/stretchr/testify/assert"
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
 
@@ -420,6 +421,70 @@ func TestOTELColValidatingWebhook(t *testing.T) {
 			},
 			expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, minReplicas must not be greater than maxReplicas",
 		},
+		{
+			name: "invalid autoscaler metric type",
+			otelcol: OpenTelemetryCollector{
+				Spec: OpenTelemetryCollectorSpec{
+					MaxReplicas: &three,
+					Autoscaler: &AutoscalerSpec{
+						Metrics: []autoscalingv2.MetricSpec{
+							{
+								Type: "invalid",
+							},
+						},
+					},
+				},
+			},
+			expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, metric type unsupported",
+		},
+		{
+			name: "invalid resource cpu utilization",
+			otelcol: OpenTelemetryCollector{
+				Spec: OpenTelemetryCollectorSpec{
+					MaxReplicas: &three,
+					Autoscaler: &AutoscalerSpec{
+						Metrics: []autoscalingv2.MetricSpec{
+							{
+								Type: autoscalingv2.ResourceMetricSourceType,
+								Resource: &autoscalingv2.ResourceMetricSource{
+									Name: v1.ResourceCPU,
+									Target: autoscalingv2.MetricTarget{
+										Type:               autoscalingv2.UtilizationMetricType,
+										AverageUtilization: &zero,
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100",
+		},
+		{
+			name: "invalid pod metric value",
+			otelcol: OpenTelemetryCollector{
+				Spec: OpenTelemetryCollectorSpec{
+					MaxReplicas: &three,
+					Autoscaler: &AutoscalerSpec{
+						Metrics: []autoscalingv2.MetricSpec{
+							{
+								Type: autoscalingv2.PodsMetricSourceType,
+								Pods: &autoscalingv2.PodsMetricSource{
+									Metric: autoscalingv2.MetricIdentifier{
+										Name: "custom1",
+									},
+									Target: autoscalingv2.MetricTarget{
+										Type:         autoscalingv2.AverageValueMetricType,
+										AverageValue: resource.NewQuantity(int64(0), resource.DecimalSI),
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0",
+		},
 		{
 			name: "invalid deployment mode incompabible with ingress settings",
 			otelcol: OpenTelemetryCollector{

@@ -31,7 +31,7 @@ metadata:
     categories: Logging & Tracing
     certified: "false"
     containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator
-    createdAt: "2023-03-29T08:21:43Z"
+    createdAt: "2023-04-12T20:34:35Z"
     description: Provides the OpenTelemetry components, including the Collector
     operators.operatorframework.io/builder: operator-sdk-v1.27.0
     operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
@@ -307,7 +307,7 @@ spec:
                 - --enable-leader-election
                 - --zap-log-level=info
                 - --zap-time-encoding=rfc3339nano
-                image: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator:0.74.0
+                image: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator:0.74.0 
                 livenessProbe:
                   httpGet:
                     path: /healthz
@@ -414,7 +414,7 @@ spec:
   maturity: alpha
   provider:
     name: OpenTelemetry Community
-  version: 0.74.0
+  version: 0.74.0 
   webhookdefinitions:
   - admissionReviewVersions:
     - v1

@@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
 kind: CustomResourceDefinition
 metadata:
   annotations:
-    controller-gen.kubebuilder.io/version: v0.11.3
+    controller-gen.kubebuilder.io/version: v0.11.3 
   creationTimestamp: null
   labels:
     app.kubernetes.io/name: opentelemetry-operator