Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HPA] Add support for Pod custom metrics #1561

17 changes: 17 additions & 0 deletions .chloggen/1560-add-custom-metrics-support.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action)
component: Autoscaler

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Support scaling on Pod custom metrics.

# One or more tracking issues related to the change
issues:
- 1560

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
9 changes: 7 additions & 2 deletions apis/v1alpha1/opentelemetrycollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,20 +291,25 @@ type OpenTelemetryCollectorList struct {

// AutoscalerSpec defines the OpenTelemetryCollector's pod autoscaling specification.
type AutoscalerSpec struct {
// MinReplicas sets a lower bound to the autoscaling feature. Set this if your are using autoscaling. It must be at least 1
// MinReplicas sets a lower bound to the autoscaling feature. Set this if you are using autoscaling. It must be at least 1
// +optional
MinReplicas *int32 `json:"minReplicas,omitempty"`
// MaxReplicas sets an upper bound to the autoscaling feature. If MaxReplicas is set autoscaling is enabled.
// +optional
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
// +optional
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
// Metrics is the array of metrics to scale on.
// +optional
Metrics []autoscalingv2.MetricSpec `json:"metrics,omitempty"`
// TargetCPUUtilization sets the target average CPU used across all replicas.
// If average CPU exceeds this value, the HPA will scale up. Defaults to 90 percent.
// +optional
// Deprecated: Configure "OpenTelemetryCollector.Spec.Autoscaler.Metrics" instead.
TargetCPUUtilization *int32 `json:"targetCPUUtilization,omitempty"`
// +optional
// TargetMemoryUtilization sets the target average memory utilization across all replicas
// +optional
// Deprecated: Configure "OpenTelemetryCollector.Spec.Autoscaler.Metrics" instead.
TargetMemoryUtilization *int32 `json:"targetMemoryUtilization,omitempty"`
}

Expand Down
101 changes: 100 additions & 1 deletion apis/v1alpha1/opentelemetrycollector_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package v1alpha1
import (
"fmt"

autoscalingv2 "k8s.io/api/autoscaling/v2"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/validation"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -219,13 +220,111 @@ func (r *OpenTelemetryCollector) validateCRDSpec() error {
if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.TargetMemoryUtilization != nil && (*r.Spec.Autoscaler.TargetMemoryUtilization < int32(1) || *r.Spec.Autoscaler.TargetMemoryUtilization > int32(99)) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetMemoryUtilization should be greater than 0 and less than 100")
}

if r.Spec.Autoscaler != nil && len(r.Spec.Autoscaler.Metrics) != 0 {
err := validateMetricsArray(r.Spec.Autoscaler.Metrics)
if err != nil {
return err
}
}
}

if r.Spec.Ingress.Type == IngressTypeNginx && r.Spec.Mode == ModeSidecar {
return fmt.Errorf("the OptenTelemetry Spec Ingress configuiration is incorrect. Ingress can only be used in combination with the modes: %s, %s, %s",
return fmt.Errorf("the OpenTelemetry Spec Ingress configuiration is incorrect. Ingress can only be used in combination with the modes: %s, %s, %s",
ModeDeployment, ModeDaemonSet, ModeStatefulSet,
)
}

return nil
}

func validateMetricsArray(metrics []autoscalingv2.MetricSpec) error {
for _, metric := range metrics {
switch metric.Type {
case autoscalingv2.ObjectMetricSourceType:
if metric.Object.Target.Type == autoscalingv2.AverageValueMetricType {
if val, ok := metric.Object.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
}
} else if metric.Object.Target.Type == autoscalingv2.ValueMetricType {
if val, ok := metric.Object.Target.Value.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
}
} else if metric.Pods.Target.Type == autoscalingv2.UtilizationMetricType {
if *metric.Object.Target.AverageUtilization < int32(1) || *metric.Object.Target.AverageUtilization > int32(99) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100")
}
} else {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, unrecognized pods target type")
}

case autoscalingv2.PodsMetricSourceType:
if metric.Pods.Target.Type == autoscalingv2.AverageValueMetricType {
if val, ok := metric.Pods.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
}
} else if metric.Pods.Target.Type == autoscalingv2.ValueMetricType {
if val, ok := metric.Pods.Target.Value.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
}
} else if metric.Pods.Target.Type == autoscalingv2.UtilizationMetricType {
if *metric.Pods.Target.AverageUtilization < int32(1) || *metric.Pods.Target.AverageUtilization > int32(99) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100")
}
} else {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, unrecognized pods target type")
}
case autoscalingv2.ResourceMetricSourceType:
if metric.Resource.Target.Type == autoscalingv2.AverageValueMetricType {
if val, ok := metric.Resource.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
}
} else if metric.Resource.Target.Type == autoscalingv2.ValueMetricType {
if val, ok := metric.Resource.Target.Value.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
}
} else if metric.Resource.Target.Type == autoscalingv2.UtilizationMetricType {
if *metric.Resource.Target.AverageUtilization < int32(1) || *metric.Resource.Target.AverageUtilization > int32(99) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100")
}
} else {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, unrecognized pods target type")
}
case autoscalingv2.ContainerResourceMetricSourceType:
if metric.ContainerResource.Target.Type == autoscalingv2.AverageValueMetricType {
if val, ok := metric.ContainerResource.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
}
} else if metric.ContainerResource.Target.Type == autoscalingv2.ValueMetricType {
if val, ok := metric.ContainerResource.Target.Value.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
}
} else if metric.ContainerResource.Target.Type == autoscalingv2.UtilizationMetricType {
if *metric.ContainerResource.Target.AverageUtilization < int32(1) || *metric.ContainerResource.Target.AverageUtilization > int32(99) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100")
}
} else {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, unrecognized pods target type")
}
case autoscalingv2.ExternalMetricSourceType:
if metric.External.Target.Type == autoscalingv2.AverageValueMetricType {
if val, ok := metric.External.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
}
} else if metric.External.Target.Type == autoscalingv2.ValueMetricType {
if val, ok := metric.External.Target.Value.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
}
} else if metric.External.Target.Type == autoscalingv2.UtilizationMetricType {
if *metric.External.Target.AverageUtilization < int32(1) || *metric.External.Target.AverageUtilization > int32(99) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100")
}
} else {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, unrecognized pods target type")
}
default:
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, metric type unsupported")
}
}
return nil
}
65 changes: 65 additions & 0 deletions apis/v1alpha1/opentelemetrycollector_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/stretchr/testify/assert"
autoscalingv2 "k8s.io/api/autoscaling/v2"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand Down Expand Up @@ -420,6 +421,70 @@ func TestOTELColValidatingWebhook(t *testing.T) {
},
expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, minReplicas must not be greater than maxReplicas",
},
{
name: "invalid autoscaler metric type",
otelcol: OpenTelemetryCollector{
Spec: OpenTelemetryCollectorSpec{
MaxReplicas: &three,
Autoscaler: &AutoscalerSpec{
Metrics: []autoscalingv2.MetricSpec{
{
Type: "invalid",
},
},
},
},
},
expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, metric type unsupported",
},
{
name: "invalid resource cpu utilization",
otelcol: OpenTelemetryCollector{
Spec: OpenTelemetryCollectorSpec{
MaxReplicas: &three,
Autoscaler: &AutoscalerSpec{
Metrics: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.ResourceMetricSourceType,
Resource: &autoscalingv2.ResourceMetricSource{
Name: v1.ResourceCPU,
Target: autoscalingv2.MetricTarget{
Type: autoscalingv2.UtilizationMetricType,
AverageUtilization: &zero,
},
},
},
},
},
},
},
expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, average utilization should be greater than 0 and less than 100",
},
{
name: "invalid pod metric value",
otelcol: OpenTelemetryCollector{
Spec: OpenTelemetryCollectorSpec{
MaxReplicas: &three,
Autoscaler: &AutoscalerSpec{
Metrics: []autoscalingv2.MetricSpec{
{
Type: autoscalingv2.PodsMetricSourceType,
Pods: &autoscalingv2.PodsMetricSource{
Metric: autoscalingv2.MetricIdentifier{
Name: "custom1",
},
Target: autoscalingv2.MetricTarget{
Type: autoscalingv2.AverageValueMetricType,
AverageValue: resource.NewQuantity(int64(0), resource.DecimalSI),
},
},
},
},
},
},
},
expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0",
},
{
name: "invalid deployment mode incompabible with ingress settings",
otelcol: OpenTelemetryCollector{
Expand Down
7 changes: 7 additions & 0 deletions apis/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ metadata:
categories: Logging & Tracing
certified: "false"
containerImage: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator
createdAt: "2023-03-29T08:21:43Z"
createdAt: "2023-04-12T20:34:35Z"
description: Provides the OpenTelemetry components, including the Collector
operators.operatorframework.io/builder: operator-sdk-v1.27.0
operators.operatorframework.io/project_layout: go.kubebuilder.io/v3
Expand Down Expand Up @@ -307,7 +307,7 @@ spec:
- --enable-leader-election
- --zap-log-level=info
- --zap-time-encoding=rfc3339nano
image: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator:0.74.0
image: ghcr.io/open-telemetry/opentelemetry-operator/opentelemetry-operator:0.74.0
livenessProbe:
httpGet:
path: /healthz
Expand Down Expand Up @@ -414,7 +414,7 @@ spec:
maturity: alpha
provider:
name: OpenTelemetry Community
version: 0.74.0
version: 0.74.0
webhookdefinitions:
- admissionReviewVersions:
- v1
Expand Down
2 changes: 1 addition & 1 deletion bundle/manifests/opentelemetry.io_instrumentations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.11.3
controller-gen.kubebuilder.io/version: v0.11.3
creationTimestamp: null
labels:
app.kubernetes.io/name: opentelemetry-operator
Expand Down
Loading