Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HPA] Add v1alpha1.MetricSpec to support for Pod custom metrics #1651

Merged
17 changes: 17 additions & 0 deletions .chloggen/1560-add-custom-metrics-support.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. operator, target allocator, github action)
component: Autoscaler

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Support scaling on Pod custom metrics.

# One or more tracking issues related to the change
issues:
- 1560

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
13 changes: 13 additions & 0 deletions apis/v1alpha1/opentelemetrycollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,11 @@ type AutoscalerSpec struct {
MaxReplicas *int32 `json:"maxReplicas,omitempty"`
// +optional
Behavior *autoscalingv2.HorizontalPodAutoscalerBehavior `json:"behavior,omitempty"`
// Metrics is meant to provide a customizable way to configure HPA metrics.
// currently the only supported custom metrics is type=Pod.
// Use TargetCPUUtilization or TargetMemoryUtilization instead if scaling on these common resource metrics.
// +optional
Metrics []MetricSpec `json:"metrics,omitempty"`
// TargetCPUUtilization sets the target average CPU used across all replicas.
// If average CPU exceeds this value, the HPA will scale up. Defaults to 90 percent.
// +optional
Expand Down Expand Up @@ -359,6 +364,14 @@ type Probe struct {
TerminationGracePeriodSeconds *int64 `json:"terminationGracePeriodSeconds,omitempty"`
}

// MetricSpec defines a subset of metrics to be defined for the HPA's metric array
// more metric type can be supported as needed.
// See https://pkg.go.dev/k8s.io/api/autoscaling/v2#MetricSpec for reference.
type MetricSpec struct {
Type autoscalingv2.MetricSourceType `json:"type"`
Pods *autoscalingv2.PodsMetricSource `json:"pods,omitempty"`
}

func init() {
SchemeBuilder.Register(&OpenTelemetryCollector{}, &OpenTelemetryCollectorList{})
}
66 changes: 50 additions & 16 deletions apis/v1alpha1/opentelemetrycollector_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package v1alpha1
import (
"fmt"

autoscalingv2 "k8s.io/api/autoscaling/v2"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -199,6 +200,12 @@ func (r *OpenTelemetryCollector) validateCRDSpec() error {
}
}

if r.Spec.Ingress.Type == IngressTypeNginx && r.Spec.Mode == ModeSidecar {
return fmt.Errorf("the OpenTelemetry Spec Ingress configuration is incorrect. Ingress can only be used in combination with the modes: %s, %s, %s",
ModeDeployment, ModeDaemonSet, ModeStatefulSet,
)
}

// validate autoscale with horizontal pod autoscaler
if maxReplicas != nil {
if *maxReplicas < int32(1) {
Expand All @@ -217,22 +224,8 @@ func (r *OpenTelemetryCollector) validateCRDSpec() error {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, minReplicas should be one or more")
}

if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.Behavior != nil {
if r.Spec.Autoscaler.Behavior.ScaleDown != nil && r.Spec.Autoscaler.Behavior.ScaleDown.StabilizationWindowSeconds != nil &&
*r.Spec.Autoscaler.Behavior.ScaleDown.StabilizationWindowSeconds < int32(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, scaleDown should be one or more")
}

if r.Spec.Autoscaler.Behavior.ScaleUp != nil && r.Spec.Autoscaler.Behavior.ScaleUp.StabilizationWindowSeconds != nil &&
*r.Spec.Autoscaler.Behavior.ScaleUp.StabilizationWindowSeconds < int32(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, scaleUp should be one or more")
}
}
if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.TargetCPUUtilization != nil && (*r.Spec.Autoscaler.TargetCPUUtilization < int32(1) || *r.Spec.Autoscaler.TargetCPUUtilization > int32(99)) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetCPUUtilization should be greater than 0 and less than 100")
}
if r.Spec.Autoscaler != nil && r.Spec.Autoscaler.TargetMemoryUtilization != nil && (*r.Spec.Autoscaler.TargetMemoryUtilization < int32(1) || *r.Spec.Autoscaler.TargetMemoryUtilization > int32(99)) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetMemoryUtilization should be greater than 0 and less than 100")
if r.Spec.Autoscaler != nil {
return checkAutoscalerSpec(r.Spec.Autoscaler)
}
}

Expand Down Expand Up @@ -265,3 +258,44 @@ func (r *OpenTelemetryCollector) validateCRDSpec() error {

return nil
}

func checkAutoscalerSpec(autoscaler *AutoscalerSpec) error {
if autoscaler.Behavior != nil {
if autoscaler.Behavior.ScaleDown != nil && autoscaler.Behavior.ScaleDown.StabilizationWindowSeconds != nil &&
*autoscaler.Behavior.ScaleDown.StabilizationWindowSeconds < int32(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, scaleDown should be one or more")
}

if autoscaler.Behavior.ScaleUp != nil && autoscaler.Behavior.ScaleUp.StabilizationWindowSeconds != nil &&
*autoscaler.Behavior.ScaleUp.StabilizationWindowSeconds < int32(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, scaleUp should be one or more")
}
}
if autoscaler.TargetCPUUtilization != nil && (*autoscaler.TargetCPUUtilization < int32(1) || *autoscaler.TargetCPUUtilization > int32(99)) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetCPUUtilization should be greater than 0 and less than 100")
}
if autoscaler.TargetMemoryUtilization != nil && (*autoscaler.TargetMemoryUtilization < int32(1) || *autoscaler.TargetMemoryUtilization > int32(99)) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, targetMemoryUtilization should be greater than 0 and less than 100")
}

for _, metric := range autoscaler.Metrics {
if metric.Type != autoscalingv2.PodsMetricSourceType {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, metric type unsupported. Expected metric of source type Pod")
}

// pod metrics target only support value and averageValue.
if metric.Pods.Target.Type == autoscalingv2.AverageValueMetricType {
if val, ok := metric.Pods.Target.AverageValue.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0")
}
} else if metric.Pods.Target.Type == autoscalingv2.ValueMetricType {
if val, ok := metric.Pods.Target.Value.AsInt64(); !ok || val < int64(1) {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, value should be greater than 0")
}
} else {
return fmt.Errorf("the OpenTelemetry Spec autoscale configuration is incorrect, invalid pods target type")
}
}

return nil
}
67 changes: 67 additions & 0 deletions apis/v1alpha1/opentelemetrycollector_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/stretchr/testify/assert"
autoscalingv2 "k8s.io/api/autoscaling/v2"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand Down Expand Up @@ -422,6 +423,72 @@ func TestOTELColValidatingWebhook(t *testing.T) {
},
expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, minReplicas must not be greater than maxReplicas",
},
{
name: "invalid autoscaler metric type",
otelcol: OpenTelemetryCollector{
Spec: OpenTelemetryCollectorSpec{
MaxReplicas: &three,
Autoscaler: &AutoscalerSpec{
Metrics: []MetricSpec{
{
Type: autoscalingv2.ResourceMetricSourceType,
},
},
},
},
},
expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, metric type unsupported. Expected metric of source type Pod",
},
{
name: "invalid pod metric average value",
otelcol: OpenTelemetryCollector{
Spec: OpenTelemetryCollectorSpec{
MaxReplicas: &three,
Autoscaler: &AutoscalerSpec{
Metrics: []MetricSpec{
{
Type: autoscalingv2.PodsMetricSourceType,
Pods: &autoscalingv2.PodsMetricSource{
Metric: autoscalingv2.MetricIdentifier{
Name: "custom1",
},
Target: autoscalingv2.MetricTarget{
Type: autoscalingv2.AverageValueMetricType,
AverageValue: resource.NewQuantity(int64(0), resource.DecimalSI),
},
},
},
},
},
},
},
expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, average value should be greater than 0",
},
{
name: "utilization target is not valid with pod metrics",
otelcol: OpenTelemetryCollector{
Spec: OpenTelemetryCollectorSpec{
MaxReplicas: &three,
Autoscaler: &AutoscalerSpec{
Metrics: []MetricSpec{
{
Type: autoscalingv2.PodsMetricSourceType,
Pods: &autoscalingv2.PodsMetricSource{
Metric: autoscalingv2.MetricIdentifier{
Name: "custom1",
},
Target: autoscalingv2.MetricTarget{
Type: autoscalingv2.UtilizationMetricType,
AverageUtilization: &one,
},
},
},
},
},
},
},
expectedErr: "the OpenTelemetry Spec autoscale configuration is incorrect, invalid pods target type",
},
{
name: "invalid deployment mode incompabible with ingress settings",
otelcol: OpenTelemetryCollector{
Expand Down
27 changes: 27 additions & 0 deletions apis/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

132 changes: 132 additions & 0 deletions bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,138 @@ spec:
feature. If MaxReplicas is set autoscaling is enabled.
format: int32
type: integer
metrics:
description: Metrics is meant to provide a customizable way to
configure HPA metrics. currently the only supported custom metrics
is type=Pod. Use TargetCPUUtilization or TargetMemoryUtilization
instead if scaling on these common resource metrics.
items:
description: MetricSpec defines a subset of metrics to be defined
for the HPA's metric array more metric type can be supported
as needed. See https://pkg.go.dev/k8s.io/api/autoscaling/v2#MetricSpec
for reference.
properties:
pods:
description: PodsMetricSource indicates how to scale on
a metric describing each pod in the current scale target
(for example, transactions-processed-per-second). The
values will be averaged together before being compared
to the target value.
properties:
metric:
description: metric identifies the target metric by
name and selector
properties:
name:
description: name is the name of the given metric
type: string
selector:
description: selector is the string-encoded form
of a standard kubernetes label selector for the
given metric When set, it is passed as an additional
parameter to the metrics server for more specific
metrics scoping. When unset, just the metricName
will be used to gather metrics.
properties:
matchExpressions:
description: matchExpressions is a list of label
selector requirements. The requirements are
ANDed.
items:
description: A label selector requirement
is a selector that contains values, a key,
and an operator that relates the key and
values.
properties:
key:
description: key is the label key that
the selector applies to.
type: string
operator:
description: operator represents a key's
relationship to a set of values. Valid
operators are In, NotIn, Exists and
DoesNotExist.
type: string
values:
description: values is an array of string
values. If the operator is In or NotIn,
the values array must be non-empty.
If the operator is Exists or DoesNotExist,
the values array must be empty. This
array is replaced during a strategic
merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchLabels:
additionalProperties:
type: string
description: matchLabels is a map of {key,value}
pairs. A single {key,value} in the matchLabels
map is equivalent to an element of matchExpressions,
whose key field is "key", the operator is
"In", and the values array contains only "value".
The requirements are ANDed.
type: object
type: object
x-kubernetes-map-type: atomic
required:
- name
type: object
target:
description: target specifies the target value for the
given metric
properties:
averageUtilization:
description: averageUtilization is the target value
of the average of the resource metric across all
relevant pods, represented as a percentage of
the requested value of the resource for the pods.
Currently only valid for Resource metric source
type
format: int32
type: integer
averageValue:
anyOf:
- type: integer
- type: string
description: averageValue is the target value of
the average of the metric across all relevant
pods (as a quantity)
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type:
description: type represents whether the metric
type is Utilization, Value, or AverageValue
type: string
value:
anyOf:
- type: integer
- type: string
description: value is the target value of the metric
(as a quantity).
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
required:
- type
type: object
required:
- metric
- target
type: object
type:
description: MetricSourceType indicates the type of metric.
type: string
required:
- type
type: object
type: array
minReplicas:
description: MinReplicas sets a lower bound to the autoscaling
feature. Set this if your are using autoscaling. It must be
Expand Down
Loading