diff --git a/.chloggen/prometheus-remote-write-exporter-retry-429.yaml b/.chloggen/prometheus-remote-write-exporter-retry-429.yaml new file mode 100644 index 000000000000..115675c38dcb --- /dev/null +++ b/.chloggen/prometheus-remote-write-exporter-retry-429.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: prometheusremotewriteexporter + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add `exporter.prometheusremotewritexporter.RetryOn429` feature gate to retry on http status code 429 response. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [ 31032 ] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: The feature gate is initially disabled by default. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/exporter/prometheusremotewriteexporter/README.md b/exporter/prometheusremotewriteexporter/README.md index f48f3e9d00a8..33ae05e1604d 100644 --- a/exporter/prometheusremotewriteexporter/README.md +++ b/exporter/prometheusremotewriteexporter/README.md @@ -100,6 +100,13 @@ Several helper files are leveraged to provide additional capabilities automatica - [TLS and mTLS settings](https://github.com/open-telemetry/opentelemetry-collector/blob/main/config/configtls/README.md) - [Retry and timeout settings](https://github.com/open-telemetry/opentelemetry-collector/blob/main/exporter/exporterhelper/README.md), note that the exporter doesn't support `sending_queue` but provides `remote_write_queue`. +### Feature gates +This exporter has feature gate: `exporter.prometheusremotewritexporter.RetryOn429`. +When this feature gate is enable the prometheus remote write exporter will retry on 429 http status code with the provided retry configuration. +It currently doesn't support respecting the http header `Retry-After` if provided since the retry library used doesn't support this feature. + +To enable it run collector with enabled feature gate `exporter.prometheusremotewritexporter.RetryOn429`. This can be done by executing it with one additional parameter - `--feature-gates=telemetry.useOtelForInternalMetrics`. + ## Metric names and labels normalization OpenTelemetry metric names and attributes are normalized to be compliant with Prometheus naming rules. [Details on this normalization process are described in the Prometheus translator module](../../pkg/translator/prometheus/). diff --git a/exporter/prometheusremotewriteexporter/exporter.go b/exporter/prometheusremotewriteexporter/exporter.go index 633dc727f3de..15118976624e 100644 --- a/exporter/prometheusremotewriteexporter/exporter.go +++ b/exporter/prometheusremotewriteexporter/exporter.go @@ -66,6 +66,7 @@ type prwExporter struct { clientSettings *confighttp.ClientConfig settings component.TelemetrySettings retrySettings configretry.BackOffConfig + retryOnHTTP429 bool wal *prweWAL exporterSettings prometheusremotewrite.Settings telemetry prwTelemetry @@ -124,6 +125,7 @@ func newPRWExporter(cfg *Config, set exporter.CreateSettings) (*prwExporter, err clientSettings: &cfg.ClientConfig, settings: set.TelemetrySettings, retrySettings: cfg.BackOffConfig, + retryOnHTTP429: retryOn429FeatureGate.IsEnabled(), exporterSettings: prometheusremotewrite.Settings{ Namespace: cfg.Namespace, ExternalLabels: sanitizedLabels, @@ -329,6 +331,13 @@ func (prwe *prwExporter) execute(ctx context.Context, writeReq *prompb.WriteRequ if resp.StatusCode >= 500 && resp.StatusCode < 600 { return rerr } + + // 429 errors are recoverable and the exporter should retry if RetryOnHTTP429 enabled + // Reference: https://github.com/prometheus/prometheus/pull/12677 + if prwe.retryOnHTTP429 && resp.StatusCode == 429 { + return rerr + } + return backoff.Permanent(consumererror.NewPermanent(rerr)) } diff --git a/exporter/prometheusremotewriteexporter/exporter_test.go b/exporter/prometheusremotewriteexporter/exporter_test.go index 6021c012ed37..c1dbaafe758e 100644 --- a/exporter/prometheusremotewriteexporter/exporter_test.go +++ b/exporter/prometheusremotewriteexporter/exporter_test.go @@ -1051,6 +1051,7 @@ func TestRetries(t *testing.T) { serverErrorCount int // number of times server should return error expectedAttempts int httpStatus int + RetryOnHTTP429 bool assertError assert.ErrorAssertionFunc assertErrorType assert.ErrorAssertionFunc ctx context.Context @@ -1060,15 +1061,37 @@ func TestRetries(t *testing.T) { 3, 4, http.StatusInternalServerError, + false, + assert.NoError, + assert.NoError, + context.Background(), + }, + { + "test 429 should retry", + 3, + 4, + http.StatusTooManyRequests, + true, assert.NoError, assert.NoError, context.Background(), }, + { + "test 429 should not retry", + 4, + 1, + http.StatusTooManyRequests, + false, + assert.Error, + assertPermanentConsumerError, + context.Background(), + }, { "test 4xx should not retry", 4, 1, http.StatusBadRequest, + false, assert.Error, assertPermanentConsumerError, context.Background(), @@ -1078,6 +1101,7 @@ func TestRetries(t *testing.T) { 4, 0, http.StatusInternalServerError, + false, assert.Error, assertPermanentConsumerError, canceledContext(), @@ -1103,8 +1127,9 @@ func TestRetries(t *testing.T) { // Create the prwExporter exporter := &prwExporter{ - endpointURL: endpointURL, - client: http.DefaultClient, + endpointURL: endpointURL, + client: http.DefaultClient, + retryOnHTTP429: tt.RetryOnHTTP429, retrySettings: configretry.BackOffConfig{ Enabled: true, }, diff --git a/exporter/prometheusremotewriteexporter/factory.go b/exporter/prometheusremotewriteexporter/factory.go index d0a0f8555513..b84fa7ce15af 100644 --- a/exporter/prometheusremotewriteexporter/factory.go +++ b/exporter/prometheusremotewriteexporter/factory.go @@ -14,11 +14,19 @@ import ( "go.opentelemetry.io/collector/config/configretry" "go.opentelemetry.io/collector/exporter" "go.opentelemetry.io/collector/exporter/exporterhelper" + "go.opentelemetry.io/collector/featuregate" "github.com/open-telemetry/opentelemetry-collector-contrib/exporter/prometheusremotewriteexporter/internal/metadata" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry" ) +var retryOn429FeatureGate = featuregate.GlobalRegistry().MustRegister( + "exporter.prometheusremotewritexporter.RetryOn429", + featuregate.StageAlpha, + featuregate.WithRegisterFromVersion("v0.101.0"), + featuregate.WithRegisterDescription("When enabled, the Prometheus remote write exporter will retry 429 http status code. Requires exporter.prometheusremotewritexporter.metrics.RetryOn429 to be enabled."), +) + // NewFactory creates a new Prometheus Remote Write exporter. func NewFactory() exporter.Factory { return exporter.NewFactory( diff --git a/exporter/prometheusremotewriteexporter/go.mod b/exporter/prometheusremotewriteexporter/go.mod index 42e0991115d3..aad1129fae9d 100644 --- a/exporter/prometheusremotewriteexporter/go.mod +++ b/exporter/prometheusremotewriteexporter/go.mod @@ -22,6 +22,7 @@ require ( go.opentelemetry.io/collector/confmap v0.100.0 go.opentelemetry.io/collector/consumer v0.100.0 go.opentelemetry.io/collector/exporter v0.100.0 + go.opentelemetry.io/collector/featuregate v1.7.0 go.opentelemetry.io/collector/pdata v1.7.0 go.opentelemetry.io/otel v1.26.0 go.opentelemetry.io/otel/metric v1.26.0 @@ -67,7 +68,6 @@ require ( go.opentelemetry.io/collector/config/internal v0.100.0 // indirect go.opentelemetry.io/collector/extension v0.100.0 // indirect go.opentelemetry.io/collector/extension/auth v0.100.0 // indirect - go.opentelemetry.io/collector/featuregate v1.7.0 // indirect go.opentelemetry.io/collector/receiver v0.100.0 // indirect go.opentelemetry.io/collector/semconv v0.100.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.51.0 // indirect