From afdb362a8b46895ebd78d908b5f7007eeb3ff5a1 Mon Sep 17 00:00:00 2001 From: jvoravong Date: Tue, 6 Jun 2023 15:44:18 -0600 Subject: [PATCH 1/6] Add scrapeFailureLogLevel config to smartagent prometheus receivers to support logging scrape failures at different levels --- CHANGELOG.md | 2 ++ .../monitors/prometheusexporter/prometheus.go | 27 ++++++++++++++++--- .../testdata/httpd_metrics_config.yaml | 2 +- .../testdata/internal_metrics_config.yaml | 3 +-- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18872e18e4..01fe10cab1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased +- (Splunk) `receiver/smartagent`: Add scrapeFailureLogLevel config to smartagent prometheus receivers to support logging scrape failures at different levels ([#3260](https://github.com/signalfx/splunk-otel-collector/pull/3260)) + ## v0.78.1 ### 🧰 Bug fixes 🧰 diff --git a/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go b/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go index 8c5e94d046..a63862c7c0 100644 --- a/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go +++ b/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go @@ -45,6 +45,11 @@ type Config struct { // (the default). MetricPath string `yaml:"metricPath" default:"/metrics"` + // Control the log level to use if a scrape failure occurs when scraping + // a target. Modifying this configuration is useful for less stable + // targets. Only the debug, info, warn, and error log levels are supported. + ScrapeFailureLogLevel string `yaml:"scrapeFailureLogLevel" default:"error"` + // Send all the metrics that come out of the Prometheus exporter without // any filtering. This option has no effect when using the prometheus // exporter monitor directly since there is no built-in filtering, only @@ -52,6 +57,17 @@ type Config struct { SendAllMetrics bool `yaml:"sendAllMetrics"` } +func (c *Config) Validate() error { + var logLevels = []string{"debug", "info", "warn", "error"} + for _, v := range logLevels { + if v == c.ScrapeFailureLogLevel { + return nil + } + } + return fmt.Errorf("invalid prometheus scrape failure log level encountered - %s - valid choices include %s", + c.ScrapeFailureLogLevel, strings.Join(logLevels, ", ")) +} + func (c *Config) GetExtraMetrics() []string { // Maintain backwards compatibility with the config flag that existing // prior to the new filtering mechanism. @@ -74,9 +90,10 @@ type Monitor struct { // If true, IncludedMetrics is ignored and everything is sent. SendAll bool - monitorName string - logger logrus.FieldLogger - cancel func() + monitorName string + logger logrus.FieldLogger + loggerFailureLevel logrus.Level + cancel func() } type fetcher func() (io.ReadCloser, expfmt.Format, error) @@ -84,6 +101,7 @@ type fetcher func() (io.ReadCloser, expfmt.Format, error) // Configure the monitor and kick off volume metric syncing func (m *Monitor) Configure(conf *Config) error { m.logger = logrus.WithFields(logrus.Fields{"monitorType": m.monitorName, "monitorID": conf.MonitorID}) + m.loggerFailureLevel, _ = logrus.ParseLevel(conf.ScrapeFailureLogLevel) var bearerToken string @@ -136,7 +154,8 @@ func (m *Monitor) Configure(conf *Config) error { utils.RunOnInterval(ctx, func() { dps, err := fetchPrometheusMetrics(fetch) if err != nil { - m.logger.WithError(err).Error("Could not get prometheus metrics") + // The default log level is error, users can configure which level to use + m.logger.WithError(err).Log(m.loggerFailureLevel, "Could not get prometheus metrics") return } diff --git a/tests/receivers/smartagent/prometheus-exporter/testdata/httpd_metrics_config.yaml b/tests/receivers/smartagent/prometheus-exporter/testdata/httpd_metrics_config.yaml index e061bd3795..18c89bb48f 100644 --- a/tests/receivers/smartagent/prometheus-exporter/testdata/httpd_metrics_config.yaml +++ b/tests/receivers/smartagent/prometheus-exporter/testdata/httpd_metrics_config.yaml @@ -4,12 +4,12 @@ receivers: intervalSeconds: 1 host: "localhost" port: 8000 + scrapeFailureLogLevel: debug exporters: otlp: endpoint: "${OTLP_ENDPOINT}" tls: insecure: true - service: pipelines: metrics: diff --git a/tests/receivers/smartagent/prometheus-exporter/testdata/internal_metrics_config.yaml b/tests/receivers/smartagent/prometheus-exporter/testdata/internal_metrics_config.yaml index d1bc00c86f..77e97f4ac1 100644 --- a/tests/receivers/smartagent/prometheus-exporter/testdata/internal_metrics_config.yaml +++ b/tests/receivers/smartagent/prometheus-exporter/testdata/internal_metrics_config.yaml @@ -6,13 +6,12 @@ receivers: port: 8889 extraDimensions: foo: bar - + scrapeFailureLogLevel: error exporters: otlp: endpoint: "${OTLP_ENDPOINT}" tls: insecure: true - service: telemetry: metrics: From 13f8a828ca623170222a77abad2113bb59e3d2a0 Mon Sep 17 00:00:00 2001 From: jvoravong Date: Tue, 13 Jun 2023 18:25:50 -0600 Subject: [PATCH 2/6] pr changes --- .../monitors/prometheusexporter/prometheus.go | 11 ++-- .../prometheusexporter/prometheus_test.go | 60 +++++++++++++++++++ .../smartagentreceiver/config_test.go | 2 + 3 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus_test.go diff --git a/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go b/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go index a63862c7c0..8804617907 100644 --- a/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go +++ b/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go @@ -58,14 +58,11 @@ type Config struct { } func (c *Config) Validate() error { - var logLevels = []string{"debug", "info", "warn", "error"} - for _, v := range logLevels { - if v == c.ScrapeFailureLogLevel { - return nil - } + if _, err := logrus.ParseLevel(c.ScrapeFailureLogLevel); err != nil { + return err + } else { + return nil } - return fmt.Errorf("invalid prometheus scrape failure log level encountered - %s - valid choices include %s", - c.ScrapeFailureLogLevel, strings.Join(logLevels, ", ")) } func (c *Config) GetExtraMetrics() []string { diff --git a/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus_test.go b/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus_test.go new file mode 100644 index 0000000000..85fcf30c4c --- /dev/null +++ b/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus_test.go @@ -0,0 +1,60 @@ +package prometheusexporter + +import ( + "errors" + "testing" +) + +func TestConfigValidate(t *testing.T) { + tests := []struct { + name string + scrapeFailureLevel string + expectedError error + }{ + { + name: "Valid log level: debug", + scrapeFailureLevel: "debug", + expectedError: nil, + }, + { + name: "Valid log level: info", + scrapeFailureLevel: "info", + expectedError: nil, + }, + { + name: "Valid log level: warn", + scrapeFailureLevel: "warn", + expectedError: nil, + }, + { + name: "Valid log level: error", + scrapeFailureLevel: "error", + expectedError: nil, + }, + { + name: "Invalid log level", + scrapeFailureLevel: "badValue", + expectedError: errors.New("not a valid logrus Level: \"badValue\""), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + config := &Config{ + ScrapeFailureLogLevel: test.scrapeFailureLevel, + } + + err := config.Validate() + + if test.expectedError != nil { + if err == nil { + t.Errorf("Expected error '%s', but got nil", test.expectedError.Error()) + } else if err.Error() != test.expectedError.Error() { + t.Errorf("Expected error '%s', but got '%s'", test.expectedError.Error(), err.Error()) + } + } else if err != nil { + t.Errorf("Expected no error, but got '%s'", err.Error()) + } + }) + } +} diff --git a/pkg/receiver/smartagentreceiver/config_test.go b/pkg/receiver/smartagentreceiver/config_test.go index c0d2fc05f3..013896ab15 100644 --- a/pkg/receiver/smartagentreceiver/config_test.go +++ b/pkg/receiver/smartagentreceiver/config_test.go @@ -135,6 +135,7 @@ func TestLoadConfig(t *testing.T) { Host: "localhost", Port: 5309, MetricPath: "/metrics", + ScrapeFailureLogLevel: "error", }, acceptsEndpoints: true, }, etcdCfg) @@ -331,6 +332,7 @@ func TestLoadConfigWithEndpoints(t *testing.T) { Host: "localhost", Port: 5555, MetricPath: "/metrics", + ScrapeFailureLogLevel: "error", }, acceptsEndpoints: true, }, etcdCfg) From 6d558b85d3461a80ae003845a4a2b8b0fe7a947a Mon Sep 17 00:00:00 2001 From: jvoravong Date: Tue, 13 Jun 2023 19:26:02 -0600 Subject: [PATCH 3/6] CHANGELOG.md fix --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index acabe2586b..ca47002a38 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ ([#23229](https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/23229)) ### 💡 Enhancements 💡 + - (Splunk) `receiver/smartagent`: Add scrapeFailureLogLevel config to smartagent prometheus receivers to support logging scrape failures at different levels ([#3260](https://github.com/signalfx/splunk-otel-collector/pull/3260)) ## v0.78.1 From c31b42b896930f5baa7b4a9b441e51c131c1b2b0 Mon Sep 17 00:00:00 2001 From: jvoravong Date: Wed, 14 Jun 2023 10:19:35 -0600 Subject: [PATCH 4/6] fmt fix --- pkg/receiver/smartagentreceiver/config_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/receiver/smartagentreceiver/config_test.go b/pkg/receiver/smartagentreceiver/config_test.go index 013896ab15..537bfc9724 100644 --- a/pkg/receiver/smartagentreceiver/config_test.go +++ b/pkg/receiver/smartagentreceiver/config_test.go @@ -132,9 +132,9 @@ func TestLoadConfig(t *testing.T) { HTTPConfig: httpclient.HTTPConfig{ HTTPTimeout: timeutil.Duration(10 * time.Second), }, - Host: "localhost", - Port: 5309, - MetricPath: "/metrics", + Host: "localhost", + Port: 5309, + MetricPath: "/metrics", ScrapeFailureLogLevel: "error", }, acceptsEndpoints: true, @@ -329,9 +329,9 @@ func TestLoadConfigWithEndpoints(t *testing.T) { HTTPConfig: httpclient.HTTPConfig{ HTTPTimeout: timeutil.Duration(10 * time.Second), }, - Host: "localhost", - Port: 5555, - MetricPath: "/metrics", + Host: "localhost", + Port: 5555, + MetricPath: "/metrics", ScrapeFailureLogLevel: "error", }, acceptsEndpoints: true, From f75d5006c467f87ce738b16be57c94c1855efe02 Mon Sep 17 00:00:00 2001 From: jvoravong <47871238+jvoravong@users.noreply.github.com> Date: Fri, 16 Jun 2023 07:44:37 -0600 Subject: [PATCH 5/6] Update CHANGELOG.md Co-authored-by: Ryan Fitzpatrick <10867373+rmfitzpatrick@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca47002a38..88f841eefe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ ### 💡 Enhancements 💡 -- (Splunk) `receiver/smartagent`: Add scrapeFailureLogLevel config to smartagent prometheus receivers to support logging scrape failures at different levels ([#3260](https://github.com/signalfx/splunk-otel-collector/pull/3260)) +- (Splunk) `receiver/smartagent`: Add `scrapeFailureLogLevel` config field to `prometheus-exporter` and its sourcing monitors to determine the log level for reported scrape failures ([#3260](https://github.com/signalfx/splunk-otel-collector/pull/3260)) ## v0.78.1 From 505ae6a4a3bceb5423de75969eaacd85e338c5d4 Mon Sep 17 00:00:00 2001 From: jvoravong Date: Wed, 14 Jun 2023 15:42:44 -0600 Subject: [PATCH 6/6] patch --- .../monitors/prometheusexporter/prometheus.go | 22 +++---- .../prometheusexporter/prometheus_test.go | 60 ++++++++++--------- 2 files changed, 44 insertions(+), 38 deletions(-) diff --git a/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go b/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go index 8804617907..53d0018800 100644 --- a/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go +++ b/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus.go @@ -47,8 +47,9 @@ type Config struct { // Control the log level to use if a scrape failure occurs when scraping // a target. Modifying this configuration is useful for less stable - // targets. Only the debug, info, warn, and error log levels are supported. - ScrapeFailureLogLevel string `yaml:"scrapeFailureLogLevel" default:"error"` + // targets. All logrus log levels are supported. + ScrapeFailureLogLevel string `yaml:"scrapeFailureLogLevel" default:"error"` + scrapeFailureLogrusLevel logrus.Level // Send all the metrics that come out of the Prometheus exporter without // any filtering. This option has no effect when using the prometheus @@ -58,11 +59,12 @@ type Config struct { } func (c *Config) Validate() error { - if _, err := logrus.ParseLevel(c.ScrapeFailureLogLevel); err != nil { + l, err := logrus.ParseLevel(c.ScrapeFailureLogLevel) + if err != nil { return err - } else { - return nil } + c.scrapeFailureLogrusLevel = l + return nil } func (c *Config) GetExtraMetrics() []string { @@ -87,10 +89,9 @@ type Monitor struct { // If true, IncludedMetrics is ignored and everything is sent. SendAll bool - monitorName string - logger logrus.FieldLogger - loggerFailureLevel logrus.Level - cancel func() + monitorName string + logger logrus.FieldLogger + cancel func() } type fetcher func() (io.ReadCloser, expfmt.Format, error) @@ -98,7 +99,6 @@ type fetcher func() (io.ReadCloser, expfmt.Format, error) // Configure the monitor and kick off volume metric syncing func (m *Monitor) Configure(conf *Config) error { m.logger = logrus.WithFields(logrus.Fields{"monitorType": m.monitorName, "monitorID": conf.MonitorID}) - m.loggerFailureLevel, _ = logrus.ParseLevel(conf.ScrapeFailureLogLevel) var bearerToken string @@ -152,7 +152,7 @@ func (m *Monitor) Configure(conf *Config) error { dps, err := fetchPrometheusMetrics(fetch) if err != nil { // The default log level is error, users can configure which level to use - m.logger.WithError(err).Log(m.loggerFailureLevel, "Could not get prometheus metrics") + m.logger.WithError(err).Log(conf.scrapeFailureLogrusLevel, "Could not get prometheus metrics") return } diff --git a/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus_test.go b/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus_test.go index 85fcf30c4c..caff3d6d7c 100644 --- a/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus_test.go +++ b/internal/signalfx-agent/pkg/monitors/prometheusexporter/prometheus_test.go @@ -3,54 +3,60 @@ package prometheusexporter import ( "errors" "testing" + + "github.com/stretchr/testify/assert" ) +type testCase struct { + name string + scrapeFailureLogLevel string + expectedError error +} + func TestConfigValidate(t *testing.T) { - tests := []struct { - name string - scrapeFailureLevel string - expectedError error - }{ + testCases := []testCase{ { - name: "Valid log level: debug", - scrapeFailureLevel: "debug", - expectedError: nil, + name: "Valid log level: debug", + scrapeFailureLogLevel: "debug", + expectedError: nil, }, { - name: "Valid log level: info", - scrapeFailureLevel: "info", - expectedError: nil, + name: "Valid log level: info", + scrapeFailureLogLevel: "info", + expectedError: nil, }, { - name: "Valid log level: warn", - scrapeFailureLevel: "warn", - expectedError: nil, + name: "Valid log level: warn", + scrapeFailureLogLevel: "warn", + expectedError: nil, }, { - name: "Valid log level: error", - scrapeFailureLevel: "error", - expectedError: nil, + name: "Valid log level: error", + scrapeFailureLogLevel: "error", + expectedError: nil, }, { - name: "Invalid log level", - scrapeFailureLevel: "badValue", - expectedError: errors.New("not a valid logrus Level: \"badValue\""), + name: "Invalid log level", + scrapeFailureLogLevel: "badValue", + expectedError: errors.New("not a valid logrus Level: \"badValue\""), }, } - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { + for _, tc := range testCases { + tcc := tc + t.Run(tc.name, func(t *testing.T) { config := &Config{ - ScrapeFailureLogLevel: test.scrapeFailureLevel, + ScrapeFailureLogLevel: tcc.scrapeFailureLogLevel, } err := config.Validate() - if test.expectedError != nil { + if tcc.expectedError != nil { if err == nil { - t.Errorf("Expected error '%s', but got nil", test.expectedError.Error()) - } else if err.Error() != test.expectedError.Error() { - t.Errorf("Expected error '%s', but got '%s'", test.expectedError.Error(), err.Error()) + t.Errorf("Expected error '%s', but got nil", tcc.expectedError.Error()) + } else if err.Error() != tcc.expectedError.Error() { + t.Errorf("Expected error '%s', but got '%s'", tcc.expectedError.Error(), err.Error()) + assert.EqualValues(t, "smartagentvalid", config.MonitorConfigCore().MonitorID) } } else if err != nil { t.Errorf("Expected no error, but got '%s'", err.Error())