From 0599e8167019c13d433563dc76a0172cb53dd42d Mon Sep 17 00:00:00 2001 From: Bartek Nowotarski Date: Mon, 20 Sep 2021 13:55:08 +0200 Subject: [PATCH] services/horizon: Change `ProcessorsRunDuration` metric type from counter to summary (#3940) Add a new metric `ProcessorsRunDurationSummary`/`processor_run_duration_seconds` to replace existing `ProcessorsRunDuration`(`processor_run_duration_seconds_total`. The old metric is now deprecated. The `ProcessorsRunDuration` is a counter. While it allows estimating and comparing actual duration of processors it's impossible to calculate average per ledger run duration because number of events are not counted. --- services/horizon/internal/ingest/fsm.go | 2 ++ services/horizon/internal/ingest/main.go | 12 ++++++++++++ services/horizon/internal/init.go | 1 + 3 files changed, 15 insertions(+) diff --git a/services/horizon/internal/ingest/fsm.go b/services/horizon/internal/ingest/fsm.go index 6c87f932dd..d610d7f5c7 100644 --- a/services/horizon/internal/ingest/fsm.go +++ b/services/horizon/internal/ingest/fsm.go @@ -523,6 +523,8 @@ func (r resumeState) addProcessorDurationsMetricFromMap(s *system, m map[string] processorName = strings.Replace(processorName, "*", "", -1) s.Metrics().ProcessorsRunDuration. With(prometheus.Labels{"name": processorName}).Add(value.Seconds()) + s.Metrics().ProcessorsRunDurationSummary. + With(prometheus.Labels{"name": processorName}).Observe(value.Seconds()) } } diff --git a/services/horizon/internal/ingest/main.go b/services/horizon/internal/ingest/main.go index d5c0827351..6d77ea2f26 100644 --- a/services/horizon/internal/ingest/main.go +++ b/services/horizon/internal/ingest/main.go @@ -129,8 +129,12 @@ type Metrics struct { LedgerStatsCounter *prometheus.CounterVec // ProcessorsRunDuration exposes processors run durations. + // Deprecated in favour of: ProcessorsRunDurationSummary. ProcessorsRunDuration *prometheus.CounterVec + // ProcessorsRunDurationSummary exposes processors run durations. + ProcessorsRunDurationSummary *prometheus.SummaryVec + // CaptiveStellarCoreSynced exposes synced status of Captive Stellar-Core. // 1 if sync, 0 if not synced, -1 if unable to connect or HTTP server disabled. CaptiveStellarCoreSynced prometheus.GaugeFunc @@ -327,6 +331,14 @@ func (s *system) initMetrics() { []string{"name"}, ) + s.metrics.ProcessorsRunDurationSummary = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: "horizon", Subsystem: "ingest", Name: "processor_run_duration_seconds", + Help: "run durations of ingestion processors, sliding window = 10m", + }, + []string{"name"}, + ) + s.metrics.CaptiveStellarCoreSynced = prometheus.NewGaugeFunc( prometheus.GaugeOpts{ Namespace: "horizon", Subsystem: "ingest", Name: "captive_stellar_core_synced", diff --git a/services/horizon/internal/init.go b/services/horizon/internal/init.go index 04755a0807..1e7c798a0c 100644 --- a/services/horizon/internal/init.go +++ b/services/horizon/internal/init.go @@ -263,6 +263,7 @@ func initIngestMetrics(app *App) { app.prometheusRegistry.MustRegister(app.ingester.Metrics().StateInvalidGauge) app.prometheusRegistry.MustRegister(app.ingester.Metrics().LedgerStatsCounter) app.prometheusRegistry.MustRegister(app.ingester.Metrics().ProcessorsRunDuration) + app.prometheusRegistry.MustRegister(app.ingester.Metrics().ProcessorsRunDurationSummary) app.prometheusRegistry.MustRegister(app.ingester.Metrics().CaptiveStellarCoreSynced) app.prometheusRegistry.MustRegister(app.ingester.Metrics().CaptiveCoreSupportedProtocolVersion) }