From 16ef4e86e04bfc15d243009d9deebf2ab5493c26 Mon Sep 17 00:00:00 2001 From: Huynh Minh Tri Date: Thu, 19 Dec 2024 11:03:41 +0700 Subject: [PATCH 1/9] feat: add prometheus metrics PR #17077 --- metrics/exp/exp.go | 2 + metrics/prometheus/collector.go | 115 +++++++++++++++++++++++++++++++ metrics/prometheus/prometheus.go | 68 ++++++++++++++++++ 3 files changed, 185 insertions(+) create mode 100644 metrics/prometheus/collector.go create mode 100644 metrics/prometheus/prometheus.go diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go index e25026d6ef..85b72385f5 100644 --- a/metrics/exp/exp.go +++ b/metrics/exp/exp.go @@ -5,6 +5,7 @@ package exp import ( "expvar" "fmt" + "github.com/tomochain/tomochain/metrics/prometheus" "net/http" "sync" @@ -42,6 +43,7 @@ func Exp(r metrics.Registry) { // http.HandleFunc("/debug/vars", e.expHandler) // haven't found an elegant way, so just use a different endpoint http.Handle("/debug/metrics", h) + http.Handle("/debug/metrics/prometheus", prometheus.Handler(r)) } // ExpHandler will return an expvar powered metrics handler. diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go new file mode 100644 index 0000000000..1ee5316cca --- /dev/null +++ b/metrics/prometheus/collector.go @@ -0,0 +1,115 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package prometheus + +import ( + "bytes" + "fmt" + "strconv" + "strings" + + "github.com/tomochain/tomochain/metrics" +) + +var ( + typeGaugeTpl = "# TYPE %s gauge\n" + typeCounterTpl = "# TYPE %s counter\n" + typeSummaryTpl = "# TYPE %s summary\n" + keyValueTpl = "%s %v\n\n" + keyQuantileTagValueTpl = "%s {quantile=\"%s\"} %v\n\n" +) + +// collector is a collection of byte buffers that aggregate Prometheus reports +// for different metric types. +type collector struct { + buff *bytes.Buffer +} + +// newCollector createa a new Prometheus metric aggregator. +func newCollector() *collector { + return &collector{ + buff: &bytes.Buffer{}, + } +} + +func (c *collector) addCounter(name string, m metrics.Counter) { + c.writeGaugeCounter(name, m.Count()) +} + +func (c *collector) addGauge(name string, m metrics.Gauge) { + c.writeGaugeCounter(name, m.Value()) +} + +func (c *collector) addGaugeFloat64(name string, m metrics.GaugeFloat64) { + c.writeGaugeCounter(name, m.Value()) +} + +func (c *collector) addHistogram(name string, m metrics.Histogram) { + pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} + ps := m.Percentiles(pv) + c.writeSummaryCounter(name, m.Count()) + for i := range pv { + c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) + } +} + +func (c *collector) addMeter(name string, m metrics.Meter) { + c.writeGaugeCounter(name, m.Count()) +} + +func (c *collector) addTimer(name string, m metrics.Timer) { + pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} + ps := m.Percentiles(pv) + c.writeSummaryCounter(name, m.Count()) + for i := range pv { + c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) + } +} + +func (c *collector) addResettingTimer(name string, m metrics.ResettingTimer) { + if len(m.Values()) <= 0 { + return + } + ps := m.Percentiles([]float64{50, 95, 99}) + val := m.Values() + c.writeSummaryCounter(name, len(val)) + c.writeSummaryPercentile(name, "0.50", ps[0]) + c.writeSummaryPercentile(name, "0.95", ps[1]) + c.writeSummaryPercentile(name, "0.99", ps[2]) +} + +func (c *collector) writeGaugeCounter(name string, value interface{}) { + name = mutateKey(name) + c.buff.WriteString(fmt.Sprintf(typeGaugeTpl, name)) + c.buff.WriteString(fmt.Sprintf(keyValueTpl, name, value)) +} + +func (c *collector) writeSummaryCounter(name string, value interface{}) { + name = mutateKey(name + "_count") + c.buff.WriteString(fmt.Sprintf(typeCounterTpl, name)) + c.buff.WriteString(fmt.Sprintf(keyValueTpl, name, value)) +} + +func (c *collector) writeSummaryPercentile(name, p string, value interface{}) { + name = mutateKey(name) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, name)) + c.buff.WriteString(fmt.Sprintf(keyQuantileTagValueTpl, name, p, value)) +} + +func mutateKey(key string) string { + return strings.Replace(key, "/", "_", -1) +} diff --git a/metrics/prometheus/prometheus.go b/metrics/prometheus/prometheus.go new file mode 100644 index 0000000000..1a6cb24c22 --- /dev/null +++ b/metrics/prometheus/prometheus.go @@ -0,0 +1,68 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Package prometheus exposes go-metrics into a Prometheus format. +package prometheus + +import ( + "fmt" + "net/http" + "sort" + + "github.com/tomochain/tomochain/log" + "github.com/tomochain/tomochain/metrics" +) + +// Handler returns an HTTP handler which dump metrics in Prometheus format. +func Handler(reg metrics.Registry) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Gather and pre-sort the metrics to avoid random listings + var names []string + reg.Each(func(name string, i interface{}) { + names = append(names, name) + }) + sort.Strings(names) + + // Aggregate all the metris into a Prometheus collector + c := newCollector() + + for _, name := range names { + i := reg.Get(name) + + switch m := i.(type) { + case metrics.Counter: + c.addCounter(name, m.Snapshot()) + case metrics.Gauge: + c.addGauge(name, m.Snapshot()) + case metrics.GaugeFloat64: + c.addGaugeFloat64(name, m.Snapshot()) + case metrics.Histogram: + c.addHistogram(name, m.Snapshot()) + case metrics.Meter: + c.addMeter(name, m.Snapshot()) + case metrics.Timer: + c.addTimer(name, m.Snapshot()) + case metrics.ResettingTimer: + c.addResettingTimer(name, m.Snapshot()) + default: + log.Warn("Unknown Prometheus metric type", "type", fmt.Sprintf("%T", i)) + } + } + w.Header().Add("Content-Type", "text/plain") + w.Header().Add("Content-Length", fmt.Sprint(c.buff.Len())) + w.Write(c.buff.Bytes()) + }) +} From a4adf0cfc6c024335a554b1d3435f1857778fa38 Mon Sep 17 00:00:00 2001 From: Huynh Minh Tri Date: Thu, 19 Dec 2024 14:00:54 +0700 Subject: [PATCH 2/9] metrics: add ResettingTimer support to expvar PR #16878 --- metrics/exp/exp.go | 16 ++++- metrics/resetting_timer.go | 2 +- metrics/resetting_timer_test.go | 110 ++++++++++++++++++++++++++++++++ node/api.go | 30 ++++++++- 4 files changed, 154 insertions(+), 4 deletions(-) diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go index 85b72385f5..77d96e4a3f 100644 --- a/metrics/exp/exp.go +++ b/metrics/exp/exp.go @@ -5,10 +5,11 @@ package exp import ( "expvar" "fmt" - "github.com/tomochain/tomochain/metrics/prometheus" "net/http" "sync" + "github.com/tomochain/tomochain/metrics/prometheus" + "github.com/tomochain/tomochain/metrics" ) @@ -136,6 +137,17 @@ func (exp *exp) publishTimer(name string, metric metrics.Timer) { exp.getFloat(name + ".mean-rate").Set(t.RateMean()) } +func (exp *exp) publishResettingTimer(name string, metric metrics.ResettingTimer) { + t := metric.Snapshot() + ps := t.Percentiles([]float64{50, 75, 95, 99}) + exp.getInt(name + ".count").Set(int64(len(t.Values()))) + exp.getFloat(name + ".mean").Set(t.Mean()) + exp.getInt(name + ".50-percentile").Set(ps[0]) + exp.getInt(name + ".75-percentile").Set(ps[1]) + exp.getInt(name + ".95-percentile").Set(ps[2]) + exp.getInt(name + ".99-percentile").Set(ps[3]) +} + func (exp *exp) syncToExpvar() { exp.registry.Each(func(name string, i interface{}) { switch i.(type) { @@ -151,6 +163,8 @@ func (exp *exp) syncToExpvar() { exp.publishMeter(name, i.(metrics.Meter)) case metrics.Timer: exp.publishTimer(name, i.(metrics.Timer)) + case metrics.ResettingTimer: + exp.publishResettingTimer(name, i.(metrics.ResettingTimer)) default: panic(fmt.Sprintf("unsupported type for '%s': %T", name, i)) } diff --git a/metrics/resetting_timer.go b/metrics/resetting_timer.go index 57bcb31343..f33a9f8aa0 100644 --- a/metrics/resetting_timer.go +++ b/metrics/resetting_timer.go @@ -210,7 +210,7 @@ func (t *ResettingTimerSnapshot) calc(percentiles []float64) { // poor man's math.Round(x): // math.Floor(x + 0.5) indexOfPerc := int(math.Floor(((abs / 100.0) * float64(count)) + 0.5)) - if pct >= 0 { + if pct >= 0 && indexOfPerc > 0 { indexOfPerc -= 1 // index offset=0 } thresholdBoundary = t.values[indexOfPerc] diff --git a/metrics/resetting_timer_test.go b/metrics/resetting_timer_test.go index 58fd47f352..77c49dc386 100644 --- a/metrics/resetting_timer_test.go +++ b/metrics/resetting_timer_test.go @@ -104,3 +104,113 @@ func TestResettingTimer(t *testing.T) { } } } + +func TestResettingTimerWithFivePercentiles(t *testing.T) { + tests := []struct { + values []int64 + start int + end int + wantP05 int64 + wantP20 int64 + wantP50 int64 + wantP95 int64 + wantP99 int64 + wantMean float64 + wantMin int64 + wantMax int64 + }{ + { + values: []int64{}, + start: 1, + end: 11, + wantP05: 1, wantP20: 2, wantP50: 5, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + { + values: []int64{}, + start: 1, + end: 101, + wantP05: 5, wantP20: 20, wantP50: 50, wantP95: 95, wantP99: 99, + wantMin: 1, wantMax: 100, wantMean: 50.5, + }, + { + values: []int64{1}, + start: 0, + end: 0, + wantP05: 1, wantP20: 1, wantP50: 1, wantP95: 1, wantP99: 1, + wantMin: 1, wantMax: 1, wantMean: 1, + }, + { + values: []int64{0}, + start: 0, + end: 0, + wantP05: 0, wantP20: 0, wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{}, + start: 0, + end: 0, + wantP05: 0, wantP20: 0, wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{1, 10}, + start: 0, + end: 0, + wantP05: 1, wantP20: 1, wantP50: 1, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + } + for ind, tt := range tests { + timer := NewResettingTimer() + + for i := tt.start; i < tt.end; i++ { + tt.values = append(tt.values, int64(i)) + } + + for _, v := range tt.values { + timer.Update(time.Duration(v)) + } + + snap := timer.Snapshot() + + ps := snap.Percentiles([]float64{5, 20, 50, 95, 99}) + + val := snap.Values() + + if len(val) > 0 { + if tt.wantMin != val[0] { + t.Fatalf("%d: min: got %d, want %d", ind, val[0], tt.wantMin) + } + + if tt.wantMax != val[len(val)-1] { + t.Fatalf("%d: max: got %d, want %d", ind, val[len(val)-1], tt.wantMax) + } + } + + if tt.wantMean != snap.Mean() { + t.Fatalf("%d: mean: got %.2f, want %.2f", ind, snap.Mean(), tt.wantMean) + } + + if tt.wantP05 != ps[0] { + t.Fatalf("%d: p05: got %d, want %d", ind, ps[0], tt.wantP05) + } + + if tt.wantP20 != ps[1] { + t.Fatalf("%d: p20: got %d, want %d", ind, ps[1], tt.wantP20) + } + + if tt.wantP50 != ps[2] { + t.Fatalf("%d: p50: got %d, want %d", ind, ps[2], tt.wantP50) + } + + if tt.wantP95 != ps[3] { + t.Fatalf("%d: p95: got %d, want %d", ind, ps[3], tt.wantP95) + } + + if tt.wantP99 != ps[4] { + t.Fatalf("%d: p99: got %d, want %d", ind, ps[4], tt.wantP99) + } + } +} diff --git a/node/api.go b/node/api.go index 23edbe2b3f..b2a5e95967 100644 --- a/node/api.go +++ b/node/api.go @@ -337,7 +337,20 @@ func (api *PublicDebugAPI) Metrics(raw bool) (map[string]interface{}, error) { "95": metric.Percentile(0.95), }, } - + case metrics.ResettingTimer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{5, 20, 50, 80, 95}) + root[name] = map[string]interface{}{ + "Measurements": len(t.Values()), + "Mean": time.Duration(t.Mean()).String(), + "Percentiles": map[string]interface{}{ + "5": time.Duration(ps[0]).String(), + "20": time.Duration(ps[1]).String(), + "50": time.Duration(ps[2]).String(), + "80": time.Duration(ps[3]).String(), + "95": time.Duration(ps[4]).String(), + }, + } default: root[name] = "Unknown metric type" } @@ -372,7 +385,20 @@ func (api *PublicDebugAPI) Metrics(raw bool) (map[string]interface{}, error) { "95": time.Duration(metric.Percentile(0.95)).String(), }, } - + case metrics.ResettingTimer: + t := metric.Snapshot() + ps := t.Percentiles([]float64{5, 20, 50, 80, 95}) + root[name] = map[string]interface{}{ + "Measurements": len(t.Values()), + "Mean": time.Duration(t.Mean()).String(), + "Percentiles": map[string]interface{}{ + "5": time.Duration(ps[0]).String(), + "20": time.Duration(ps[1]).String(), + "50": time.Duration(ps[2]).String(), + "80": time.Duration(ps[3]).String(), + "95": time.Duration(ps[4]).String(), + }, + } default: root[name] = "Unknown metric type" } From cb9af42b0430f447f1f067c02040fd9a8c22a45d Mon Sep 17 00:00:00 2001 From: Huynh Minh Tri Date: Thu, 19 Dec 2024 14:15:24 +0700 Subject: [PATCH 3/9] metrics: return an empty snapshot for NilResettingTimer PR #16930 --- metrics/resetting_timer.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/metrics/resetting_timer.go b/metrics/resetting_timer.go index f33a9f8aa0..e5327d3bd3 100644 --- a/metrics/resetting_timer.go +++ b/metrics/resetting_timer.go @@ -58,7 +58,11 @@ type NilResettingTimer struct { func (NilResettingTimer) Values() []int64 { return nil } // Snapshot is a no-op. -func (NilResettingTimer) Snapshot() ResettingTimer { return NilResettingTimer{} } +func (NilResettingTimer) Snapshot() ResettingTimer { + return &ResettingTimerSnapshot{ + values: []int64{}, + } +} // Time is a no-op. func (NilResettingTimer) Time(func()) {} From c9aec9df8731fc73746c0bfe0f0b6826e3bb8ba6 Mon Sep 17 00:00:00 2001 From: Huynh Minh Tri Date: Thu, 19 Dec 2024 14:20:24 +0700 Subject: [PATCH 4/9] metrics: remove repetitive calculations PR #16944 --- metrics/metrics.go | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index dbb2727ec0..98af9c4379 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -67,17 +67,20 @@ func CollectProcessMetrics(refresh time.Duration) { } // Iterate loading the different stats and updating the meters for i := 1; ; i++ { - runtime.ReadMemStats(memstats[i%2]) - memAllocs.Mark(int64(memstats[i%2].Mallocs - memstats[(i-1)%2].Mallocs)) - memFrees.Mark(int64(memstats[i%2].Frees - memstats[(i-1)%2].Frees)) - memInuse.Mark(int64(memstats[i%2].Alloc - memstats[(i-1)%2].Alloc)) - memPauses.Mark(int64(memstats[i%2].PauseTotalNs - memstats[(i-1)%2].PauseTotalNs)) + location1 := i % 2 + location2 := (i - 1) % 2 - if ReadDiskStats(diskstats[i%2]) == nil { - diskReads.Mark(diskstats[i%2].ReadCount - diskstats[(i-1)%2].ReadCount) - diskReadBytes.Mark(diskstats[i%2].ReadBytes - diskstats[(i-1)%2].ReadBytes) - diskWrites.Mark(diskstats[i%2].WriteCount - diskstats[(i-1)%2].WriteCount) - diskWriteBytes.Mark(diskstats[i%2].WriteBytes - diskstats[(i-1)%2].WriteBytes) + runtime.ReadMemStats(memstats[location1]) + memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs)) + memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees)) + memInuse.Mark(int64(memstats[location1].Alloc - memstats[location2].Alloc)) + memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs)) + + if ReadDiskStats(diskstats[location1]) == nil { + diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) + diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) + diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount) + diskWriteBytes.Mark(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) } time.Sleep(refresh) } From 9239f613dcd1004a8eb2f5411d11966476abfef4 Mon Sep 17 00:00:00 2001 From: Huynh Minh Tri Date: Mon, 23 Dec 2024 14:06:18 +0700 Subject: [PATCH 5/9] flags: enable pprof flag --- internal/debug/flags.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/debug/flags.go b/internal/debug/flags.go index 7e6539fb26..7ac781a0a7 100644 --- a/internal/debug/flags.go +++ b/internal/debug/flags.go @@ -91,9 +91,9 @@ var Flags = []cli.Flag{ //vmoduleFlag, //backtraceAtFlag, //debugFlag, - //pprofFlag, - //pprofAddrFlag, - //pprofPortFlag, + pprofFlag, + pprofAddrFlag, + pprofPortFlag, //memprofilerateFlag, //blockprofilerateFlag, //cpuprofileFlag, From 4290b28c6abb040c1dc79b0fd6b4ef1b7ef91a15 Mon Sep 17 00:00:00 2001 From: Huynh Minh Tri Date: Mon, 23 Dec 2024 15:16:52 +0700 Subject: [PATCH 6/9] metrics: add disk read and write bytes counter --- metrics/metrics.go | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index dbb2727ec0..ea93c889ee 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -57,27 +57,36 @@ func CollectProcessMetrics(refresh time.Duration) { memPauses := GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) var diskReads, diskReadBytes, diskWrites, diskWriteBytes Meter + var diskReadBytesCounter, diskWriteBytesCounter Counter if err := ReadDiskStats(diskstats[0]); err == nil { diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) + diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) diskWrites = GetOrRegisterMeter("system/disk/writecount", DefaultRegistry) diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) + diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry) } else { log.Debug("Failed to read disk metrics", "err", err) } // Iterate loading the different stats and updating the meters for i := 1; ; i++ { - runtime.ReadMemStats(memstats[i%2]) - memAllocs.Mark(int64(memstats[i%2].Mallocs - memstats[(i-1)%2].Mallocs)) - memFrees.Mark(int64(memstats[i%2].Frees - memstats[(i-1)%2].Frees)) - memInuse.Mark(int64(memstats[i%2].Alloc - memstats[(i-1)%2].Alloc)) - memPauses.Mark(int64(memstats[i%2].PauseTotalNs - memstats[(i-1)%2].PauseTotalNs)) + location1 := i % 2 + location2 := (i - 1) % 2 - if ReadDiskStats(diskstats[i%2]) == nil { - diskReads.Mark(diskstats[i%2].ReadCount - diskstats[(i-1)%2].ReadCount) - diskReadBytes.Mark(diskstats[i%2].ReadBytes - diskstats[(i-1)%2].ReadBytes) - diskWrites.Mark(diskstats[i%2].WriteCount - diskstats[(i-1)%2].WriteCount) - diskWriteBytes.Mark(diskstats[i%2].WriteBytes - diskstats[(i-1)%2].WriteBytes) + runtime.ReadMemStats(memstats[location1]) + memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs)) + memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees)) + memInuse.Mark(int64(memstats[location1].Alloc - memstats[location2].Alloc)) + memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs)) + + if ReadDiskStats(diskstats[location1]) == nil { + diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) + diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) + diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount) + diskWriteBytes.Mark(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) + + diskReadBytesCounter.Inc(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) + diskWriteBytesCounter.Inc(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) } time.Sleep(refresh) } From 82db379233b8eba8ce2f8587730dcdd4898b5cd6 Mon Sep 17 00:00:00 2001 From: Huynh Minh Tri Date: Wed, 25 Dec 2024 11:43:28 +0700 Subject: [PATCH 7/9] metrics,core: add flag metrics.expensive, add metrics for block processing --- cmd/tomo/main.go | 6 +++- cmd/utils/flags.go | 7 +++-- core/blockchain.go | 24 ++++++++++++++++ core/state/state_object.go | 24 ++++++++++++++++ core/state/statedb.go | 56 ++++++++++++++++++++++++++++++++------ metrics/metrics.go | 28 +++++++++++++++---- 6 files changed, 128 insertions(+), 17 deletions(-) diff --git a/cmd/tomo/main.go b/cmd/tomo/main.go index 0bd21952a7..d6d839fa84 100644 --- a/cmd/tomo/main.go +++ b/cmd/tomo/main.go @@ -117,7 +117,6 @@ var ( utils.RPCCORSDomainFlag, utils.RPCVirtualHostsFlag, utils.EthStatsURLFlag, - utils.MetricsEnabledFlag, //utils.FakePoWFlag, //utils.NoCompactionFlag, //utils.GpoBlocksFlag, @@ -149,6 +148,10 @@ var ( utils.WhisperMaxMessageSizeFlag, utils.WhisperMinPOWFlag, } + metricsFlags = []cli.Flag{ + utils.MetricsEnabledFlag, + utils.MetricsEnabledExpensiveFlag, + } ) func init() { @@ -182,6 +185,7 @@ func init() { app.Flags = append(app.Flags, consoleFlags...) app.Flags = append(app.Flags, debug.Flags...) app.Flags = append(app.Flags, whisperFlags...) + app.Flags = append(app.Flags, metricsFlags...) app.Before = func(ctx *cli.Context) error { runtime.GOMAXPROCS(runtime.NumCPU()) diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index b1f0f92763..8157381da9 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -43,7 +43,6 @@ import ( "github.com/tomochain/tomochain/eth/gasprice" "github.com/tomochain/tomochain/ethdb" "github.com/tomochain/tomochain/log" - "github.com/tomochain/tomochain/metrics" "github.com/tomochain/tomochain/node" "github.com/tomochain/tomochain/p2p" "github.com/tomochain/tomochain/p2p/discover" @@ -356,9 +355,13 @@ var ( Usage: "Reporting URL of a ethstats service (nodename:secret@host:port)", } MetricsEnabledFlag = cli.BoolFlag{ - Name: metrics.MetricsEnabledFlag, + Name: "metrics", Usage: "Enable metrics collection and reporting", } + MetricsEnabledExpensiveFlag = &cli.BoolFlag{ + Name: "metrics.expensive", + Usage: "Enable expensive metrics collection and reporting", + } FakePoWFlag = cli.BoolFlag{ Name: "fakepow", Usage: "Disables proof-of-work verification", diff --git a/core/blockchain.go b/core/blockchain.go index 4b17aa48d7..73b68dfa7b 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -55,6 +55,16 @@ import ( ) var ( + accountReadTimer = metrics.NewRegisteredTimer("chain/account/reads", nil) + accountHashTimer = metrics.NewRegisteredTimer("chain/account/hashes", nil) + accountUpdateTimer = metrics.NewRegisteredTimer("chain/account/updates", nil) + accountCommitTimer = metrics.NewRegisteredTimer("chain/account/commits", nil) + + storageReadTimer = metrics.NewRegisteredTimer("chain/storage/reads", nil) + storageHashTimer = metrics.NewRegisteredTimer("chain/storage/hashes", nil) + storageUpdateTimer = metrics.NewRegisteredTimer("chain/storage/updates", nil) + storageCommitTimer = metrics.NewRegisteredTimer("chain/storage/commits", nil) + blockInsertTimer = metrics.NewRegisteredTimer("chain/inserts", nil) CheckpointCh = make(chan int) ErrNoGenesis = errors.New("Genesis not found in chain") @@ -1651,6 +1661,20 @@ func (bc *BlockChain) insertChain(chain types.Blocks) (int, []interface{}, []*ty if err != nil { return i, events, coalescedLogs, err } + // Update the metrics subsystem with all the measurements + accountReadTimer.Update(statedb.AccountReads) + accountHashTimer.Update(statedb.AccountHashes) + accountUpdateTimer.Update(statedb.AccountUpdates) + accountCommitTimer.Update(statedb.AccountCommits) + + storageReadTimer.Update(statedb.StorageReads) + storageHashTimer.Update(statedb.StorageHashes) + storageUpdateTimer.Update(statedb.StorageUpdates) + storageCommitTimer.Update(statedb.StorageCommits) + + trieAccess := statedb.AccountReads + statedb.AccountHashes + statedb.AccountUpdates + statedb.AccountCommits + trieAccess += statedb.StorageReads + statedb.StorageHashes + statedb.StorageUpdates + statedb.StorageCommits + if bc.chainConfig.Posv != nil { c := bc.engine.(*posv.Posv) coinbase := c.Signer() diff --git a/core/state/state_object.go b/core/state/state_object.go index b03231e23b..a72a632b4c 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -21,9 +21,11 @@ import ( "fmt" "io" "math/big" + "time" "github.com/tomochain/tomochain/common" "github.com/tomochain/tomochain/crypto" + "github.com/tomochain/tomochain/metrics" "github.com/tomochain/tomochain/rlp" ) @@ -170,6 +172,10 @@ func (c *stateObject) getTrie(db Database) Trie { func (self *stateObject) GetCommittedState(db Database, key common.Hash) common.Hash { value := common.Hash{} + // Track the amount of time wasted on reading the storage trie + if metrics.EnabledExpensive { + defer func(start time.Time) { self.db.StorageReads += time.Since(start) }(time.Now()) + } // Load from DB in case it is missing. enc, err := self.getTrie(db).TryGet(key[:]) if err != nil { @@ -232,6 +238,12 @@ func (self *stateObject) setState(key, value common.Hash) { // updateTrie writes cached storage modifications into the object's storage trie. func (self *stateObject) updateTrie(db Database) Trie { + // Track the amount of time wasted on updating the storage trie + if metrics.EnabledExpensive { + defer func(start time.Time) { self.db.StorageUpdates += time.Since(start) }(time.Now()) + } + + // Update all the dirty slots in the trie tr := self.getTrie(db) for key, value := range self.dirtyStorage { delete(self.dirtyStorage, key) @@ -249,6 +261,12 @@ func (self *stateObject) updateTrie(db Database) Trie { // UpdateRoot sets the trie root to the current root hash of func (self *stateObject) updateRoot(db Database) { self.updateTrie(db) + + // Track the amount of time wasted on hashing the storage trie + if metrics.EnabledExpensive { + defer func(start time.Time) { self.db.StorageHashes += time.Since(start) }(time.Now()) + } + self.data.Root = self.trie.Hash() } @@ -259,6 +277,12 @@ func (self *stateObject) CommitTrie(db Database) error { if self.dbErr != nil { return self.dbErr } + + // Track the amount of time wasted on committing the storage trie + if metrics.EnabledExpensive { + defer func(start time.Time) { self.db.StorageCommits += time.Since(start) }(time.Now()) + } + root, err := self.trie.Commit(nil) if err == nil { self.data.Root = root diff --git a/core/state/statedb.go b/core/state/statedb.go index 7a3357b3e8..da950efd4a 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -22,11 +22,13 @@ import ( "math/big" "sort" "sync" + "time" "github.com/tomochain/tomochain/common" "github.com/tomochain/tomochain/core/types" "github.com/tomochain/tomochain/crypto" "github.com/tomochain/tomochain/log" + "github.com/tomochain/tomochain/metrics" "github.com/tomochain/tomochain/rlp" "github.com/tomochain/tomochain/trie" ) @@ -80,6 +82,16 @@ type StateDB struct { validRevisions []revision nextRevisionId int + // Measurements gathered during execution for debugging purposes + AccountReads time.Duration + AccountHashes time.Duration + AccountUpdates time.Duration + AccountCommits time.Duration + StorageReads time.Duration + StorageHashes time.Duration + StorageUpdates time.Duration + StorageCommits time.Duration + lock sync.Mutex } @@ -359,7 +371,13 @@ func (self *StateDB) Suicide(addr common.Address) bool { // updateStateObject writes the given object to the trie. func (self *StateDB) updateStateObject(stateObject *stateObject) { + // Track the amount of time wasted on updating the account from the trie + if metrics.EnabledExpensive { + defer func(start time.Time) { self.AccountUpdates += time.Since(start) }(time.Now()) + } + // Encode the account and update the account trie addr := stateObject.Address() + data, err := rlp.EncodeToBytes(stateObject) if err != nil { panic(fmt.Errorf("can't encode object at %x: %v", addr[:], err)) @@ -369,6 +387,11 @@ func (self *StateDB) updateStateObject(stateObject *stateObject) { // deleteStateObject removes the given object from the state trie. func (self *StateDB) deleteStateObject(stateObject *stateObject) { + // Track the amount of time wasted on deleting the account from the trie + if metrics.EnabledExpensive { + defer func(start time.Time) { self.AccountUpdates += time.Since(start) }(time.Now()) + } + // Delete the account from the trie stateObject.deleted = true addr := stateObject.Address() self.setError(self.trie.TryDelete(addr[:])) @@ -383,19 +406,24 @@ func (self *StateDB) DeleteAddress(addr common.Address) { } // Retrieve a state object given my the address. Returns nil if not found. -func (self *StateDB) getStateObject(addr common.Address) (stateObject *stateObject) { +func (s *StateDB) getStateObject(addr common.Address) (stateObject *stateObject) { // Prefer 'live' objects. - if obj := self.stateObjects[addr]; obj != nil { + if obj := s.stateObjects[addr]; obj != nil { if obj.deleted { return nil } return obj } + // Track the amount of time wasted on loading the object from the database + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now()) + } + // Load the object from the database. - enc, err := self.trie.TryGet(addr[:]) + enc, err := s.trie.TryGet(addr[:]) if len(enc) == 0 { - self.setError(err) + s.setError(err) return nil } var data Account @@ -404,8 +432,8 @@ func (self *StateDB) getStateObject(addr common.Address) (stateObject *stateObje return nil } // Insert into the live set. - obj := newObject(self, addr, data, self.MarkStateObjectDirty) - self.setStateObject(obj) + obj := newObject(s, addr, data, s.MarkStateObjectDirty) + s.setStateObject(obj) return obj } @@ -449,8 +477,8 @@ func (self *StateDB) createObject(addr common.Address) (newobj, prev *stateObjec // CreateAccount is called during the EVM CREATE operation. The situation might arise that // a contract does the following: // -// 1. sends funds to sha(account ++ (nonce + 1)) -// 2. tx_create(sha(account ++ nonce)) (note that this gets the address of 1) +// 1. sends funds to sha(account ++ (nonce + 1)) +// 2. tx_create(sha(account ++ nonce)) (note that this gets the address of 1) // // Carrying over the balance ensures that Ether doesn't disappear. func (self *StateDB) CreateAccount(addr common.Address) { @@ -569,6 +597,12 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) { // goes into transaction receipts. func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { s.Finalise(deleteEmptyObjects) + + // Track the amount of time wasted on hashing the account trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountHashes += time.Since(start) }(time.Now()) + } + return s.trie.Hash() } @@ -611,7 +645,7 @@ func (s *StateDB) clearJournalAndRefund() { func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) { defer s.clearJournalAndRefund() - // Commit objects to the trie. + // Commit objects to the trie, measuring the elapsed time for addr, stateObject := range s.stateObjects { _, isDirty := s.stateObjectsDirty[addr] switch { @@ -634,6 +668,10 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) } delete(s.stateObjectsDirty, addr) } + // Write the account trie changes, measuring the amount of wasted time + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now()) + } // Write trie changes. root, err = s.trie.Commit(func(leaf []byte, parent common.Hash) error { var account Account diff --git a/metrics/metrics.go b/metrics/metrics.go index ea93c889ee..1c2fa593f3 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -21,17 +21,35 @@ import ( // for less cluttered pprof profiles. var Enabled bool = false -// MetricsEnabledFlag is the CLI flag name to use to enable metrics collections. -const MetricsEnabledFlag = "metrics" +// EnabledExpensive is a soft-flag meant for external packages to check if costly +// metrics gathering is allowed or not. The goal is to separate standard metrics +// for health monitoring and debug metrics that might impact runtime performance. +var EnabledExpensive = false + +// enablerFlags is the CLI flag names to use to enable metrics collections. +var enablerFlags = []string{"metrics", "dashboard"} + +// expensiveEnablerFlags is the CLI flag names to use to enable metrics collections. +var expensiveEnablerFlags = []string{"metrics.expensive"} // Init enables or disables the metrics system. Since we need this to run before // any other code gets to create meters and timers, we'll actually do an ugly hack // and peek into the command line args for the metrics flag. func init() { for _, arg := range os.Args { - if flag := strings.TrimLeft(arg, "-"); flag == MetricsEnabledFlag { - log.Info("Enabling metrics collection") - Enabled = true + flag := strings.TrimLeft(arg, "-") + + for _, enabler := range enablerFlags { + if !Enabled && flag == enabler { + log.Info("Enabling metrics collection") + Enabled = true + } + } + for _, enabler := range expensiveEnablerFlags { + if !EnabledExpensive && flag == enabler { + log.Info("Enabling expensive metrics collection") + EnabledExpensive = true + } } } } From c4232e0aa61062f5314f80ab6ef1ad39cb087875 Mon Sep 17 00:00:00 2001 From: tristan98 Date: Mon, 30 Dec 2024 14:25:02 +0700 Subject: [PATCH 8/9] chore: add collector unit test, define TYPE only once --- metrics/prometheus/collector.go | 11 ++- metrics/prometheus/collector_test.go | 110 +++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 metrics/prometheus/collector_test.go diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go index 1ee5316cca..59985c577e 100644 --- a/metrics/prometheus/collector.go +++ b/metrics/prometheus/collector.go @@ -30,7 +30,7 @@ var ( typeCounterTpl = "# TYPE %s counter\n" typeSummaryTpl = "# TYPE %s summary\n" keyValueTpl = "%s %v\n\n" - keyQuantileTagValueTpl = "%s {quantile=\"%s\"} %v\n\n" + keyQuantileTagValueTpl = "%s {quantile=\"%s\"} %v\n" ) // collector is a collection of byte buffers that aggregate Prometheus reports @@ -39,7 +39,7 @@ type collector struct { buff *bytes.Buffer } -// newCollector createa a new Prometheus metric aggregator. +// newCollector creates a new Prometheus metric aggregator. func newCollector() *collector { return &collector{ buff: &bytes.Buffer{}, @@ -62,9 +62,11 @@ func (c *collector) addHistogram(name string, m metrics.Histogram) { pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} ps := m.Percentiles(pv) c.writeSummaryCounter(name, m.Count()) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) for i := range pv { c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) } + c.buff.WriteRune('\n') } func (c *collector) addMeter(name string, m metrics.Meter) { @@ -75,9 +77,11 @@ func (c *collector) addTimer(name string, m metrics.Timer) { pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} ps := m.Percentiles(pv) c.writeSummaryCounter(name, m.Count()) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) for i := range pv { c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) } + c.buff.WriteRune('\n') } func (c *collector) addResettingTimer(name string, m metrics.ResettingTimer) { @@ -87,9 +91,11 @@ func (c *collector) addResettingTimer(name string, m metrics.ResettingTimer) { ps := m.Percentiles([]float64{50, 95, 99}) val := m.Values() c.writeSummaryCounter(name, len(val)) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) c.writeSummaryPercentile(name, "0.50", ps[0]) c.writeSummaryPercentile(name, "0.95", ps[1]) c.writeSummaryPercentile(name, "0.99", ps[2]) + c.buff.WriteRune('\n') } func (c *collector) writeGaugeCounter(name string, value interface{}) { @@ -106,7 +112,6 @@ func (c *collector) writeSummaryCounter(name string, value interface{}) { func (c *collector) writeSummaryPercentile(name, p string, value interface{}) { name = mutateKey(name) - c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, name)) c.buff.WriteString(fmt.Sprintf(keyQuantileTagValueTpl, name, p, value)) } diff --git a/metrics/prometheus/collector_test.go b/metrics/prometheus/collector_test.go new file mode 100644 index 0000000000..7a68bc29f9 --- /dev/null +++ b/metrics/prometheus/collector_test.go @@ -0,0 +1,110 @@ +package prometheus + +import ( + "os" + "testing" + "time" + + "github.com/tomochain/tomochain/metrics" +) + +func TestMain(m *testing.M) { + metrics.Enabled = true + os.Exit(m.Run()) +} + +func TestCollector(t *testing.T) { + c := newCollector() + + counter := metrics.NewCounter() + counter.Inc(12345) + c.addCounter("test/counter", counter) + + gauge := metrics.NewGauge() + gauge.Update(23456) + c.addGauge("test/gauge", gauge) + + gaugeFloat64 := metrics.NewGaugeFloat64() + gaugeFloat64.Update(34567.89) + c.addGaugeFloat64("test/gauge_float64", gaugeFloat64) + + histogram := metrics.NewHistogram(&metrics.NilSample{}) + c.addHistogram("test/histogram", histogram) + + meter := metrics.NewMeter() + defer meter.Stop() + meter.Mark(9999999) + c.addMeter("test/meter", meter) + + timer := metrics.NewTimer() + defer timer.Stop() + timer.Update(20 * time.Millisecond) + timer.Update(21 * time.Millisecond) + timer.Update(22 * time.Millisecond) + timer.Update(120 * time.Millisecond) + timer.Update(23 * time.Millisecond) + timer.Update(24 * time.Millisecond) + c.addTimer("test/timer", timer) + + resettingTimer := metrics.NewResettingTimer() + resettingTimer.Update(10 * time.Millisecond) + resettingTimer.Update(11 * time.Millisecond) + resettingTimer.Update(12 * time.Millisecond) + resettingTimer.Update(120 * time.Millisecond) + resettingTimer.Update(13 * time.Millisecond) + resettingTimer.Update(14 * time.Millisecond) + c.addResettingTimer("test/resetting_timer", resettingTimer.Snapshot()) + + emptyResettingTimer := metrics.NewResettingTimer().Snapshot() + c.addResettingTimer("test/empty_resetting_timer", emptyResettingTimer) + + const expectedOutput = `# TYPE test_counter gauge +test_counter 12345 + +# TYPE test_gauge gauge +test_gauge 23456 + +# TYPE test_gauge_float64 gauge +test_gauge_float64 34567.89 + +# TYPE test_histogram_count counter +test_histogram_count 0 + +# TYPE test_histogram summary +test_histogram {quantile="0.5"} 0 +test_histogram {quantile="0.75"} 0 +test_histogram {quantile="0.95"} 0 +test_histogram {quantile="0.99"} 0 +test_histogram {quantile="0.999"} 0 +test_histogram {quantile="0.9999"} 0 + +# TYPE test_meter gauge +test_meter 9999999 + +# TYPE test_timer_count counter +test_timer_count 6 + +# TYPE test_timer summary +test_timer {quantile="0.5"} 2.25e+07 +test_timer {quantile="0.75"} 4.8e+07 +test_timer {quantile="0.95"} 1.2e+08 +test_timer {quantile="0.99"} 1.2e+08 +test_timer {quantile="0.999"} 1.2e+08 +test_timer {quantile="0.9999"} 1.2e+08 + +# TYPE test_resetting_timer_count counter +test_resetting_timer_count 6 + +# TYPE test_resetting_timer summary +test_resetting_timer {quantile="0.50"} 12000000 +test_resetting_timer {quantile="0.95"} 120000000 +test_resetting_timer {quantile="0.99"} 120000000 + +` + exp := c.buff.String() + if exp != expectedOutput { + t.Log("Expected Output:\n", expectedOutput) + t.Log("Actual Output:\n", exp) + t.Fatal("unexpected collector output") + } +} From 5e3a8b691ff2fe5441531761aed7b985ef1a94aa Mon Sep 17 00:00:00 2001 From: tristan98 Date: Wed, 1 Jan 2025 01:17:08 +0700 Subject: [PATCH 9/9] flags: configuring metrics HTTP server on separate endpoint PR #21290 --- cmd/tomo/main.go | 6 ++++++ cmd/tomo/usage.go | 6 ++++++ cmd/utils/flags.go | 28 ++++++++++++++++++++++++++++ internal/debug/flags.go | 32 ++++++++++++++++++++++---------- metrics/exp/exp.go | 15 +++++++++++++++ 5 files changed, 77 insertions(+), 10 deletions(-) diff --git a/cmd/tomo/main.go b/cmd/tomo/main.go index d6d839fa84..306479a1e8 100644 --- a/cmd/tomo/main.go +++ b/cmd/tomo/main.go @@ -151,6 +151,8 @@ var ( metricsFlags = []cli.Flag{ utils.MetricsEnabledFlag, utils.MetricsEnabledExpensiveFlag, + utils.MetricsHTTPFlag, + utils.MetricsPortFlag, } ) @@ -192,6 +194,10 @@ func init() { if err := debug.Setup(ctx); err != nil { return err } + + // Start metrics export if enabled + utils.SetupMetrics(ctx) + // Start system runtime metrics collection go metrics.CollectProcessMetrics(3 * time.Second) diff --git a/cmd/tomo/usage.go b/cmd/tomo/usage.go index f166d9aae9..2c69212dbf 100644 --- a/cmd/tomo/usage.go +++ b/cmd/tomo/usage.go @@ -209,6 +209,12 @@ var AppHelpFlagGroups = []flagGroup{ //utils.NoCompactionFlag, }, debug.Flags...), }, + + { + Name: "METRICS AND STATS", + Flags: metricsFlags, + }, + //{ // Name: "WHISPER (EXPERIMENTAL)", // Flags: whisperFlags, diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 8157381da9..3da997a80a 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -43,6 +43,8 @@ import ( "github.com/tomochain/tomochain/eth/gasprice" "github.com/tomochain/tomochain/ethdb" "github.com/tomochain/tomochain/log" + "github.com/tomochain/tomochain/metrics" + "github.com/tomochain/tomochain/metrics/exp" "github.com/tomochain/tomochain/node" "github.com/tomochain/tomochain/p2p" "github.com/tomochain/tomochain/p2p/discover" @@ -362,6 +364,20 @@ var ( Name: "metrics.expensive", Usage: "Enable expensive metrics collection and reporting", } + // MetricsHTTPFlag defines the endpoint for a stand-alone metrics HTTP endpoint. + // Since the pprof service enables sensitive/vulnerable behavior, this allows a user + // to enable a public-OK metrics endpoint without having to worry about ALSO exposing + // other profiling behavior or information. + MetricsHTTPFlag = cli.StringFlag{ + Name: "metrics.addr", + Usage: "Enable stand-alone metrics HTTP server listening interface", + Value: "127.0.0.1", + } + MetricsPortFlag = cli.IntFlag{ + Name: "metrics.port", + Usage: "Metrics HTTP server listening port", + Value: 6060, + } FakePoWFlag = cli.BoolFlag{ Name: "fakepow", Usage: "Disables proof-of-work verification", @@ -1195,6 +1211,18 @@ func SetupNetwork(ctx *cli.Context) { params.TargetGasLimit = ctx.GlobalUint64(TargetGasLimitFlag.Name) } +func SetupMetrics(ctx *cli.Context) { + if metrics.Enabled { + log.Info("Enabling metrics collection") + + if ctx.GlobalIsSet(MetricsHTTPFlag.Name) { + address := fmt.Sprintf("%s:%d", ctx.GlobalString(MetricsHTTPFlag.Name), ctx.GlobalInt(MetricsPortFlag.Name)) + log.Info("Enabling stand-alone metrics HTTP endpoint", "address", address) + exp.Setup(address) + } + } +} + // MakeChainDatabase open an LevelDB using the flags passed to the client and will hard crash if it fails. func MakeChainDatabase(ctx *cli.Context, stack *node.Node) ethdb.Database { var ( diff --git a/internal/debug/flags.go b/internal/debug/flags.go index 7ac781a0a7..57e639c2ab 100644 --- a/internal/debug/flags.go +++ b/internal/debug/flags.go @@ -137,21 +137,33 @@ func Setup(ctx *cli.Context) error { // pprof server if ctx.GlobalBool(pprofFlag.Name) { - // Hook go-metrics into expvar on any /debug/metrics request, load all vars - // from the registry into expvar, and execute regular expvar handler. - exp.Exp(metrics.DefaultRegistry) + listenHost := ctx.GlobalString(pprofAddrFlag.Name) + port := ctx.GlobalInt(pprofPortFlag.Name) + + address := fmt.Sprintf("%s:%d", listenHost, port) - address := fmt.Sprintf("%s:%d", ctx.GlobalString(pprofAddrFlag.Name), ctx.GlobalInt(pprofPortFlag.Name)) - go func() { - log.Info("Starting pprof server", "addr", fmt.Sprintf("http://%s/debug/pprof", address)) - if err := http.ListenAndServe(address, nil); err != nil { - log.Error("Failure in running pprof server", "err", err) - } - }() + // This context value ("metrics.addr") represents the utils.MetricsHTTPFlag.Name. + // It cannot be imported because it will cause a cyclical dependency. + StartPProf(address, !ctx.GlobalIsSet("metrics.addr")) } return nil } +func StartPProf(address string, withMetrics bool) { + // Hook go-metrics into expvar on any /debug/metrics request, load all vars + // from the registry into expvar, and execute regular expvar handler. + if withMetrics { + exp.Exp(metrics.DefaultRegistry) + } + log.Info("Starting pprof server", "addr", fmt.Sprintf("http://%s/debug/pprof", address)) + go func() { + if err := http.ListenAndServe(address, nil); err != nil { + log.Error("Failure in running pprof server", "err", err) + } + }() + +} + // Exit stops all running profiles, flushing their output to the // respective file. func Exit() { diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go index 77d96e4a3f..6abac584f6 100644 --- a/metrics/exp/exp.go +++ b/metrics/exp/exp.go @@ -8,6 +8,7 @@ import ( "net/http" "sync" + "github.com/tomochain/tomochain/log" "github.com/tomochain/tomochain/metrics/prometheus" "github.com/tomochain/tomochain/metrics" @@ -53,6 +54,20 @@ func ExpHandler(r metrics.Registry) http.Handler { return http.HandlerFunc(e.expHandler) } +// Setup starts a dedicated metrics server at the given address. +// This function enables metrics reporting separate from pprof. +func Setup(address string) { + m := http.NewServeMux() + m.Handle("/debug/metrics", ExpHandler(metrics.DefaultRegistry)) + m.Handle("/debug/metrics/prometheus", prometheus.Handler(metrics.DefaultRegistry)) + log.Info("Starting metrics server", "addr", fmt.Sprintf("http://%s/debug/metrics", address)) + go func() { + if err := http.ListenAndServe(address, m); err != nil { + log.Error("Failure in running metrics server", "err", err) + } + }() +} + func (exp *exp) getInt(name string) *expvar.Int { var v *expvar.Int exp.expvarLock.Lock()