From 23bc924585b60dffb0ff65916935235f7576df54 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 01/74] internal/debug: start pprof server by new function StartPProf (#16532) --- internal/debug/flags.go | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/internal/debug/flags.go b/internal/debug/flags.go index de8d587f0852..1e9970b73ce5 100644 --- a/internal/debug/flags.go +++ b/internal/debug/flags.go @@ -312,17 +312,8 @@ func Setup(ctx *cli.Context) error { // pprof server if ctx.Bool(pprofFlag.Name) { - // Hook go-metrics into expvar on any /debug/metrics request, load all vars - // from the registry into expvar, and execute regular expvar handler. - exp.Exp(metrics.DefaultRegistry) - address := fmt.Sprintf("%s:%d", ctx.String(pprofAddrFlag.Name), ctx.Int(pprofPortFlag.Name)) - go func() { - log.Info("Starting pprof server", "addr", fmt.Sprintf("http://%s/debug/pprof", address)) - if err := http.ListenAndServe(address, nil); err != nil { - log.Error("Failure in running pprof server", "err", err) - } - }() + StartPProf(address) } if len(logFile) > 0 || rotation { @@ -332,6 +323,18 @@ func Setup(ctx *cli.Context) error { return nil } +func StartPProf(address string) { + // Hook go-metrics into expvar on any /debug/metrics request, load all vars + // from the registry into expvar, and execute regular expvar handler. + exp.Exp(metrics.DefaultRegistry) + log.Info("Starting pprof server", "addr", fmt.Sprintf("http://%s/debug/pprof", address)) + go func() { + if err := http.ListenAndServe(address, nil); err != nil { + log.Error("Failure in running pprof server", "err", err) + } + }() +} + // Exit stops all running profiles, flushing their output to the // respective file. func Exit() { From 92a7b64961940fb2a1090b65bdd7cabc60348f5f Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 02/74] metrics: export to InfluxDB (#16979) --- cmd/XDC/main.go | 19 +++++++-- cmd/utils/flags.go | 82 ++++++++++++++++++++++++++++++------ metrics/config.go | 12 ++++++ metrics/influxdb/influxdb.go | 12 +++--- 4 files changed, 104 insertions(+), 21 deletions(-) diff --git a/cmd/XDC/main.go b/cmd/XDC/main.go index 5ececb431df7..f5bdd24709d2 100644 --- a/cmd/XDC/main.go +++ b/cmd/XDC/main.go @@ -126,9 +126,6 @@ var ( utils.HTTPCORSDomainFlag, utils.HTTPVirtualHostsFlag, utils.EthStatsURLFlag, - utils.MetricsEnabledFlag, - utils.MetricsHTTPFlag, - utils.MetricsPortFlag, //utils.FakePoWFlag, //utils.NoCompactionFlag, //utils.GpoBlocksFlag, @@ -163,6 +160,18 @@ var ( utils.IPCPathFlag, utils.RPCGlobalTxFeeCap, } + + metricsFlags = []cli.Flag{ + utils.MetricsEnabledFlag, + utils.MetricsHTTPFlag, + utils.MetricsPortFlag, + utils.MetricsEnableInfluxDBFlag, + utils.MetricsInfluxDBEndpointFlag, + utils.MetricsInfluxDBDatabaseFlag, + utils.MetricsInfluxDBUsernameFlag, + utils.MetricsInfluxDBPasswordFlag, + utils.MetricsInfluxDBHostTagFlag, + } ) func init() { @@ -194,6 +203,7 @@ func init() { app.Flags = append(app.Flags, rpcFlags...) app.Flags = append(app.Flags, consoleFlags...) app.Flags = append(app.Flags, debug.Flags...) + app.Flags = append(app.Flags, metricsFlags...) flags.AutoEnvVars(app.Flags, "XDC") app.Before = func(ctx *cli.Context) error { @@ -204,6 +214,9 @@ func init() { } flags.CheckEnvVars(ctx, app.Flags, "XDC") + // Start system runtime metrics collection + go metrics.CollectProcessMetrics(3 * time.Second) + utils.SetupNetwork(ctx) return nil } diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 0efc867a273e..39a06e0e3b09 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -28,6 +28,7 @@ import ( godebug "runtime/debug" "strconv" "strings" + "time" "github.com/XinFinOrg/XDPoSChain/XDCx" "github.com/XinFinOrg/XDPoSChain/accounts" @@ -52,6 +53,7 @@ import ( "github.com/XinFinOrg/XDPoSChain/log" "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/metrics/exp" + "github.com/XinFinOrg/XDPoSChain/metrics/influxdb" "github.com/XinFinOrg/XDPoSChain/node" "github.com/XinFinOrg/XDPoSChain/p2p" "github.com/XinFinOrg/XDPoSChain/p2p/discover" @@ -661,6 +663,45 @@ var ( Value: metrics.DefaultConfig.Port, Category: flags.MetricsCategory, } + MetricsEnableInfluxDBFlag = &cli.BoolFlag{ + Name: "metrics-influxdb", + Usage: "Enable metrics export/push to an external InfluxDB database", + Category: flags.MetricsCategory, + } + MetricsInfluxDBEndpointFlag = &cli.StringFlag{ + Name: "metrics-influxdb.endpoint", + Usage: "InfluxDB API endpoint to report metrics to", + Value: metrics.DefaultConfig.InfluxDBEndpoint, + Category: flags.MetricsCategory, + } + MetricsInfluxDBDatabaseFlag = &cli.StringFlag{ + Name: "metrics-influxdb.database", + Usage: "InfluxDB database name to push reported metrics to", + Value: metrics.DefaultConfig.InfluxDBDatabase, + Category: flags.MetricsCategory, + } + MetricsInfluxDBUsernameFlag = &cli.StringFlag{ + Name: "metrics-influxdb.username", + Usage: "Username to authorize access to the database", + Value: metrics.DefaultConfig.InfluxDBUsername, + Category: flags.MetricsCategory, + } + MetricsInfluxDBPasswordFlag = &cli.StringFlag{ + Name: "metrics-influxdb.password", + Usage: "Password to authorize access to the database", + Value: metrics.DefaultConfig.InfluxDBPassword, + Category: flags.MetricsCategory, + } + // The `host` tag is part of every measurement sent to InfluxDB. Queries on tags are faster in InfluxDB. + // It is used so that we can group all nodes and average a measurement across all of them, but also so + // that we can select a specific node and inspect its measurements. + // https://docs.influxdata.com/influxdb/v1.4/concepts/key_concepts/#tag-key + MetricsInfluxDBHostTagFlag = &cli.StringFlag{ + Name: "metrics-influxdb.host.tag", + Usage: "InfluxDB `host` tag attached to all measurements", + Value: metrics.DefaultConfig.InfluxDBTags, + Category: flags.MetricsCategory, + } // MISC settings RollbackFlag = &cli.StringFlag{ @@ -1468,6 +1509,35 @@ func SetupNetwork(ctx *cli.Context) { params.TargetGasLimit = ctx.Uint64(MinerGasLimitFlag.Name) } +func SetupMetrics(ctx *cli.Context) { + if metrics.Enabled { + log.Info("Enabling metrics collection") + var ( + enableExport = ctx.Bool(MetricsEnableInfluxDBFlag.Name) + endpoint = ctx.String(MetricsInfluxDBEndpointFlag.Name) + database = ctx.String(MetricsInfluxDBDatabaseFlag.Name) + username = ctx.String(MetricsInfluxDBUsernameFlag.Name) + password = ctx.String(MetricsInfluxDBPasswordFlag.Name) + hosttag = ctx.String(MetricsInfluxDBHostTagFlag.Name) + ) + + if enableExport { + log.Info("Enabling metrics export to InfluxDB") + go influxdb.InfluxDBWithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, database, username, password, "xdc.", map[string]string{ + "host": hosttag, + }) + } + + if ctx.IsSet(MetricsHTTPFlag.Name) { + address := fmt.Sprintf("%s:%d", ctx.String(MetricsHTTPFlag.Name), ctx.Int(MetricsPortFlag.Name)) + log.Info("Enabling stand-alone metrics HTTP endpoint", "address", address) + exp.Setup(address) + } else if ctx.IsSet(MetricsPortFlag.Name) { + log.Warn(fmt.Sprintf("--%s specified without --%s, metrics server will not start.", MetricsPortFlag.Name, MetricsHTTPFlag.Name)) + } + } +} + // MakeChainDatabase open an LevelDB using the flags passed to the client and will hard crash if it fails. func MakeChainDatabase(ctx *cli.Context, stack *node.Node) ethdb.Database { var ( @@ -1595,15 +1665,3 @@ func RegisterFilterAPI(stack *node.Node, backend ethapi.Backend, ethcfg *ethconf }}) return filterSystem } - -func SetupMetrics(ctx *cli.Context) { - if metrics.Enabled { - log.Info("Enabling metrics collection") - - if ctx.IsSet(MetricsHTTPFlag.Name) { - address := fmt.Sprintf("%s:%d", ctx.String(MetricsHTTPFlag.Name), ctx.Int(MetricsPortFlag.Name)) - log.Info("Enabling stand-alone metrics HTTP endpoint", "address", address) - exp.Setup(address) - } - } -} diff --git a/metrics/config.go b/metrics/config.go index 169c683a97b5..4570628c243b 100644 --- a/metrics/config.go +++ b/metrics/config.go @@ -22,6 +22,12 @@ type Config struct { EnabledExpensive bool `toml:",omitempty"` HTTP string `toml:",omitempty"` Port int `toml:",omitempty"` + EnableInfluxDB bool `toml:",omitempty"` + InfluxDBEndpoint string `toml:",omitempty"` + InfluxDBDatabase string `toml:",omitempty"` + InfluxDBUsername string `toml:",omitempty"` + InfluxDBPassword string `toml:",omitempty"` + InfluxDBTags string `toml:",omitempty"` } // DefaultConfig is the default config for metrics used in go-ethereum. @@ -30,4 +36,10 @@ var DefaultConfig = Config{ EnabledExpensive: false, HTTP: "127.0.0.1", Port: 6060, + EnableInfluxDB: false, + InfluxDBEndpoint: "http://localhost:8086", + InfluxDBDatabase: "xdc", + InfluxDBUsername: "test", + InfluxDBPassword: "test", + InfluxDBTags: "localhost", } diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index 5c00c1a4c320..0567705a9b99 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -2,10 +2,10 @@ package influxdb import ( "fmt" - "log" uurl "net/url" "time" + "github.com/XinFinOrg/XDPoSChain/log" "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/influxdata/influxdb/client" ) @@ -35,7 +35,7 @@ func InfluxDB(r metrics.Registry, d time.Duration, url, database, username, pass func InfluxDBWithTags(r metrics.Registry, d time.Duration, url, database, username, password, namespace string, tags map[string]string) { u, err := uurl.Parse(url) if err != nil { - log.Printf("unable to parse InfluxDB url %s. err=%v", url, err) + log.Warn("unable to parse InfluxDB url %s. err=%v", url, err) return } @@ -51,7 +51,7 @@ func InfluxDBWithTags(r metrics.Registry, d time.Duration, url, database, userna cache: make(map[string]int64), } if err := rep.makeClient(); err != nil { - log.Printf("unable to make InfluxDB client. err=%v", err) + log.Warn("unable to make InfluxDB client. err=%v", err) return } @@ -79,15 +79,15 @@ func (r *reporter) run() { select { case <-intervalTicker.C: if err := r.send(); err != nil { - log.Printf("unable to send to InfluxDB. err=%v", err) + log.Warn("unable to send to InfluxDB. err=%v", err) } case <-pingTicker.C: _, _, err := r.client.Ping() if err != nil { - log.Printf("got error while sending a ping to InfluxDB, trying to recreate client. err=%v", err) + log.Warn("got error while sending a ping to InfluxDB, trying to recreate client. err=%v", err) if err = r.makeClient(); err != nil { - log.Printf("unable to make InfluxDB client. err=%v", err) + log.Warn("unable to make InfluxDB client. err=%v", err) } } } From 61f5a888c37c24e812db298cbfde1ffb244466ec Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 03/74] metrics: add force option for metric system (#17667) --- metrics/ewma.go | 3 --- metrics/meter.go | 43 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/metrics/ewma.go b/metrics/ewma.go index 3aecd4fa35a1..57c949e7d421 100644 --- a/metrics/ewma.go +++ b/metrics/ewma.go @@ -17,9 +17,6 @@ type EWMA interface { // NewEWMA constructs a new EWMA with the given alpha. func NewEWMA(alpha float64) EWMA { - if !Enabled { - return NilEWMA{} - } return &StandardEWMA{alpha: alpha} } diff --git a/metrics/meter.go b/metrics/meter.go index 82b2141a624b..58d170fae027 100644 --- a/metrics/meter.go +++ b/metrics/meter.go @@ -29,6 +29,17 @@ func GetOrRegisterMeter(name string, r Registry) Meter { return r.GetOrRegister(name, NewMeter).(Meter) } +// GetOrRegisterMeterForced returns an existing Meter or constructs and registers a +// new StandardMeter no matter the global switch is enabled or not. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func GetOrRegisterMeterForced(name string, r Registry) Meter { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewMeterForced).(Meter) +} + // NewMeter constructs a new StandardMeter and launches a goroutine. // Be sure to call Stop() once the meter is of no use to allow for garbage collection. func NewMeter() Meter { @@ -46,8 +57,23 @@ func NewMeter() Meter { return m } -// NewMeter constructs and registers a new StandardMeter and launches a -// goroutine. +// NewMeterForced constructs a new StandardMeter and launches a goroutine no matter +// the global switch is enabled or not. +// Be sure to call Stop() once the meter is of no use to allow for garbage collection. +func NewMeterForced() Meter { + m := newStandardMeter() + arbiter.Lock() + defer arbiter.Unlock() + arbiter.meters[m] = struct{}{} + if !arbiter.started { + arbiter.started = true + go arbiter.tick() + } + return m +} + +// NewRegisteredMeter constructs and registers a new StandardMeter +// and launches a goroutine. // Be sure to unregister the meter from the registry once it is of no use to // allow for garbage collection. func NewRegisteredMeter(name string, r Registry) Meter { @@ -59,6 +85,19 @@ func NewRegisteredMeter(name string, r Registry) Meter { return c } +// NewRegisteredMeterForced constructs and registers a new StandardMeter +// and launches a goroutine no matter the global switch is enabled or not. +// Be sure to unregister the meter from the registry once it is of no use to +// allow for garbage collection. +func NewRegisteredMeterForced(name string, r Registry) Meter { + c := NewMeterForced() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + // MeterSnapshot is a read-only copy of another Meter. type MeterSnapshot struct { count int64 From 01a52982732bf7bef2770859b63c669d5b6127e2 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 04/74] metrics: added NewCounterForced (#17919) --- metrics/counter.go | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/metrics/counter.go b/metrics/counter.go index c7f2b4bd3aa3..2f78c90d5c64 100644 --- a/metrics/counter.go +++ b/metrics/counter.go @@ -1,6 +1,8 @@ package metrics -import "sync/atomic" +import ( + "sync/atomic" +) // Counters hold an int64 value that can be incremented and decremented. type Counter interface { @@ -20,6 +22,17 @@ func GetOrRegisterCounter(name string, r Registry) Counter { return r.GetOrRegister(name, NewCounter).(Counter) } +// GetOrRegisterCounterForced returns an existing Counter or constructs and registers a +// new Counter no matter the global switch is enabled or not. +// Be sure to unregister the counter from the registry once it is of no use to +// allow for garbage collection. +func GetOrRegisterCounterForced(name string, r Registry) Counter { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewCounterForced).(Counter) +} + // NewCounter constructs a new StandardCounter. func NewCounter() Counter { if !Enabled { @@ -28,6 +41,12 @@ func NewCounter() Counter { return &StandardCounter{0} } +// NewCounterForced constructs a new StandardCounter and returns it no matter if +// the global switch is enabled or not. +func NewCounterForced() Counter { + return &StandardCounter{0} +} + // NewRegisteredCounter constructs and registers a new StandardCounter. func NewRegisteredCounter(name string, r Registry) Counter { c := NewCounter() @@ -38,6 +57,19 @@ func NewRegisteredCounter(name string, r Registry) Counter { return c } +// NewRegisteredCounterForced constructs and registers a new StandardCounter +// and launches a goroutine no matter the global switch is enabled or not. +// Be sure to unregister the counter from the registry once it is of no use to +// allow for garbage collection. +func NewRegisteredCounterForced(name string, r Registry) Counter { + c := NewCounterForced() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + // CounterSnapshot is a read-only copy of another Counter. type CounterSnapshot int64 From 4202f2389775c38c3b9c64409a1e0ab9727800f3 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 05/74] core: more detailed metrics for block processing (#18119) --- core/blockchain.go | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/core/blockchain.go b/core/blockchain.go index 2f731331decc..ebd17ccbc56f 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -56,13 +56,18 @@ import ( ) var ( - blockInsertTimer = metrics.NewRegisteredTimer("chain/inserts", nil) - CheckpointCh = make(chan int) - ErrNoGenesis = errors.New("Genesis not found in chain") + blockInsertTimer = metrics.NewRegisteredTimer("chain/inserts", nil) + blockValidationTimer = metrics.NewRegisteredTimer("chain/validation", nil) + blockExecutionTimer = metrics.NewRegisteredTimer("chain/execution", nil) + blockWriteTimer = metrics.NewRegisteredTimer("chain/write", nil) blockReorgMeter = metrics.NewRegisteredMeter("chain/reorg/executes", nil) blockReorgAddMeter = metrics.NewRegisteredMeter("chain/reorg/add", nil) blockReorgDropMeter = metrics.NewRegisteredMeter("chain/reorg/drop", nil) + + CheckpointCh = make(chan int) + + ErrNoGenesis = errors.New("Genesis not found in chain") ) const ( @@ -1658,7 +1663,9 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, [] } feeCapacity := state.GetTRC21FeeCapacityFromStateWithCache(parent.Root(), statedb) // Process block using the parent state as reference point. + t0 := time.Now() receipts, logs, usedGas, err := bc.processor.Process(block, statedb, tradingState, bc.vmConfig, feeCapacity) + t1 := time.Now() if err != nil { bc.reportBlock(block, receipts, err) return i, events, coalescedLogs, err @@ -1669,19 +1676,27 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, [] bc.reportBlock(block, receipts, err) return i, events, coalescedLogs, err } + t2 := time.Now() proctime := time.Since(bstart) + // Write the block to the chain and get the status. status, err := bc.WriteBlockWithState(block, receipts, statedb, tradingState, lendingState) + t3 := time.Now() if err != nil { return i, events, coalescedLogs, err } + + blockInsertTimer.UpdateSince(bstart) + blockExecutionTimer.Update(t1.Sub(t0)) + blockValidationTimer.Update(t2.Sub(t1)) + blockWriteTimer.Update(t3.Sub(t2)) + switch status { case CanonStatTy: log.Debug("Inserted new block from downloader", "number", block.Number(), "hash", block.Hash(), "uncles", len(block.Uncles()), "txs", len(block.Transactions()), "gas", block.GasUsed(), "elapsed", common.PrettyDuration(time.Since(bstart))) coalescedLogs = append(coalescedLogs, logs...) - blockInsertTimer.UpdateSince(bstart) events = append(events, ChainEvent{block, block.Hash(), logs}) lastCanon = block @@ -1695,8 +1710,6 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, [] case SideStatTy: log.Debug("Inserted forked block from downloader", "number", block.Number(), "hash", block.Hash(), "diff", block.Difficulty(), "elapsed", common.PrettyDuration(time.Since(bstart)), "txs", len(block.Transactions()), "gas", block.GasUsed(), "uncles", len(block.Uncles())) - - blockInsertTimer.UpdateSince(bstart) events = append(events, ChainSideEvent{block}) bc.UpdateBlocksHashCache(block) } @@ -2015,7 +2028,6 @@ func (bc *BlockChain) insertBlock(block *types.Block) ([]interface{}, []*types.L "txs", len(block.Transactions()), "gas", block.GasUsed(), "elapsed", common.PrettyDuration(time.Since(block.ReceivedAt))) coalescedLogs = append(coalescedLogs, result.logs...) events = append(events, ChainEvent{block, block.Hash(), result.logs}) - // Only count canonical blocks for GC processing time bc.gcproc += result.proctime bc.UpdateBlocksHashCache(block) @@ -2026,10 +2038,8 @@ func (bc *BlockChain) insertBlock(block *types.Block) ([]interface{}, []*types.L case SideStatTy: log.Debug("Inserted forked block from fetcher", "number", block.Number(), "hash", block.Hash(), "diff", block.Difficulty(), "elapsed", common.PrettyDuration(time.Since(block.ReceivedAt)), "txs", len(block.Transactions()), "gas", block.GasUsed(), "uncles", len(block.Uncles())) - blockInsertTimer.Update(result.proctime) events = append(events, ChainSideEvent{block}) - bc.UpdateBlocksHashCache(block) } stats.processed++ From 3e4583e7c44f85181f1ec870c7b267e2068dc67b Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 06/74] cmd/utils: allow for multiple influxdb flags (#18520) --- cmd/XDC/main.go | 2 +- cmd/utils/flags.go | 37 ++++++++++++++++++------- cmd/utils/flags_test.go | 60 +++++++++++++++++++++++++++++++++++++++++ metrics/config.go | 2 +- 4 files changed, 89 insertions(+), 12 deletions(-) diff --git a/cmd/XDC/main.go b/cmd/XDC/main.go index f5bdd24709d2..37b5dde48ab8 100644 --- a/cmd/XDC/main.go +++ b/cmd/XDC/main.go @@ -170,7 +170,7 @@ var ( utils.MetricsInfluxDBDatabaseFlag, utils.MetricsInfluxDBUsernameFlag, utils.MetricsInfluxDBPasswordFlag, - utils.MetricsInfluxDBHostTagFlag, + utils.MetricsInfluxDBTagsFlag, } ) diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 39a06e0e3b09..04ddbca93622 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -692,13 +692,13 @@ var ( Value: metrics.DefaultConfig.InfluxDBPassword, Category: flags.MetricsCategory, } - // The `host` tag is part of every measurement sent to InfluxDB. Queries on tags are faster in InfluxDB. - // It is used so that we can group all nodes and average a measurement across all of them, but also so - // that we can select a specific node and inspect its measurements. + // Tags are part of every measurement sent to InfluxDB. Queries on tags are faster in InfluxDB. + // For example `host` tag could be used so that we can group all nodes and average a measurement + // across all of them, but also so that we can select a specific node and inspect its measurements. // https://docs.influxdata.com/influxdb/v1.4/concepts/key_concepts/#tag-key - MetricsInfluxDBHostTagFlag = &cli.StringFlag{ - Name: "metrics-influxdb.host.tag", - Usage: "InfluxDB `host` tag attached to all measurements", + MetricsInfluxDBTagsFlag = &cli.StringFlag{ + Name: "metrics-influxdb.tags", + Usage: "Comma-separated InfluxDB tags (key/values) attached to all measurements", Value: metrics.DefaultConfig.InfluxDBTags, Category: flags.MetricsCategory, } @@ -1518,14 +1518,14 @@ func SetupMetrics(ctx *cli.Context) { database = ctx.String(MetricsInfluxDBDatabaseFlag.Name) username = ctx.String(MetricsInfluxDBUsernameFlag.Name) password = ctx.String(MetricsInfluxDBPasswordFlag.Name) - hosttag = ctx.String(MetricsInfluxDBHostTagFlag.Name) ) if enableExport { log.Info("Enabling metrics export to InfluxDB") - go influxdb.InfluxDBWithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, database, username, password, "xdc.", map[string]string{ - "host": hosttag, - }) + + tagsMap := SplitTagsFlag(ctx.String(MetricsInfluxDBTagsFlag.Name)) + + go influxdb.InfluxDBWithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, database, username, password, "xdc.", tagsMap) } if ctx.IsSet(MetricsHTTPFlag.Name) { @@ -1538,6 +1538,23 @@ func SetupMetrics(ctx *cli.Context) { } } +func SplitTagsFlag(tagsFlag string) map[string]string { + tags := strings.Split(tagsFlag, ",") + tagsMap := map[string]string{} + + for _, t := range tags { + if t != "" { + kv := strings.Split(t, "=") + + if len(kv) == 2 { + tagsMap[kv[0]] = kv[1] + } + } + } + + return tagsMap +} + // MakeChainDatabase open an LevelDB using the flags passed to the client and will hard crash if it fails. func MakeChainDatabase(ctx *cli.Context, stack *node.Node) ethdb.Database { var ( diff --git a/cmd/utils/flags_test.go b/cmd/utils/flags_test.go index adbf25b45bf6..728cbb707014 100644 --- a/cmd/utils/flags_test.go +++ b/cmd/utils/flags_test.go @@ -1,3 +1,20 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +// Package utils contains internal helper functions for go-ethereum commands. package utils import ( @@ -8,6 +25,49 @@ import ( "testing" ) +func Test_SplitTagsFlag(t *testing.T) { + t.Parallel() + tests := []struct { + name string + args string + want map[string]string + }{ + { + "2 tags case", + "host=localhost,bzzkey=123", + map[string]string{ + "host": "localhost", + "bzzkey": "123", + }, + }, + { + "1 tag case", + "host=localhost123", + map[string]string{ + "host": "localhost123", + }, + }, + { + "empty case", + "", + map[string]string{}, + }, + { + "garbage", + "smth=smthelse=123", + map[string]string{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + if got := SplitTagsFlag(tt.args); !reflect.DeepEqual(got, tt.want) { + t.Errorf("splitTagsFlag() = %v, want %v", got, tt.want) + } + }) + } +} + func TestWalkMatch(t *testing.T) { type args struct { root string diff --git a/metrics/config.go b/metrics/config.go index 4570628c243b..1150a007aaf7 100644 --- a/metrics/config.go +++ b/metrics/config.go @@ -41,5 +41,5 @@ var DefaultConfig = Config{ InfluxDBDatabase: "xdc", InfluxDBUsername: "test", InfluxDBPassword: "test", - InfluxDBTags: "localhost", + InfluxDBTags: "host=localhost", } From 7608787b3c1c40ae6570cb7ae9ce4e83bc826ce9 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 07/74] metrics: remove redundant type specifiers (#19090) --- metrics/metrics.go | 2 +- metrics/opentsdb.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 2d9d652a0201..ebfa602d18d9 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -19,7 +19,7 @@ import ( // // This global kill-switch helps quantify the observer effect and makes // for less cluttered pprof profiles. -var Enabled bool = false +var Enabled = false // MetricsEnabledFlag is the CLI flag name to use to enable metrics collections. const MetricsEnabledFlag = "metrics" diff --git a/metrics/opentsdb.go b/metrics/opentsdb.go index df7f152ed2eb..3fde55454ba9 100644 --- a/metrics/opentsdb.go +++ b/metrics/opentsdb.go @@ -10,7 +10,7 @@ import ( "time" ) -var shortHostName string = "" +var shortHostName = "" // OpenTSDBConfig provides a container with configuration parameters for // the OpenTSDB exporter From 1557746bcd94546a9b46fa7c56a97482f04e0311 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 08/74] metrics/influxdb: add a timeout to the InfluxDB HTTP client (#19250) --- metrics/influxdb/influxdb.go | 1 + 1 file changed, 1 insertion(+) diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index 0567705a9b99..ac5e0fce3c4d 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -63,6 +63,7 @@ func (r *reporter) makeClient() (err error) { URL: r.url, Username: r.username, Password: r.password, + Timeout: 10 * time.Second, }) return From db9487f1e8f924d118cf54040e4cff370fd60b3b Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 09/74] core: split out detailed trie access metrics from insertion time (#19316) --- cmd/XDC/main.go | 1 + cmd/utils/flags.go | 6 +++- cmd/utils/flags_legacy.go | 2 +- core/blockchain.go | 26 ++++++++++++++++- core/state/state_object.go | 19 ++++++++++++ core/state/statedb.go | 47 ++++++++++++++++++++++++++---- metrics/metrics.go | 59 +++++++++++++++++++++++++++----------- 7 files changed, 135 insertions(+), 25 deletions(-) diff --git a/cmd/XDC/main.go b/cmd/XDC/main.go index 37b5dde48ab8..fb01ba2f5f77 100644 --- a/cmd/XDC/main.go +++ b/cmd/XDC/main.go @@ -163,6 +163,7 @@ var ( metricsFlags = []cli.Flag{ utils.MetricsEnabledFlag, + utils.MetricsEnabledExpensiveFlag, utils.MetricsHTTPFlag, utils.MetricsPortFlag, utils.MetricsEnableInfluxDBFlag, diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 04ddbca93622..b42914b2a866 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -644,7 +644,11 @@ var ( Usage: "Enable metrics collection and reporting", Category: flags.MetricsCategory, } - + MetricsEnabledExpensiveFlag = &cli.BoolFlag{ + Name: "metrics-expensive", + Usage: "Enable expensive metrics collection and reporting", + Category: flags.MetricsCategory, + } // MetricsHTTPFlag defines the endpoint for a stand-alone metrics HTTP endpoint. // Since the pprof service enables sensitive/vulnerable behavior, this allows a user // to enable a public-OK metrics endpoint without having to worry about ALSO exposing diff --git a/cmd/utils/flags_legacy.go b/cmd/utils/flags_legacy.go index b47a74595613..3bd314292d52 100644 --- a/cmd/utils/flags_legacy.go +++ b/cmd/utils/flags_legacy.go @@ -50,7 +50,7 @@ var ( Usage: "Enable light client mode", Category: flags.DeprecatedCategory, } - // (Deprecated May 2020, shown in aliased flags section) + // Deprecated May 2020, shown in aliased flags section NoUSBFlag = &cli.BoolFlag{ Name: "nousb", Usage: "Disables monitoring for and managing USB hardware wallets (deprecated)", diff --git a/core/blockchain.go b/core/blockchain.go index ebd17ccbc56f..b90e461cc6ae 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -56,6 +56,16 @@ import ( ) var ( + accountReadTimer = metrics.NewRegisteredTimer("chain/account/reads", nil) + accountHashTimer = metrics.NewRegisteredTimer("chain/account/hashes", nil) + accountUpdateTimer = metrics.NewRegisteredTimer("chain/account/updates", nil) + accountCommitTimer = metrics.NewRegisteredTimer("chain/account/commits", nil) + + storageReadTimer = metrics.NewRegisteredTimer("chain/storage/reads", nil) + storageHashTimer = metrics.NewRegisteredTimer("chain/storage/hashes", nil) + storageUpdateTimer = metrics.NewRegisteredTimer("chain/storage/updates", nil) + storageCommitTimer = metrics.NewRegisteredTimer("chain/storage/commits", nil) + blockInsertTimer = metrics.NewRegisteredTimer("chain/inserts", nil) blockValidationTimer = metrics.NewRegisteredTimer("chain/validation", nil) blockExecutionTimer = metrics.NewRegisteredTimer("chain/execution", nil) @@ -1686,8 +1696,22 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, [] return i, events, coalescedLogs, err } + // Update the metrics subsystem with all the measurements + accountReadTimer.Update(statedb.AccountReads) + accountHashTimer.Update(statedb.AccountHashes) + accountUpdateTimer.Update(statedb.AccountUpdates) + accountCommitTimer.Update(statedb.AccountCommits) + + storageReadTimer.Update(statedb.StorageReads) + storageHashTimer.Update(statedb.StorageHashes) + storageUpdateTimer.Update(statedb.StorageUpdates) + storageCommitTimer.Update(statedb.StorageCommits) + + trieAccess := statedb.AccountReads + statedb.AccountHashes + statedb.AccountUpdates + statedb.AccountCommits + trieAccess += statedb.StorageReads + statedb.StorageHashes + statedb.StorageUpdates + statedb.StorageCommits + blockInsertTimer.UpdateSince(bstart) - blockExecutionTimer.Update(t1.Sub(t0)) + blockExecutionTimer.Update(t1.Sub(t0) - trieAccess) blockValidationTimer.Update(t2.Sub(t1)) blockWriteTimer.Update(t3.Sub(t2)) diff --git a/core/state/state_object.go b/core/state/state_object.go index ca695efe3b85..10fcfc8b3cb2 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -21,10 +21,12 @@ import ( "fmt" "io" "math/big" + "time" "github.com/XinFinOrg/XDPoSChain/common" "github.com/XinFinOrg/XDPoSChain/core/types" "github.com/XinFinOrg/XDPoSChain/crypto" + "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/rlp" ) @@ -173,6 +175,10 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has if s.fakeStorage != nil { return s.fakeStorage[key] } + // Track the amount of time wasted on reading the storge trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now()) + } value := common.Hash{} // Load from DB in case it is missing. enc, err := s.getTrie(db).TryGet(key[:]) @@ -269,6 +275,10 @@ func (s *stateObject) setState(key, value common.Hash) { // updateTrie writes cached storage modifications into the object's storage trie. func (s *stateObject) updateTrie(db Database) Trie { + // Track the amount of time wasted on updating the storge trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now()) + } tr := s.getTrie(db) for key, value := range s.dirtyStorage { delete(s.dirtyStorage, key) @@ -286,6 +296,11 @@ func (s *stateObject) updateTrie(db Database) Trie { // UpdateRoot sets the trie root to the current root hash of func (s *stateObject) updateRoot(db Database) { s.updateTrie(db) + + // Track the amount of time wasted on hashing the storge trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageHashes += time.Since(start) }(time.Now()) + } s.data.Root = s.trie.Hash() } @@ -296,6 +311,10 @@ func (s *stateObject) CommitTrie(db Database) error { if s.dbErr != nil { return s.dbErr } + // Track the amount of time wasted on committing the storge trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.db.StorageCommits += time.Since(start) }(time.Now()) + } root, err := s.trie.Commit(nil) if err == nil { s.data.Root = root diff --git a/core/state/statedb.go b/core/state/statedb.go index 7782643f8a0e..0e0dc9e7e5b5 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -22,11 +22,13 @@ import ( "math/big" "sort" "sync" + "time" "github.com/XinFinOrg/XDPoSChain/common" "github.com/XinFinOrg/XDPoSChain/core/types" "github.com/XinFinOrg/XDPoSChain/crypto" "github.com/XinFinOrg/XDPoSChain/log" + "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/params" "github.com/XinFinOrg/XDPoSChain/rlp" "github.com/XinFinOrg/XDPoSChain/trie" @@ -80,6 +82,16 @@ type StateDB struct { nextRevisionId int lock sync.Mutex + + // Measurements gathered during execution for debugging purposes + AccountReads time.Duration + AccountHashes time.Duration + AccountUpdates time.Duration + AccountCommits time.Duration + StorageReads time.Duration + StorageHashes time.Duration + StorageUpdates time.Duration + StorageCommits time.Duration } type AccountInfo struct { @@ -446,7 +458,13 @@ func (s *StateDB) GetTransientState(addr common.Address, key common.Hash) common // updateStateObject writes the given object to the trie. func (s *StateDB) updateStateObject(stateObject *stateObject) { + // Track the amount of time wasted on updating the account from the trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) + } + // Encode the account and update the account trie addr := stateObject.Address() + data, err := rlp.EncodeToBytes(stateObject) if err != nil { panic(fmt.Errorf("can't encode object at %x: %v", addr[:], err)) @@ -456,7 +474,13 @@ func (s *StateDB) updateStateObject(stateObject *stateObject) { // deleteStateObject removes the given object from the state trie. func (s *StateDB) deleteStateObject(stateObject *stateObject) { + // Track the amount of time wasted on deleting the account from the trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) + } + // Delete the account from the trie stateObject.deleted = true + addr := stateObject.Address() s.setError(s.trie.TryDelete(addr[:])) } @@ -471,15 +495,18 @@ func (s *StateDB) DeleteAddress(addr common.Address) { // Retrieve a state object given my the address. Returns nil if not found. func (s *StateDB) getStateObject(addr common.Address) (stateObject *stateObject) { - // Prefer 'live' objects. + // Prefer live objects if any is available if obj := s.stateObjects[addr]; obj != nil { if obj.deleted { return nil } return obj } - - // Load the object from the database. + // Track the amount of time wasted on loading the object from the database + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now()) + } + // Load the object from the database enc, err := s.trie.TryGet(addr[:]) if len(enc) == 0 { s.setError(err) @@ -490,7 +517,7 @@ func (s *StateDB) getStateObject(addr common.Address) (stateObject *stateObject) log.Error("Failed to decode state object", "addr", addr, "err", err) return nil } - // Insert into the live set. + // Insert into the live set obj := newObject(s, addr, data, s.MarkStateObjectDirty) s.setStateObject(obj) return obj @@ -672,6 +699,11 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) { // goes into transaction receipts. func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { s.Finalise(deleteEmptyObjects) + + // Track the amount of time wasted on hashing the account trie + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountHashes += time.Since(start) }(time.Now()) + } return s.trie.Hash() } @@ -714,7 +746,7 @@ func (s *StateDB) clearJournalAndRefund() { func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) { defer s.clearJournalAndRefund() - // Commit objects to the trie. + // Commit objects to the trie, measuring the elapsed time for addr, stateObject := range s.stateObjects { _, isDirty := s.stateObjectsDirty[addr] switch { @@ -737,7 +769,10 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) } delete(s.stateObjectsDirty, addr) } - // Write trie changes. + // Write the account trie changes, measuing the amount of wasted time + if metrics.EnabledExpensive { + defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now()) + } root, err = s.trie.Commit(func(leaf []byte, parent common.Hash) error { var account Account if err := rlp.DecodeBytes(leaf, &account); err != nil { diff --git a/metrics/metrics.go b/metrics/metrics.go index ebfa602d18d9..53c8e81fbed7 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -15,23 +15,41 @@ import ( ) // Enabled is checked by the constructor functions for all of the -// standard metrics. If it is true, the metric returned is a stub. +// standard metrics. If it is true, the metric returned is a stub. // // This global kill-switch helps quantify the observer effect and makes // for less cluttered pprof profiles. var Enabled = false -// MetricsEnabledFlag is the CLI flag name to use to enable metrics collections. -const MetricsEnabledFlag = "metrics" +// EnabledExpensive is a soft-flag meant for external packages to check if costly +// metrics gathering is allowed or not. The goal is to separate standard metrics +// for health monitoring and debug metrics that might impact runtime performance. +var EnabledExpensive = false + +// enablerFlags is the CLI flag names to use to enable metrics collections. +var enablerFlags = []string{"metrics"} + +// expensiveEnablerFlags is the CLI flag names to use to enable metrics collections. +var expensiveEnablerFlags = []string{"metrics-expensive"} // Init enables or disables the metrics system. Since we need this to run before // any other code gets to create meters and timers, we'll actually do an ugly hack // and peek into the command line args for the metrics flag. func init() { for _, arg := range os.Args { - if flag := strings.TrimLeft(arg, "-"); flag == MetricsEnabledFlag { - log.Info("Enabling metrics collection") - Enabled = true + flag := strings.TrimLeft(arg, "-") + + for _, enabler := range enablerFlags { + if !Enabled && flag == enabler { + log.Info("Enabling metrics collection") + Enabled = true + } + } + for _, enabler := range expensiveEnablerFlags { + if !Enabled && flag == enabler { + log.Info("Enabling expensive metrics collection") + EnabledExpensive = true + } } } } @@ -57,27 +75,36 @@ func CollectProcessMetrics(refresh time.Duration) { memPauses := GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) var diskReads, diskReadBytes, diskWrites, diskWriteBytes Meter + var diskReadBytesCounter, diskWriteBytesCounter Counter if err := ReadDiskStats(diskstats[0]); err == nil { diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) + diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) diskWrites = GetOrRegisterMeter("system/disk/writecount", DefaultRegistry) diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) + diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry) } else { log.Debug("Failed to read disk metrics", "err", err) } // Iterate loading the different stats and updating the meters for i := 1; ; i++ { - runtime.ReadMemStats(memstats[i%2]) - memAllocs.Mark(int64(memstats[i%2].Mallocs - memstats[(i-1)%2].Mallocs)) - memFrees.Mark(int64(memstats[i%2].Frees - memstats[(i-1)%2].Frees)) - memInuse.Mark(int64(memstats[i%2].Alloc - memstats[(i-1)%2].Alloc)) - memPauses.Mark(int64(memstats[i%2].PauseTotalNs - memstats[(i-1)%2].PauseTotalNs)) + location1 := i % 2 + location2 := (i - 1) % 2 + + runtime.ReadMemStats(memstats[location1]) + memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs)) + memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees)) + memInuse.Mark(int64(memstats[location1].Alloc - memstats[location2].Alloc)) + memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs)) + + if ReadDiskStats(diskstats[location1]) == nil { + diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) + diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) + diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount) + diskWriteBytes.Mark(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) - if ReadDiskStats(diskstats[i%2]) == nil { - diskReads.Mark(diskstats[i%2].ReadCount - diskstats[(i-1)%2].ReadCount) - diskReadBytes.Mark(diskstats[i%2].ReadBytes - diskstats[(i-1)%2].ReadBytes) - diskWrites.Mark(diskstats[i%2].WriteCount - diskstats[(i-1)%2].WriteCount) - diskWriteBytes.Mark(diskstats[i%2].WriteBytes - diskstats[(i-1)%2].WriteBytes) + diskReadBytesCounter.Inc(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) + diskWriteBytesCounter.Inc(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) } time.Sleep(refresh) } From ed427a9426f3151ab33f50ffc0c6d5ec6ede5106 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 10/74] metrics: fix expensive metrics flag processing (#19327) --- metrics/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 53c8e81fbed7..bd23e4b24747 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -46,7 +46,7 @@ func init() { } } for _, enabler := range expensiveEnablerFlags { - if !Enabled && flag == enabler { + if !EnabledExpensive && flag == enabler { log.Info("Enabling expensive metrics collection") EnabledExpensive = true } From a577c719445a85cb93b57882d71b5c11e619e462 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 11/74] metrics/prometheus: added prometheus metrics (#17077) --- metrics/exp/exp.go | 2 + metrics/prometheus/collector.go | 115 +++++++++++++++++++++++++++++++ metrics/prometheus/prometheus.go | 68 ++++++++++++++++++ 3 files changed, 185 insertions(+) create mode 100644 metrics/prometheus/collector.go create mode 100644 metrics/prometheus/prometheus.go diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go index 253b4e94903a..28435beceac5 100644 --- a/metrics/exp/exp.go +++ b/metrics/exp/exp.go @@ -10,6 +10,7 @@ import ( "github.com/XinFinOrg/XDPoSChain/log" "github.com/XinFinOrg/XDPoSChain/metrics" + "github.com/XinFinOrg/XDPoSChain/metrics/prometheus" ) type exp struct { @@ -43,6 +44,7 @@ func Exp(r metrics.Registry) { // http.HandleFunc("/debug/vars", e.expHandler) // haven't found an elegant way, so just use a different endpoint http.Handle("/debug/metrics", h) + http.Handle("/debug/metrics/prometheus", prometheus.Handler(r)) } // ExpHandler will return an expvar powered metrics handler. diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go new file mode 100644 index 000000000000..d0e731e6c2dc --- /dev/null +++ b/metrics/prometheus/collector.go @@ -0,0 +1,115 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package prometheus + +import ( + "bytes" + "fmt" + "strconv" + "strings" + + "github.com/XinFinOrg/XDPoSChain/metrics" +) + +var ( + typeGaugeTpl = "# TYPE %s gauge\n" + typeCounterTpl = "# TYPE %s counter\n" + typeSummaryTpl = "# TYPE %s summary\n" + keyValueTpl = "%s %v\n\n" + keyQuantileTagValueTpl = "%s {quantile=\"%s\"} %v\n\n" +) + +// collector is a collection of byte buffers that aggregate Prometheus reports +// for different metric types. +type collector struct { + buff *bytes.Buffer +} + +// newCollector createa a new Prometheus metric aggregator. +func newCollector() *collector { + return &collector{ + buff: &bytes.Buffer{}, + } +} + +func (c *collector) addCounter(name string, m metrics.Counter) { + c.writeGaugeCounter(name, m.Count()) +} + +func (c *collector) addGauge(name string, m metrics.Gauge) { + c.writeGaugeCounter(name, m.Value()) +} + +func (c *collector) addGaugeFloat64(name string, m metrics.GaugeFloat64) { + c.writeGaugeCounter(name, m.Value()) +} + +func (c *collector) addHistogram(name string, m metrics.Histogram) { + pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} + ps := m.Percentiles(pv) + c.writeSummaryCounter(name, m.Count()) + for i := range pv { + c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) + } +} + +func (c *collector) addMeter(name string, m metrics.Meter) { + c.writeGaugeCounter(name, m.Count()) +} + +func (c *collector) addTimer(name string, m metrics.Timer) { + pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} + ps := m.Percentiles(pv) + c.writeSummaryCounter(name, m.Count()) + for i := range pv { + c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) + } +} + +func (c *collector) addResettingTimer(name string, m metrics.ResettingTimer) { + if len(m.Values()) <= 0 { + return + } + ps := m.Percentiles([]float64{50, 95, 99}) + val := m.Values() + c.writeSummaryCounter(name, len(val)) + c.writeSummaryPercentile(name, "0.50", ps[0]) + c.writeSummaryPercentile(name, "0.95", ps[1]) + c.writeSummaryPercentile(name, "0.99", ps[2]) +} + +func (c *collector) writeGaugeCounter(name string, value interface{}) { + name = mutateKey(name) + c.buff.WriteString(fmt.Sprintf(typeGaugeTpl, name)) + c.buff.WriteString(fmt.Sprintf(keyValueTpl, name, value)) +} + +func (c *collector) writeSummaryCounter(name string, value interface{}) { + name = mutateKey(name + "_count") + c.buff.WriteString(fmt.Sprintf(typeCounterTpl, name)) + c.buff.WriteString(fmt.Sprintf(keyValueTpl, name, value)) +} + +func (c *collector) writeSummaryPercentile(name, p string, value interface{}) { + name = mutateKey(name) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, name)) + c.buff.WriteString(fmt.Sprintf(keyQuantileTagValueTpl, name, p, value)) +} + +func mutateKey(key string) string { + return strings.Replace(key, "/", "_", -1) +} diff --git a/metrics/prometheus/prometheus.go b/metrics/prometheus/prometheus.go new file mode 100644 index 000000000000..d34c2206f951 --- /dev/null +++ b/metrics/prometheus/prometheus.go @@ -0,0 +1,68 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Package prometheus exposes go-metrics into a Prometheus format. +package prometheus + +import ( + "fmt" + "net/http" + "sort" + + "github.com/XinFinOrg/XDPoSChain/log" + "github.com/XinFinOrg/XDPoSChain/metrics" +) + +// Handler returns an HTTP handler which dump metrics in Prometheus format. +func Handler(reg metrics.Registry) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Gather and pre-sort the metrics to avoid random listings + var names []string + reg.Each(func(name string, i interface{}) { + names = append(names, name) + }) + sort.Strings(names) + + // Aggregate all the metris into a Prometheus collector + c := newCollector() + + for _, name := range names { + i := reg.Get(name) + + switch m := i.(type) { + case metrics.Counter: + c.addCounter(name, m.Snapshot()) + case metrics.Gauge: + c.addGauge(name, m.Snapshot()) + case metrics.GaugeFloat64: + c.addGaugeFloat64(name, m.Snapshot()) + case metrics.Histogram: + c.addHistogram(name, m.Snapshot()) + case metrics.Meter: + c.addMeter(name, m.Snapshot()) + case metrics.Timer: + c.addTimer(name, m.Snapshot()) + case metrics.ResettingTimer: + c.addResettingTimer(name, m.Snapshot()) + default: + log.Warn("Unknown Prometheus metric type", "type", fmt.Sprintf("%T", i)) + } + } + w.Header().Add("Content-Type", "text/plain") + w.Header().Add("Content-Length", fmt.Sprint(c.buff.Len())) + w.Write(c.buff.Bytes()) + }) +} From 1eb2ed82932da77b3d9b8c21596e43cbb37692db Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 12/74] core, metrics, p2p: expose various counter metrics for grafana (#19692) --- core/blockchain.go | 23 +++++++++++++++++++++++ core/headerchain.go | 6 ++++++ go.mod | 1 + go.sum | 5 +++++ metrics/cpu.go | 36 ++++++++++++++++++++++++++++++++++++ metrics/cpu_syscall.go | 35 +++++++++++++++++++++++++++++++++++ metrics/cpu_windows.go | 23 +++++++++++++++++++++++ metrics/metrics.go | 23 +++++++++++++++++++---- p2p/server.go | 4 +--- 9 files changed, 149 insertions(+), 7 deletions(-) create mode 100644 metrics/cpu.go create mode 100644 metrics/cpu_syscall.go create mode 100644 metrics/cpu_windows.go diff --git a/core/blockchain.go b/core/blockchain.go index b90e461cc6ae..d11a421978ca 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -56,6 +56,10 @@ import ( ) var ( + headBlockGauge = metrics.NewRegisteredGauge("chain/head/block", nil) + headHeaderGauge = metrics.NewRegisteredGauge("chain/head/header", nil) + headFastBlockGauge = metrics.NewRegisteredGauge("chain/head/receipt", nil) + accountReadTimer = metrics.NewRegisteredTimer("chain/account/reads", nil) accountHashTimer = metrics.NewRegisteredTimer("chain/account/hashes", nil) accountUpdateTimer = metrics.NewRegisteredTimer("chain/account/updates", nil) @@ -355,6 +359,7 @@ func (bc *BlockChain) loadLastState() error { } // Everything seems to be fine, set as the head block bc.currentBlock.Store(currentBlock) + headBlockGauge.Update(int64(currentBlock.NumberU64())) // Restore the last known head header currentHeader := currentBlock.Header() @@ -374,9 +379,12 @@ func (bc *BlockChain) loadLastState() error { // Restore the last known head fast block bc.currentFastBlock.Store(currentBlock) + headFastBlockGauge.Update(int64(currentBlock.NumberU64())) + if head := GetHeadFastBlockHash(bc.db); head != (common.Hash{}) { if block := bc.GetBlockByHash(head); block != nil { bc.currentFastBlock.Store(block) + headFastBlockGauge.Update(int64(block.NumberU64())) } } @@ -422,23 +430,28 @@ func (bc *BlockChain) SetHead(head uint64) error { // Rewind the block chain, ensuring we don't end up with a stateless head block if currentBlock := bc.CurrentBlock(); currentBlock != nil && currentHeader.Number.Uint64() < currentBlock.NumberU64() { bc.currentBlock.Store(bc.GetBlock(currentHeader.Hash(), currentHeader.Number.Uint64())) + headBlockGauge.Update(int64(currentHeader.Number.Uint64())) } if currentBlock := bc.CurrentBlock(); currentBlock != nil { if _, err := state.New(currentBlock.Root(), bc.stateCache); err != nil { // Rewound state missing, rolled back to before pivot, reset to genesis bc.currentBlock.Store(bc.genesisBlock) + headBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) } } // Rewind the fast block in a simpleton way to the target head if currentFastBlock := bc.CurrentFastBlock(); currentFastBlock != nil && currentHeader.Number.Uint64() < currentFastBlock.NumberU64() { bc.currentFastBlock.Store(bc.GetBlock(currentHeader.Hash(), currentHeader.Number.Uint64())) + headFastBlockGauge.Update(int64(currentHeader.Number.Uint64())) } // If either blocks reached nil, reset to the genesis state if currentBlock := bc.CurrentBlock(); currentBlock == nil { bc.currentBlock.Store(bc.genesisBlock) + headBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) } if currentFastBlock := bc.CurrentFastBlock(); currentFastBlock == nil { bc.currentFastBlock.Store(bc.genesisBlock) + headFastBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) } currentBlock := bc.CurrentBlock() currentFastBlock := bc.CurrentFastBlock() @@ -463,6 +476,7 @@ func (bc *BlockChain) FastSyncCommitHead(hash common.Hash) error { // If all checks out, manually set the head block bc.mu.Lock() bc.currentBlock.Store(block) + headBlockGauge.Update(int64(block.NumberU64())) bc.mu.Unlock() log.Info("Committed new head block", "number", block.Number(), "hash", hash) @@ -593,9 +607,12 @@ func (bc *BlockChain) ResetWithGenesisBlock(genesis *types.Block) error { bc.genesisBlock = genesis bc.insert(bc.genesisBlock, false) bc.currentBlock.Store(bc.genesisBlock) + headBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) + bc.hc.SetGenesis(bc.genesisBlock.Header()) bc.hc.SetCurrentHeader(bc.genesisBlock.Header()) bc.currentFastBlock.Store(bc.genesisBlock) + headFastBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) return nil } @@ -695,7 +712,9 @@ func (bc *BlockChain) insert(block *types.Block, writeBlock bool) { if writeBlock { rawdb.WriteBlock(bc.db, block) } + bc.currentBlock.Store(block) + headBlockGauge.Update(int64(block.NumberU64())) // save cache BlockSigners if bc.chainConfig.XDPoS != nil && !bc.chainConfig.IsTIPSigning(block.Number()) { @@ -713,6 +732,7 @@ func (bc *BlockChain) insert(block *types.Block, writeBlock bool) { log.Crit("Failed to insert head fast block hash", "err", err) } bc.currentFastBlock.Store(block) + headFastBlockGauge.Update(int64(block.NumberU64())) } } @@ -1050,10 +1070,12 @@ func (bc *BlockChain) Rollback(chain []common.Hash) { newFastBlock := bc.GetBlock(currentFastBlock.ParentHash(), currentFastBlock.NumberU64()-1) bc.currentFastBlock.Store(newFastBlock) WriteHeadFastBlockHash(bc.db, newFastBlock.Hash()) + headFastBlockGauge.Update(int64(newFastBlock.NumberU64())) } if currentBlock := bc.CurrentBlock(); currentBlock.Hash() == hash { newBlock := bc.GetBlock(currentBlock.ParentHash(), currentBlock.NumberU64()-1) bc.currentBlock.Store(newBlock) + headBlockGauge.Update(int64(newBlock.NumberU64())) rawdb.WriteHeadBlockHash(bc.db, newBlock.Hash()) } } @@ -1136,6 +1158,7 @@ func (bc *BlockChain) InsertReceiptChain(blockChain types.Blocks, receiptChain [ log.Crit("Failed to update head fast block hash", "err", err) } bc.currentFastBlock.Store(head) + headFastBlockGauge.Update(int64(head.NumberU64())) } } bc.mu.Unlock() diff --git a/core/headerchain.go b/core/headerchain.go index b3cdc10f68d5..51a876d2ecac 100644 --- a/core/headerchain.go +++ b/core/headerchain.go @@ -101,6 +101,7 @@ func NewHeaderChain(chainDb ethdb.Database, config *params.ChainConfig, engine c } } hc.currentHeaderHash = hc.CurrentHeader().Hash() + headHeaderGauge.Update(hc.CurrentHeader().Number.Int64()) return hc, nil } @@ -176,8 +177,10 @@ func (hc *HeaderChain) WriteHeader(header *types.Header) (status WriteStatus, er if err := WriteHeadHeaderHash(hc.chainDb, hash); err != nil { log.Crit("Failed to insert head header hash", "err", err) } + hc.currentHeaderHash = hash hc.currentHeader.Store(types.CopyHeader(header)) + headHeaderGauge.Update(header.Number.Int64()) status = CanonStatTy } else { @@ -392,8 +395,10 @@ func (hc *HeaderChain) SetCurrentHeader(head *types.Header) { if err := WriteHeadHeaderHash(hc.chainDb, head.Hash()); err != nil { log.Crit("Failed to insert head header hash", "err", err) } + hc.currentHeader.Store(head) hc.currentHeaderHash = head.Hash() + headHeaderGauge.Update(head.Number.Int64()) } // DeleteCallback is a callback function that is called by SetHead before @@ -432,6 +437,7 @@ func (hc *HeaderChain) SetHead(head uint64, delFn DeleteCallback) { hc.currentHeader.Store(hc.genesisHeader) } hc.currentHeaderHash = hc.CurrentHeader().Hash() + headHeaderGauge.Update(hc.CurrentHeader().Number.Int64()) if err := WriteHeadHeaderHash(hc.chainDb, hc.currentHeaderHash); err != nil { log.Crit("Failed to reset head header hash", "err", err) diff --git a/go.mod b/go.mod index d849d578c82b..109e053747f8 100644 --- a/go.mod +++ b/go.mod @@ -49,6 +49,7 @@ require ( github.com/crate-crypto/go-kzg-4844 v0.7.0 github.com/deckarep/golang-set v1.8.0 github.com/dop251/goja v0.0.0-20200721192441-a695b0cdd498 + github.com/elastic/gosigar v0.14.3 github.com/ethereum/c-kzg-4844 v0.4.0 github.com/kylelemons/godebug v1.1.0 github.com/mattn/go-isatty v0.0.17 diff --git a/go.sum b/go.sum index 55a922e8cc24..b826a51c6e9e 100644 --- a/go.sum +++ b/go.sum @@ -43,6 +43,8 @@ github.com/dop251/goja v0.0.0-20200721192441-a695b0cdd498 h1:Y9vTBSsV4hSwPSj4bac github.com/dop251/goja v0.0.0-20200721192441-a695b0cdd498/go.mod h1:Mw6PkjjMXWbTj+nnj4s3QPXq1jaT0s5pC0iFD4+BOAA= github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw= github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= +github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo= +github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= github.com/ethereum/c-kzg-4844 v0.4.0 h1:3MS1s4JtA868KpJxroZoepdV0ZKBp3u/O5HcZ7R3nlY= github.com/ethereum/c-kzg-4844 v0.4.0/go.mod h1:VewdlzQmpT5QSrVhbBuGoCdFJkpaJlO1aQputP83wc0= github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= @@ -166,6 +168,7 @@ github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3 h1:njlZPzLwU639 github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3/go.mod h1:hpGUWaI9xL8pRQCTXQgocU38Qw1g0Us7n5PxxTwTCYU= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/supranational/blst v0.3.11 h1:LyU6FolezeWAhvQk0k6O/d49jqgO52MSDDfYgbeoEm4= @@ -192,6 +195,7 @@ golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180926160741-c2ed4eda69e7/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -245,6 +249,7 @@ gopkg.in/olebedev/go-duktape.v3 v3.0.0-20200619000410-60c24ae608a6 h1:a6cXbcDDUk gopkg.in/olebedev/go-duktape.v3 v3.0.0-20200619000410-60c24ae608a6/go.mod h1:uAJfkITjFhyEEuUfm7bsmCZRbW5WRq8s9EY8HZ6hCns= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= diff --git a/metrics/cpu.go b/metrics/cpu.go new file mode 100644 index 000000000000..3278d81616a3 --- /dev/null +++ b/metrics/cpu.go @@ -0,0 +1,36 @@ +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package metrics + +import "github.com/elastic/gosigar" + +// CPUStats is the system and process CPU stats. +type CPUStats struct { + GlobalTime int64 // Time spent by the CPU working on all processes + GlobalWait int64 // Time spent by waiting on disk for all processes + LocalTime int64 // Time spent by the CPU working on this process +} + +// ReadCPUStats retrieves the current CPU stats. +func ReadCPUStats(stats *CPUStats) { + global := gosigar.Cpu{} + global.Get() + + stats.GlobalTime = int64(global.User + global.Nice + global.Sys) + stats.GlobalWait = int64(global.Wait) + stats.LocalTime = getProcessCPUTime() +} diff --git a/metrics/cpu_syscall.go b/metrics/cpu_syscall.go new file mode 100644 index 000000000000..c111d89e7fcf --- /dev/null +++ b/metrics/cpu_syscall.go @@ -0,0 +1,35 @@ +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// +build !windows + +package metrics + +import ( + "syscall" + + "github.com/XinFinOrg/XDPoSChain/log" +) + +// getProcessCPUTime retrieves the process' CPU time since program startup. +func getProcessCPUTime() int64 { + var usage syscall.Rusage + if err := syscall.Getrusage(syscall.RUSAGE_SELF, &usage); err != nil { + log.Warn("Failed to retrieve CPU time", "err", err) + return 0 + } + return int64(usage.Utime.Sec+usage.Stime.Sec)*100 + int64(usage.Utime.Usec+usage.Stime.Usec)/10000 //nolint:unconvert +} diff --git a/metrics/cpu_windows.go b/metrics/cpu_windows.go new file mode 100644 index 000000000000..fb29a52a82c1 --- /dev/null +++ b/metrics/cpu_windows.go @@ -0,0 +1,23 @@ +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package metrics + +// getProcessCPUTime returns 0 on Windows as there is no system call to resolve +// the actual process' CPU time. +func getProcessCPUTime() int64 { + return 0 +} diff --git a/metrics/metrics.go b/metrics/metrics.go index bd23e4b24747..74de03a9dcc7 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -61,18 +61,27 @@ func CollectProcessMetrics(refresh time.Duration) { if !Enabled { return } + refreshFreq := int64(refresh / time.Second) + // Create the various data collectors + cpuStats := make([]*CPUStats, 2) memstats := make([]*runtime.MemStats, 2) diskstats := make([]*DiskStats, 2) for i := 0; i < len(memstats); i++ { + cpuStats[i] = new(CPUStats) memstats[i] = new(runtime.MemStats) diskstats[i] = new(DiskStats) } // Define the various metrics to collect + cpuSysLoad := GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) + cpuSysWait := GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) + cpuProcLoad := GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) + + memPauses := GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) memAllocs := GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) memFrees := GetOrRegisterMeter("system/memory/frees", DefaultRegistry) - memInuse := GetOrRegisterMeter("system/memory/inuse", DefaultRegistry) - memPauses := GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) + memHeld := GetOrRegisterGauge("system/memory/held", DefaultRegistry) + memUsed := GetOrRegisterGauge("system/memory/used", DefaultRegistry) var diskReads, diskReadBytes, diskWrites, diskWriteBytes Meter var diskReadBytesCounter, diskWriteBytesCounter Counter @@ -91,11 +100,17 @@ func CollectProcessMetrics(refresh time.Duration) { location1 := i % 2 location2 := (i - 1) % 2 + ReadCPUStats(cpuStats[location1]) + cpuSysLoad.Update((cpuStats[location1].GlobalTime - cpuStats[location2].GlobalTime) / refreshFreq) + cpuSysWait.Update((cpuStats[location1].GlobalWait - cpuStats[location2].GlobalWait) / refreshFreq) + cpuProcLoad.Update((cpuStats[location1].LocalTime - cpuStats[location2].LocalTime) / refreshFreq) + runtime.ReadMemStats(memstats[location1]) + memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs)) memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs)) memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees)) - memInuse.Mark(int64(memstats[location1].Alloc - memstats[location2].Alloc)) - memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs)) + memHeld.Update(int64(memstats[location1].HeapSys - memstats[location1].HeapReleased)) + memUsed.Update(int64(memstats[location1].Alloc)) if ReadDiskStats(diskstats[location1]) == nil { diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) diff --git a/p2p/server.go b/p2p/server.go index fabbb9cdbfcd..51579ff7c30c 100644 --- a/p2p/server.go +++ b/p2p/server.go @@ -650,9 +650,7 @@ running: // This channel is used by RemoveTrustedPeer to remove an enode // from the trusted node set. srv.log.Trace("Removing trusted node", "node", n) - if _, ok := trusted[n.ID]; ok { - delete(trusted, n.ID) - } + delete(trusted, n.ID) // Unmark any already-connected peer as trusted if p, ok := peers[n.ID]; ok { p.rw.set(trustedConn, false) From a4e113ca110dc04d31f2e5da5aacb13958ba21aa Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 13/74] metrics: gather and export threads and goroutines (#19725) --- metrics/metrics.go | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 74de03a9dcc7..2df2404b5f60 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -73,28 +73,26 @@ func CollectProcessMetrics(refresh time.Duration) { diskstats[i] = new(DiskStats) } // Define the various metrics to collect - cpuSysLoad := GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) - cpuSysWait := GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) - cpuProcLoad := GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) - - memPauses := GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) - memAllocs := GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) - memFrees := GetOrRegisterMeter("system/memory/frees", DefaultRegistry) - memHeld := GetOrRegisterGauge("system/memory/held", DefaultRegistry) - memUsed := GetOrRegisterGauge("system/memory/used", DefaultRegistry) - - var diskReads, diskReadBytes, diskWrites, diskWriteBytes Meter - var diskReadBytesCounter, diskWriteBytesCounter Counter - if err := ReadDiskStats(diskstats[0]); err == nil { - diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) - diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) - diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) - diskWrites = GetOrRegisterMeter("system/disk/writecount", DefaultRegistry) - diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) + var ( + cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) + cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) + cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) + cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) + cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) + + memPauses = GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) + memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) + memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) + memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) + memUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) + + diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) + diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) + diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) + diskWrites = GetOrRegisterMeter("system/disk/writecount", DefaultRegistry) + diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry) - } else { - log.Debug("Failed to read disk metrics", "err", err) - } + ) // Iterate loading the different stats and updating the meters for i := 1; ; i++ { location1 := i % 2 @@ -104,6 +102,8 @@ func CollectProcessMetrics(refresh time.Duration) { cpuSysLoad.Update((cpuStats[location1].GlobalTime - cpuStats[location2].GlobalTime) / refreshFreq) cpuSysWait.Update((cpuStats[location1].GlobalWait - cpuStats[location2].GlobalWait) / refreshFreq) cpuProcLoad.Update((cpuStats[location1].LocalTime - cpuStats[location2].LocalTime) / refreshFreq) + cpuThreads.Update(int64(threadCreateProfile.Count())) + cpuGoroutines.Update(int64(runtime.NumGoroutine())) runtime.ReadMemStats(memstats[location1]) memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs)) From 745640795a13579a071c8cf47d79d64900b9b5d2 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:11 +0800 Subject: [PATCH 14/74] metrics: change links in README.md to https (#20182) --- metrics/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/README.md b/metrics/README.md index bc2a45a8382d..e2d7945008e6 100644 --- a/metrics/README.md +++ b/metrics/README.md @@ -5,7 +5,7 @@ go-metrics Go port of Coda Hale's Metrics library: . -Documentation: . +Documentation: . Usage ----- @@ -128,7 +128,7 @@ go stathat.Stathat(metrics.DefaultRegistry, 10e9, "example@example.com") Maintain all metrics along with expvars at `/debug/metrics`: -This uses the same mechanism as [the official expvar](http://golang.org/pkg/expvar/) +This uses the same mechanism as [the official expvar](https://golang.org/pkg/expvar/) but exposed under `/debug/metrics`, which shows a json representation of all your usual expvars as well as all your go-metrics. From 332ac32bc5275302a342b362bba1d5cacfc78d12 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 15/74] metrics: not compare float numbers directly (#20219) --- metrics/doc.go | 4 ++ metrics/ewma_test.go | 101 +++++++++++++++++++++-------------------- metrics/sample_test.go | 3 +- 3 files changed, 58 insertions(+), 50 deletions(-) create mode 100644 metrics/doc.go diff --git a/metrics/doc.go b/metrics/doc.go new file mode 100644 index 000000000000..13f429c1689d --- /dev/null +++ b/metrics/doc.go @@ -0,0 +1,4 @@ +package metrics + +const epsilon = 0.0000000000000001 +const epsilonPercentile = .00000000001 diff --git a/metrics/ewma_test.go b/metrics/ewma_test.go index 39e67c605b89..5b244191616e 100644 --- a/metrics/ewma_test.go +++ b/metrics/ewma_test.go @@ -1,6 +1,9 @@ package metrics -import "testing" +import ( + "math" + "testing" +) func BenchmarkEWMA(b *testing.B) { a := NewEWMA1() @@ -15,67 +18,67 @@ func TestEWMA1(t *testing.T) { a := NewEWMA1() a.Update(3) a.Tick() - if rate := a.Rate(); rate != 0.6 { + if rate := a.Rate(); math.Abs(0.6-rate) > epsilon { t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.22072766470286553 { + if rate := a.Rate(); math.Abs(0.22072766470286553-rate) > epsilon { t.Errorf("1 minute a.Rate(): 0.22072766470286553 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.08120116994196772 { + if rate := a.Rate(); math.Abs(0.08120116994196772-rate) > epsilon { t.Errorf("2 minute a.Rate(): 0.08120116994196772 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.029872241020718428 { + if rate := a.Rate(); math.Abs(0.029872241020718428-rate) > epsilon { t.Errorf("3 minute a.Rate(): 0.029872241020718428 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.01098938333324054 { + if rate := a.Rate(); math.Abs(0.01098938333324054-rate) > epsilon { t.Errorf("4 minute a.Rate(): 0.01098938333324054 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.004042768199451294 { + if rate := a.Rate(); math.Abs(0.004042768199451294-rate) > epsilon { t.Errorf("5 minute a.Rate(): 0.004042768199451294 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.0014872513059998212 { + if rate := a.Rate(); math.Abs(0.0014872513059998212-rate) > epsilon { t.Errorf("6 minute a.Rate(): 0.0014872513059998212 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.0005471291793327122 { + if rate := a.Rate(); math.Abs(0.0005471291793327122-rate) > epsilon { t.Errorf("7 minute a.Rate(): 0.0005471291793327122 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.00020127757674150815 { + if rate := a.Rate(); math.Abs(0.00020127757674150815-rate) > epsilon { t.Errorf("8 minute a.Rate(): 0.00020127757674150815 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 7.404588245200814e-05 { + if rate := a.Rate(); math.Abs(7.404588245200814e-05-rate) > epsilon { t.Errorf("9 minute a.Rate(): 7.404588245200814e-05 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 2.7239957857491083e-05 { + if rate := a.Rate(); math.Abs(2.7239957857491083e-05-rate) > epsilon { t.Errorf("10 minute a.Rate(): 2.7239957857491083e-05 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 1.0021020474147462e-05 { + if rate := a.Rate(); math.Abs(1.0021020474147462e-05-rate) > epsilon { t.Errorf("11 minute a.Rate(): 1.0021020474147462e-05 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 3.6865274119969525e-06 { + if rate := a.Rate(); math.Abs(3.6865274119969525e-06-rate) > epsilon { t.Errorf("12 minute a.Rate(): 3.6865274119969525e-06 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 1.3561976441886433e-06 { + if rate := a.Rate(); math.Abs(1.3561976441886433e-06-rate) > epsilon { t.Errorf("13 minute a.Rate(): 1.3561976441886433e-06 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 4.989172314621449e-07 { + if rate := a.Rate(); math.Abs(4.989172314621449e-07-rate) > epsilon { t.Errorf("14 minute a.Rate(): 4.989172314621449e-07 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 1.8354139230109722e-07 { + if rate := a.Rate(); math.Abs(1.8354139230109722e-07-rate) > epsilon { t.Errorf("15 minute a.Rate(): 1.8354139230109722e-07 != %v\n", rate) } } @@ -84,67 +87,67 @@ func TestEWMA5(t *testing.T) { a := NewEWMA5() a.Update(3) a.Tick() - if rate := a.Rate(); rate != 0.6 { + if rate := a.Rate(); math.Abs(0.6-rate) > epsilon { t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.49123845184678905 { + if rate := a.Rate(); math.Abs(0.49123845184678905-rate) > epsilon { t.Errorf("1 minute a.Rate(): 0.49123845184678905 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.4021920276213837 { + if rate := a.Rate(); math.Abs(0.4021920276213837-rate) > epsilon { t.Errorf("2 minute a.Rate(): 0.4021920276213837 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.32928698165641596 { + if rate := a.Rate(); math.Abs(0.32928698165641596-rate) > epsilon { t.Errorf("3 minute a.Rate(): 0.32928698165641596 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.269597378470333 { + if rate := a.Rate(); math.Abs(0.269597378470333-rate) > epsilon { t.Errorf("4 minute a.Rate(): 0.269597378470333 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.2207276647028654 { + if rate := a.Rate(); math.Abs(0.2207276647028654-rate) > epsilon { t.Errorf("5 minute a.Rate(): 0.2207276647028654 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.18071652714732128 { + if rate := a.Rate(); math.Abs(0.18071652714732128-rate) > epsilon { t.Errorf("6 minute a.Rate(): 0.18071652714732128 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.14795817836496392 { + if rate := a.Rate(); math.Abs(0.14795817836496392-rate) > epsilon { t.Errorf("7 minute a.Rate(): 0.14795817836496392 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.12113791079679326 { + if rate := a.Rate(); math.Abs(0.12113791079679326-rate) > epsilon { t.Errorf("8 minute a.Rate(): 0.12113791079679326 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.09917933293295193 { + if rate := a.Rate(); math.Abs(0.09917933293295193-rate) > epsilon { t.Errorf("9 minute a.Rate(): 0.09917933293295193 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.08120116994196763 { + if rate := a.Rate(); math.Abs(0.08120116994196763-rate) > epsilon { t.Errorf("10 minute a.Rate(): 0.08120116994196763 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.06648189501740036 { + if rate := a.Rate(); math.Abs(0.06648189501740036-rate) > epsilon { t.Errorf("11 minute a.Rate(): 0.06648189501740036 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.05443077197364752 { + if rate := a.Rate(); math.Abs(0.05443077197364752-rate) > epsilon { t.Errorf("12 minute a.Rate(): 0.05443077197364752 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.04456414692860035 { + if rate := a.Rate(); math.Abs(0.04456414692860035-rate) > epsilon { t.Errorf("13 minute a.Rate(): 0.04456414692860035 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.03648603757513079 { + if rate := a.Rate(); math.Abs(0.03648603757513079-rate) > epsilon { t.Errorf("14 minute a.Rate(): 0.03648603757513079 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.0298722410207183831020718428 { + if rate := a.Rate(); math.Abs(0.0298722410207183831020718428-rate) > epsilon { t.Errorf("15 minute a.Rate(): 0.0298722410207183831020718428 != %v\n", rate) } } @@ -153,67 +156,67 @@ func TestEWMA15(t *testing.T) { a := NewEWMA15() a.Update(3) a.Tick() - if rate := a.Rate(); rate != 0.6 { + if rate := a.Rate(); math.Abs(0.6-rate) > epsilon { t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.5613041910189706 { + if rate := a.Rate(); math.Abs(0.5613041910189706-rate) > epsilon { t.Errorf("1 minute a.Rate(): 0.5613041910189706 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.5251039914257684 { + if rate := a.Rate(); math.Abs(0.5251039914257684-rate) > epsilon { t.Errorf("2 minute a.Rate(): 0.5251039914257684 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.4912384518467888184678905 { + if rate := a.Rate(); math.Abs(0.4912384518467888184678905-rate) > epsilon { t.Errorf("3 minute a.Rate(): 0.4912384518467888184678905 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.459557003018789 { + if rate := a.Rate(); math.Abs(0.459557003018789-rate) > epsilon { t.Errorf("4 minute a.Rate(): 0.459557003018789 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.4299187863442732 { + if rate := a.Rate(); math.Abs(0.4299187863442732-rate) > epsilon { t.Errorf("5 minute a.Rate(): 0.4299187863442732 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.4021920276213831 { + if rate := a.Rate(); math.Abs(0.4021920276213831-rate) > epsilon { t.Errorf("6 minute a.Rate(): 0.4021920276213831 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.37625345116383313 { + if rate := a.Rate(); math.Abs(0.37625345116383313-rate) > epsilon { t.Errorf("7 minute a.Rate(): 0.37625345116383313 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.3519877317060185 { + if rate := a.Rate(); math.Abs(0.3519877317060185-rate) > epsilon { t.Errorf("8 minute a.Rate(): 0.3519877317060185 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.3292869816564153165641596 { + if rate := a.Rate(); math.Abs(0.3292869816564153165641596-rate) > epsilon { t.Errorf("9 minute a.Rate(): 0.3292869816564153165641596 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.3080502714195546 { + if rate := a.Rate(); math.Abs(0.3080502714195546-rate) > epsilon { t.Errorf("10 minute a.Rate(): 0.3080502714195546 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.2881831806538789 { + if rate := a.Rate(); math.Abs(0.2881831806538789-rate) > epsilon { t.Errorf("11 minute a.Rate(): 0.2881831806538789 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.26959737847033216 { + if rate := a.Rate(); math.Abs(0.26959737847033216-rate) > epsilon { t.Errorf("12 minute a.Rate(): 0.26959737847033216 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.2522102307052083 { + if rate := a.Rate(); math.Abs(0.2522102307052083-rate) > epsilon { t.Errorf("13 minute a.Rate(): 0.2522102307052083 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.23594443252115815 { + if rate := a.Rate(); math.Abs(0.23594443252115815-rate) > epsilon { t.Errorf("14 minute a.Rate(): 0.23594443252115815 != %v\n", rate) } elapseMinute(a) - if rate := a.Rate(); rate != 0.2207276647028646247028654470286553 { + if rate := a.Rate(); math.Abs(0.2207276647028646247028654470286553-rate) > epsilon { t.Errorf("15 minute a.Rate(): 0.2207276647028646247028654470286553 != %v\n", rate) } } diff --git a/metrics/sample_test.go b/metrics/sample_test.go index d6e966400b24..be7bf3d2dffb 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -1,6 +1,7 @@ package metrics import ( + "math" "math/rand" "runtime" "testing" @@ -326,7 +327,7 @@ func testUniformSampleStatistics(t *testing.T, s Sample) { if ps[1] != 7380.5 { t.Errorf("75th percentile: 7380.5 != %v\n", ps[1]) } - if ps[2] != 9986.429999999998 { + if math.Abs(ps[2]-9986.429999999998) > epsilonPercentile { t.Errorf("99th percentile: 9986.429999999998 != %v\n", ps[2]) } } From 462999b38124b95704eea83c1c5168e1f9b265ef Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 16/74] metrics: fix issues reported by staticcheck (#20365) --- metrics/librato/librato.go | 5 +++-- metrics/registry_test.go | 13 +++++++++---- metrics/timer.go | 5 +---- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/metrics/librato/librato.go b/metrics/librato/librato.go index 45bf6cf297a7..9323290bc51b 100644 --- a/metrics/librato/librato.go +++ b/metrics/librato/librato.go @@ -42,9 +42,10 @@ func Librato(r metrics.Registry, d time.Duration, e string, t string, s string, func (re *Reporter) Run() { log.Printf("WARNING: This client has been DEPRECATED! It has been moved to https://github.com/mihasya/go-metrics-librato and will be removed from rcrowley/go-metrics on August 5th 2015") - ticker := time.Tick(re.Interval) + ticker := time.NewTicker(re.Interval) + defer ticker.Stop() metricsApi := &LibratoClient{re.Email, re.Token} - for now := range ticker { + for now := range ticker.C { var metrics Batch var err error if metrics, err = re.BuildRequest(now, re.Registry); err != nil { diff --git a/metrics/registry_test.go b/metrics/registry_test.go index 416c82d0b207..d277ae5c3e47 100644 --- a/metrics/registry_test.go +++ b/metrics/registry_test.go @@ -270,7 +270,10 @@ func TestChildPrefixedRegistryOfChildRegister(t *testing.T) { if err != nil { t.Fatal(err.Error()) } - r2.Register("baz", NewCounter()) + err = r2.Register("baz", NewCounter()) + if err != nil { + t.Fatal(err.Error()) + } c := NewCounter() Register("bars", c) @@ -278,7 +281,7 @@ func TestChildPrefixedRegistryOfChildRegister(t *testing.T) { r2.Each(func(name string, m interface{}) { i++ if name != "prefix.prefix2.baz" { - //t.Fatal(name) + t.Fatal(name) } }) if i != 1 { @@ -293,7 +296,10 @@ func TestWalkRegistries(t *testing.T) { if err != nil { t.Fatal(err.Error()) } - r2.Register("baz", NewCounter()) + err = r2.Register("baz", NewCounter()) + if err != nil { + t.Fatal(err.Error()) + } c := NewCounter() Register("bars", c) @@ -301,5 +307,4 @@ func TestWalkRegistries(t *testing.T) { if prefix != "prefix.prefix2." { t.Fatal(prefix) } - } diff --git a/metrics/timer.go b/metrics/timer.go index 89e22208fde0..a63c9dfb6c2d 100644 --- a/metrics/timer.go +++ b/metrics/timer.go @@ -76,10 +76,7 @@ func NewTimer() Timer { } // NilTimer is a no-op Timer. -type NilTimer struct { - h Histogram - m Meter -} +type NilTimer struct{} // Count is a no-op. func (NilTimer) Count() int64 { return 0 } From 9fee8a72ebd0ddd485af610b6478e26c2ea61960 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 17/74] metrics: disable CPU stats (gosigar) on iOS (#20816) --- metrics/cpu.go | 12 ------------ metrics/cpu_disabled.go | 23 +++++++++++++++++++++++ metrics/cpu_enabled.go | 31 +++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 12 deletions(-) create mode 100644 metrics/cpu_disabled.go create mode 100644 metrics/cpu_enabled.go diff --git a/metrics/cpu.go b/metrics/cpu.go index 3278d81616a3..72ece16e0768 100644 --- a/metrics/cpu.go +++ b/metrics/cpu.go @@ -16,21 +16,9 @@ package metrics -import "github.com/elastic/gosigar" - // CPUStats is the system and process CPU stats. type CPUStats struct { GlobalTime int64 // Time spent by the CPU working on all processes GlobalWait int64 // Time spent by waiting on disk for all processes LocalTime int64 // Time spent by the CPU working on this process } - -// ReadCPUStats retrieves the current CPU stats. -func ReadCPUStats(stats *CPUStats) { - global := gosigar.Cpu{} - global.Get() - - stats.GlobalTime = int64(global.User + global.Nice + global.Sys) - stats.GlobalWait = int64(global.Wait) - stats.LocalTime = getProcessCPUTime() -} diff --git a/metrics/cpu_disabled.go b/metrics/cpu_disabled.go new file mode 100644 index 000000000000..6c3428993fb1 --- /dev/null +++ b/metrics/cpu_disabled.go @@ -0,0 +1,23 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// +build ios + +package metrics + +// ReadCPUStats retrieves the current CPU stats. Internally this uses `gosigar`, +// which is not supported on the platforms in this file. +func ReadCPUStats(stats *CPUStats) {} diff --git a/metrics/cpu_enabled.go b/metrics/cpu_enabled.go new file mode 100644 index 000000000000..99d44e400229 --- /dev/null +++ b/metrics/cpu_enabled.go @@ -0,0 +1,31 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// +build !ios + +package metrics + +import "github.com/elastic/gosigar" + +// ReadCPUStats retrieves the current CPU stats. +func ReadCPUStats(stats *CPUStats) { + global := gosigar.Cpu{} + global.Get() + + stats.GlobalTime = int64(global.User + global.Nice + global.Sys) + stats.GlobalWait = int64(global.Wait) + stats.LocalTime = getProcessCPUTime() +} From 47ce406a4a2a73121581ed4f3e01c067cefb58d5 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 18/74] metrics: make flawed test less flawed (#20818) --- metrics/timer_test.go | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/metrics/timer_test.go b/metrics/timer_test.go index d3750e4f9fa3..390a0e6c23a8 100644 --- a/metrics/timer_test.go +++ b/metrics/timer_test.go @@ -45,10 +45,23 @@ func TestTimerStop(t *testing.T) { } func TestTimerFunc(t *testing.T) { - tm := NewTimer() - tm.Time(func() { time.Sleep(50e6) }) - if max := tm.Max(); 35e6 > max || max > 95e6 { - t.Errorf("tm.Max(): 35e6 > %v || %v > 95e6\n", max, max) + var ( + tm = NewTimer() + testStart = time.Now() + actualTime time.Duration + ) + tm.Time(func() { + time.Sleep(50 * time.Millisecond) + actualTime = time.Since(testStart) + }) + var ( + drift = time.Millisecond * 2 + measured = time.Duration(tm.Max()) + ceil = actualTime + drift + floor = actualTime - drift + ) + if measured > ceil || measured < floor { + t.Errorf("tm.Max(): %v > %v || %v > %v\n", measured, ceil, measured, floor) } } From 1415bb63695780d26924f1a696c3a4f42a41a80f Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 19/74] metrics: add missing calls to Ticker.Stop in tests (#20866) --- metrics/meter_test.go | 1 + metrics/sample_test.go | 1 + 2 files changed, 2 insertions(+) diff --git a/metrics/meter_test.go b/metrics/meter_test.go index 0dfce76b7af0..37e45bded251 100644 --- a/metrics/meter_test.go +++ b/metrics/meter_test.go @@ -26,6 +26,7 @@ func TestMeterDecay(t *testing.T) { ticker: time.NewTicker(time.Millisecond), meters: make(map[*StandardMeter]struct{}), } + defer ma.ticker.Stop() m := newStandardMeter() ma.meters[m] = struct{}{} go ma.tick() diff --git a/metrics/sample_test.go b/metrics/sample_test.go index be7bf3d2dffb..5b2ee96e3679 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -346,6 +346,7 @@ func TestUniformSampleConcurrentUpdateCount(t *testing.T) { quit := make(chan struct{}) go func() { t := time.NewTicker(10 * time.Millisecond) + defer t.Stop() for { select { case <-t.C: From c65d0cd947ced255c0d2f9fdcc7116b87f972626 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 20/74] cmd/XDC: enable metrics for geth import command (#20738) --- cmd/XDC/chaincmd.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cmd/XDC/chaincmd.go b/cmd/XDC/chaincmd.go index 5f12048f2cac..78a249ea1c4f 100644 --- a/cmd/XDC/chaincmd.go +++ b/cmd/XDC/chaincmd.go @@ -71,6 +71,14 @@ It expects the genesis file as argument.`, utils.GCModeFlag, utils.CacheDatabaseFlag, utils.CacheGCFlag, + utils.MetricsEnabledFlag, + utils.MetricsEnabledExpensiveFlag, + utils.MetricsEnableInfluxDBFlag, + utils.MetricsInfluxDBEndpointFlag, + utils.MetricsInfluxDBDatabaseFlag, + utils.MetricsInfluxDBUsernameFlag, + utils.MetricsInfluxDBPasswordFlag, + utils.MetricsInfluxDBTagsFlag, }, Description: ` The import command imports blocks from an RLP-encoded form. The form can be one file From 32f974cc7b28bc3225d116b069253a693f3d3f74 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 21/74] metrics/prometheus: define TYPE once, add tests (#21068) * metrics/prometheus: define type once for histograms * metrics/prometheus: test collector --- metrics/prometheus/collector.go | 11 ++- metrics/prometheus/collector_test.go | 110 +++++++++++++++++++++++++++ 2 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 metrics/prometheus/collector_test.go diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go index d0e731e6c2dc..22f2d2f02a70 100644 --- a/metrics/prometheus/collector.go +++ b/metrics/prometheus/collector.go @@ -30,7 +30,7 @@ var ( typeCounterTpl = "# TYPE %s counter\n" typeSummaryTpl = "# TYPE %s summary\n" keyValueTpl = "%s %v\n\n" - keyQuantileTagValueTpl = "%s {quantile=\"%s\"} %v\n\n" + keyQuantileTagValueTpl = "%s {quantile=\"%s\"} %v\n" ) // collector is a collection of byte buffers that aggregate Prometheus reports @@ -39,7 +39,7 @@ type collector struct { buff *bytes.Buffer } -// newCollector createa a new Prometheus metric aggregator. +// newCollector creates a new Prometheus metric aggregator. func newCollector() *collector { return &collector{ buff: &bytes.Buffer{}, @@ -62,9 +62,11 @@ func (c *collector) addHistogram(name string, m metrics.Histogram) { pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} ps := m.Percentiles(pv) c.writeSummaryCounter(name, m.Count()) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) for i := range pv { c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) } + c.buff.WriteRune('\n') } func (c *collector) addMeter(name string, m metrics.Meter) { @@ -75,9 +77,11 @@ func (c *collector) addTimer(name string, m metrics.Timer) { pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} ps := m.Percentiles(pv) c.writeSummaryCounter(name, m.Count()) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) for i := range pv { c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) } + c.buff.WriteRune('\n') } func (c *collector) addResettingTimer(name string, m metrics.ResettingTimer) { @@ -87,9 +91,11 @@ func (c *collector) addResettingTimer(name string, m metrics.ResettingTimer) { ps := m.Percentiles([]float64{50, 95, 99}) val := m.Values() c.writeSummaryCounter(name, len(val)) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) c.writeSummaryPercentile(name, "0.50", ps[0]) c.writeSummaryPercentile(name, "0.95", ps[1]) c.writeSummaryPercentile(name, "0.99", ps[2]) + c.buff.WriteRune('\n') } func (c *collector) writeGaugeCounter(name string, value interface{}) { @@ -106,7 +112,6 @@ func (c *collector) writeSummaryCounter(name string, value interface{}) { func (c *collector) writeSummaryPercentile(name, p string, value interface{}) { name = mutateKey(name) - c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, name)) c.buff.WriteString(fmt.Sprintf(keyQuantileTagValueTpl, name, p, value)) } diff --git a/metrics/prometheus/collector_test.go b/metrics/prometheus/collector_test.go new file mode 100644 index 000000000000..4c2ffcb1fed6 --- /dev/null +++ b/metrics/prometheus/collector_test.go @@ -0,0 +1,110 @@ +package prometheus + +import ( + "os" + "testing" + "time" + + "github.com/XinFinOrg/XDPoSChain/metrics" +) + +func TestMain(m *testing.M) { + metrics.Enabled = true + os.Exit(m.Run()) +} + +func TestCollector(t *testing.T) { + c := newCollector() + + counter := metrics.NewCounter() + counter.Inc(12345) + c.addCounter("test/counter", counter) + + gauge := metrics.NewGauge() + gauge.Update(23456) + c.addGauge("test/gauge", gauge) + + gaugeFloat64 := metrics.NewGaugeFloat64() + gaugeFloat64.Update(34567.89) + c.addGaugeFloat64("test/gauge_float64", gaugeFloat64) + + histogram := metrics.NewHistogram(&metrics.NilSample{}) + c.addHistogram("test/histogram", histogram) + + meter := metrics.NewMeter() + defer meter.Stop() + meter.Mark(9999999) + c.addMeter("test/meter", meter) + + timer := metrics.NewTimer() + defer timer.Stop() + timer.Update(20 * time.Millisecond) + timer.Update(21 * time.Millisecond) + timer.Update(22 * time.Millisecond) + timer.Update(120 * time.Millisecond) + timer.Update(23 * time.Millisecond) + timer.Update(24 * time.Millisecond) + c.addTimer("test/timer", timer) + + resettingTimer := metrics.NewResettingTimer() + resettingTimer.Update(10 * time.Millisecond) + resettingTimer.Update(11 * time.Millisecond) + resettingTimer.Update(12 * time.Millisecond) + resettingTimer.Update(120 * time.Millisecond) + resettingTimer.Update(13 * time.Millisecond) + resettingTimer.Update(14 * time.Millisecond) + c.addResettingTimer("test/resetting_timer", resettingTimer.Snapshot()) + + emptyResettingTimer := metrics.NewResettingTimer().Snapshot() + c.addResettingTimer("test/empty_resetting_timer", emptyResettingTimer) + + const expectedOutput = `# TYPE test_counter gauge +test_counter 12345 + +# TYPE test_gauge gauge +test_gauge 23456 + +# TYPE test_gauge_float64 gauge +test_gauge_float64 34567.89 + +# TYPE test_histogram_count counter +test_histogram_count 0 + +# TYPE test_histogram summary +test_histogram {quantile="0.5"} 0 +test_histogram {quantile="0.75"} 0 +test_histogram {quantile="0.95"} 0 +test_histogram {quantile="0.99"} 0 +test_histogram {quantile="0.999"} 0 +test_histogram {quantile="0.9999"} 0 + +# TYPE test_meter gauge +test_meter 9999999 + +# TYPE test_timer_count counter +test_timer_count 6 + +# TYPE test_timer summary +test_timer {quantile="0.5"} 2.25e+07 +test_timer {quantile="0.75"} 4.8e+07 +test_timer {quantile="0.95"} 1.2e+08 +test_timer {quantile="0.99"} 1.2e+08 +test_timer {quantile="0.999"} 1.2e+08 +test_timer {quantile="0.9999"} 1.2e+08 + +# TYPE test_resetting_timer_count counter +test_resetting_timer_count 6 + +# TYPE test_resetting_timer summary +test_resetting_timer {quantile="0.50"} 12000000 +test_resetting_timer {quantile="0.95"} 120000000 +test_resetting_timer {quantile="0.99"} 120000000 + +` + exp := c.buff.String() + if exp != expectedOutput { + t.Log("Expected Output:\n", expectedOutput) + t.Log("Actual Output:\n", exp) + t.Fatal("unexpected collector output") + } +} From d7d54b00f74f83aa12627bd9eb10611444d92015 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 22/74] metrics: replace gosigar with gopsutil (#21041) --- go.mod | 3 ++- go.sum | 9 ++++----- metrics/cpu_enabled.go | 21 +++++++++++++++------ 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/go.mod b/go.mod index 109e053747f8..60f4d9bc50a2 100644 --- a/go.mod +++ b/go.mod @@ -49,7 +49,6 @@ require ( github.com/crate-crypto/go-kzg-4844 v0.7.0 github.com/deckarep/golang-set v1.8.0 github.com/dop251/goja v0.0.0-20200721192441-a695b0cdd498 - github.com/elastic/gosigar v0.14.3 github.com/ethereum/c-kzg-4844 v0.4.0 github.com/kylelemons/godebug v1.1.0 github.com/mattn/go-isatty v0.0.17 @@ -80,6 +79,8 @@ require ( github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3 // indirect github.com/supranational/blst v0.3.11 // indirect + github.com/tklauser/go-sysconf v0.3.14 // indirect + github.com/tklauser/numcpus v0.8.0 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect golang.org/x/mod v0.17.0 // indirect golang.org/x/term v0.26.0 // indirect diff --git a/go.sum b/go.sum index b826a51c6e9e..4b74e864cb21 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,6 @@ github.com/dop251/goja v0.0.0-20200721192441-a695b0cdd498 h1:Y9vTBSsV4hSwPSj4bac github.com/dop251/goja v0.0.0-20200721192441-a695b0cdd498/go.mod h1:Mw6PkjjMXWbTj+nnj4s3QPXq1jaT0s5pC0iFD4+BOAA= github.com/edsrzf/mmap-go v1.0.0 h1:CEBF7HpRnUCSJgGUb5h1Gm7e3VkmVDrR8lvWVLtrOFw= github.com/edsrzf/mmap-go v1.0.0/go.mod h1:YO35OhQPt3KJa3ryjFM5Bs14WD66h8eGKpfaBNrHW5M= -github.com/elastic/gosigar v0.14.3 h1:xwkKwPia+hSfg9GqrCUKYdId102m9qTJIIr7egmK/uo= -github.com/elastic/gosigar v0.14.3/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= github.com/ethereum/c-kzg-4844 v0.4.0 h1:3MS1s4JtA868KpJxroZoepdV0ZKBp3u/O5HcZ7R3nlY= github.com/ethereum/c-kzg-4844 v0.4.0/go.mod h1:VewdlzQmpT5QSrVhbBuGoCdFJkpaJlO1aQputP83wc0= github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= @@ -168,13 +166,16 @@ github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3 h1:njlZPzLwU639 github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3/go.mod h1:hpGUWaI9xL8pRQCTXQgocU38Qw1g0Us7n5PxxTwTCYU= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/supranational/blst v0.3.11 h1:LyU6FolezeWAhvQk0k6O/d49jqgO52MSDDfYgbeoEm4= github.com/supranational/blst v0.3.11/go.mod h1:jZJtfjgudtNl4en1tzwPIV3KjUnQUvG3/j+w+fVonLw= github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 h1:epCh84lMvA70Z7CTTCmYQn2CKbY8j86K7/FAIr141uY= github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc= +github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= +github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= +github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= +github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= @@ -195,7 +196,6 @@ golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180926160741-c2ed4eda69e7/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -249,7 +249,6 @@ gopkg.in/olebedev/go-duktape.v3 v3.0.0-20200619000410-60c24ae608a6 h1:a6cXbcDDUk gopkg.in/olebedev/go-duktape.v3 v3.0.0-20200619000410-60c24ae608a6/go.mod h1:uAJfkITjFhyEEuUfm7bsmCZRbW5WRq8s9EY8HZ6hCns= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= diff --git a/metrics/cpu_enabled.go b/metrics/cpu_enabled.go index 99d44e400229..15476aa902fb 100644 --- a/metrics/cpu_enabled.go +++ b/metrics/cpu_enabled.go @@ -14,18 +14,27 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . +//go:build !ios // +build !ios package metrics -import "github.com/elastic/gosigar" +import ( + "github.com/XinFinOrg/XDPoSChain/log" + "github.com/shirou/gopsutil/cpu" +) // ReadCPUStats retrieves the current CPU stats. func ReadCPUStats(stats *CPUStats) { - global := gosigar.Cpu{} - global.Get() - - stats.GlobalTime = int64(global.User + global.Nice + global.Sys) - stats.GlobalWait = int64(global.Wait) + // passing false to request all cpu times + timeStats, err := cpu.Times(false) + if err != nil { + log.Error("Could not read cpu stats", "err", err) + return + } + // requesting all cpu times will always return an array with only one time stats entry + timeStat := timeStats[0] + stats.GlobalTime = int64((timeStat.User + timeStat.Nice + timeStat.System) * cpu.ClocksPerSec) + stats.GlobalWait = int64((timeStat.Iowait) * cpu.ClocksPerSec) stats.LocalTime = getProcessCPUTime() } From 3dee6675d21f56e6042e4537f0eab3e5c29ef249 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 23/74] metrics/exp: allow configuring metrics HTTP server on separate endpoint (#21290) --- internal/debug/flags.go | 10 +++++++--- metrics/exp/exp.go | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/internal/debug/flags.go b/internal/debug/flags.go index 1e9970b73ce5..cf5b49e4754e 100644 --- a/internal/debug/flags.go +++ b/internal/debug/flags.go @@ -313,7 +313,9 @@ func Setup(ctx *cli.Context) error { // pprof server if ctx.Bool(pprofFlag.Name) { address := fmt.Sprintf("%s:%d", ctx.String(pprofAddrFlag.Name), ctx.Int(pprofPortFlag.Name)) - StartPProf(address) + // This context value ("metrics-addr") represents the utils.MetricsHTTPFlag.Name. + // It cannot be imported because it will cause a cyclical dependency. + StartPProf(address, !ctx.IsSet("metrics-addr") && !ctx.IsSet("metrics.addr")) } if len(logFile) > 0 || rotation { @@ -323,10 +325,12 @@ func Setup(ctx *cli.Context) error { return nil } -func StartPProf(address string) { +func StartPProf(address string, withMetrics bool) { // Hook go-metrics into expvar on any /debug/metrics request, load all vars // from the registry into expvar, and execute regular expvar handler. - exp.Exp(metrics.DefaultRegistry) + if withMetrics { + exp.Exp(metrics.DefaultRegistry) + } log.Info("Starting pprof server", "addr", fmt.Sprintf("http://%s/debug/pprof", address)) go func() { if err := http.ListenAndServe(address, nil); err != nil { diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go index 28435beceac5..622c2b5289af 100644 --- a/metrics/exp/exp.go +++ b/metrics/exp/exp.go @@ -58,6 +58,7 @@ func ExpHandler(r metrics.Registry) http.Handler { func Setup(address string) { m := http.NewServeMux() m.Handle("/debug/metrics", ExpHandler(metrics.DefaultRegistry)) + m.Handle("/debug/metrics/prometheus", prometheus.Handler(metrics.DefaultRegistry)) log.Info("Starting metrics server", "addr", fmt.Sprintf("http://%s/debug/metrics", address)) go func() { if err := http.ListenAndServe(address, m); err != nil { From 730960ff0669e3a5c94af15f8cb6eb60849610f9 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 24/74] metrics: make meter updates lock-free (#21446) --- metrics/ewma.go | 5 ++-- metrics/meter.go | 65 +++++++++++++++++++++---------------------- metrics/meter_test.go | 5 ++-- 3 files changed, 37 insertions(+), 38 deletions(-) diff --git a/metrics/ewma.go b/metrics/ewma.go index 57c949e7d421..039286493ebc 100644 --- a/metrics/ewma.go +++ b/metrics/ewma.go @@ -4,6 +4,7 @@ import ( "math" "sync" "sync/atomic" + "time" ) // EWMAs continuously calculate an exponentially-weighted moving average @@ -85,7 +86,7 @@ type StandardEWMA struct { func (a *StandardEWMA) Rate() float64 { a.mutex.Lock() defer a.mutex.Unlock() - return a.rate * float64(1e9) + return a.rate * float64(time.Second) } // Snapshot returns a read-only copy of the EWMA. @@ -98,7 +99,7 @@ func (a *StandardEWMA) Snapshot() EWMA { func (a *StandardEWMA) Tick() { count := atomic.LoadInt64(&a.uncounted) atomic.AddInt64(&a.uncounted, -count) - instantRate := float64(count) / float64(5e9) + instantRate := float64(count) / float64(5*time.Second) a.mutex.Lock() defer a.mutex.Unlock() if a.init { diff --git a/metrics/meter.go b/metrics/meter.go index 58d170fae027..7d2a2f530788 100644 --- a/metrics/meter.go +++ b/metrics/meter.go @@ -2,6 +2,7 @@ package metrics import ( "sync" + "sync/atomic" "time" ) @@ -101,6 +102,7 @@ func NewRegisteredMeterForced(name string, r Registry) Meter { // MeterSnapshot is a read-only copy of another Meter. type MeterSnapshot struct { count int64 + temp int64 rate1, rate5, rate15, rateMean float64 } @@ -149,7 +151,7 @@ func (NilMeter) Rate1() float64 { return 0.0 } // Rate5 is a no-op. func (NilMeter) Rate5() float64 { return 0.0 } -// Rate15is a no-op. +// Rate15 is a no-op. func (NilMeter) Rate15() float64 { return 0.0 } // RateMean is a no-op. @@ -167,7 +169,7 @@ type StandardMeter struct { snapshot *MeterSnapshot a1, a5, a15 EWMA startTime time.Time - stopped bool + stopped uint32 } func newStandardMeter() *StandardMeter { @@ -182,11 +184,8 @@ func newStandardMeter() *StandardMeter { // Stop stops the meter, Mark() will be a no-op if you use it after being stopped. func (m *StandardMeter) Stop() { - m.lock.Lock() - stopped := m.stopped - m.stopped = true - m.lock.Unlock() - if !stopped { + stopped := atomic.SwapUint32(&m.stopped, 1) + if stopped != 1 { arbiter.Lock() delete(arbiter.meters, m) arbiter.Unlock() @@ -194,57 +193,45 @@ func (m *StandardMeter) Stop() { } // Count returns the number of events recorded. +// It updates the meter to be as accurate as possible func (m *StandardMeter) Count() int64 { - m.lock.RLock() - count := m.snapshot.count - m.lock.RUnlock() - return count + m.lock.Lock() + defer m.lock.Unlock() + m.updateMeter() + return m.snapshot.count } // Mark records the occurrence of n events. func (m *StandardMeter) Mark(n int64) { - m.lock.Lock() - defer m.lock.Unlock() - if m.stopped { - return - } - m.snapshot.count += n - m.a1.Update(n) - m.a5.Update(n) - m.a15.Update(n) - m.updateSnapshot() + atomic.AddInt64(&m.snapshot.temp, n) } // Rate1 returns the one-minute moving average rate of events per second. func (m *StandardMeter) Rate1() float64 { m.lock.RLock() - rate1 := m.snapshot.rate1 - m.lock.RUnlock() - return rate1 + defer m.lock.RUnlock() + return m.snapshot.rate1 } // Rate5 returns the five-minute moving average rate of events per second. func (m *StandardMeter) Rate5() float64 { m.lock.RLock() - rate5 := m.snapshot.rate5 - m.lock.RUnlock() - return rate5 + defer m.lock.RUnlock() + return m.snapshot.rate5 } // Rate15 returns the fifteen-minute moving average rate of events per second. func (m *StandardMeter) Rate15() float64 { m.lock.RLock() - rate15 := m.snapshot.rate15 - m.lock.RUnlock() - return rate15 + defer m.lock.RUnlock() + return m.snapshot.rate15 } // RateMean returns the meter's mean rate of events per second. func (m *StandardMeter) RateMean() float64 { m.lock.RLock() - rateMean := m.snapshot.rateMean - m.lock.RUnlock() - return rateMean + defer m.lock.RUnlock() + return m.snapshot.rateMean } // Snapshot returns a read-only copy of the meter. @@ -264,9 +251,19 @@ func (m *StandardMeter) updateSnapshot() { snapshot.rateMean = float64(snapshot.count) / time.Since(m.startTime).Seconds() } +func (m *StandardMeter) updateMeter() { + // should only run with write lock held on m.lock + n := atomic.LoadInt64(&m.snapshot.temp) + m.snapshot.count += n + m.a1.Update(n) + m.a5.Update(n) + m.a15.Update(n) +} + func (m *StandardMeter) tick() { m.lock.Lock() defer m.lock.Unlock() + m.updateMeter() m.a1.Tick() m.a5.Tick() m.a15.Tick() @@ -282,7 +279,7 @@ type meterArbiter struct { ticker *time.Ticker } -var arbiter = meterArbiter{ticker: time.NewTicker(5e9), meters: make(map[*StandardMeter]struct{})} +var arbiter = meterArbiter{ticker: time.NewTicker(5 * time.Second), meters: make(map[*StandardMeter]struct{})} // Ticks meters on the scheduled interval func (ma *meterArbiter) tick() { diff --git a/metrics/meter_test.go b/metrics/meter_test.go index 37e45bded251..c44286a0356c 100644 --- a/metrics/meter_test.go +++ b/metrics/meter_test.go @@ -17,7 +17,7 @@ func TestGetOrRegisterMeter(t *testing.T) { r := NewRegistry() NewRegisteredMeter("foo", r).Mark(47) if m := GetOrRegisterMeter("foo", r); m.Count() != 47 { - t.Fatal(m) + t.Fatal(m.Count()) } } @@ -29,10 +29,11 @@ func TestMeterDecay(t *testing.T) { defer ma.ticker.Stop() m := newStandardMeter() ma.meters[m] = struct{}{} - go ma.tick() m.Mark(1) + ma.tickMeters() rateMean := m.RateMean() time.Sleep(100 * time.Millisecond) + ma.tickMeters() if m.RateMean() >= rateMean { t.Error("m.RateMean() didn't decrease") } From bf4b42a551b486524cc1c6a014f258c1773ee363 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 25/74] metrics: zero temp variable in updateMeter (#21470) * metrics: zero temp variable in updateMeter Previously the temp variable was not updated properly after summing it to count. This meant we had astronomically high metrics, now we zero out the temp whenever we sum it onto the snapshot count * metrics: move temp variable to be aligned, unit tests Moves the temp variable in MeterSnapshot to be 64-bit aligned because of the atomic bug. Adds a unit test, that catches the previous bug. --- metrics/meter.go | 8 ++++++-- metrics/meter_test.go | 16 ++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/metrics/meter.go b/metrics/meter.go index 7d2a2f530788..60ae919d04db 100644 --- a/metrics/meter.go +++ b/metrics/meter.go @@ -101,8 +101,12 @@ func NewRegisteredMeterForced(name string, r Registry) Meter { // MeterSnapshot is a read-only copy of another Meter. type MeterSnapshot struct { - count int64 + // WARNING: The `temp` field is accessed atomically. + // On 32 bit platforms, only 64-bit aligned fields can be atomic. The struct is + // guaranteed to be so aligned, so take advantage of that. For more information, + // see https://golang.org/pkg/sync/atomic/#pkg-note-BUG. temp int64 + count int64 rate1, rate5, rate15, rateMean float64 } @@ -253,7 +257,7 @@ func (m *StandardMeter) updateSnapshot() { func (m *StandardMeter) updateMeter() { // should only run with write lock held on m.lock - n := atomic.LoadInt64(&m.snapshot.temp) + n := atomic.SwapInt64(&m.snapshot.temp, 0) m.snapshot.count += n m.a1.Update(n) m.a5.Update(n) diff --git a/metrics/meter_test.go b/metrics/meter_test.go index c44286a0356c..99bbd6d98937 100644 --- a/metrics/meter_test.go +++ b/metrics/meter_test.go @@ -73,3 +73,19 @@ func TestMeterZero(t *testing.T) { t.Errorf("m.Count(): 0 != %v\n", count) } } + +func TestMeterRepeat(t *testing.T) { + m := NewMeter() + for i := 0; i < 101; i++ { + m.Mark(int64(i)) + } + if count := m.Count(); count != 5050 { + t.Errorf("m.Count(): 5050 != %v\n", count) + } + for i := 0; i < 101; i++ { + m.Mark(int64(i)) + } + if count := m.Count(); count != 10100 { + t.Errorf("m.Count(): 10100 != %v\n", count) + } +} From d4f1b8a6dd1c0886e5dc94abdcebe2bb28e3f6bb Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 26/74] metrics: fix the panic for reading empty cpu stats (#21864) --- metrics/cpu_enabled.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metrics/cpu_enabled.go b/metrics/cpu_enabled.go index 15476aa902fb..87a2620a5256 100644 --- a/metrics/cpu_enabled.go +++ b/metrics/cpu_enabled.go @@ -32,6 +32,10 @@ func ReadCPUStats(stats *CPUStats) { log.Error("Could not read cpu stats", "err", err) return } + if len(timeStats) == 0 { + log.Error("Empty cpu stats") + return + } // requesting all cpu times will always return an array with only one time stats entry timeStat := timeStats[0] stats.GlobalTime = int64((timeStat.User + timeStat.Nice + timeStat.System) * cpu.ClocksPerSec) From 9d082aa38cbf474bb0e75226ea05f8879c7b5dba Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 27/74] cmd/XDC: dump config for metrics (#22083) --- cmd/XDC/config.go | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/cmd/XDC/config.go b/cmd/XDC/config.go index d5e60c932180..f2ec5d3b83c2 100644 --- a/cmd/XDC/config.go +++ b/cmd/XDC/config.go @@ -32,6 +32,7 @@ import ( "github.com/XinFinOrg/XDPoSChain/common" "github.com/XinFinOrg/XDPoSChain/eth/ethconfig" "github.com/XinFinOrg/XDPoSChain/internal/flags" + "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/node" "github.com/XinFinOrg/XDPoSChain/params" "github.com/naoina/toml" @@ -90,6 +91,7 @@ type XDCConfig struct { Eth ethconfig.Config Node node.Config Ethstats ethstatsConfig + Metrics metrics.Config XDCX XDCx.Config Account account StakeEnable bool @@ -122,12 +124,14 @@ func defaultNodeConfig() node.Config { return cfg } +// makeConfigNode loads geth configuration and creates a blank node instance. func makeConfigNode(ctx *cli.Context) (*node.Node, XDCConfig) { // Load defaults. cfg := XDCConfig{ Eth: ethconfig.Defaults, XDCX: XDCx.DefaultConfig, Node: defaultNodeConfig(), + Metrics: metrics.DefaultConfig, StakeEnable: true, Verbosity: 3, NAT: "", @@ -208,6 +212,9 @@ func makeConfigNode(ctx *cli.Context) (*node.Node, XDCConfig) { } utils.SetXDCXConfig(ctx, &cfg.XDCX, cfg.Node.DataDir) + + applyMetricConfig(ctx, &cfg) + return stack, cfg } @@ -258,3 +265,36 @@ func dumpConfig(ctx *cli.Context) error { os.Stdout.Write(out) return nil } + +func applyMetricConfig(ctx *cli.Context, cfg *XDCConfig) { + if ctx.IsSet(utils.MetricsEnabledFlag.Name) { + cfg.Metrics.Enabled = ctx.Bool(utils.MetricsEnabledFlag.Name) + } + if ctx.IsSet(utils.MetricsEnabledExpensiveFlag.Name) { + cfg.Metrics.EnabledExpensive = ctx.Bool(utils.MetricsEnabledExpensiveFlag.Name) + } + if ctx.IsSet(utils.MetricsHTTPFlag.Name) { + cfg.Metrics.HTTP = ctx.String(utils.MetricsHTTPFlag.Name) + } + if ctx.IsSet(utils.MetricsPortFlag.Name) { + cfg.Metrics.Port = ctx.Int(utils.MetricsPortFlag.Name) + } + if ctx.IsSet(utils.MetricsEnableInfluxDBFlag.Name) { + cfg.Metrics.EnableInfluxDB = ctx.Bool(utils.MetricsEnableInfluxDBFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBEndpointFlag.Name) { + cfg.Metrics.InfluxDBEndpoint = ctx.String(utils.MetricsInfluxDBEndpointFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBDatabaseFlag.Name) { + cfg.Metrics.InfluxDBDatabase = ctx.String(utils.MetricsInfluxDBDatabaseFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBUsernameFlag.Name) { + cfg.Metrics.InfluxDBUsername = ctx.String(utils.MetricsInfluxDBUsernameFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBPasswordFlag.Name) { + cfg.Metrics.InfluxDBPassword = ctx.String(utils.MetricsInfluxDBPasswordFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBTagsFlag.Name) { + cfg.Metrics.InfluxDBTags = ctx.String(utils.MetricsInfluxDBTagsFlag.Name) + } +} From 1a844e457867103e7e47adbb3476dfd6d5aae75d Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 28/74] metrics: remove uneeded syntax (#21921) --- metrics/cpu_syscall.go | 3 ++- metrics/gauge_float64_test.go | 12 ++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/metrics/cpu_syscall.go b/metrics/cpu_syscall.go index c111d89e7fcf..0a3897ea3de5 100644 --- a/metrics/cpu_syscall.go +++ b/metrics/cpu_syscall.go @@ -14,6 +14,7 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . +//go:build !windows // +build !windows package metrics @@ -31,5 +32,5 @@ func getProcessCPUTime() int64 { log.Warn("Failed to retrieve CPU time", "err", err) return 0 } - return int64(usage.Utime.Sec+usage.Stime.Sec)*100 + int64(usage.Utime.Usec+usage.Stime.Usec)/10000 //nolint:unconvert + return (usage.Utime.Sec+usage.Stime.Sec)*100 + int64(usage.Utime.Usec+usage.Stime.Usec)/10000 //nolint:unconvert } diff --git a/metrics/gauge_float64_test.go b/metrics/gauge_float64_test.go index 3ee568e7ba09..ae088d4abf3b 100644 --- a/metrics/gauge_float64_test.go +++ b/metrics/gauge_float64_test.go @@ -12,27 +12,27 @@ func BenchmarkGuageFloat64(b *testing.B) { func TestGaugeFloat64(t *testing.T) { g := NewGaugeFloat64() - g.Update(float64(47.0)) - if v := g.Value(); float64(47.0) != v { + g.Update(47.0) + if v := g.Value(); v != 47.0 { t.Errorf("g.Value(): 47.0 != %v\n", v) } } func TestGaugeFloat64Snapshot(t *testing.T) { g := NewGaugeFloat64() - g.Update(float64(47.0)) + g.Update(47.0) snapshot := g.Snapshot() g.Update(float64(0)) - if v := snapshot.Value(); float64(47.0) != v { + if v := snapshot.Value(); v != 47.0 { t.Errorf("g.Value(): 47.0 != %v\n", v) } } func TestGetOrRegisterGaugeFloat64(t *testing.T) { r := NewRegistry() - NewRegisteredGaugeFloat64("foo", r).Update(float64(47.0)) + NewRegisteredGaugeFloat64("foo", r).Update(47.0) t.Logf("registry: %v", r) - if g := GetOrRegisterGaugeFloat64("foo", r); float64(47.0) != g.Value() { + if g := GetOrRegisterGaugeFloat64("foo", r); g.Value() != 47.0 { t.Fatal(g) } } From ec0ae4965dfd33a9c73798e2e2907c0a9eac3005 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 29/74] metrics: fix cast omission in cpu_syscall.go (#22262) fixes an regression which caused build failure on certain platforms --- metrics/cpu_syscall.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/cpu_syscall.go b/metrics/cpu_syscall.go index 0a3897ea3de5..89b38c9687d5 100644 --- a/metrics/cpu_syscall.go +++ b/metrics/cpu_syscall.go @@ -32,5 +32,5 @@ func getProcessCPUTime() int64 { log.Warn("Failed to retrieve CPU time", "err", err) return 0 } - return (usage.Utime.Sec+usage.Stime.Sec)*100 + int64(usage.Utime.Usec+usage.Stime.Usec)/10000 //nolint:unconvert + return int64(usage.Utime.Sec+usage.Stime.Sec)*100 + int64(usage.Utime.Usec+usage.Stime.Usec)/10000 //nolint:unconvert } From 5e9cb5d758a707a6157588a1d16218607346a9ae Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 30/74] metrics: add handler performance metrics (#22581) --- metrics/histogram.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/metrics/histogram.go b/metrics/histogram.go index 46f3bbd2f138..2c54ce8b4063 100644 --- a/metrics/histogram.go +++ b/metrics/histogram.go @@ -26,6 +26,15 @@ func GetOrRegisterHistogram(name string, r Registry, s Sample) Histogram { return r.GetOrRegister(name, func() Histogram { return NewHistogram(s) }).(Histogram) } +// GetOrRegisterHistogramLazy returns an existing Histogram or constructs and +// registers a new StandardHistogram. +func GetOrRegisterHistogramLazy(name string, r Registry, s func() Sample) Histogram { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, func() Histogram { return NewHistogram(s()) }).(Histogram) +} + // NewHistogram constructs a new StandardHistogram from a Sample. func NewHistogram(s Sample) Histogram { if !Enabled { From ebbcd608cc10144a34e1a6e29ec481b1a967c145 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 31/74] metrics: use resetting histograms for rare packets (#22586) --- metrics/resetting_sample.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 metrics/resetting_sample.go diff --git a/metrics/resetting_sample.go b/metrics/resetting_sample.go new file mode 100644 index 000000000000..43c1129cd0bc --- /dev/null +++ b/metrics/resetting_sample.go @@ -0,0 +1,24 @@ +package metrics + +// ResettingSample converts an ordinary sample into one that resets whenever its +// snapshot is retrieved. This will break for multi-monitor systems, but when only +// a single metric is being pushed out, this ensure that low-frequency events don't +// skew th charts indefinitely. +func ResettingSample(sample Sample) Sample { + return &resettingSample{ + Sample: sample, + } +} + +// resettingSample is a simple wrapper around a sample that resets it upon the +// snapshot retrieval. +type resettingSample struct { + Sample +} + +// Snapshot returns a read-only copy of the sample with the original reset. +func (rs *resettingSample) Snapshot() Sample { + s := rs.Sample.Snapshot() + rs.Sample.Clear() + return s +} From d16c72edbe618d452c8ee20532bbea7c289dfca1 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 32/74] metrics/influxdb: don't push empty histograms, no measurement != 0 (#22590) --- metrics/influxdb/influxdb.go | 43 +++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index ac5e0fce3c4d..51cf088d6a48 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -137,26 +137,29 @@ func (r *reporter) send() error { }) case metrics.Histogram: ms := metric.Snapshot() - ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.histogram", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "count": ms.Count(), - "max": ms.Max(), - "mean": ms.Mean(), - "min": ms.Min(), - "stddev": ms.StdDev(), - "variance": ms.Variance(), - "p50": ps[0], - "p75": ps[1], - "p95": ps[2], - "p99": ps[3], - "p999": ps[4], - "p9999": ps[5], - }, - Time: now, - }) + + if ms.Count() > 0 { + ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + pts = append(pts, client.Point{ + Measurement: fmt.Sprintf("%s%s.histogram", namespace, name), + Tags: r.tags, + Fields: map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p50": ps[0], + "p75": ps[1], + "p95": ps[2], + "p99": ps[3], + "p999": ps[4], + "p9999": ps[5], + }, + Time: now, + }) + } case metrics.Meter: ms := metric.Snapshot() pts = append(pts, client.Point{ From 73b81dde780b06011bdbab823916778ee99fd25f Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 33/74] metrics: use golang.org/x/sys/unix to support Solaris (#22584) Fixes #11113 Co-authored-by: rene <41963722+renaynay@users.noreply.github.com> --- metrics/cpu_syscall.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/cpu_syscall.go b/metrics/cpu_syscall.go index 89b38c9687d5..b3394464e5f5 100644 --- a/metrics/cpu_syscall.go +++ b/metrics/cpu_syscall.go @@ -20,7 +20,7 @@ package metrics import ( - "syscall" + syscall "golang.org/x/sys/unix" "github.com/XinFinOrg/XDPoSChain/log" ) From 29b72dbba62aa5305cf852f40b948b46cd3453d7 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 34/74] metrics/influxdb: support V2 (#23194) --- cmd/XDC/chaincmd.go | 4 + cmd/XDC/config.go | 12 ++ cmd/XDC/main.go | 4 + cmd/utils/flags.go | 65 +++++++++- go.mod | 5 + go.sum | 53 ++++++++ metrics/config.go | 11 ++ metrics/influxdb/influxdbv2.go | 214 +++++++++++++++++++++++++++++++++ 8 files changed, 363 insertions(+), 5 deletions(-) create mode 100644 metrics/influxdb/influxdbv2.go diff --git a/cmd/XDC/chaincmd.go b/cmd/XDC/chaincmd.go index 78a249ea1c4f..3ddcae6f145f 100644 --- a/cmd/XDC/chaincmd.go +++ b/cmd/XDC/chaincmd.go @@ -74,11 +74,15 @@ It expects the genesis file as argument.`, utils.MetricsEnabledFlag, utils.MetricsEnabledExpensiveFlag, utils.MetricsEnableInfluxDBFlag, + utils.MetricsEnableInfluxDBV2Flag, utils.MetricsInfluxDBEndpointFlag, utils.MetricsInfluxDBDatabaseFlag, utils.MetricsInfluxDBUsernameFlag, utils.MetricsInfluxDBPasswordFlag, utils.MetricsInfluxDBTagsFlag, + utils.MetricsInfluxDBTokenFlag, + utils.MetricsInfluxDBBucketFlag, + utils.MetricsInfluxDBOrganizationFlag, }, Description: ` The import command imports blocks from an RLP-encoded form. The form can be one file diff --git a/cmd/XDC/config.go b/cmd/XDC/config.go index f2ec5d3b83c2..511ba21715a6 100644 --- a/cmd/XDC/config.go +++ b/cmd/XDC/config.go @@ -297,4 +297,16 @@ func applyMetricConfig(ctx *cli.Context, cfg *XDCConfig) { if ctx.IsSet(utils.MetricsInfluxDBTagsFlag.Name) { cfg.Metrics.InfluxDBTags = ctx.String(utils.MetricsInfluxDBTagsFlag.Name) } + if ctx.IsSet(utils.MetricsEnableInfluxDBV2Flag.Name) { + cfg.Metrics.EnableInfluxDBV2 = ctx.Bool(utils.MetricsEnableInfluxDBV2Flag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBTokenFlag.Name) { + cfg.Metrics.InfluxDBToken = ctx.String(utils.MetricsInfluxDBTokenFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBBucketFlag.Name) { + cfg.Metrics.InfluxDBBucket = ctx.String(utils.MetricsInfluxDBBucketFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBOrganizationFlag.Name) { + cfg.Metrics.InfluxDBOrganization = ctx.String(utils.MetricsInfluxDBOrganizationFlag.Name) + } } diff --git a/cmd/XDC/main.go b/cmd/XDC/main.go index fb01ba2f5f77..b11f3b976c63 100644 --- a/cmd/XDC/main.go +++ b/cmd/XDC/main.go @@ -172,6 +172,10 @@ var ( utils.MetricsInfluxDBUsernameFlag, utils.MetricsInfluxDBPasswordFlag, utils.MetricsInfluxDBTagsFlag, + utils.MetricsEnableInfluxDBV2Flag, + utils.MetricsInfluxDBTokenFlag, + utils.MetricsInfluxDBBucketFlag, + utils.MetricsInfluxDBOrganizationFlag, } ) diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index b42914b2a866..797f578243bb 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -707,6 +707,30 @@ var ( Category: flags.MetricsCategory, } + MetricsEnableInfluxDBV2Flag = &cli.BoolFlag{ + Name: "metrics-influxdbv2", + Usage: "Enable metrics export/push to an external InfluxDB v2 database", + Category: flags.MetricsCategory, + } + MetricsInfluxDBTokenFlag = &cli.StringFlag{ + Name: "metrics-influxdb.token", + Usage: "Token to authorize access to the database (v2 only)", + Value: metrics.DefaultConfig.InfluxDBToken, + Category: flags.MetricsCategory, + } + MetricsInfluxDBBucketFlag = &cli.StringFlag{ + Name: "metrics-influxdb.bucket", + Usage: "InfluxDB bucket name to push reported metrics to (v2 only)", + Value: metrics.DefaultConfig.InfluxDBBucket, + Category: flags.MetricsCategory, + } + MetricsInfluxDBOrganizationFlag = &cli.StringFlag{ + Name: "metrics-influxdb.organization", + Usage: "InfluxDB organization name (v2 only)", + Value: metrics.DefaultConfig.InfluxDBOrganization, + Category: flags.MetricsCategory, + } + // MISC settings RollbackFlag = &cli.StringFlag{ Name: "rollback", @@ -1517,11 +1541,36 @@ func SetupMetrics(ctx *cli.Context) { if metrics.Enabled { log.Info("Enabling metrics collection") var ( - enableExport = ctx.Bool(MetricsEnableInfluxDBFlag.Name) - endpoint = ctx.String(MetricsInfluxDBEndpointFlag.Name) - database = ctx.String(MetricsInfluxDBDatabaseFlag.Name) - username = ctx.String(MetricsInfluxDBUsernameFlag.Name) - password = ctx.String(MetricsInfluxDBPasswordFlag.Name) + enableExport = ctx.Bool(MetricsEnableInfluxDBFlag.Name) + enableExportV2 = ctx.Bool(MetricsEnableInfluxDBV2Flag.Name) + ) + + if enableExport || enableExportV2 { + checkExclusive(ctx, MetricsEnableInfluxDBFlag, MetricsEnableInfluxDBV2Flag) + + v1FlagIsSet := ctx.IsSet(MetricsInfluxDBUsernameFlag.Name) || + ctx.IsSet(MetricsInfluxDBPasswordFlag.Name) + + v2FlagIsSet := ctx.IsSet(MetricsInfluxDBTokenFlag.Name) || + ctx.IsSet(MetricsInfluxDBOrganizationFlag.Name) || + ctx.IsSet(MetricsInfluxDBBucketFlag.Name) + + if enableExport && v2FlagIsSet { + Fatalf("Flags --influxdb.metrics.organization, --influxdb.metrics.token, --influxdb.metrics.bucket are only available for influxdb-v2") + } else if enableExportV2 && v1FlagIsSet { + Fatalf("Flags --influxdb.metrics.username, --influxdb.metrics.password are only available for influxdb-v1") + } + } + + var ( + endpoint = ctx.String(MetricsInfluxDBEndpointFlag.Name) + database = ctx.String(MetricsInfluxDBDatabaseFlag.Name) + username = ctx.String(MetricsInfluxDBUsernameFlag.Name) + password = ctx.String(MetricsInfluxDBPasswordFlag.Name) + + token = ctx.String(MetricsInfluxDBTokenFlag.Name) + bucket = ctx.String(MetricsInfluxDBBucketFlag.Name) + organization = ctx.String(MetricsInfluxDBOrganizationFlag.Name) ) if enableExport { @@ -1530,6 +1579,12 @@ func SetupMetrics(ctx *cli.Context) { tagsMap := SplitTagsFlag(ctx.String(MetricsInfluxDBTagsFlag.Name)) go influxdb.InfluxDBWithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, database, username, password, "xdc.", tagsMap) + } else if enableExportV2 { + log.Info("Enabling metrics export to InfluxDB (v2)") + + tagsMap := SplitTagsFlag(ctx.String(MetricsInfluxDBTagsFlag.Name)) + + go influxdb.InfluxDBV2WithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, token, bucket, organization, "xdc.", tagsMap) } if ctx.IsSet(MetricsHTTPFlag.Name) { diff --git a/go.mod b/go.mod index 60f4d9bc50a2..3160395d2a06 100644 --- a/go.mod +++ b/go.mod @@ -50,6 +50,7 @@ require ( github.com/deckarep/golang-set v1.8.0 github.com/dop251/goja v0.0.0-20200721192441-a695b0cdd498 github.com/ethereum/c-kzg-4844 v0.4.0 + github.com/influxdata/influxdb-client-go/v2 v2.4.0 github.com/kylelemons/godebug v1.1.0 github.com/mattn/go-isatty v0.0.17 github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible @@ -64,10 +65,12 @@ require ( github.com/consensys/bavard v0.1.13 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect + github.com/deepmap/oapi-codegen v1.6.0 // indirect github.com/dlclark/regexp2 v1.10.0 // indirect github.com/go-ole/go-ole v1.2.5 // indirect github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect github.com/google/uuid v1.3.0 // indirect + github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect @@ -83,9 +86,11 @@ require ( github.com/tklauser/numcpus v0.8.0 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect golang.org/x/mod v0.17.0 // indirect + golang.org/x/net v0.25.0 // indirect golang.org/x/term v0.26.0 // indirect golang.org/x/text v0.20.0 // indirect google.golang.org/protobuf v1.31.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect gotest.tools v2.2.0+incompatible // indirect rsc.io/tmplfunc v0.0.3 // indirect diff --git a/go.sum b/go.sum index 4b74e864cb21..84d68e6c41ca 100644 --- a/go.sum +++ b/go.sum @@ -26,6 +26,7 @@ github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t github.com/crate-crypto/go-kzg-4844 v0.7.0 h1:C0vgZRk4q4EZ/JgPfzuSoxdCq3C3mOZMBShovmncxvA= github.com/crate-crypto/go-kzg-4844 v0.7.0/go.mod h1:1kMhvPgI0Ky3yIa+9lFySEBUBXkYxeOi8ZF1sYioxhc= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -35,6 +36,9 @@ github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs= +github.com/deepmap/oapi-codegen v1.6.0 h1:w/d1ntwh91XI0b/8ja7+u5SvA4IFfM0UNNLmiDR1gg0= +github.com/deepmap/oapi-codegen v1.6.0/go.mod h1:ryDa9AgbELGeB+YEXE1dR53yAjHwFvE9iAUlWl9Al3M= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/docker/docker v1.4.2-0.20180625184442-8e610b2b55bf h1:sh8rkQZavChcmakYiSlqu2425CHyFXLZZnvm7PDpU8M= @@ -50,10 +54,15 @@ github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYF github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/getkin/kin-openapi v0.53.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8 h1:DujepqpGd1hyOd7aW59XpK7Qymp8iy83xq74fLr21is= github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= +github.com/go-chi/chi/v5 v5.0.0/go.mod h1:BBug9lr0cqtdAhsu6R4AAdvufI0/XBzAQSsUqJpoZOs= github.com/go-ole/go-ole v1.2.5 h1:t4MGB5xEDZvXI+0rMjjsfBsD7yAgp/s9ZDkL1JndXwY= github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU= github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -69,6 +78,7 @@ github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb h1:PBC98N2aIaM3XXiurYmW7fx4GZkL8feAMVq7nEjURHk= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golangci/lint-1 v0.0.0-20181222135242-d2cdd8c08219/go.mod h1:/X8TswGSh1pIozq4ZwCfxS0WA5JGXguxk94ar/4c87Y= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= @@ -79,6 +89,7 @@ github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3 github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= @@ -90,12 +101,17 @@ github.com/huin/goupnp v1.3.0 h1:UvLUlWDNpoUdYzb2TCn+MuTWtcjXKSza2n6CBdQ0xXc= github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8= github.com/influxdata/influxdb v1.7.9 h1:uSeBTNO4rBkbp1Be5FKRsAmglM9nlx25TzVQRQt1An4= github.com/influxdata/influxdb v1.7.9/go.mod h1:qZna6X/4elxqT3yI9iZYdZrWWdeFOOprn86kgg4+IzY= +github.com/influxdata/influxdb-client-go/v2 v2.4.0 h1:HGBfZYStlx3Kqvsv1h2pJixbCl/jhnFtxpKFAv9Tu5k= +github.com/influxdata/influxdb-client-go/v2 v2.4.0/go.mod h1:vLNHdxTJkIf2mSLvGrpj8TCcISApPoXkaxP8g9uRlW8= +github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 h1:W9WBk7wlPfJLvMCdtV4zPulc4uCPrlywQOmbFOhgQNU= +github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo= github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus= github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc= github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/karalabe/hid v1.0.0 h1:+/CIMNXhSU/zIJgnIvBD2nKHxS/bnRHhhs9xBryLpPo= github.com/karalabe/hid v1.0.0/go.mod h1:Vr51f8rUOLYrfrWDFlV12GGQgM5AT8sVh+2fY4MPeu8= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -105,11 +121,21 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/labstack/echo/v4 v4.2.1/go.mod h1:AA49e0DZ8kk5jTOOCKNuPR6oTnBS0dYiM4FW1e6jwpg= +github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c= github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8= +github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/matryer/moq v0.0.0-20190312154309-6cfb0558e1bd/go.mod h1:9ELz6aaclSIGnZBoaSLZ3NAl1VTufbOrXBPvtcy6WiQ= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= @@ -142,6 +168,7 @@ github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtP github.com/peterh/liner v1.1.1-0.20190123174540-a2c9a5303de7 h1:oYW+YCJ1pachXTQmzR3rNLYGGz4g/UgFcjb28p/viDM= github.com/peterh/liner v1.1.1-0.20190123174540-a2c9a5303de7/go.mod h1:CRroGNssyjTd/qIG2FyxByd2S8JEAZXBl4qUrZf8GS0= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -166,6 +193,8 @@ github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3 h1:njlZPzLwU639 github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3/go.mod h1:hpGUWaI9xL8pRQCTXQgocU38Qw1g0Us7n5PxxTwTCYU= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/supranational/blst v0.3.11 h1:LyU6FolezeWAhvQk0k6O/d49jqgO52MSDDfYgbeoEm4= @@ -178,52 +207,74 @@ github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYg github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= +github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180926160741-c2ed4eda69e7/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200814200057-3d37ad5750ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -238,6 +289,7 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= @@ -249,6 +301,7 @@ gopkg.in/olebedev/go-duktape.v3 v3.0.0-20200619000410-60c24ae608a6 h1:a6cXbcDDUk gopkg.in/olebedev/go-duktape.v3 v3.0.0-20200619000410-60c24ae608a6/go.mod h1:uAJfkITjFhyEEuUfm7bsmCZRbW5WRq8s9EY8HZ6hCns= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= diff --git a/metrics/config.go b/metrics/config.go index 1150a007aaf7..cac1fc9c184a 100644 --- a/metrics/config.go +++ b/metrics/config.go @@ -28,6 +28,11 @@ type Config struct { InfluxDBUsername string `toml:",omitempty"` InfluxDBPassword string `toml:",omitempty"` InfluxDBTags string `toml:",omitempty"` + + EnableInfluxDBV2 bool `toml:",omitempty"` + InfluxDBToken string `toml:",omitempty"` + InfluxDBBucket string `toml:",omitempty"` + InfluxDBOrganization string `toml:",omitempty"` } // DefaultConfig is the default config for metrics used in go-ethereum. @@ -42,4 +47,10 @@ var DefaultConfig = Config{ InfluxDBUsername: "test", InfluxDBPassword: "test", InfluxDBTags: "host=localhost", + + // influxdbv2-specific flags + EnableInfluxDBV2: false, + InfluxDBToken: "test", + InfluxDBBucket: "xdc", + InfluxDBOrganization: "xdc", } diff --git a/metrics/influxdb/influxdbv2.go b/metrics/influxdb/influxdbv2.go new file mode 100644 index 000000000000..5dc2bff4d7b0 --- /dev/null +++ b/metrics/influxdb/influxdbv2.go @@ -0,0 +1,214 @@ +package influxdb + +import ( + "context" + "fmt" + "time" + + "github.com/XinFinOrg/XDPoSChain/log" + "github.com/XinFinOrg/XDPoSChain/metrics" + influxdb2 "github.com/influxdata/influxdb-client-go/v2" + "github.com/influxdata/influxdb-client-go/v2/api" +) + +type v2Reporter struct { + reg metrics.Registry + interval time.Duration + + endpoint string + token string + bucket string + organization string + namespace string + tags map[string]string + + client influxdb2.Client + write api.WriteAPI + + cache map[string]int64 +} + +// InfluxDBWithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags +func InfluxDBV2WithTags(r metrics.Registry, d time.Duration, endpoint string, token string, bucket string, organization string, namespace string, tags map[string]string) { + rep := &v2Reporter{ + reg: r, + interval: d, + endpoint: endpoint, + token: token, + bucket: bucket, + organization: organization, + namespace: namespace, + tags: tags, + cache: make(map[string]int64), + } + + rep.client = influxdb2.NewClient(rep.endpoint, rep.token) + defer rep.client.Close() + + // async write client + rep.write = rep.client.WriteAPI(rep.organization, rep.bucket) + errorsCh := rep.write.Errors() + + // have to handle write errors in a separate goroutine like this b/c the channel is unbuffered and will block writes if not read + go func() { + for err := range errorsCh { + log.Warn("write error", "err", err.Error()) + } + }() + rep.run() +} + +func (r *v2Reporter) run() { + intervalTicker := time.Tick(r.interval) + pingTicker := time.Tick(time.Second * 5) + + for { + select { + case <-intervalTicker: + r.send() + case <-pingTicker: + _, err := r.client.Health(context.Background()) + if err != nil { + log.Warn("Got error from influxdb client health check", "err", err.Error()) + } + } + } +} + +func (r *v2Reporter) send() { + r.reg.Each(func(name string, i interface{}) { + now := time.Now() + namespace := r.namespace + + switch metric := i.(type) { + + case metrics.Counter: + v := metric.Count() + l := r.cache[name] + + measurement := fmt.Sprintf("%s%s.count", namespace, name) + fields := map[string]interface{}{ + "value": v - l, + } + + pt := influxdb2.NewPoint(measurement, r.tags, fields, now) + r.write.WritePoint(pt) + + r.cache[name] = v + + case metrics.Gauge: + ms := metric.Snapshot() + + measurement := fmt.Sprintf("%s%s.gauge", namespace, name) + fields := map[string]interface{}{ + "value": ms.Value(), + } + + pt := influxdb2.NewPoint(measurement, r.tags, fields, now) + r.write.WritePoint(pt) + + case metrics.GaugeFloat64: + ms := metric.Snapshot() + + measurement := fmt.Sprintf("%s%s.gauge", namespace, name) + fields := map[string]interface{}{ + "value": ms.Value(), + } + + pt := influxdb2.NewPoint(measurement, r.tags, fields, now) + r.write.WritePoint(pt) + + case metrics.Histogram: + ms := metric.Snapshot() + + if ms.Count() > 0 { + ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + measurement := fmt.Sprintf("%s%s.histogram", namespace, name) + fields := map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p50": ps[0], + "p75": ps[1], + "p95": ps[2], + "p99": ps[3], + "p999": ps[4], + "p9999": ps[5], + } + + pt := influxdb2.NewPoint(measurement, r.tags, fields, now) + r.write.WritePoint(pt) + } + + case metrics.Meter: + ms := metric.Snapshot() + + measurement := fmt.Sprintf("%s%s.meter", namespace, name) + fields := map[string]interface{}{ + "count": ms.Count(), + "m1": ms.Rate1(), + "m5": ms.Rate5(), + "m15": ms.Rate15(), + "mean": ms.RateMean(), + } + + pt := influxdb2.NewPoint(measurement, r.tags, fields, now) + r.write.WritePoint(pt) + + case metrics.Timer: + ms := metric.Snapshot() + ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + + measurement := fmt.Sprintf("%s%s.timer", namespace, name) + fields := map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p50": ps[0], + "p75": ps[1], + "p95": ps[2], + "p99": ps[3], + "p999": ps[4], + "p9999": ps[5], + "m1": ms.Rate1(), + "m5": ms.Rate5(), + "m15": ms.Rate15(), + "meanrate": ms.RateMean(), + } + + pt := influxdb2.NewPoint(measurement, r.tags, fields, now) + r.write.WritePoint(pt) + + case metrics.ResettingTimer: + t := metric.Snapshot() + + if len(t.Values()) > 0 { + ps := t.Percentiles([]float64{50, 95, 99}) + val := t.Values() + + measurement := fmt.Sprintf("%s%s.span", namespace, name) + fields := map[string]interface{}{ + "count": len(val), + "max": val[len(val)-1], + "mean": t.Mean(), + "min": val[0], + "p50": ps[0], + "p95": ps[1], + "p99": ps[2], + } + + pt := influxdb2.NewPoint(measurement, r.tags, fields, now) + r.write.WritePoint(pt) + } + } + }) + + // Force all unwritten data to be sent + r.write.Flush() +} From 98079104e4937bcb8db5bfe696437cef36061d39 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 35/74] metrics: fix compilation for GOOS=js (#23449) --- metrics/cpu_disabled.go | 2 +- metrics/cpu_enabled.go | 4 ++-- metrics/{cpu_windows.go => cputime_nop.go} | 2 ++ metrics/{cpu_syscall.go => cputime_unix.go} | 4 ++-- metrics/runtime_cgo.go | 3 +-- metrics/runtime_no_cgo.go | 2 +- 6 files changed, 9 insertions(+), 8 deletions(-) rename metrics/{cpu_windows.go => cputime_nop.go} (97%) rename metrics/{cpu_syscall.go => cputime_unix.go} (96%) diff --git a/metrics/cpu_disabled.go b/metrics/cpu_disabled.go index 6c3428993fb1..b0b483ee4bce 100644 --- a/metrics/cpu_disabled.go +++ b/metrics/cpu_disabled.go @@ -14,7 +14,7 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . -// +build ios +// +build ios js package metrics diff --git a/metrics/cpu_enabled.go b/metrics/cpu_enabled.go index 87a2620a5256..5d0e8485d7fa 100644 --- a/metrics/cpu_enabled.go +++ b/metrics/cpu_enabled.go @@ -14,8 +14,8 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . -//go:build !ios -// +build !ios +//go:build !ios && !js +// +build !ios,!js package metrics diff --git a/metrics/cpu_windows.go b/metrics/cputime_nop.go similarity index 97% rename from metrics/cpu_windows.go rename to metrics/cputime_nop.go index fb29a52a82c1..1bf5537277fe 100644 --- a/metrics/cpu_windows.go +++ b/metrics/cputime_nop.go @@ -14,6 +14,8 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . +// +build windows js + package metrics // getProcessCPUTime returns 0 on Windows as there is no system call to resolve diff --git a/metrics/cpu_syscall.go b/metrics/cputime_unix.go similarity index 96% rename from metrics/cpu_syscall.go rename to metrics/cputime_unix.go index b3394464e5f5..50bb95b31e93 100644 --- a/metrics/cpu_syscall.go +++ b/metrics/cputime_unix.go @@ -14,8 +14,8 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . -//go:build !windows -// +build !windows +//go:build !windows && !js +// +build !windows,!js package metrics diff --git a/metrics/runtime_cgo.go b/metrics/runtime_cgo.go index e3391f4e89fa..11722f76ccd5 100644 --- a/metrics/runtime_cgo.go +++ b/metrics/runtime_cgo.go @@ -1,5 +1,4 @@ -// +build cgo -// +build !appengine +// +build cgo,!appengine,!js package metrics diff --git a/metrics/runtime_no_cgo.go b/metrics/runtime_no_cgo.go index 616a3b4751be..e760af554f1a 100644 --- a/metrics/runtime_no_cgo.go +++ b/metrics/runtime_no_cgo.go @@ -1,4 +1,4 @@ -// +build !cgo appengine +// +build !cgo appengine js package metrics From 2e5b342826a51f0af3685567e914a9d881868b6d Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 36/74] metrics: add go:build lines (#23468) --- metrics/cpu_disabled.go | 1 + metrics/cputime_nop.go | 1 + metrics/runtime_cgo.go | 1 + metrics/runtime_gccpufraction.go | 1 + metrics/runtime_no_cgo.go | 1 + metrics/runtime_no_gccpufraction.go | 1 + metrics/syslog.go | 1 + 7 files changed, 7 insertions(+) diff --git a/metrics/cpu_disabled.go b/metrics/cpu_disabled.go index b0b483ee4bce..025d97aeb32a 100644 --- a/metrics/cpu_disabled.go +++ b/metrics/cpu_disabled.go @@ -14,6 +14,7 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . +//go:build ios || js // +build ios js package metrics diff --git a/metrics/cputime_nop.go b/metrics/cputime_nop.go index 1bf5537277fe..0188735a7833 100644 --- a/metrics/cputime_nop.go +++ b/metrics/cputime_nop.go @@ -14,6 +14,7 @@ // You should have received a copy of the GNU Lesser General Public License // along with the go-ethereum library. If not, see . +//go:build windows || js // +build windows js package metrics diff --git a/metrics/runtime_cgo.go b/metrics/runtime_cgo.go index 11722f76ccd5..4307ebdba689 100644 --- a/metrics/runtime_cgo.go +++ b/metrics/runtime_cgo.go @@ -1,3 +1,4 @@ +//go:build cgo && !appengine && !js // +build cgo,!appengine,!js package metrics diff --git a/metrics/runtime_gccpufraction.go b/metrics/runtime_gccpufraction.go index ca12c05bac74..28cd44752b45 100644 --- a/metrics/runtime_gccpufraction.go +++ b/metrics/runtime_gccpufraction.go @@ -1,3 +1,4 @@ +//go:build go1.5 // +build go1.5 package metrics diff --git a/metrics/runtime_no_cgo.go b/metrics/runtime_no_cgo.go index e760af554f1a..1799bef63bfb 100644 --- a/metrics/runtime_no_cgo.go +++ b/metrics/runtime_no_cgo.go @@ -1,3 +1,4 @@ +//go:build !cgo || appengine || js // +build !cgo appengine js package metrics diff --git a/metrics/runtime_no_gccpufraction.go b/metrics/runtime_no_gccpufraction.go index be96aa6f1be9..af1a4b63c809 100644 --- a/metrics/runtime_no_gccpufraction.go +++ b/metrics/runtime_no_gccpufraction.go @@ -1,3 +1,4 @@ +//go:build !go1.5 // +build !go1.5 package metrics diff --git a/metrics/syslog.go b/metrics/syslog.go index a0ed4b1b2364..551a2bd0f072 100644 --- a/metrics/syslog.go +++ b/metrics/syslog.go @@ -1,3 +1,4 @@ +//go:build !windows // +build !windows package metrics From d77c1e5ea3c3c90fb6d5e89fd0de8fe6843037a4 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 37/74] metrics: replace strings.Replace with string.ReplaceAll (#24835) --- metrics/prometheus/collector.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go index 22f2d2f02a70..23934b212c23 100644 --- a/metrics/prometheus/collector.go +++ b/metrics/prometheus/collector.go @@ -116,5 +116,5 @@ func (c *collector) writeSummaryPercentile(name, p string, value interface{}) { } func mutateKey(key string) string { - return strings.Replace(key, "/", "_", -1) + return strings.ReplaceAll(key, "/", "_") } From 7b0a7e459331066af0618fc949086a6849eb70cc Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 38/74] metrics/influxdb: temp solution to present counter meaningfully (#24811) --- metrics/influxdb/influxdb.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index 51cf088d6a48..d4ebea74fbe7 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -104,17 +104,15 @@ func (r *reporter) send() error { switch metric := i.(type) { case metrics.Counter: - v := metric.Count() - l := r.cache[name] + count := metric.Count() pts = append(pts, client.Point{ Measurement: fmt.Sprintf("%s%s.count", namespace, name), Tags: r.tags, Fields: map[string]interface{}{ - "value": v - l, + "value": count, }, Time: now, }) - r.cache[name] = v case metrics.Gauge: ms := metric.Snapshot() pts = append(pts, client.Point{ From 2e34afe4000e9c46f5620bbe039b2e084940a765 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 39/74] rpc: swap out timer metrics to histograms (#25044) --- rpc/handler.go | 4 ++-- rpc/metrics.go | 27 +++++++++++++++++++-------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/rpc/handler.go b/rpc/handler.go index 0e407abec900..777e47e7c88f 100644 --- a/rpc/handler.go +++ b/rpc/handler.go @@ -340,12 +340,12 @@ func (h *handler) handleCall(cp *callProc, msg *jsonrpcMessage) *jsonrpcMessage if callb != h.unsubscribeCb { rpcRequestGauge.Inc(1) if answer.Error != nil { - failedReqeustGauge.Inc(1) + failedRequestGauge.Inc(1) } else { successfulRequestGauge.Inc(1) } rpcServingTimer.UpdateSince(start) - newRPCServingTimer(msg.Method, answer.Error == nil).UpdateSince(start) + updateServeTimeHistogram(msg.Method, answer.Error == nil, time.Since(start)) } return answer } diff --git a/rpc/metrics.go b/rpc/metrics.go index ebb407fa3dad..11f853dd2401 100644 --- a/rpc/metrics.go +++ b/rpc/metrics.go @@ -18,6 +18,7 @@ package rpc import ( "fmt" + "time" "github.com/XinFinOrg/XDPoSChain/metrics" ) @@ -25,15 +26,25 @@ import ( var ( rpcRequestGauge = metrics.NewRegisteredGauge("rpc/requests", nil) successfulRequestGauge = metrics.NewRegisteredGauge("rpc/success", nil) - failedReqeustGauge = metrics.NewRegisteredGauge("rpc/failure", nil) - rpcServingTimer = metrics.NewRegisteredTimer("rpc/duration/all", nil) + failedRequestGauge = metrics.NewRegisteredGauge("rpc/failure", nil) + + // serveTimeHistName is the prefix of the per-request serving time histograms. + serveTimeHistName = "rpc/duration" + + rpcServingTimer = metrics.NewRegisteredTimer("rpc/duration/all", nil) ) -func newRPCServingTimer(method string, valid bool) metrics.Timer { - flag := "success" - if !valid { - flag = "failure" +// updateServeTimeHistogram tracks the serving time of a remote RPC call. +func updateServeTimeHistogram(method string, success bool, elapsed time.Duration) { + note := "success" + if !success { + note = "failure" + } + h := fmt.Sprintf("%s/%s/%s", serveTimeHistName, method, note) + sampler := func() metrics.Sample { + return metrics.ResettingSample( + metrics.NewExpDecaySample(1028, 0.015), + ) } - m := fmt.Sprintf("rpc/duration/%s/%s", method, flag) - return metrics.GetOrRegisterTimer(m, nil) + metrics.GetOrRegisterHistogramLazy(h, nil, sampler).Update(elapsed.Microseconds()) } From 392b44c948ad4e3dd23274fe2d3627ab841f8b1e Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 40/74] metrics/influxdb: replace time.Tick with time.NewTicker (#24783) --- metrics/influxdb/influxdbv2.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metrics/influxdb/influxdbv2.go b/metrics/influxdb/influxdbv2.go index 5dc2bff4d7b0..583a1ebc78e0 100644 --- a/metrics/influxdb/influxdbv2.go +++ b/metrics/influxdb/influxdbv2.go @@ -59,14 +59,14 @@ func InfluxDBV2WithTags(r metrics.Registry, d time.Duration, endpoint string, to } func (r *v2Reporter) run() { - intervalTicker := time.Tick(r.interval) - pingTicker := time.Tick(time.Second * 5) + intervalTicker := time.NewTicker(r.interval) + pingTicker := time.NewTicker(time.Second * 5) for { select { - case <-intervalTicker: + case <-intervalTicker.C: r.send() - case <-pingTicker: + case <-pingTicker.C: _, err := r.client.Health(context.Background()) if err != nil { log.Warn("Got error from influxdb client health check", "err", err.Error()) From e8452c94a7f73538b5e7ee7ec4b65255b613cae3 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:12 +0800 Subject: [PATCH 41/74] metrics: fix some typos (#25551) --- metrics/gauge_float64_test.go | 2 +- metrics/gauge_test.go | 2 +- metrics/prometheus/prometheus.go | 2 +- miner/unconfirmed_test.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/metrics/gauge_float64_test.go b/metrics/gauge_float64_test.go index ae088d4abf3b..1ae598049229 100644 --- a/metrics/gauge_float64_test.go +++ b/metrics/gauge_float64_test.go @@ -2,7 +2,7 @@ package metrics import "testing" -func BenchmarkGuageFloat64(b *testing.B) { +func BenchmarkGaugeFloat64(b *testing.B) { g := NewGaugeFloat64() b.ResetTimer() for i := 0; i < b.N; i++ { diff --git a/metrics/gauge_test.go b/metrics/gauge_test.go index 3aee143455c3..a98fe985d8c2 100644 --- a/metrics/gauge_test.go +++ b/metrics/gauge_test.go @@ -5,7 +5,7 @@ import ( "testing" ) -func BenchmarkGuage(b *testing.B) { +func BenchmarkGauge(b *testing.B) { g := NewGauge() b.ResetTimer() for i := 0; i < b.N; i++ { diff --git a/metrics/prometheus/prometheus.go b/metrics/prometheus/prometheus.go index d34c2206f951..b367e6c3aa59 100644 --- a/metrics/prometheus/prometheus.go +++ b/metrics/prometheus/prometheus.go @@ -36,7 +36,7 @@ func Handler(reg metrics.Registry) http.Handler { }) sort.Strings(names) - // Aggregate all the metris into a Prometheus collector + // Aggregate all the metrics into a Prometheus collector c := newCollector() for _, name := range names { diff --git a/miner/unconfirmed_test.go b/miner/unconfirmed_test.go index 71a0dde931a5..781ba0636651 100644 --- a/miner/unconfirmed_test.go +++ b/miner/unconfirmed_test.go @@ -72,7 +72,7 @@ func TestUnconfirmedShifts(t *testing.T) { if n := pool.blocks.Len(); n != int(limit)/2 { t.Errorf("unconfirmed count mismatch: have %d, want %d", n, limit/2) } - // Try to shift all the remaining blocks out and verify emptyness + // Try to shift all the remaining blocks out and verify emptiness pool.Shift(start + 2*uint64(limit)) if n := pool.blocks.Len(); n != 0 { t.Errorf("unconfirmed count mismatch: have %d, want %d", n, 0) From dddd6c57cdc32a6474c819506d62054d13cc7cfa Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 42/74] metrics: improve reading Go runtime metrics (#25886) This changes how we read performance metrics from the Go runtime. Instead of using runtime.ReadMemStats, we now rely on the API provided by package runtime/metrics. runtime/metrics provides more accurate information. For example, the new interface has better reporting of memory use. In my testing, the reported value of held memory more accurately reflects the usage reported by the OS. The semantics of metrics system/memory/allocs and system/memory/frees have changed to report amounts in bytes. ReadMemStats only reported the count of allocations in number-of-objects. This is imprecise: 'tiny objects' are not counted because the runtime allocates them in batches; and certain improvements in allocation behavior, such as struct size optimizations, will be less visible when the number of allocs doesn't change. Changing allocation reports to be in bytes makes it appear in graphs that lots more is being allocated. I don't think that's a problem because this metric is primarily interesting for geth developers. The metric system/memory/pauses has been changed to report statistical values from the histogram provided by the runtime. Its name in influxdb has changed from geth.system/memory/pauses.meter to geth.system/memory/pauses.histogram. We also have a new histogram metric, system/cpu/schedlatency, reporting the Go scheduler latency. --- metrics/influxdb/influxdb.go | 35 +-- metrics/metrics.go | 176 ++++++++++----- metrics/metrics_test.go | 32 +-- metrics/runtime.go | 212 ------------------ metrics/runtime_cgo.go | 10 - metrics/runtime_gccpufraction.go | 10 - metrics/runtime_no_cgo.go | 8 - metrics/runtime_no_gccpufraction.go | 10 - metrics/runtime_test.go | 88 -------- metrics/runtimehistogram.go | 319 ++++++++++++++++++++++++++++ metrics/runtimehistogram_test.go | 133 ++++++++++++ 11 files changed, 602 insertions(+), 431 deletions(-) delete mode 100644 metrics/runtime.go delete mode 100644 metrics/runtime_cgo.go delete mode 100644 metrics/runtime_gccpufraction.go delete mode 100644 metrics/runtime_no_cgo.go delete mode 100644 metrics/runtime_no_gccpufraction.go delete mode 100644 metrics/runtime_test.go create mode 100644 metrics/runtimehistogram.go create mode 100644 metrics/runtimehistogram_test.go diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index d4ebea74fbe7..56f19e944abf 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -135,27 +135,28 @@ func (r *reporter) send() error { }) case metrics.Histogram: ms := metric.Snapshot() - if ms.Count() > 0 { - ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + ps := ms.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + fields := map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p25": ps[0], + "p50": ps[1], + "p75": ps[2], + "p95": ps[3], + "p99": ps[4], + "p999": ps[5], + "p9999": ps[6], + } pts = append(pts, client.Point{ Measurement: fmt.Sprintf("%s%s.histogram", namespace, name), Tags: r.tags, - Fields: map[string]interface{}{ - "count": ms.Count(), - "max": ms.Max(), - "mean": ms.Mean(), - "min": ms.Min(), - "stddev": ms.StdDev(), - "variance": ms.Variance(), - "p50": ps[0], - "p75": ps[1], - "p95": ps[2], - "p99": ps[3], - "p999": ps[4], - "p9999": ps[5], - }, - Time: now, + Fields: fields, + Time: now, }) } case metrics.Meter: diff --git a/metrics/metrics.go b/metrics/metrics.go index 2df2404b5f60..2b8bad8bee36 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -7,7 +7,8 @@ package metrics import ( "os" - "runtime" + "runtime/metrics" + "runtime/pprof" "strings" "time" @@ -54,38 +55,106 @@ func init() { } } -// CollectProcessMetrics periodically collects various metrics about the running -// process. +var threadCreateProfile = pprof.Lookup("threadcreate") + +type runtimeStats struct { + GCPauses *metrics.Float64Histogram + GCAllocBytes uint64 + GCFreedBytes uint64 + + MemTotal uint64 + HeapObjects uint64 + HeapFree uint64 + HeapReleased uint64 + HeapUnused uint64 + + Goroutines uint64 + SchedLatency *metrics.Float64Histogram +} + +var runtimeSamples = []metrics.Sample{ + {Name: "/gc/pauses:seconds"}, // histogram + {Name: "/gc/heap/allocs:bytes"}, + {Name: "/gc/heap/frees:bytes"}, + {Name: "/memory/classes/total:bytes"}, + {Name: "/memory/classes/heap/objects:bytes"}, + {Name: "/memory/classes/heap/free:bytes"}, + {Name: "/memory/classes/heap/released:bytes"}, + {Name: "/memory/classes/heap/unused:bytes"}, + {Name: "/sched/goroutines:goroutines"}, + {Name: "/sched/latencies:seconds"}, // histogram +} + +func readRuntimeStats(v *runtimeStats) { + metrics.Read(runtimeSamples) + for _, s := range runtimeSamples { + // Skip invalid/unknown metrics. This is needed because some metrics + // are unavailable in older Go versions, and attempting to read a 'bad' + // metric panics. + if s.Value.Kind() == metrics.KindBad { + continue + } + + switch s.Name { + case "/gc/pauses:seconds": + v.GCPauses = s.Value.Float64Histogram() + case "/gc/heap/allocs:bytes": + v.GCAllocBytes = s.Value.Uint64() + case "/gc/heap/frees:bytes": + v.GCFreedBytes = s.Value.Uint64() + case "/memory/classes/total:bytes": + v.MemTotal = s.Value.Uint64() + case "/memory/classes/heap/objects:bytes": + v.HeapObjects = s.Value.Uint64() + case "/memory/classes/heap/free:bytes": + v.HeapFree = s.Value.Uint64() + case "/memory/classes/heap/released:bytes": + v.HeapReleased = s.Value.Uint64() + case "/memory/classes/heap/unused:bytes": + v.HeapUnused = s.Value.Uint64() + case "/sched/goroutines:goroutines": + v.Goroutines = s.Value.Uint64() + case "/sched/latencies:seconds": + v.SchedLatency = s.Value.Float64Histogram() + } + } +} + +// CollectProcessMetrics periodically collects various metrics about the running process. func CollectProcessMetrics(refresh time.Duration) { // Short circuit if the metrics system is disabled if !Enabled { return } + refreshFreq := int64(refresh / time.Second) // Create the various data collectors - cpuStats := make([]*CPUStats, 2) - memstats := make([]*runtime.MemStats, 2) - diskstats := make([]*DiskStats, 2) - for i := 0; i < len(memstats); i++ { - cpuStats[i] = new(CPUStats) - memstats[i] = new(runtime.MemStats) - diskstats[i] = new(DiskStats) - } - // Define the various metrics to collect var ( - cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) - cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) - cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) - cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) - cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) - - memPauses = GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) - memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) - memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) - memHeld = GetOrRegisterGauge("system/memory/held", DefaultRegistry) - memUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) + cpustats = make([]CPUStats, 2) + diskstats = make([]DiskStats, 2) + rstats = make([]runtimeStats, 2) + ) + + // This scale factor is used for the runtime's time metrics. It's useful to convert to + // ns here because the runtime gives times in float seconds, but runtimeHistogram can + // only provide integers for the minimum and maximum values. + const secondsToNs = float64(time.Second) + // Define the various metrics to collect + var ( + cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) + cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) + cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) + cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) + cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) + cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil) + memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil) + memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) + memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) + memTotal = GetOrRegisterGauge("system/memory/held", DefaultRegistry) + heapUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) + heapObjects = GetOrRegisterGauge("system/memory/objects", DefaultRegistry) diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) @@ -93,34 +162,43 @@ func CollectProcessMetrics(refresh time.Duration) { diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry) ) - // Iterate loading the different stats and updating the meters - for i := 1; ; i++ { - location1 := i % 2 - location2 := (i - 1) % 2 - - ReadCPUStats(cpuStats[location1]) - cpuSysLoad.Update((cpuStats[location1].GlobalTime - cpuStats[location2].GlobalTime) / refreshFreq) - cpuSysWait.Update((cpuStats[location1].GlobalWait - cpuStats[location2].GlobalWait) / refreshFreq) - cpuProcLoad.Update((cpuStats[location1].LocalTime - cpuStats[location2].LocalTime) / refreshFreq) + + // Iterate loading the different stats and updating the meters. + now, prev := 0, 1 + for ; ; now, prev = prev, now { + // CPU + ReadCPUStats(&cpustats[now]) + cpuSysLoad.Update((cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / refreshFreq) + cpuSysWait.Update((cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / refreshFreq) + cpuProcLoad.Update((cpustats[now].LocalTime - cpustats[prev].LocalTime) / refreshFreq) + + // Threads cpuThreads.Update(int64(threadCreateProfile.Count())) - cpuGoroutines.Update(int64(runtime.NumGoroutine())) - - runtime.ReadMemStats(memstats[location1]) - memPauses.Mark(int64(memstats[location1].PauseTotalNs - memstats[location2].PauseTotalNs)) - memAllocs.Mark(int64(memstats[location1].Mallocs - memstats[location2].Mallocs)) - memFrees.Mark(int64(memstats[location1].Frees - memstats[location2].Frees)) - memHeld.Update(int64(memstats[location1].HeapSys - memstats[location1].HeapReleased)) - memUsed.Update(int64(memstats[location1].Alloc)) - - if ReadDiskStats(diskstats[location1]) == nil { - diskReads.Mark(diskstats[location1].ReadCount - diskstats[location2].ReadCount) - diskReadBytes.Mark(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) - diskWrites.Mark(diskstats[location1].WriteCount - diskstats[location2].WriteCount) - diskWriteBytes.Mark(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) - - diskReadBytesCounter.Inc(diskstats[location1].ReadBytes - diskstats[location2].ReadBytes) - diskWriteBytesCounter.Inc(diskstats[location1].WriteBytes - diskstats[location2].WriteBytes) + + // Go runtime metrics + readRuntimeStats(&rstats[now]) + + cpuGoroutines.Update(int64(rstats[now].Goroutines)) + cpuSchedLatency.update(rstats[now].SchedLatency) + memPauses.update(rstats[now].GCPauses) + + memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes)) + memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes)) + + memTotal.Update(int64(rstats[now].MemTotal)) + heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased)) + heapObjects.Update(int64(rstats[now].HeapObjects)) + + // Disk + if ReadDiskStats(&diskstats[now]) == nil { + diskReads.Mark(diskstats[now].ReadCount - diskstats[prev].ReadCount) + diskReadBytes.Mark(diskstats[now].ReadBytes - diskstats[prev].ReadBytes) + diskWrites.Mark(diskstats[now].WriteCount - diskstats[prev].WriteCount) + diskWriteBytes.Mark(diskstats[now].WriteBytes - diskstats[prev].WriteBytes) + diskReadBytesCounter.Inc(diskstats[now].ReadBytes - diskstats[prev].ReadBytes) + diskWriteBytesCounter.Inc(diskstats[now].WriteBytes - diskstats[prev].WriteBytes) } + time.Sleep(refresh) } } diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index 029c99870eba..e3fde1ea62ce 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -2,8 +2,6 @@ package metrics import ( "fmt" - "io" - "log" "sync" "testing" "time" @@ -11,11 +9,11 @@ import ( const FANOUT = 128 -// Stop the compiler from complaining during debugging. -var ( - _ = io.Discard - _ = log.LstdFlags -) +func TestReadRuntimeValues(t *testing.T) { + var v runtimeStats + readRuntimeStats(&v) + t.Logf("%+v", v) +} func BenchmarkMetrics(b *testing.B) { r := NewRegistry() @@ -26,7 +24,6 @@ func BenchmarkMetrics(b *testing.B) { m := NewRegisteredMeter("meter", r) t := NewRegisteredTimer("timer", r) RegisterDebugGCStats(r) - RegisterRuntimeMemStats(r) b.ResetTimer() ch := make(chan bool) @@ -48,24 +45,6 @@ func BenchmarkMetrics(b *testing.B) { }() //*/ - wgR := &sync.WaitGroup{} - //* - wgR.Add(1) - go func() { - defer wgR.Done() - //log.Println("go CaptureRuntimeMemStats") - for { - select { - case <-ch: - //log.Println("done CaptureRuntimeMemStats") - return - default: - CaptureRuntimeMemStatsOnce(r) - } - } - }() - //*/ - wgW := &sync.WaitGroup{} /* wgW.Add(1) @@ -104,7 +83,6 @@ func BenchmarkMetrics(b *testing.B) { wg.Wait() close(ch) wgD.Wait() - wgR.Wait() wgW.Wait() } diff --git a/metrics/runtime.go b/metrics/runtime.go deleted file mode 100644 index 9450c479bad7..000000000000 --- a/metrics/runtime.go +++ /dev/null @@ -1,212 +0,0 @@ -package metrics - -import ( - "runtime" - "runtime/pprof" - "time" -) - -var ( - memStats runtime.MemStats - runtimeMetrics struct { - MemStats struct { - Alloc Gauge - BuckHashSys Gauge - DebugGC Gauge - EnableGC Gauge - Frees Gauge - HeapAlloc Gauge - HeapIdle Gauge - HeapInuse Gauge - HeapObjects Gauge - HeapReleased Gauge - HeapSys Gauge - LastGC Gauge - Lookups Gauge - Mallocs Gauge - MCacheInuse Gauge - MCacheSys Gauge - MSpanInuse Gauge - MSpanSys Gauge - NextGC Gauge - NumGC Gauge - GCCPUFraction GaugeFloat64 - PauseNs Histogram - PauseTotalNs Gauge - StackInuse Gauge - StackSys Gauge - Sys Gauge - TotalAlloc Gauge - } - NumCgoCall Gauge - NumGoroutine Gauge - NumThread Gauge - ReadMemStats Timer - } - frees uint64 - lookups uint64 - mallocs uint64 - numGC uint32 - numCgoCalls int64 - - threadCreateProfile = pprof.Lookup("threadcreate") -) - -// Capture new values for the Go runtime statistics exported in -// runtime.MemStats. This is designed to be called as a goroutine. -func CaptureRuntimeMemStats(r Registry, d time.Duration) { - for range time.Tick(d) { - CaptureRuntimeMemStatsOnce(r) - } -} - -// Capture new values for the Go runtime statistics exported in -// runtime.MemStats. This is designed to be called in a background -// goroutine. Giving a registry which has not been given to -// RegisterRuntimeMemStats will panic. -// -// Be very careful with this because runtime.ReadMemStats calls the C -// functions runtime·semacquire(&runtime·worldsema) and runtime·stoptheworld() -// and that last one does what it says on the tin. -func CaptureRuntimeMemStatsOnce(r Registry) { - t := time.Now() - runtime.ReadMemStats(&memStats) // This takes 50-200us. - runtimeMetrics.ReadMemStats.UpdateSince(t) - - runtimeMetrics.MemStats.Alloc.Update(int64(memStats.Alloc)) - runtimeMetrics.MemStats.BuckHashSys.Update(int64(memStats.BuckHashSys)) - if memStats.DebugGC { - runtimeMetrics.MemStats.DebugGC.Update(1) - } else { - runtimeMetrics.MemStats.DebugGC.Update(0) - } - if memStats.EnableGC { - runtimeMetrics.MemStats.EnableGC.Update(1) - } else { - runtimeMetrics.MemStats.EnableGC.Update(0) - } - - runtimeMetrics.MemStats.Frees.Update(int64(memStats.Frees - frees)) - runtimeMetrics.MemStats.HeapAlloc.Update(int64(memStats.HeapAlloc)) - runtimeMetrics.MemStats.HeapIdle.Update(int64(memStats.HeapIdle)) - runtimeMetrics.MemStats.HeapInuse.Update(int64(memStats.HeapInuse)) - runtimeMetrics.MemStats.HeapObjects.Update(int64(memStats.HeapObjects)) - runtimeMetrics.MemStats.HeapReleased.Update(int64(memStats.HeapReleased)) - runtimeMetrics.MemStats.HeapSys.Update(int64(memStats.HeapSys)) - runtimeMetrics.MemStats.LastGC.Update(int64(memStats.LastGC)) - runtimeMetrics.MemStats.Lookups.Update(int64(memStats.Lookups - lookups)) - runtimeMetrics.MemStats.Mallocs.Update(int64(memStats.Mallocs - mallocs)) - runtimeMetrics.MemStats.MCacheInuse.Update(int64(memStats.MCacheInuse)) - runtimeMetrics.MemStats.MCacheSys.Update(int64(memStats.MCacheSys)) - runtimeMetrics.MemStats.MSpanInuse.Update(int64(memStats.MSpanInuse)) - runtimeMetrics.MemStats.MSpanSys.Update(int64(memStats.MSpanSys)) - runtimeMetrics.MemStats.NextGC.Update(int64(memStats.NextGC)) - runtimeMetrics.MemStats.NumGC.Update(int64(memStats.NumGC - numGC)) - runtimeMetrics.MemStats.GCCPUFraction.Update(gcCPUFraction(&memStats)) - - // - i := numGC % uint32(len(memStats.PauseNs)) - ii := memStats.NumGC % uint32(len(memStats.PauseNs)) - if memStats.NumGC-numGC >= uint32(len(memStats.PauseNs)) { - for i = 0; i < uint32(len(memStats.PauseNs)); i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - } else { - if i > ii { - for ; i < uint32(len(memStats.PauseNs)); i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - i = 0 - } - for ; i < ii; i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - } - frees = memStats.Frees - lookups = memStats.Lookups - mallocs = memStats.Mallocs - numGC = memStats.NumGC - - runtimeMetrics.MemStats.PauseTotalNs.Update(int64(memStats.PauseTotalNs)) - runtimeMetrics.MemStats.StackInuse.Update(int64(memStats.StackInuse)) - runtimeMetrics.MemStats.StackSys.Update(int64(memStats.StackSys)) - runtimeMetrics.MemStats.Sys.Update(int64(memStats.Sys)) - runtimeMetrics.MemStats.TotalAlloc.Update(int64(memStats.TotalAlloc)) - - currentNumCgoCalls := numCgoCall() - runtimeMetrics.NumCgoCall.Update(currentNumCgoCalls - numCgoCalls) - numCgoCalls = currentNumCgoCalls - - runtimeMetrics.NumGoroutine.Update(int64(runtime.NumGoroutine())) - - runtimeMetrics.NumThread.Update(int64(threadCreateProfile.Count())) -} - -// Register runtimeMetrics for the Go runtime statistics exported in runtime and -// specifically runtime.MemStats. The runtimeMetrics are named by their -// fully-qualified Go symbols, i.e. runtime.MemStats.Alloc. -func RegisterRuntimeMemStats(r Registry) { - runtimeMetrics.MemStats.Alloc = NewGauge() - runtimeMetrics.MemStats.BuckHashSys = NewGauge() - runtimeMetrics.MemStats.DebugGC = NewGauge() - runtimeMetrics.MemStats.EnableGC = NewGauge() - runtimeMetrics.MemStats.Frees = NewGauge() - runtimeMetrics.MemStats.HeapAlloc = NewGauge() - runtimeMetrics.MemStats.HeapIdle = NewGauge() - runtimeMetrics.MemStats.HeapInuse = NewGauge() - runtimeMetrics.MemStats.HeapObjects = NewGauge() - runtimeMetrics.MemStats.HeapReleased = NewGauge() - runtimeMetrics.MemStats.HeapSys = NewGauge() - runtimeMetrics.MemStats.LastGC = NewGauge() - runtimeMetrics.MemStats.Lookups = NewGauge() - runtimeMetrics.MemStats.Mallocs = NewGauge() - runtimeMetrics.MemStats.MCacheInuse = NewGauge() - runtimeMetrics.MemStats.MCacheSys = NewGauge() - runtimeMetrics.MemStats.MSpanInuse = NewGauge() - runtimeMetrics.MemStats.MSpanSys = NewGauge() - runtimeMetrics.MemStats.NextGC = NewGauge() - runtimeMetrics.MemStats.NumGC = NewGauge() - runtimeMetrics.MemStats.GCCPUFraction = NewGaugeFloat64() - runtimeMetrics.MemStats.PauseNs = NewHistogram(NewExpDecaySample(1028, 0.015)) - runtimeMetrics.MemStats.PauseTotalNs = NewGauge() - runtimeMetrics.MemStats.StackInuse = NewGauge() - runtimeMetrics.MemStats.StackSys = NewGauge() - runtimeMetrics.MemStats.Sys = NewGauge() - runtimeMetrics.MemStats.TotalAlloc = NewGauge() - runtimeMetrics.NumCgoCall = NewGauge() - runtimeMetrics.NumGoroutine = NewGauge() - runtimeMetrics.NumThread = NewGauge() - runtimeMetrics.ReadMemStats = NewTimer() - - r.Register("runtime.MemStats.Alloc", runtimeMetrics.MemStats.Alloc) - r.Register("runtime.MemStats.BuckHashSys", runtimeMetrics.MemStats.BuckHashSys) - r.Register("runtime.MemStats.DebugGC", runtimeMetrics.MemStats.DebugGC) - r.Register("runtime.MemStats.EnableGC", runtimeMetrics.MemStats.EnableGC) - r.Register("runtime.MemStats.Frees", runtimeMetrics.MemStats.Frees) - r.Register("runtime.MemStats.HeapAlloc", runtimeMetrics.MemStats.HeapAlloc) - r.Register("runtime.MemStats.HeapIdle", runtimeMetrics.MemStats.HeapIdle) - r.Register("runtime.MemStats.HeapInuse", runtimeMetrics.MemStats.HeapInuse) - r.Register("runtime.MemStats.HeapObjects", runtimeMetrics.MemStats.HeapObjects) - r.Register("runtime.MemStats.HeapReleased", runtimeMetrics.MemStats.HeapReleased) - r.Register("runtime.MemStats.HeapSys", runtimeMetrics.MemStats.HeapSys) - r.Register("runtime.MemStats.LastGC", runtimeMetrics.MemStats.LastGC) - r.Register("runtime.MemStats.Lookups", runtimeMetrics.MemStats.Lookups) - r.Register("runtime.MemStats.Mallocs", runtimeMetrics.MemStats.Mallocs) - r.Register("runtime.MemStats.MCacheInuse", runtimeMetrics.MemStats.MCacheInuse) - r.Register("runtime.MemStats.MCacheSys", runtimeMetrics.MemStats.MCacheSys) - r.Register("runtime.MemStats.MSpanInuse", runtimeMetrics.MemStats.MSpanInuse) - r.Register("runtime.MemStats.MSpanSys", runtimeMetrics.MemStats.MSpanSys) - r.Register("runtime.MemStats.NextGC", runtimeMetrics.MemStats.NextGC) - r.Register("runtime.MemStats.NumGC", runtimeMetrics.MemStats.NumGC) - r.Register("runtime.MemStats.GCCPUFraction", runtimeMetrics.MemStats.GCCPUFraction) - r.Register("runtime.MemStats.PauseNs", runtimeMetrics.MemStats.PauseNs) - r.Register("runtime.MemStats.PauseTotalNs", runtimeMetrics.MemStats.PauseTotalNs) - r.Register("runtime.MemStats.StackInuse", runtimeMetrics.MemStats.StackInuse) - r.Register("runtime.MemStats.StackSys", runtimeMetrics.MemStats.StackSys) - r.Register("runtime.MemStats.Sys", runtimeMetrics.MemStats.Sys) - r.Register("runtime.MemStats.TotalAlloc", runtimeMetrics.MemStats.TotalAlloc) - r.Register("runtime.NumCgoCall", runtimeMetrics.NumCgoCall) - r.Register("runtime.NumGoroutine", runtimeMetrics.NumGoroutine) - r.Register("runtime.NumThread", runtimeMetrics.NumThread) - r.Register("runtime.ReadMemStats", runtimeMetrics.ReadMemStats) -} diff --git a/metrics/runtime_cgo.go b/metrics/runtime_cgo.go deleted file mode 100644 index 4307ebdba689..000000000000 --- a/metrics/runtime_cgo.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build cgo && !appengine && !js -// +build cgo,!appengine,!js - -package metrics - -import "runtime" - -func numCgoCall() int64 { - return runtime.NumCgoCall() -} diff --git a/metrics/runtime_gccpufraction.go b/metrics/runtime_gccpufraction.go deleted file mode 100644 index 28cd44752b45..000000000000 --- a/metrics/runtime_gccpufraction.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build go1.5 -// +build go1.5 - -package metrics - -import "runtime" - -func gcCPUFraction(memStats *runtime.MemStats) float64 { - return memStats.GCCPUFraction -} diff --git a/metrics/runtime_no_cgo.go b/metrics/runtime_no_cgo.go deleted file mode 100644 index 1799bef63bfb..000000000000 --- a/metrics/runtime_no_cgo.go +++ /dev/null @@ -1,8 +0,0 @@ -//go:build !cgo || appengine || js -// +build !cgo appengine js - -package metrics - -func numCgoCall() int64 { - return 0 -} diff --git a/metrics/runtime_no_gccpufraction.go b/metrics/runtime_no_gccpufraction.go deleted file mode 100644 index af1a4b63c809..000000000000 --- a/metrics/runtime_no_gccpufraction.go +++ /dev/null @@ -1,10 +0,0 @@ -//go:build !go1.5 -// +build !go1.5 - -package metrics - -import "runtime" - -func gcCPUFraction(memStats *runtime.MemStats) float64 { - return 0 -} diff --git a/metrics/runtime_test.go b/metrics/runtime_test.go deleted file mode 100644 index f85f7868f71a..000000000000 --- a/metrics/runtime_test.go +++ /dev/null @@ -1,88 +0,0 @@ -package metrics - -import ( - "runtime" - "testing" - "time" -) - -func BenchmarkRuntimeMemStats(b *testing.B) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - b.ResetTimer() - for i := 0; i < b.N; i++ { - CaptureRuntimeMemStatsOnce(r) - } -} - -func TestRuntimeMemStats(t *testing.T) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - CaptureRuntimeMemStatsOnce(r) - zero := runtimeMetrics.MemStats.PauseNs.Count() // Get a "zero" since GC may have run before these tests. - runtime.GC() - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 1 { - t.Fatal(count - zero) - } - runtime.GC() - runtime.GC() - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 3 { - t.Fatal(count - zero) - } - for i := 0; i < 256; i++ { - runtime.GC() - } - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 259 { - t.Fatal(count - zero) - } - for i := 0; i < 257; i++ { - runtime.GC() - } - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 515 { // We lost one because there were too many GCs between captures. - t.Fatal(count - zero) - } -} - -func TestRuntimeMemStatsNumThread(t *testing.T) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - CaptureRuntimeMemStatsOnce(r) - - if value := runtimeMetrics.NumThread.Value(); value < 1 { - t.Fatalf("got NumThread: %d, wanted at least 1", value) - } -} - -func TestRuntimeMemStatsBlocking(t *testing.T) { - if g := runtime.GOMAXPROCS(0); g < 2 { - t.Skipf("skipping TestRuntimeMemStatsBlocking with GOMAXPROCS=%d\n", g) - } - ch := make(chan int) - go testRuntimeMemStatsBlocking(ch) - var memStats runtime.MemStats - t0 := time.Now() - runtime.ReadMemStats(&memStats) - t1 := time.Now() - t.Log("i++ during runtime.ReadMemStats:", <-ch) - go testRuntimeMemStatsBlocking(ch) - d := t1.Sub(t0) - t.Log(d) - time.Sleep(d) - t.Log("i++ during time.Sleep:", <-ch) -} - -func testRuntimeMemStatsBlocking(ch chan int) { - i := 0 - for { - select { - case ch <- i: - return - default: - i++ - } - } -} diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go new file mode 100644 index 000000000000..c68939af1ef7 --- /dev/null +++ b/metrics/runtimehistogram.go @@ -0,0 +1,319 @@ +package metrics + +import ( + "math" + "runtime/metrics" + "sort" + "sync/atomic" +) + +func getOrRegisterRuntimeHistogram(name string, scale float64, r Registry) *runtimeHistogram { + if r == nil { + r = DefaultRegistry + } + constructor := func() Histogram { return newRuntimeHistogram(scale) } + return r.GetOrRegister(name, constructor).(*runtimeHistogram) +} + +// runtimeHistogram wraps a runtime/metrics histogram. +type runtimeHistogram struct { + v atomic.Value + scaleFactor float64 +} + +func newRuntimeHistogram(scale float64) *runtimeHistogram { + h := &runtimeHistogram{scaleFactor: scale} + h.update(&metrics.Float64Histogram{}) + return h +} + +func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) { + if mh == nil { + // The update value can be nil if the current Go version doesn't support a + // requested metric. It's just easier to handle nil here than putting + // conditionals everywhere. + return + } + + s := runtimeHistogramSnapshot{ + Counts: make([]uint64, len(mh.Counts)), + Buckets: make([]float64, len(mh.Buckets)), + } + copy(s.Counts, mh.Counts) + copy(s.Buckets, mh.Buckets) + for i, b := range s.Buckets { + s.Buckets[i] = b * h.scaleFactor + } + h.v.Store(&s) +} + +func (h *runtimeHistogram) load() *runtimeHistogramSnapshot { + return h.v.Load().(*runtimeHistogramSnapshot) +} + +func (h *runtimeHistogram) Clear() { + panic("runtimeHistogram does not support Clear") +} +func (h *runtimeHistogram) Update(int64) { + panic("runtimeHistogram does not support Update") +} +func (h *runtimeHistogram) Sample() Sample { + return NilSample{} +} + +// Snapshot returns a non-changing cop of the histogram. +func (h *runtimeHistogram) Snapshot() Histogram { + return h.load() +} + +// Count returns the sample count. +func (h *runtimeHistogram) Count() int64 { + return h.load().Count() +} + +// Mean returns an approximation of the mean. +func (h *runtimeHistogram) Mean() float64 { + return h.load().Mean() +} + +// StdDev approximates the standard deviation of the histogram. +func (h *runtimeHistogram) StdDev() float64 { + return h.load().StdDev() +} + +// Variance approximates the variance of the histogram. +func (h *runtimeHistogram) Variance() float64 { + return h.load().Variance() +} + +// Percentile computes the p'th percentile value. +func (h *runtimeHistogram) Percentile(p float64) float64 { + return h.load().Percentile(p) +} + +// Percentiles computes all requested percentile values. +func (h *runtimeHistogram) Percentiles(ps []float64) []float64 { + return h.load().Percentiles(ps) +} + +// Max returns the highest sample value. +func (h *runtimeHistogram) Max() int64 { + return h.load().Max() +} + +// Min returns the lowest sample value. +func (h *runtimeHistogram) Min() int64 { + return h.load().Min() +} + +// Sum returns the sum of all sample values. +func (h *runtimeHistogram) Sum() int64 { + return h.load().Sum() +} + +type runtimeHistogramSnapshot metrics.Float64Histogram + +func (h *runtimeHistogramSnapshot) Clear() { + panic("runtimeHistogram does not support Clear") +} +func (h *runtimeHistogramSnapshot) Update(int64) { + panic("runtimeHistogram does not support Update") +} +func (h *runtimeHistogramSnapshot) Sample() Sample { + return NilSample{} +} + +func (h *runtimeHistogramSnapshot) Snapshot() Histogram { + return h +} + +// Count returns the sample count. +func (h *runtimeHistogramSnapshot) Count() int64 { + var count int64 + for _, c := range h.Counts { + count += int64(c) + } + return count +} + +// Mean returns an approximation of the mean. +func (h *runtimeHistogramSnapshot) Mean() float64 { + if len(h.Counts) == 0 { + return 0 + } + mean, _ := h.mean() + return mean +} + +// mean computes the mean and also the total sample count. +func (h *runtimeHistogramSnapshot) mean() (mean, totalCount float64) { + var sum float64 + for i, c := range h.Counts { + midpoint := h.midpoint(i) + sum += midpoint * float64(c) + totalCount += float64(c) + } + return sum / totalCount, totalCount +} + +func (h *runtimeHistogramSnapshot) midpoint(bucket int) float64 { + high := h.Buckets[bucket+1] + low := h.Buckets[bucket] + if math.IsInf(high, 1) { + // The edge of the highest bucket can be +Inf, and it's supposed to mean that this + // bucket contains all remaining samples > low. We can't get the middle of an + // infinite range, so just return the lower bound of this bucket instead. + return low + } + if math.IsInf(low, -1) { + // Similarly, we can get -Inf in the left edge of the lowest bucket, + // and it means the bucket contains all remaining values < high. + return high + } + return (low + high) / 2 +} + +// StdDev approximates the standard deviation of the histogram. +func (h *runtimeHistogramSnapshot) StdDev() float64 { + return math.Sqrt(h.Variance()) +} + +// Variance approximates the variance of the histogram. +func (h *runtimeHistogramSnapshot) Variance() float64 { + if len(h.Counts) == 0 { + return 0 + } + + mean, totalCount := h.mean() + if totalCount <= 1 { + // There is no variance when there are zero or one items. + return 0 + } + + var sum float64 + for i, c := range h.Counts { + midpoint := h.midpoint(i) + d := midpoint - mean + sum += float64(c) * (d * d) + } + return sum / (totalCount - 1) +} + +// Percentile computes the p'th percentile value. +func (h *runtimeHistogramSnapshot) Percentile(p float64) float64 { + threshold := float64(h.Count()) * p + values := [1]float64{threshold} + h.computePercentiles(values[:]) + return values[0] +} + +// Percentiles computes all requested percentile values. +func (h *runtimeHistogramSnapshot) Percentiles(ps []float64) []float64 { + // Compute threshold values. We need these to be sorted + // for the percentile computation, but restore the original + // order later, so keep the indexes as well. + count := float64(h.Count()) + thresholds := make([]float64, len(ps)) + indexes := make([]int, len(ps)) + for i, percentile := range ps { + thresholds[i] = count * math.Max(0, math.Min(1.0, percentile)) + indexes[i] = i + } + sort.Sort(floatsAscendingKeepingIndex{thresholds, indexes}) + + // Now compute. The result is stored back into the thresholds slice. + h.computePercentiles(thresholds) + + // Put the result back into the requested order. + sort.Sort(floatsByIndex{thresholds, indexes}) + return thresholds +} + +func (h *runtimeHistogramSnapshot) computePercentiles(thresh []float64) { + var totalCount float64 + for i, count := range h.Counts { + totalCount += float64(count) + + for len(thresh) > 0 && thresh[0] < totalCount { + thresh[0] = h.Buckets[i] + thresh = thresh[1:] + } + if len(thresh) == 0 { + return + } + } +} + +// Note: runtime/metrics.Float64Histogram is a collection of float64s, but the methods +// below need to return int64 to satisfy the interface. The histogram provided by runtime +// also doesn't keep track of individual samples, so results are approximated. + +// Max returns the highest sample value. +func (h *runtimeHistogramSnapshot) Max() int64 { + for i := len(h.Counts) - 1; i >= 0; i-- { + count := h.Counts[i] + if count > 0 { + edge := h.Buckets[i+1] + if math.IsInf(edge, 1) { + edge = h.Buckets[i] + } + return int64(math.Ceil(edge)) + } + } + return 0 +} + +// Min returns the lowest sample value. +func (h *runtimeHistogramSnapshot) Min() int64 { + for i, count := range h.Counts { + if count > 0 { + return int64(math.Floor(h.Buckets[i])) + } + } + return 0 +} + +// Sum returns the sum of all sample values. +func (h *runtimeHistogramSnapshot) Sum() int64 { + var sum float64 + for i := range h.Counts { + sum += h.Buckets[i] * float64(h.Counts[i]) + } + return int64(math.Ceil(sum)) +} + +type floatsAscendingKeepingIndex struct { + values []float64 + indexes []int +} + +func (s floatsAscendingKeepingIndex) Len() int { + return len(s.values) +} + +func (s floatsAscendingKeepingIndex) Less(i, j int) bool { + return s.values[i] < s.values[j] +} + +func (s floatsAscendingKeepingIndex) Swap(i, j int) { + s.values[i], s.values[j] = s.values[j], s.values[i] + s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i] +} + +type floatsByIndex struct { + values []float64 + indexes []int +} + +func (s floatsByIndex) Len() int { + return len(s.values) +} + +func (s floatsByIndex) Less(i, j int) bool { + return s.indexes[i] < s.indexes[j] +} + +func (s floatsByIndex) Swap(i, j int) { + s.values[i], s.values[j] = s.values[j], s.values[i] + s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i] +} diff --git a/metrics/runtimehistogram_test.go b/metrics/runtimehistogram_test.go new file mode 100644 index 000000000000..d53a01438311 --- /dev/null +++ b/metrics/runtimehistogram_test.go @@ -0,0 +1,133 @@ +package metrics + +import ( + "fmt" + "math" + "reflect" + "runtime/metrics" + "testing" +) + +var _ Histogram = (*runtimeHistogram)(nil) + +type runtimeHistogramTest struct { + h metrics.Float64Histogram + + Count int64 + Min int64 + Max int64 + Sum int64 + Mean float64 + Variance float64 + StdDev float64 + Percentiles []float64 // .5 .8 .9 .99 .995 +} + +// This test checks the results of statistical functions implemented +// by runtimeHistogramSnapshot. +func TestRuntimeHistogramStats(t *testing.T) { + tests := []runtimeHistogramTest{ + 0: { + h: metrics.Float64Histogram{ + Counts: []uint64{}, + Buckets: []float64{}, + }, + Count: 0, + Max: 0, + Min: 0, + Sum: 0, + Mean: 0, + Variance: 0, + StdDev: 0, + Percentiles: []float64{0, 0, 0, 0, 0}, + }, + 1: { + // This checks the case where the highest bucket is +Inf. + h: metrics.Float64Histogram{ + Counts: []uint64{0, 1, 2}, + Buckets: []float64{0, 0.5, 1, math.Inf(1)}, + }, + Count: 3, + Max: 1, + Min: 0, + Sum: 3, + Mean: 0.9166666, + Percentiles: []float64{1, 1, 1, 1, 1}, + Variance: 0.020833, + StdDev: 0.144433, + }, + 2: { + h: metrics.Float64Histogram{ + Counts: []uint64{8, 6, 3, 1}, + Buckets: []float64{12, 16, 18, 24, 25}, + }, + Count: 18, + Max: 25, + Min: 12, + Sum: 270, + Mean: 16.75, + Variance: 10.3015, + StdDev: 3.2096, + Percentiles: []float64{16, 18, 18, 24, 24}, + }, + } + + for i, test := range tests { + t.Run(fmt.Sprint(i), func(t *testing.T) { + s := runtimeHistogramSnapshot(test.h) + + if v := s.Count(); v != test.Count { + t.Errorf("Count() = %v, want %v", v, test.Count) + } + if v := s.Min(); v != test.Min { + t.Errorf("Min() = %v, want %v", v, test.Min) + } + if v := s.Max(); v != test.Max { + t.Errorf("Max() = %v, want %v", v, test.Max) + } + if v := s.Sum(); v != test.Sum { + t.Errorf("Sum() = %v, want %v", v, test.Sum) + } + if v := s.Mean(); !approxEqual(v, test.Mean, 0.0001) { + t.Errorf("Mean() = %v, want %v", v, test.Mean) + } + if v := s.Variance(); !approxEqual(v, test.Variance, 0.0001) { + t.Errorf("Variance() = %v, want %v", v, test.Variance) + } + if v := s.StdDev(); !approxEqual(v, test.StdDev, 0.0001) { + t.Errorf("StdDev() = %v, want %v", v, test.StdDev) + } + ps := []float64{.5, .8, .9, .99, .995} + if v := s.Percentiles(ps); !reflect.DeepEqual(v, test.Percentiles) { + t.Errorf("Percentiles(%v) = %v, want %v", ps, v, test.Percentiles) + } + }) + } +} + +func approxEqual(x, y, ε float64) bool { + if math.IsInf(x, -1) && math.IsInf(y, -1) { + return true + } + if math.IsInf(x, 1) && math.IsInf(y, 1) { + return true + } + if math.IsNaN(x) && math.IsNaN(y) { + return true + } + return math.Abs(x-y) < ε +} + +// This test verifies that requesting Percentiles in unsorted order +// returns them in the requested order. +func TestRuntimeHistogramStatsPercentileOrder(t *testing.T) { + p := runtimeHistogramSnapshot{ + Counts: []uint64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + Buckets: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + } + result := p.Percentiles([]float64{1, 0.2, 0.5, 0.1, 0.2}) + expected := []float64{10, 2, 5, 1, 2} + if !reflect.DeepEqual(result, expected) { + t.Fatal("wrong result:", result) + } +} From 40f47a641ba6cf02933f78c9e5a20356a5191748 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 43/74] metrics/influxdb: fix time ticker leaks (#26507) --- metrics/influxdb/influxdbv2.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/metrics/influxdb/influxdbv2.go b/metrics/influxdb/influxdbv2.go index 583a1ebc78e0..4fa0d02bcdca 100644 --- a/metrics/influxdb/influxdbv2.go +++ b/metrics/influxdb/influxdbv2.go @@ -62,6 +62,9 @@ func (r *v2Reporter) run() { intervalTicker := time.NewTicker(r.interval) pingTicker := time.NewTicker(time.Second * 5) + defer intervalTicker.Stop() + defer pingTicker.Stop() + for { select { case <-intervalTicker.C: From 76320b4b9807d1e33f20e20e58bbedc982ab3602 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 44/74] metrics/librato: use http package to replace http method names (#26535) --- metrics/librato/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/librato/client.go b/metrics/librato/client.go index a807c392af01..89ecb9ec64da 100644 --- a/metrics/librato/client.go +++ b/metrics/librato/client.go @@ -80,7 +80,7 @@ func (lc *LibratoClient) PostMetrics(batch Batch) (err error) { return } - if req, err = http.NewRequest("POST", MetricsPostUrl, bytes.NewBuffer(js)); err != nil { + if req, err = http.NewRequest(http.MethodPost, MetricsPostUrl, bytes.NewBuffer(js)); err != nil { return } From 6199c84050dff2ca21a070a2dba92d5635138fd0 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 45/74] metrics: remove deprecated uses of math.rand (#26710) --- metrics/sample.go | 29 +++++++++++++++++++++++++++-- metrics/sample_test.go | 19 +++++-------------- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/metrics/sample.go b/metrics/sample.go index fa2bfb274e39..afcaa2118426 100644 --- a/metrics/sample.go +++ b/metrics/sample.go @@ -41,6 +41,7 @@ type ExpDecaySample struct { reservoirSize int t0, t1 time.Time values *expDecaySampleHeap + rand *rand.Rand } // NewExpDecaySample constructs a new exponentially-decaying sample with the @@ -59,6 +60,12 @@ func NewExpDecaySample(reservoirSize int, alpha float64) Sample { return s } +// SetRand sets the random source (useful in tests) +func (s *ExpDecaySample) SetRand(prng *rand.Rand) Sample { + s.rand = prng + return s +} + // Clear clears all samples. func (s *ExpDecaySample) Clear() { s.mutex.Lock() @@ -168,8 +175,14 @@ func (s *ExpDecaySample) update(t time.Time, v int64) { if s.values.Size() == s.reservoirSize { s.values.Pop() } + var f64 float64 + if s.rand != nil { + f64 = s.rand.Float64() + } else { + f64 = rand.Float64() + } s.values.Push(expDecaySample{ - k: math.Exp(t.Sub(s.t0).Seconds()*s.alpha) / rand.Float64(), + k: math.Exp(t.Sub(s.t0).Seconds()*s.alpha) / f64, v: v, }) if t.After(s.t1) { @@ -402,6 +415,7 @@ type UniformSample struct { mutex sync.Mutex reservoirSize int values []int64 + rand *rand.Rand } // NewUniformSample constructs a new uniform sample with the given reservoir @@ -416,6 +430,12 @@ func NewUniformSample(reservoirSize int) Sample { } } +// SetRand sets the random source (useful in tests) +func (s *UniformSample) SetRand(prng *rand.Rand) Sample { + s.rand = prng + return s +} + // Clear clears all samples. func (s *UniformSample) Clear() { s.mutex.Lock() @@ -511,7 +531,12 @@ func (s *UniformSample) Update(v int64) { if len(s.values) < s.reservoirSize { s.values = append(s.values, v) } else { - r := rand.Int63n(s.count) + var r int64 + if s.rand != nil { + r = s.rand.Int63n(s.count) + } else { + r = rand.Int63n(s.count) + } if r < int64(len(s.values)) { s.values[int(r)] = v } diff --git a/metrics/sample_test.go b/metrics/sample_test.go index 5b2ee96e3679..3ae128d56f67 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -80,7 +80,6 @@ func BenchmarkUniformSample1028(b *testing.B) { } func TestExpDecaySample10(t *testing.T) { - rand.Seed(1) s := NewExpDecaySample(100, 0.99) for i := 0; i < 10; i++ { s.Update(int64(i)) @@ -102,7 +101,6 @@ func TestExpDecaySample10(t *testing.T) { } func TestExpDecaySample100(t *testing.T) { - rand.Seed(1) s := NewExpDecaySample(1000, 0.01) for i := 0; i < 100; i++ { s.Update(int64(i)) @@ -124,7 +122,6 @@ func TestExpDecaySample100(t *testing.T) { } func TestExpDecaySample1000(t *testing.T) { - rand.Seed(1) s := NewExpDecaySample(100, 0.99) for i := 0; i < 1000; i++ { s.Update(int64(i)) @@ -150,7 +147,6 @@ func TestExpDecaySample1000(t *testing.T) { // The priority becomes +Inf quickly after starting if this is done, // effectively freezing the set of samples until a rescale step happens. func TestExpDecaySampleNanosecondRegression(t *testing.T) { - rand.Seed(1) s := NewExpDecaySample(100, 0.99) for i := 0; i < 100; i++ { s.Update(10) @@ -183,8 +179,7 @@ func TestExpDecaySampleRescale(t *testing.T) { func TestExpDecaySampleSnapshot(t *testing.T) { now := time.Now() - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) + s := NewExpDecaySample(100, 0.99).(*ExpDecaySample).SetRand(rand.New(rand.NewSource(1))) for i := 1; i <= 10000; i++ { s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) } @@ -195,8 +190,7 @@ func TestExpDecaySampleSnapshot(t *testing.T) { func TestExpDecaySampleStatistics(t *testing.T) { now := time.Now() - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) + s := NewExpDecaySample(100, 0.99).(*ExpDecaySample).SetRand(rand.New(rand.NewSource(1))) for i := 1; i <= 10000; i++ { s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) } @@ -204,7 +198,6 @@ func TestExpDecaySampleStatistics(t *testing.T) { } func TestUniformSample(t *testing.T) { - rand.Seed(1) s := NewUniformSample(100) for i := 0; i < 1000; i++ { s.Update(int64(i)) @@ -226,7 +219,6 @@ func TestUniformSample(t *testing.T) { } func TestUniformSampleIncludesTail(t *testing.T) { - rand.Seed(1) s := NewUniformSample(100) max := 100 for i := 0; i < max; i++ { @@ -244,7 +236,7 @@ func TestUniformSampleIncludesTail(t *testing.T) { } func TestUniformSampleSnapshot(t *testing.T) { - s := NewUniformSample(100) + s := NewUniformSample(100).(*UniformSample).SetRand(rand.New(rand.NewSource(1))) for i := 1; i <= 10000; i++ { s.Update(int64(i)) } @@ -254,8 +246,7 @@ func TestUniformSampleSnapshot(t *testing.T) { } func TestUniformSampleStatistics(t *testing.T) { - rand.Seed(1) - s := NewUniformSample(100) + s := NewUniformSample(100).(*UniformSample).SetRand(rand.New(rand.NewSource(1))) for i := 1; i <= 10000; i++ { s.Update(int64(i)) } @@ -327,7 +318,7 @@ func testUniformSampleStatistics(t *testing.T, s Sample) { if ps[1] != 7380.5 { t.Errorf("75th percentile: 7380.5 != %v\n", ps[1]) } - if math.Abs(ps[2]-9986.429999999998) > epsilonPercentile { + if math.Abs(9986.429999999998-ps[2]) > epsilonPercentile { t.Errorf("99th percentile: 9986.429999999998 != %v\n", ps[2]) } } From c616077fb54efd9f8522024b58a59db6989f2d0f Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 46/74] metrics: improve accuracy of CPU gauges (#26793) This PR changes metrics collection to actually measure the time interval between collections, rather than assume 3 seconds. I did some ad hoc profiling, and on slower hardware (eg, my Raspberry Pi 4) I routinely saw intervals between 3.3 - 3.5 seconds, with some being as high as 4.5 seconds. This will generally cause the CPU gauge readings to be too high, and in some cases can cause impossibly large values for the CPU load metrics (eg. greater than 400 for a 4 core CPU). --------- Co-authored-by: Felix Lange --- metrics/cpu.go | 7 ++++--- metrics/cpu_enabled.go | 4 ++-- metrics/cputime_nop.go | 2 +- metrics/cputime_unix.go | 4 ++-- metrics/metrics.go | 21 +++++++++++++++------ 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/metrics/cpu.go b/metrics/cpu.go index 72ece16e0768..3a49cd42493a 100644 --- a/metrics/cpu.go +++ b/metrics/cpu.go @@ -17,8 +17,9 @@ package metrics // CPUStats is the system and process CPU stats. +// All values are in seconds. type CPUStats struct { - GlobalTime int64 // Time spent by the CPU working on all processes - GlobalWait int64 // Time spent by waiting on disk for all processes - LocalTime int64 // Time spent by the CPU working on this process + GlobalTime float64 // Time spent by the CPU working on all processes + GlobalWait float64 // Time spent by waiting on disk for all processes + LocalTime float64 // Time spent by the CPU working on this process } diff --git a/metrics/cpu_enabled.go b/metrics/cpu_enabled.go index 5d0e8485d7fa..efb2234c9990 100644 --- a/metrics/cpu_enabled.go +++ b/metrics/cpu_enabled.go @@ -38,7 +38,7 @@ func ReadCPUStats(stats *CPUStats) { } // requesting all cpu times will always return an array with only one time stats entry timeStat := timeStats[0] - stats.GlobalTime = int64((timeStat.User + timeStat.Nice + timeStat.System) * cpu.ClocksPerSec) - stats.GlobalWait = int64((timeStat.Iowait) * cpu.ClocksPerSec) + stats.GlobalTime = timeStat.User + timeStat.Nice + timeStat.System + stats.GlobalWait = timeStat.Iowait stats.LocalTime = getProcessCPUTime() } diff --git a/metrics/cputime_nop.go b/metrics/cputime_nop.go index 0188735a7833..465d88c4d232 100644 --- a/metrics/cputime_nop.go +++ b/metrics/cputime_nop.go @@ -21,6 +21,6 @@ package metrics // getProcessCPUTime returns 0 on Windows as there is no system call to resolve // the actual process' CPU time. -func getProcessCPUTime() int64 { +func getProcessCPUTime() float64 { return 0 } diff --git a/metrics/cputime_unix.go b/metrics/cputime_unix.go index 50bb95b31e93..a5b50ebaa27f 100644 --- a/metrics/cputime_unix.go +++ b/metrics/cputime_unix.go @@ -26,11 +26,11 @@ import ( ) // getProcessCPUTime retrieves the process' CPU time since program startup. -func getProcessCPUTime() int64 { +func getProcessCPUTime() float64 { var usage syscall.Rusage if err := syscall.Getrusage(syscall.RUSAGE_SELF, &usage); err != nil { log.Warn("Failed to retrieve CPU time", "err", err) return 0 } - return int64(usage.Utime.Sec+usage.Stime.Sec)*100 + int64(usage.Utime.Usec+usage.Stime.Usec)/10000 //nolint:unconvert + return float64(usage.Utime.Sec+usage.Stime.Sec) + float64(usage.Utime.Usec+usage.Stime.Usec)/1000000 //nolint:unconvert } diff --git a/metrics/metrics.go b/metrics/metrics.go index 2b8bad8bee36..00f46e2a7c0a 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -127,8 +127,6 @@ func CollectProcessMetrics(refresh time.Duration) { return } - refreshFreq := int64(refresh / time.Second) - // Create the various data collectors var ( cpustats = make([]CPUStats, 2) @@ -163,14 +161,25 @@ func CollectProcessMetrics(refresh time.Duration) { diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry) ) + var lastCollectTime time.Time + // Iterate loading the different stats and updating the meters. now, prev := 0, 1 for ; ; now, prev = prev, now { - // CPU + // Gather CPU times. ReadCPUStats(&cpustats[now]) - cpuSysLoad.Update((cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / refreshFreq) - cpuSysWait.Update((cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / refreshFreq) - cpuProcLoad.Update((cpustats[now].LocalTime - cpustats[prev].LocalTime) / refreshFreq) + collectTime := time.Now() + secondsSinceLastCollect := collectTime.Sub(lastCollectTime).Seconds() + lastCollectTime = collectTime + if secondsSinceLastCollect > 0 { + sysLoad := (cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / secondsSinceLastCollect + sysWait := (cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / secondsSinceLastCollect + procLoad := (cpustats[now].LocalTime - cpustats[prev].LocalTime) / secondsSinceLastCollect + // Convert to integer percentage. + cpuSysLoad.Update(int64(sysLoad * 100)) + cpuSysWait.Update(int64(sysWait * 100)) + cpuProcLoad.Update(int64(procLoad * 100)) + } // Threads cpuThreads.Update(int64(threadCreateProfile.Count())) From 9eae1243cd3d215a71b1e1e58ba26719b8100972 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 47/74] metrics: add cpu counters (#26796) This PR adds counter metrics for the CPU system and the Geth process. Currently the only metrics available for these items are gauges. Gauges are fine when the consumer scrapes metrics data at the same interval as Geth produces new values (every 3 seconds), but it is likely that most consumers will not scrape that often. Intervals of 10, 15, or maybe even 30 seconds are probably more common. So the problem is, how does the consumer estimate what the CPU was doing in between scrapes. With a counter, it's easy ... you just subtract two successive values and divide by the time to get a nice, accurate average. But with a gauge, you can't do that. A gauge reading is an instantaneous picture of what was happening at that moment, but it gives you no idea about what was going on between scrapes. Taking an average of values is meaningless. --- metrics/counter_float64.go | 153 +++++++++++++++++++++++++++ metrics/counter_float_64_test.go | 77 ++++++++++++++ metrics/exp/exp.go | 7 ++ metrics/graphite.go | 2 + metrics/influxdb/influxdb.go | 10 ++ metrics/influxdb/influxdbv2.go | 17 +-- metrics/librato/librato.go | 11 ++ metrics/log.go | 3 + metrics/metrics.go | 19 ++-- metrics/metrics_test.go | 2 + metrics/opentsdb.go | 2 + metrics/prometheus/collector.go | 4 + metrics/prometheus/collector_test.go | 7 ++ metrics/prometheus/prometheus.go | 2 + metrics/registry.go | 4 +- metrics/syslog.go | 2 + metrics/writer.go | 3 + 17 files changed, 312 insertions(+), 13 deletions(-) create mode 100644 metrics/counter_float64.go create mode 100644 metrics/counter_float_64_test.go diff --git a/metrics/counter_float64.go b/metrics/counter_float64.go new file mode 100644 index 000000000000..f05f62fed632 --- /dev/null +++ b/metrics/counter_float64.go @@ -0,0 +1,153 @@ +package metrics + +import ( + "sync" +) + +// CounterFloat64 holds a float64 value that can be incremented and decremented. +type CounterFloat64 interface { + Clear() + Count() float64 + Dec(float64) + Inc(float64) + Snapshot() CounterFloat64 +} + +// GetOrRegisterCounterFloat64 returns an existing CounterFloat64 or constructs and registers +// a new StandardCounterFloat64. +func GetOrRegisterCounterFloat64(name string, r Registry) CounterFloat64 { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewCounterFloat64).(CounterFloat64) +} + +// GetOrRegisterCounterFloat64Forced returns an existing CounterFloat64 or constructs and registers a +// new CounterFloat64 no matter the global switch is enabled or not. +// Be sure to unregister the counter from the registry once it is of no use to +// allow for garbage collection. +func GetOrRegisterCounterFloat64Forced(name string, r Registry) CounterFloat64 { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewCounterFloat64Forced).(CounterFloat64) +} + +// NewCounterFloat64 constructs a new StandardCounterFloat64. +func NewCounterFloat64() CounterFloat64 { + if !Enabled { + return NilCounterFloat64{} + } + return &StandardCounterFloat64{count: 0.0} +} + +// NewCounterFloat64Forced constructs a new StandardCounterFloat64 and returns it no matter if +// the global switch is enabled or not. +func NewCounterFloat64Forced() CounterFloat64 { + return &StandardCounterFloat64{count: 0.0} +} + +// NewRegisteredCounterFloat64 constructs and registers a new StandardCounterFloat64. +func NewRegisteredCounterFloat64(name string, r Registry) CounterFloat64 { + c := NewCounterFloat64() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewRegisteredCounterFloat64Forced constructs and registers a new StandardCounterFloat64 +// and launches a goroutine no matter the global switch is enabled or not. +// Be sure to unregister the counter from the registry once it is of no use to +// allow for garbage collection. +func NewRegisteredCounterFloat64Forced(name string, r Registry) CounterFloat64 { + c := NewCounterFloat64Forced() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// CounterFloat64Snapshot is a read-only copy of another CounterFloat64. +type CounterFloat64Snapshot float64 + +// Clear panics. +func (CounterFloat64Snapshot) Clear() { + panic("Clear called on a CounterFloat64Snapshot") +} + +// Count returns the value at the time the snapshot was taken. +func (c CounterFloat64Snapshot) Count() float64 { return float64(c) } + +// Dec panics. +func (CounterFloat64Snapshot) Dec(float64) { + panic("Dec called on a CounterFloat64Snapshot") +} + +// Inc panics. +func (CounterFloat64Snapshot) Inc(float64) { + panic("Inc called on a CounterFloat64Snapshot") +} + +// Snapshot returns the snapshot. +func (c CounterFloat64Snapshot) Snapshot() CounterFloat64 { return c } + +// NilCounterFloat64 is a no-op CounterFloat64. +type NilCounterFloat64 struct{} + +// Clear is a no-op. +func (NilCounterFloat64) Clear() {} + +// Count is a no-op. +func (NilCounterFloat64) Count() float64 { return 0.0 } + +// Dec is a no-op. +func (NilCounterFloat64) Dec(i float64) {} + +// Inc is a no-op. +func (NilCounterFloat64) Inc(i float64) {} + +// Snapshot is a no-op. +func (NilCounterFloat64) Snapshot() CounterFloat64 { return NilCounterFloat64{} } + +// StandardCounterFloat64 is the standard implementation of a CounterFloat64 and uses the +// sync.Mutex package to manage a single float64 value. +type StandardCounterFloat64 struct { + mutex sync.Mutex + count float64 +} + +// Clear sets the counter to zero. +func (c *StandardCounterFloat64) Clear() { + c.mutex.Lock() + defer c.mutex.Unlock() + c.count = 0.0 +} + +// Count returns the current value. +func (c *StandardCounterFloat64) Count() float64 { + c.mutex.Lock() + defer c.mutex.Unlock() + return c.count +} + +// Dec decrements the counter by the given amount. +func (c *StandardCounterFloat64) Dec(v float64) { + c.mutex.Lock() + defer c.mutex.Unlock() + c.count -= v +} + +// Inc increments the counter by the given amount. +func (c *StandardCounterFloat64) Inc(v float64) { + c.mutex.Lock() + defer c.mutex.Unlock() + c.count += v +} + +// Snapshot returns a read-only copy of the counter. +func (c *StandardCounterFloat64) Snapshot() CounterFloat64 { + return CounterFloat64Snapshot(c.Count()) +} diff --git a/metrics/counter_float_64_test.go b/metrics/counter_float_64_test.go new file mode 100644 index 000000000000..44d9b4c20c85 --- /dev/null +++ b/metrics/counter_float_64_test.go @@ -0,0 +1,77 @@ +package metrics + +import "testing" + +func BenchmarkCounterFloat64(b *testing.B) { + c := NewCounterFloat64() + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Inc(1.0) + } +} + +func TestCounterFloat64Clear(t *testing.T) { + c := NewCounterFloat64() + c.Inc(1.0) + c.Clear() + if count := c.Count(); count != 0 { + t.Errorf("c.Count(): 0 != %v\n", count) + } +} + +func TestCounterFloat64Dec1(t *testing.T) { + c := NewCounterFloat64() + c.Dec(1.0) + if count := c.Count(); count != -1.0 { + t.Errorf("c.Count(): -1.0 != %v\n", count) + } +} + +func TestCounterFloat64Dec2(t *testing.T) { + c := NewCounterFloat64() + c.Dec(2.0) + if count := c.Count(); count != -2.0 { + t.Errorf("c.Count(): -2.0 != %v\n", count) + } +} + +func TestCounterFloat64Inc1(t *testing.T) { + c := NewCounterFloat64() + c.Inc(1.0) + if count := c.Count(); count != 1.0 { + t.Errorf("c.Count(): 1.0 != %v\n", count) + } +} + +func TestCounterFloat64Inc2(t *testing.T) { + c := NewCounterFloat64() + c.Inc(2.0) + if count := c.Count(); count != 2.0 { + t.Errorf("c.Count(): 2.0 != %v\n", count) + } +} + +func TestCounterFloat64Snapshot(t *testing.T) { + c := NewCounterFloat64() + c.Inc(1.0) + snapshot := c.Snapshot() + c.Inc(1.0) + if count := snapshot.Count(); count != 1.0 { + t.Errorf("c.Count(): 1.0 != %v\n", count) + } +} + +func TestCounterFloat64Zero(t *testing.T) { + c := NewCounterFloat64() + if count := c.Count(); count != 0 { + t.Errorf("c.Count(): 0 != %v\n", count) + } +} + +func TestGetOrRegisterCounterFloat64(t *testing.T) { + r := NewRegistry() + NewRegisteredCounterFloat64("foo", r).Inc(47.0) + if c := GetOrRegisterCounterFloat64("foo", r); c.Count() != 47.0 { + t.Fatal(c) + } +} diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go index 622c2b5289af..77b198e610fd 100644 --- a/metrics/exp/exp.go +++ b/metrics/exp/exp.go @@ -100,6 +100,11 @@ func (exp *exp) publishCounter(name string, metric metrics.Counter) { v.Set(metric.Count()) } +func (exp *exp) publishCounterFloat64(name string, metric metrics.CounterFloat64) { + v := exp.getFloat(name) + v.Set(metric.Count()) +} + func (exp *exp) publishGauge(name string, metric metrics.Gauge) { v := exp.getInt(name) v.Set(metric.Value()) @@ -167,6 +172,8 @@ func (exp *exp) syncToExpvar() { switch i := i.(type) { case metrics.Counter: exp.publishCounter(name, i) + case metrics.CounterFloat64: + exp.publishCounterFloat64(name, i) case metrics.Gauge: exp.publishGauge(name, i) case metrics.GaugeFloat64: diff --git a/metrics/graphite.go b/metrics/graphite.go index 142eec86beb4..29f72b0c4181 100644 --- a/metrics/graphite.go +++ b/metrics/graphite.go @@ -67,6 +67,8 @@ func graphite(c *GraphiteConfig) error { switch metric := i.(type) { case Counter: fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, metric.Count(), now) + case CounterFloat64: + fmt.Fprintf(w, "%s.%s.count %f %d\n", c.Prefix, name, metric.Count(), now) case Gauge: fmt.Fprintf(w, "%s.%s.value %d %d\n", c.Prefix, name, metric.Value(), now) case GaugeFloat64: diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index 56f19e944abf..9f331183f413 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -113,6 +113,16 @@ func (r *reporter) send() error { }, Time: now, }) + case metrics.CounterFloat64: + count := metric.Count() + pts = append(pts, client.Point{ + Measurement: fmt.Sprintf("%s%s.count", namespace, name), + Tags: r.tags, + Fields: map[string]interface{}{ + "value": count, + }, + Time: now, + }) case metrics.Gauge: ms := metric.Snapshot() pts = append(pts, client.Point{ diff --git a/metrics/influxdb/influxdbv2.go b/metrics/influxdb/influxdbv2.go index 4fa0d02bcdca..8a4dbab3c3ca 100644 --- a/metrics/influxdb/influxdbv2.go +++ b/metrics/influxdb/influxdbv2.go @@ -24,8 +24,6 @@ type v2Reporter struct { client influxdb2.Client write api.WriteAPI - - cache map[string]int64 } // InfluxDBWithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags @@ -39,7 +37,6 @@ func InfluxDBV2WithTags(r metrics.Registry, d time.Duration, endpoint string, to organization: organization, namespace: namespace, tags: tags, - cache: make(map[string]int64), } rep.client = influxdb2.NewClient(rep.endpoint, rep.token) @@ -87,17 +84,25 @@ func (r *v2Reporter) send() { case metrics.Counter: v := metric.Count() - l := r.cache[name] measurement := fmt.Sprintf("%s%s.count", namespace, name) fields := map[string]interface{}{ - "value": v - l, + "value": v, } pt := influxdb2.NewPoint(measurement, r.tags, fields, now) r.write.WritePoint(pt) - r.cache[name] = v + case metrics.CounterFloat64: + v := metric.Count() + + measurement := fmt.Sprintf("%s%s.count", namespace, name) + fields := map[string]interface{}{ + "value": v, + } + + pt := influxdb2.NewPoint(measurement, r.tags, fields, now) + r.write.WritePoint(pt) case metrics.Gauge: ms := metric.Snapshot() diff --git a/metrics/librato/librato.go b/metrics/librato/librato.go index 9323290bc51b..7fa4c56b5db3 100644 --- a/metrics/librato/librato.go +++ b/metrics/librato/librato.go @@ -107,6 +107,17 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba } snapshot.Counters = append(snapshot.Counters, measurement) } + case metrics.CounterFloat64: + if m.Count() > 0 { + measurement[Name] = fmt.Sprintf("%s.%s", name, "count") + measurement[Value] = m.Count() + measurement[Attributes] = map[string]interface{}{ + DisplayUnitsLong: Operations, + DisplayUnitsShort: OperationsShort, + DisplayMin: "0", + } + snapshot.Counters = append(snapshot.Counters, measurement) + } case metrics.Gauge: measurement[Name] = name measurement[Value] = float64(m.Value()) diff --git a/metrics/log.go b/metrics/log.go index 0c8ea7c97123..d1ce627a8378 100644 --- a/metrics/log.go +++ b/metrics/log.go @@ -24,6 +24,9 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { case Counter: l.Printf("counter %s\n", name) l.Printf(" count: %9d\n", metric.Count()) + case CounterFloat64: + l.Printf("counter %s\n", name) + l.Printf(" count: %f\n", metric.Count()) case Gauge: l.Printf("gauge %s\n", name) l.Printf(" value: %9d\n", metric.Value()) diff --git a/metrics/metrics.go b/metrics/metrics.go index 00f46e2a7c0a..3468cfe8e407 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -144,6 +144,9 @@ func CollectProcessMetrics(refresh time.Duration) { cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) + cpuSysLoadTotal = GetOrRegisterCounterFloat64("system/cpu/sysload/total", DefaultRegistry) + cpuSysWaitTotal = GetOrRegisterCounterFloat64("system/cpu/syswait/total", DefaultRegistry) + cpuProcLoadTotal = GetOrRegisterCounterFloat64("system/cpu/procload/total", DefaultRegistry) cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil) @@ -172,13 +175,17 @@ func CollectProcessMetrics(refresh time.Duration) { secondsSinceLastCollect := collectTime.Sub(lastCollectTime).Seconds() lastCollectTime = collectTime if secondsSinceLastCollect > 0 { - sysLoad := (cpustats[now].GlobalTime - cpustats[prev].GlobalTime) / secondsSinceLastCollect - sysWait := (cpustats[now].GlobalWait - cpustats[prev].GlobalWait) / secondsSinceLastCollect - procLoad := (cpustats[now].LocalTime - cpustats[prev].LocalTime) / secondsSinceLastCollect + sysLoad := cpustats[now].GlobalTime - cpustats[prev].GlobalTime + sysWait := cpustats[now].GlobalWait - cpustats[prev].GlobalWait + procLoad := cpustats[now].LocalTime - cpustats[prev].LocalTime // Convert to integer percentage. - cpuSysLoad.Update(int64(sysLoad * 100)) - cpuSysWait.Update(int64(sysWait * 100)) - cpuProcLoad.Update(int64(procLoad * 100)) + cpuSysLoad.Update(int64(sysLoad / secondsSinceLastCollect * 100)) + cpuSysWait.Update(int64(sysWait / secondsSinceLastCollect * 100)) + cpuProcLoad.Update(int64(procLoad / secondsSinceLastCollect * 100)) + // increment counters (ms) + cpuSysLoadTotal.Inc(sysLoad) + cpuSysWaitTotal.Inc(sysWait) + cpuProcLoadTotal.Inc(procLoad) } // Threads diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index e3fde1ea62ce..534c44139b36 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -18,6 +18,7 @@ func TestReadRuntimeValues(t *testing.T) { func BenchmarkMetrics(b *testing.B) { r := NewRegistry() c := NewRegisteredCounter("counter", r) + cf := NewRegisteredCounterFloat64("counterfloat64", r) g := NewRegisteredGauge("gauge", r) gf := NewRegisteredGaugeFloat64("gaugefloat64", r) h := NewRegisteredHistogram("histogram", r, NewUniformSample(100)) @@ -71,6 +72,7 @@ func BenchmarkMetrics(b *testing.B) { //log.Println("go", i) for i := 0; i < b.N; i++ { c.Inc(1) + cf.Inc(1.0) g.Update(int64(i)) gf.Update(float64(i)) h.Update(int64(i)) diff --git a/metrics/opentsdb.go b/metrics/opentsdb.go index 3fde55454ba9..c9fd2e75d5e5 100644 --- a/metrics/opentsdb.go +++ b/metrics/opentsdb.go @@ -71,6 +71,8 @@ func openTSDB(c *OpenTSDBConfig) error { switch metric := i.(type) { case Counter: fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, metric.Count(), shortHostname) + case CounterFloat64: + fmt.Fprintf(w, "put %s.%s.count %d %f host=%s\n", c.Prefix, name, now, metric.Count(), shortHostname) case Gauge: fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) case GaugeFloat64: diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go index 23934b212c23..b7c238f601d7 100644 --- a/metrics/prometheus/collector.go +++ b/metrics/prometheus/collector.go @@ -50,6 +50,10 @@ func (c *collector) addCounter(name string, m metrics.Counter) { c.writeGaugeCounter(name, m.Count()) } +func (c *collector) addCounterFloat64(name string, m metrics.CounterFloat64) { + c.writeGaugeCounter(name, m.Count()) +} + func (c *collector) addGauge(name string, m metrics.Gauge) { c.writeGaugeCounter(name, m.Value()) } diff --git a/metrics/prometheus/collector_test.go b/metrics/prometheus/collector_test.go index 4c2ffcb1fed6..eb6dbe603f99 100644 --- a/metrics/prometheus/collector_test.go +++ b/metrics/prometheus/collector_test.go @@ -20,6 +20,10 @@ func TestCollector(t *testing.T) { counter.Inc(12345) c.addCounter("test/counter", counter) + counterfloat64 := metrics.NewCounterFloat64() + counterfloat64.Inc(54321.98) + c.addCounterFloat64("test/counter_float64", counterfloat64) + gauge := metrics.NewGauge() gauge.Update(23456) c.addGauge("test/gauge", gauge) @@ -61,6 +65,9 @@ func TestCollector(t *testing.T) { const expectedOutput = `# TYPE test_counter gauge test_counter 12345 +# TYPE test_counter_float64 gauge +test_counter_float64 54321.98 + # TYPE test_gauge gauge test_gauge 23456 diff --git a/metrics/prometheus/prometheus.go b/metrics/prometheus/prometheus.go index b367e6c3aa59..2d999e493dc8 100644 --- a/metrics/prometheus/prometheus.go +++ b/metrics/prometheus/prometheus.go @@ -45,6 +45,8 @@ func Handler(reg metrics.Registry) http.Handler { switch m := i.(type) { case metrics.Counter: c.addCounter(name, m.Snapshot()) + case metrics.CounterFloat64: + c.addCounterFloat64(name, m.Snapshot()) case metrics.Gauge: c.addGauge(name, m.Snapshot()) case metrics.GaugeFloat64: diff --git a/metrics/registry.go b/metrics/registry.go index cc34c9dfd2c8..c550b363d0a1 100644 --- a/metrics/registry.go +++ b/metrics/registry.go @@ -120,6 +120,8 @@ func (r *StandardRegistry) GetAll() map[string]map[string]interface{} { switch metric := i.(type) { case Counter: values["count"] = metric.Count() + case CounterFloat64: + values["count"] = metric.Count() case Gauge: values["value"] = metric.Value() case GaugeFloat64: @@ -196,7 +198,7 @@ func (r *StandardRegistry) register(name string, i interface{}) error { return DuplicateMetric(name) } switch i.(type) { - case Counter, Gauge, GaugeFloat64, Healthcheck, Histogram, Meter, Timer, ResettingTimer: + case Counter, CounterFloat64, Gauge, GaugeFloat64, Healthcheck, Histogram, Meter, Timer, ResettingTimer: r.metrics[name] = i } return nil diff --git a/metrics/syslog.go b/metrics/syslog.go index 551a2bd0f072..f23b07e199f3 100644 --- a/metrics/syslog.go +++ b/metrics/syslog.go @@ -17,6 +17,8 @@ func Syslog(r Registry, d time.Duration, w *syslog.Writer) { switch metric := i.(type) { case Counter: w.Info(fmt.Sprintf("counter %s: count: %d", name, metric.Count())) + case CounterFloat64: + w.Info(fmt.Sprintf("counter %s: count: %f", name, metric.Count())) case Gauge: w.Info(fmt.Sprintf("gauge %s: value: %d", name, metric.Value())) case GaugeFloat64: diff --git a/metrics/writer.go b/metrics/writer.go index 88521a80d9d7..256fbd14c9b9 100644 --- a/metrics/writer.go +++ b/metrics/writer.go @@ -29,6 +29,9 @@ func WriteOnce(r Registry, w io.Writer) { case Counter: fmt.Fprintf(w, "counter %s\n", namedMetric.name) fmt.Fprintf(w, " count: %9d\n", metric.Count()) + case CounterFloat64: + fmt.Fprintf(w, "counter %s\n", namedMetric.name) + fmt.Fprintf(w, " count: %f\n", metric.Count()) case Gauge: fmt.Fprintf(w, "gauge %s\n", namedMetric.name) fmt.Fprintf(w, " value: %9d\n", metric.Value()) From 6e055a601d38f5788c43f04921786d7059c106fb Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 48/74] metrics/influxdb: reuse code between v1 and v2 reporters (#26963) --- go.mod | 2 +- go.sum | 4 +- metrics/influxdb/influxdb.go | 332 ++++++++++----------------------- metrics/influxdb/influxdbv1.go | 145 ++++++++++++++ metrics/influxdb/influxdbv2.go | 143 +------------- 5 files changed, 254 insertions(+), 372 deletions(-) create mode 100644 metrics/influxdb/influxdbv1.go diff --git a/go.mod b/go.mod index 3160395d2a06..28a24f8b6b38 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,6 @@ require ( github.com/hashicorp/golang-lru v0.5.4 github.com/holiman/uint256 v1.2.4 github.com/huin/goupnp v1.3.0 - github.com/influxdata/influxdb v1.7.9 github.com/jackpal/go-nat-pmp v1.0.2 github.com/julienschmidt/httprouter v1.3.0 github.com/karalabe/hid v1.0.0 @@ -51,6 +50,7 @@ require ( github.com/dop251/goja v0.0.0-20200721192441-a695b0cdd498 github.com/ethereum/c-kzg-4844 v0.4.0 github.com/influxdata/influxdb-client-go/v2 v2.4.0 + github.com/influxdata/influxdb1-client v0.0.0-20220302092344-a9ab5670611c github.com/kylelemons/godebug v1.1.0 github.com/mattn/go-isatty v0.0.17 github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible diff --git a/go.sum b/go.sum index 84d68e6c41ca..68019c92aba9 100644 --- a/go.sum +++ b/go.sum @@ -99,10 +99,10 @@ github.com/holiman/uint256 v1.2.4/go.mod h1:EOMSn4q6Nyt9P6efbI3bueV4e1b3dGlUCXei github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huin/goupnp v1.3.0 h1:UvLUlWDNpoUdYzb2TCn+MuTWtcjXKSza2n6CBdQ0xXc= github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8= -github.com/influxdata/influxdb v1.7.9 h1:uSeBTNO4rBkbp1Be5FKRsAmglM9nlx25TzVQRQt1An4= -github.com/influxdata/influxdb v1.7.9/go.mod h1:qZna6X/4elxqT3yI9iZYdZrWWdeFOOprn86kgg4+IzY= github.com/influxdata/influxdb-client-go/v2 v2.4.0 h1:HGBfZYStlx3Kqvsv1h2pJixbCl/jhnFtxpKFAv9Tu5k= github.com/influxdata/influxdb-client-go/v2 v2.4.0/go.mod h1:vLNHdxTJkIf2mSLvGrpj8TCcISApPoXkaxP8g9uRlW8= +github.com/influxdata/influxdb1-client v0.0.0-20220302092344-a9ab5670611c h1:qSHzRbhzK8RdXOsAdfDgO49TtqC1oZ+acxPrkfTxcCs= +github.com/influxdata/influxdb1-client v0.0.0-20220302092344-a9ab5670611c/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 h1:W9WBk7wlPfJLvMCdtV4zPulc4uCPrlywQOmbFOhgQNU= github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo= github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus= diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index 9f331183f413..1e7fb9e11dd4 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -2,242 +2,112 @@ package influxdb import ( "fmt" - uurl "net/url" - "time" - "github.com/XinFinOrg/XDPoSChain/log" "github.com/XinFinOrg/XDPoSChain/metrics" - "github.com/influxdata/influxdb/client" ) -type reporter struct { - reg metrics.Registry - interval time.Duration - - url uurl.URL - database string - username string - password string - namespace string - tags map[string]string - - client *client.Client - - cache map[string]int64 -} - -// InfluxDB starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval. -func InfluxDB(r metrics.Registry, d time.Duration, url, database, username, password, namespace string) { - InfluxDBWithTags(r, d, url, database, username, password, namespace, nil) -} - -// InfluxDBWithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags -func InfluxDBWithTags(r metrics.Registry, d time.Duration, url, database, username, password, namespace string, tags map[string]string) { - u, err := uurl.Parse(url) - if err != nil { - log.Warn("unable to parse InfluxDB url %s. err=%v", url, err) - return - } - - rep := &reporter{ - reg: r, - interval: d, - url: *u, - database: database, - username: username, - password: password, - namespace: namespace, - tags: tags, - cache: make(map[string]int64), - } - if err := rep.makeClient(); err != nil { - log.Warn("unable to make InfluxDB client. err=%v", err) - return - } - - rep.run() -} - -func (r *reporter) makeClient() (err error) { - r.client, err = client.NewClient(client.Config{ - URL: r.url, - Username: r.username, - Password: r.password, - Timeout: 10 * time.Second, - }) - - return -} - -func (r *reporter) run() { - intervalTicker := time.NewTicker(r.interval) - pingTicker := time.NewTicker(time.Second * 5) - - defer intervalTicker.Stop() - defer pingTicker.Stop() - - for { - select { - case <-intervalTicker.C: - if err := r.send(); err != nil { - log.Warn("unable to send to InfluxDB. err=%v", err) - } - case <-pingTicker.C: - _, _, err := r.client.Ping() - if err != nil { - log.Warn("got error while sending a ping to InfluxDB, trying to recreate client. err=%v", err) - - if err = r.makeClient(); err != nil { - log.Warn("unable to make InfluxDB client. err=%v", err) - } - } +func readMeter(namespace, name string, i interface{}) (string, map[string]interface{}) { + switch metric := i.(type) { + case metrics.Counter: + measurement := fmt.Sprintf("%s%s.count", namespace, name) + fields := map[string]interface{}{ + "value": metric.Count(), } - } -} - -func (r *reporter) send() error { - var pts []client.Point - - r.reg.Each(func(name string, i interface{}) { - now := time.Now() - namespace := r.namespace - - switch metric := i.(type) { - case metrics.Counter: - count := metric.Count() - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.count", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "value": count, - }, - Time: now, - }) - case metrics.CounterFloat64: - count := metric.Count() - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.count", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "value": count, - }, - Time: now, - }) - case metrics.Gauge: - ms := metric.Snapshot() - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.gauge", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "value": ms.Value(), - }, - Time: now, - }) - case metrics.GaugeFloat64: - ms := metric.Snapshot() - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.gauge", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "value": ms.Value(), - }, - Time: now, - }) - case metrics.Histogram: - ms := metric.Snapshot() - if ms.Count() > 0 { - ps := ms.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) - fields := map[string]interface{}{ - "count": ms.Count(), - "max": ms.Max(), - "mean": ms.Mean(), - "min": ms.Min(), - "stddev": ms.StdDev(), - "variance": ms.Variance(), - "p25": ps[0], - "p50": ps[1], - "p75": ps[2], - "p95": ps[3], - "p99": ps[4], - "p999": ps[5], - "p9999": ps[6], - } - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.histogram", namespace, name), - Tags: r.tags, - Fields: fields, - Time: now, - }) - } - case metrics.Meter: - ms := metric.Snapshot() - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.meter", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "count": ms.Count(), - "m1": ms.Rate1(), - "m5": ms.Rate5(), - "m15": ms.Rate15(), - "mean": ms.RateMean(), - }, - Time: now, - }) - case metrics.Timer: - ms := metric.Snapshot() - ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.timer", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "count": ms.Count(), - "max": ms.Max(), - "mean": ms.Mean(), - "min": ms.Min(), - "stddev": ms.StdDev(), - "variance": ms.Variance(), - "p50": ps[0], - "p75": ps[1], - "p95": ps[2], - "p99": ps[3], - "p999": ps[4], - "p9999": ps[5], - "m1": ms.Rate1(), - "m5": ms.Rate5(), - "m15": ms.Rate15(), - "meanrate": ms.RateMean(), - }, - Time: now, - }) - case metrics.ResettingTimer: - t := metric.Snapshot() - - if len(t.Values()) > 0 { - ps := t.Percentiles([]float64{50, 95, 99}) - val := t.Values() - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.span", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "count": len(val), - "max": val[len(val)-1], - "mean": t.Mean(), - "min": val[0], - "p50": ps[0], - "p95": ps[1], - "p99": ps[2], - }, - Time: now, - }) - } + return measurement, fields + case metrics.CounterFloat64: + measurement := fmt.Sprintf("%s%s.count", namespace, name) + fields := map[string]interface{}{ + "value": metric.Count(), } - }) - - bps := client.BatchPoints{ - Points: pts, - Database: r.database, + return measurement, fields + case metrics.Gauge: + measurement := fmt.Sprintf("%s%s.gauge", namespace, name) + fields := map[string]interface{}{ + "value": metric.Snapshot().Value(), + } + return measurement, fields + case metrics.GaugeFloat64: + measurement := fmt.Sprintf("%s%s.gauge", namespace, name) + fields := map[string]interface{}{ + "value": metric.Snapshot().Value(), + } + return measurement, fields + case metrics.Histogram: + ms := metric.Snapshot() + if ms.Count() <= 0 { + break + } + ps := ms.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + measurement := fmt.Sprintf("%s%s.histogram", namespace, name) + fields := map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p25": ps[0], + "p50": ps[1], + "p75": ps[2], + "p95": ps[3], + "p99": ps[4], + "p999": ps[5], + "p9999": ps[6], + } + return measurement, fields + case metrics.Meter: + ms := metric.Snapshot() + measurement := fmt.Sprintf("%s%s.meter", namespace, name) + fields := map[string]interface{}{ + "count": ms.Count(), + "m1": ms.Rate1(), + "m5": ms.Rate5(), + "m15": ms.Rate15(), + "mean": ms.RateMean(), + } + return measurement, fields + case metrics.Timer: + ms := metric.Snapshot() + ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + + measurement := fmt.Sprintf("%s%s.timer", namespace, name) + fields := map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p50": ps[0], + "p75": ps[1], + "p95": ps[2], + "p99": ps[3], + "p999": ps[4], + "p9999": ps[5], + "m1": ms.Rate1(), + "m5": ms.Rate5(), + "m15": ms.Rate15(), + "meanrate": ms.RateMean(), + } + return measurement, fields + case metrics.ResettingTimer: + t := metric.Snapshot() + if len(t.Values()) == 0 { + break + } + ps := t.Percentiles([]float64{50, 95, 99}) + val := t.Values() + measurement := fmt.Sprintf("%s%s.span", namespace, name) + fields := map[string]interface{}{ + "count": len(val), + "max": val[len(val)-1], + "mean": t.Mean(), + "min": val[0], + "p50": ps[0], + "p95": ps[1], + "p99": ps[2], + } + return measurement, fields } - - _, err := r.client.Write(bps) - return err + return "", nil } diff --git a/metrics/influxdb/influxdbv1.go b/metrics/influxdb/influxdbv1.go new file mode 100644 index 000000000000..40ae9a3c10b7 --- /dev/null +++ b/metrics/influxdb/influxdbv1.go @@ -0,0 +1,145 @@ +package influxdb + +import ( + "fmt" + uurl "net/url" + "time" + + "github.com/XinFinOrg/XDPoSChain/log" + "github.com/XinFinOrg/XDPoSChain/metrics" + client "github.com/influxdata/influxdb1-client/v2" +) + +type reporter struct { + reg metrics.Registry + interval time.Duration + + url uurl.URL + database string + username string + password string + namespace string + tags map[string]string + + client client.Client + + cache map[string]int64 +} + +// InfluxDB starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval. +func InfluxDB(r metrics.Registry, d time.Duration, url, database, username, password, namespace string) { + InfluxDBWithTags(r, d, url, database, username, password, namespace, nil) +} + +// InfluxDBWithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags +func InfluxDBWithTags(r metrics.Registry, d time.Duration, url, database, username, password, namespace string, tags map[string]string) { + u, err := uurl.Parse(url) + if err != nil { + log.Warn("Unable to parse InfluxDB", "url", url, "err", err) + return + } + + rep := &reporter{ + reg: r, + interval: d, + url: *u, + database: database, + username: username, + password: password, + namespace: namespace, + tags: tags, + cache: make(map[string]int64), + } + if err := rep.makeClient(); err != nil { + log.Warn("Unable to make InfluxDB client", "err", err) + return + } + + rep.run() +} + +// InfluxDBWithTagsOnce runs once an InfluxDB reporter and post the given metrics.Registry with the specified tags +func InfluxDBWithTagsOnce(r metrics.Registry, url, database, username, password, namespace string, tags map[string]string) error { + u, err := uurl.Parse(url) + if err != nil { + return fmt.Errorf("unable to parse InfluxDB. url: %s, err: %v", url, err) + } + + rep := &reporter{ + reg: r, + url: *u, + database: database, + username: username, + password: password, + namespace: namespace, + tags: tags, + cache: make(map[string]int64), + } + if err := rep.makeClient(); err != nil { + return fmt.Errorf("unable to make InfluxDB client. err: %v", err) + } + + if err := rep.send(); err != nil { + return fmt.Errorf("unable to send to InfluxDB. err: %v", err) + } + + return nil +} + +func (r *reporter) makeClient() (err error) { + r.client, err = client.NewHTTPClient(client.HTTPConfig{ + Addr: r.url.String(), + Username: r.username, + Password: r.password, + Timeout: 10 * time.Second, + }) + + return +} + +func (r *reporter) run() { + intervalTicker := time.NewTicker(r.interval) + pingTicker := time.NewTicker(time.Second * 5) + + defer intervalTicker.Stop() + defer pingTicker.Stop() + + for { + select { + case <-intervalTicker.C: + if err := r.send(); err != nil { + log.Warn("Unable to send to InfluxDB", "err", err) + } + case <-pingTicker.C: + _, _, err := r.client.Ping(0) + if err != nil { + log.Warn("Got error while sending a ping to InfluxDB, trying to recreate client", "err", err) + + if err = r.makeClient(); err != nil { + log.Warn("Unable to make InfluxDB client", "err", err) + } + } + } + } +} + +func (r *reporter) send() error { + bps, err := client.NewBatchPoints( + client.BatchPointsConfig{ + Database: r.database, + }) + if err != nil { + return err + } + r.reg.Each(func(name string, i interface{}) { + now := time.Now() + measurement, fields := readMeter(r.namespace, name, i) + if fields == nil { + return + } + if p, err := client.NewPoint(measurement, r.tags, fields, now); err == nil { + bps.AddPoint(p) + } + }) + return r.client.Write(bps) +} diff --git a/metrics/influxdb/influxdbv2.go b/metrics/influxdb/influxdbv2.go index 8a4dbab3c3ca..15ea660d865c 100644 --- a/metrics/influxdb/influxdbv2.go +++ b/metrics/influxdb/influxdbv2.go @@ -2,7 +2,6 @@ package influxdb import ( "context" - "fmt" "time" "github.com/XinFinOrg/XDPoSChain/log" @@ -78,145 +77,13 @@ func (r *v2Reporter) run() { func (r *v2Reporter) send() { r.reg.Each(func(name string, i interface{}) { now := time.Now() - namespace := r.namespace - - switch metric := i.(type) { - - case metrics.Counter: - v := metric.Count() - - measurement := fmt.Sprintf("%s%s.count", namespace, name) - fields := map[string]interface{}{ - "value": v, - } - - pt := influxdb2.NewPoint(measurement, r.tags, fields, now) - r.write.WritePoint(pt) - - case metrics.CounterFloat64: - v := metric.Count() - - measurement := fmt.Sprintf("%s%s.count", namespace, name) - fields := map[string]interface{}{ - "value": v, - } - - pt := influxdb2.NewPoint(measurement, r.tags, fields, now) - r.write.WritePoint(pt) - - case metrics.Gauge: - ms := metric.Snapshot() - - measurement := fmt.Sprintf("%s%s.gauge", namespace, name) - fields := map[string]interface{}{ - "value": ms.Value(), - } - - pt := influxdb2.NewPoint(measurement, r.tags, fields, now) - r.write.WritePoint(pt) - - case metrics.GaugeFloat64: - ms := metric.Snapshot() - - measurement := fmt.Sprintf("%s%s.gauge", namespace, name) - fields := map[string]interface{}{ - "value": ms.Value(), - } - - pt := influxdb2.NewPoint(measurement, r.tags, fields, now) - r.write.WritePoint(pt) - - case metrics.Histogram: - ms := metric.Snapshot() - - if ms.Count() > 0 { - ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) - measurement := fmt.Sprintf("%s%s.histogram", namespace, name) - fields := map[string]interface{}{ - "count": ms.Count(), - "max": ms.Max(), - "mean": ms.Mean(), - "min": ms.Min(), - "stddev": ms.StdDev(), - "variance": ms.Variance(), - "p50": ps[0], - "p75": ps[1], - "p95": ps[2], - "p99": ps[3], - "p999": ps[4], - "p9999": ps[5], - } - - pt := influxdb2.NewPoint(measurement, r.tags, fields, now) - r.write.WritePoint(pt) - } - - case metrics.Meter: - ms := metric.Snapshot() - - measurement := fmt.Sprintf("%s%s.meter", namespace, name) - fields := map[string]interface{}{ - "count": ms.Count(), - "m1": ms.Rate1(), - "m5": ms.Rate5(), - "m15": ms.Rate15(), - "mean": ms.RateMean(), - } - - pt := influxdb2.NewPoint(measurement, r.tags, fields, now) - r.write.WritePoint(pt) - - case metrics.Timer: - ms := metric.Snapshot() - ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) - - measurement := fmt.Sprintf("%s%s.timer", namespace, name) - fields := map[string]interface{}{ - "count": ms.Count(), - "max": ms.Max(), - "mean": ms.Mean(), - "min": ms.Min(), - "stddev": ms.StdDev(), - "variance": ms.Variance(), - "p50": ps[0], - "p75": ps[1], - "p95": ps[2], - "p99": ps[3], - "p999": ps[4], - "p9999": ps[5], - "m1": ms.Rate1(), - "m5": ms.Rate5(), - "m15": ms.Rate15(), - "meanrate": ms.RateMean(), - } - - pt := influxdb2.NewPoint(measurement, r.tags, fields, now) - r.write.WritePoint(pt) - - case metrics.ResettingTimer: - t := metric.Snapshot() - - if len(t.Values()) > 0 { - ps := t.Percentiles([]float64{50, 95, 99}) - val := t.Values() - - measurement := fmt.Sprintf("%s%s.span", namespace, name) - fields := map[string]interface{}{ - "count": len(val), - "max": val[len(val)-1], - "mean": t.Mean(), - "min": val[0], - "p50": ps[0], - "p95": ps[1], - "p99": ps[2], - } - - pt := influxdb2.NewPoint(measurement, r.tags, fields, now) - r.write.WritePoint(pt) - } + measurement, fields := readMeter(r.namespace, name, i) + if fields == nil { + return } + pt := influxdb2.NewPoint(measurement, r.tags, fields, now) + r.write.WritePoint(pt) }) - // Force all unwritten data to be sent r.write.Flush() } From 54570300cc7734f12d0d6bf96d366bd9de9e618a Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 49/74] all: ensure resp.body closed (#26969) --- metrics/librato/client.go | 4 +++- node/node_test.go | 1 + p2p/simulations/mocker_test.go | 5 ++++- rpc/http_test.go | 1 + 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/metrics/librato/client.go b/metrics/librato/client.go index 89ecb9ec64da..053e1811d4ed 100644 --- a/metrics/librato/client.go +++ b/metrics/librato/client.go @@ -87,9 +87,11 @@ func (lc *LibratoClient) PostMetrics(batch Batch) (err error) { req.Header.Set("Content-Type", "application/json") req.SetBasicAuth(lc.Email, lc.Token) - if resp, err = http.DefaultClient.Do(req); err != nil { + resp, err = http.DefaultClient.Do(req) + if err != nil { return } + defer resp.Body.Close() if resp.StatusCode != http.StatusOK { var body []byte diff --git a/node/node_test.go b/node/node_test.go index df97c66c71ca..b733ea517492 100644 --- a/node/node_test.go +++ b/node/node_test.go @@ -651,5 +651,6 @@ func doHTTPRequest(t *testing.T, req *http.Request) *http.Response { if err != nil { t.Error("could not issue a GET request to the given endpoint", err) } + t.Cleanup(func() { resp.Body.Close() }) return resp } diff --git a/p2p/simulations/mocker_test.go b/p2p/simulations/mocker_test.go index 32b566ecee7e..8cc84f8e83b8 100644 --- a/p2p/simulations/mocker_test.go +++ b/p2p/simulations/mocker_test.go @@ -127,6 +127,7 @@ func TestMocker(t *testing.T) { if err != nil { t.Fatalf("Could not start mocker: %s", err) } + resp.Body.Close() if resp.StatusCode != 200 { t.Fatalf("Invalid Status Code received for starting mocker, expected 200, got %d", resp.StatusCode) } @@ -148,15 +149,17 @@ func TestMocker(t *testing.T) { if err != nil { t.Fatalf("Could not stop mocker: %s", err) } + resp.Body.Close() if resp.StatusCode != 200 { t.Fatalf("Invalid Status Code received for stopping mocker, expected 200, got %d", resp.StatusCode) } //reset the network - _, err = http.Post(s.URL+"/reset", "", nil) + resp, err = http.Post(s.URL+"/reset", "", nil) if err != nil { t.Fatalf("Could not reset network: %s", err) } + resp.Body.Close() //now the number of nodes in the network should be zero nodes_info, err = client.GetNodes() diff --git a/rpc/http_test.go b/rpc/http_test.go index 73920b44f09a..73124fb3508a 100644 --- a/rpc/http_test.go +++ b/rpc/http_test.go @@ -92,6 +92,7 @@ func confirmHTTPRequestYieldsStatusCode(t *testing.T, method, contentType, body if err != nil { t.Fatalf("request failed: %v", err) } + resp.Body.Close() confirmStatusCode(t, resp.StatusCode, expectedStatusCode) } From aa5e3e2c0886e52112ee6f18c8bfe106ca0a0cd9 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 50/74] metrics: make gauge_float64 and counter_float64 lock free (#27025) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes the float-gauges lock-free name old time/op new time/op delta CounterFloat64Parallel-8 1.45µs ±10% 0.85µs ± 6% -41.65% (p=0.008 n=5+5) --------- Co-authored-by: Exca-DK Co-authored-by: Martin Holst Swende --- metrics/counter_float64.go | 38 +++++++++++++++++--------------- metrics/counter_float_64_test.go | 24 +++++++++++++++++++- metrics/gauge_float64.go | 22 ++++++++---------- metrics/gauge_float64_test.go | 23 ++++++++++++++++++- 4 files changed, 74 insertions(+), 33 deletions(-) diff --git a/metrics/counter_float64.go b/metrics/counter_float64.go index f05f62fed632..d1197bb8e0ae 100644 --- a/metrics/counter_float64.go +++ b/metrics/counter_float64.go @@ -1,7 +1,8 @@ package metrics import ( - "sync" + "math" + "sync/atomic" ) // CounterFloat64 holds a float64 value that can be incremented and decremented. @@ -38,13 +39,13 @@ func NewCounterFloat64() CounterFloat64 { if !Enabled { return NilCounterFloat64{} } - return &StandardCounterFloat64{count: 0.0} + return &StandardCounterFloat64{} } // NewCounterFloat64Forced constructs a new StandardCounterFloat64 and returns it no matter if // the global switch is enabled or not. func NewCounterFloat64Forced() CounterFloat64 { - return &StandardCounterFloat64{count: 0.0} + return &StandardCounterFloat64{} } // NewRegisteredCounterFloat64 constructs and registers a new StandardCounterFloat64. @@ -113,41 +114,42 @@ func (NilCounterFloat64) Inc(i float64) {} func (NilCounterFloat64) Snapshot() CounterFloat64 { return NilCounterFloat64{} } // StandardCounterFloat64 is the standard implementation of a CounterFloat64 and uses the -// sync.Mutex package to manage a single float64 value. +// atomic to manage a single float64 value. type StandardCounterFloat64 struct { - mutex sync.Mutex - count float64 + floatBits atomic.Uint64 } // Clear sets the counter to zero. func (c *StandardCounterFloat64) Clear() { - c.mutex.Lock() - defer c.mutex.Unlock() - c.count = 0.0 + c.floatBits.Store(0) } // Count returns the current value. func (c *StandardCounterFloat64) Count() float64 { - c.mutex.Lock() - defer c.mutex.Unlock() - return c.count + return math.Float64frombits(c.floatBits.Load()) } // Dec decrements the counter by the given amount. func (c *StandardCounterFloat64) Dec(v float64) { - c.mutex.Lock() - defer c.mutex.Unlock() - c.count -= v + atomicAddFloat(&c.floatBits, -v) } // Inc increments the counter by the given amount. func (c *StandardCounterFloat64) Inc(v float64) { - c.mutex.Lock() - defer c.mutex.Unlock() - c.count += v + atomicAddFloat(&c.floatBits, v) } // Snapshot returns a read-only copy of the counter. func (c *StandardCounterFloat64) Snapshot() CounterFloat64 { return CounterFloat64Snapshot(c.Count()) } + +func atomicAddFloat(fbits *atomic.Uint64, v float64) { + for { + loadedBits := fbits.Load() + newBits := math.Float64bits(math.Float64frombits(loadedBits) + v) + if fbits.CompareAndSwap(loadedBits, newBits) { + break + } + } +} diff --git a/metrics/counter_float_64_test.go b/metrics/counter_float_64_test.go index 44d9b4c20c85..f17aca330cbe 100644 --- a/metrics/counter_float_64_test.go +++ b/metrics/counter_float_64_test.go @@ -1,6 +1,9 @@ package metrics -import "testing" +import ( + "sync" + "testing" +) func BenchmarkCounterFloat64(b *testing.B) { c := NewCounterFloat64() @@ -10,6 +13,25 @@ func BenchmarkCounterFloat64(b *testing.B) { } } +func BenchmarkCounterFloat64Parallel(b *testing.B) { + c := NewCounterFloat64() + b.ResetTimer() + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + for i := 0; i < b.N; i++ { + c.Inc(1.0) + } + wg.Done() + }() + } + wg.Wait() + if have, want := c.Count(), 10.0*float64(b.N); have != want { + b.Fatalf("have %f want %f", have, want) + } +} + func TestCounterFloat64Clear(t *testing.T) { c := NewCounterFloat64() c.Inc(1.0) diff --git a/metrics/gauge_float64.go b/metrics/gauge_float64.go index 66819c957774..237ff8036e01 100644 --- a/metrics/gauge_float64.go +++ b/metrics/gauge_float64.go @@ -1,6 +1,9 @@ package metrics -import "sync" +import ( + "math" + "sync/atomic" +) // GaugeFloat64s hold a float64 value that can be set arbitrarily. type GaugeFloat64 interface { @@ -23,9 +26,7 @@ func NewGaugeFloat64() GaugeFloat64 { if !Enabled { return NilGaugeFloat64{} } - return &StandardGaugeFloat64{ - value: 0.0, - } + return &StandardGaugeFloat64{} } // NewRegisteredGaugeFloat64 constructs and registers a new StandardGaugeFloat64. @@ -83,10 +84,9 @@ func (NilGaugeFloat64) Update(v float64) {} func (NilGaugeFloat64) Value() float64 { return 0.0 } // StandardGaugeFloat64 is the standard implementation of a GaugeFloat64 and uses -// sync.Mutex to manage a single float64 value. +// atomic to manage a single float64 value. type StandardGaugeFloat64 struct { - mutex sync.Mutex - value float64 + floatBits atomic.Uint64 } // Snapshot returns a read-only copy of the gauge. @@ -96,16 +96,12 @@ func (g *StandardGaugeFloat64) Snapshot() GaugeFloat64 { // Update updates the gauge's value. func (g *StandardGaugeFloat64) Update(v float64) { - g.mutex.Lock() - defer g.mutex.Unlock() - g.value = v + g.floatBits.Store(math.Float64bits(v)) } // Value returns the gauge's current value. func (g *StandardGaugeFloat64) Value() float64 { - g.mutex.Lock() - defer g.mutex.Unlock() - return g.value + return math.Float64frombits(g.floatBits.Load()) } // FunctionalGaugeFloat64 returns value from given function diff --git a/metrics/gauge_float64_test.go b/metrics/gauge_float64_test.go index 1ae598049229..0e11d803c4f6 100644 --- a/metrics/gauge_float64_test.go +++ b/metrics/gauge_float64_test.go @@ -1,6 +1,9 @@ package metrics -import "testing" +import ( + "sync" + "testing" +) func BenchmarkGaugeFloat64(b *testing.B) { g := NewGaugeFloat64() @@ -10,6 +13,24 @@ func BenchmarkGaugeFloat64(b *testing.B) { } } +func BenchmarkGaugeFloat64Parallel(b *testing.B) { + c := NewGaugeFloat64() + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + for i := 0; i < b.N; i++ { + c.Update(float64(i)) + } + wg.Done() + }() + } + wg.Wait() + if have, want := c.Value(), float64(b.N-1); have != want { + b.Fatalf("have %f want %f", have, want) + } +} + func TestGaugeFloat64(t *testing.T) { g := NewGaugeFloat64() g.Update(47.0) From 7015a9e1ccec0cb23b6eeb7cffb5a8e5f31e2ca0 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 51/74] metrics: use atomic type (#27121) --- metrics/counter.go | 14 +++++++------- metrics/ewma.go | 8 ++++---- metrics/gauge.go | 12 ++++++------ metrics/meter.go | 25 ++++++++++++++----------- 4 files changed, 31 insertions(+), 28 deletions(-) diff --git a/metrics/counter.go b/metrics/counter.go index 2f78c90d5c64..55e1c59540f6 100644 --- a/metrics/counter.go +++ b/metrics/counter.go @@ -38,13 +38,13 @@ func NewCounter() Counter { if !Enabled { return NilCounter{} } - return &StandardCounter{0} + return &StandardCounter{} } // NewCounterForced constructs a new StandardCounter and returns it no matter if // the global switch is enabled or not. func NewCounterForced() Counter { - return &StandardCounter{0} + return &StandardCounter{} } // NewRegisteredCounter constructs and registers a new StandardCounter. @@ -115,27 +115,27 @@ func (NilCounter) Snapshot() Counter { return NilCounter{} } // StandardCounter is the standard implementation of a Counter and uses the // sync/atomic package to manage a single int64 value. type StandardCounter struct { - count int64 + count atomic.Int64 } // Clear sets the counter to zero. func (c *StandardCounter) Clear() { - atomic.StoreInt64(&c.count, 0) + c.count.Store(0) } // Count returns the current count. func (c *StandardCounter) Count() int64 { - return atomic.LoadInt64(&c.count) + return c.count.Load() } // Dec decrements the counter by the given amount. func (c *StandardCounter) Dec(i int64) { - atomic.AddInt64(&c.count, -i) + c.count.Add(-i) } // Inc increments the counter by the given amount. func (c *StandardCounter) Inc(i int64) { - atomic.AddInt64(&c.count, i) + c.count.Add(i) } // Snapshot returns a read-only copy of the counter. diff --git a/metrics/ewma.go b/metrics/ewma.go index 039286493ebc..ed95cba19b4f 100644 --- a/metrics/ewma.go +++ b/metrics/ewma.go @@ -75,7 +75,7 @@ func (NilEWMA) Update(n int64) {} // of uncounted events and processes them on each tick. It uses the // sync/atomic package to manage uncounted events. type StandardEWMA struct { - uncounted int64 // /!\ this should be the first member to ensure 64-bit alignment + uncounted atomic.Int64 alpha float64 rate float64 init bool @@ -97,8 +97,8 @@ func (a *StandardEWMA) Snapshot() EWMA { // Tick ticks the clock to update the moving average. It assumes it is called // every five seconds. func (a *StandardEWMA) Tick() { - count := atomic.LoadInt64(&a.uncounted) - atomic.AddInt64(&a.uncounted, -count) + count := a.uncounted.Load() + a.uncounted.Add(-count) instantRate := float64(count) / float64(5*time.Second) a.mutex.Lock() defer a.mutex.Unlock() @@ -112,5 +112,5 @@ func (a *StandardEWMA) Tick() { // Update adds n uncounted events. func (a *StandardEWMA) Update(n int64) { - atomic.AddInt64(&a.uncounted, n) + a.uncounted.Add(n) } diff --git a/metrics/gauge.go b/metrics/gauge.go index b6b2758b0d13..81137d7f7c5e 100644 --- a/metrics/gauge.go +++ b/metrics/gauge.go @@ -25,7 +25,7 @@ func NewGauge() Gauge { if !Enabled { return NilGauge{} } - return &StandardGauge{0} + return &StandardGauge{} } // NewRegisteredGauge constructs and registers a new StandardGauge. @@ -101,7 +101,7 @@ func (NilGauge) Value() int64 { return 0 } // StandardGauge is the standard implementation of a Gauge and uses the // sync/atomic package to manage a single int64 value. type StandardGauge struct { - value int64 + value atomic.Int64 } // Snapshot returns a read-only copy of the gauge. @@ -111,22 +111,22 @@ func (g *StandardGauge) Snapshot() Gauge { // Update updates the gauge's value. func (g *StandardGauge) Update(v int64) { - atomic.StoreInt64(&g.value, v) + g.value.Store(v) } // Value returns the gauge's current value. func (g *StandardGauge) Value() int64 { - return atomic.LoadInt64(&g.value) + return g.value.Load() } // Dec decrements the gauge's current value by the given amount. func (g *StandardGauge) Dec(i int64) { - atomic.AddInt64(&g.value, -i) + g.value.Add(-i) } // Inc increments the gauge's current value by the given amount. func (g *StandardGauge) Inc(i int64) { - atomic.AddInt64(&g.value, i) + g.value.Add(i) } // FunctionalGauge returns value from given function diff --git a/metrics/meter.go b/metrics/meter.go index 60ae919d04db..e8564d6a5e76 100644 --- a/metrics/meter.go +++ b/metrics/meter.go @@ -101,11 +101,7 @@ func NewRegisteredMeterForced(name string, r Registry) Meter { // MeterSnapshot is a read-only copy of another Meter. type MeterSnapshot struct { - // WARNING: The `temp` field is accessed atomically. - // On 32 bit platforms, only 64-bit aligned fields can be atomic. The struct is - // guaranteed to be so aligned, so take advantage of that. For more information, - // see https://golang.org/pkg/sync/atomic/#pkg-note-BUG. - temp int64 + temp atomic.Int64 count int64 rate1, rate5, rate15, rateMean float64 } @@ -173,7 +169,7 @@ type StandardMeter struct { snapshot *MeterSnapshot a1, a5, a15 EWMA startTime time.Time - stopped uint32 + stopped atomic.Bool } func newStandardMeter() *StandardMeter { @@ -188,8 +184,8 @@ func newStandardMeter() *StandardMeter { // Stop stops the meter, Mark() will be a no-op if you use it after being stopped. func (m *StandardMeter) Stop() { - stopped := atomic.SwapUint32(&m.stopped, 1) - if stopped != 1 { + stopped := m.stopped.Swap(true) + if !stopped { arbiter.Lock() delete(arbiter.meters, m) arbiter.Unlock() @@ -207,7 +203,7 @@ func (m *StandardMeter) Count() int64 { // Mark records the occurrence of n events. func (m *StandardMeter) Mark(n int64) { - atomic.AddInt64(&m.snapshot.temp, n) + m.snapshot.temp.Add(n) } // Rate1 returns the one-minute moving average rate of events per second. @@ -241,7 +237,14 @@ func (m *StandardMeter) RateMean() float64 { // Snapshot returns a read-only copy of the meter. func (m *StandardMeter) Snapshot() Meter { m.lock.RLock() - snapshot := *m.snapshot + snapshot := MeterSnapshot{ + count: m.snapshot.count, + rate1: m.snapshot.rate1, + rate5: m.snapshot.rate5, + rate15: m.snapshot.rate15, + rateMean: m.snapshot.rateMean, + } + snapshot.temp.Store(m.snapshot.temp.Load()) m.lock.RUnlock() return &snapshot } @@ -257,7 +260,7 @@ func (m *StandardMeter) updateSnapshot() { func (m *StandardMeter) updateMeter() { // should only run with write lock held on m.lock - n := atomic.SwapInt64(&m.snapshot.temp, 0) + n := m.snapshot.temp.Swap(0) m.snapshot.count += n m.a1.Update(n) m.a5.Update(n) From 7d40ca6f3526fda11c39d4bdb067a3699b75cf28 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 52/74] metrics: use sync.map in registry (#27159) --- metrics/registry.go | 103 ++++++++++++++++++--------------------- metrics/registry_test.go | 25 ++++++++++ 2 files changed, 72 insertions(+), 56 deletions(-) diff --git a/metrics/registry.go b/metrics/registry.go index c550b363d0a1..88e192c0d196 100644 --- a/metrics/registry.go +++ b/metrics/registry.go @@ -45,21 +45,17 @@ type Registry interface { // Unregister the metric with the given name. Unregister(string) - - // Unregister all metrics. (Mostly for testing.) - UnregisterAll() } -// The standard implementation of a Registry is a mutex-protected map +// The standard implementation of a Registry uses sync.map // of names to metrics. type StandardRegistry struct { - metrics map[string]interface{} - mutex sync.Mutex + metrics sync.Map } // Create a new registry. func NewRegistry() Registry { - return &StandardRegistry{metrics: make(map[string]interface{})} + return &StandardRegistry{} } // Call the given function for each registered metric. @@ -71,9 +67,8 @@ func (r *StandardRegistry) Each(f func(string, interface{})) { // Get the metric by the given name or nil if none is registered. func (r *StandardRegistry) Get(name string) interface{} { - r.mutex.Lock() - defer r.mutex.Unlock() - return r.metrics[name] + item, _ := r.metrics.Load(name) + return item } // Gets an existing metric or creates and registers a new one. Threadsafe @@ -81,35 +76,48 @@ func (r *StandardRegistry) Get(name string) interface{} { // The interface can be the metric to register if not found in registry, // or a function returning the metric for lazy instantiation. func (r *StandardRegistry) GetOrRegister(name string, i interface{}) interface{} { - r.mutex.Lock() - defer r.mutex.Unlock() - if metric, ok := r.metrics[name]; ok { - return metric + // fast path + cached, ok := r.metrics.Load(name) + if ok { + return cached } if v := reflect.ValueOf(i); v.Kind() == reflect.Func { i = v.Call(nil)[0].Interface() } - r.register(name, i) - return i + item, _, ok := r.loadOrRegister(name, i) + if !ok { + return i + } + return item } // Register the given metric under the given name. Returns a DuplicateMetric // if a metric by the given name is already registered. func (r *StandardRegistry) Register(name string, i interface{}) error { - r.mutex.Lock() - defer r.mutex.Unlock() - return r.register(name, i) + // fast path + _, ok := r.metrics.Load(name) + if ok { + return DuplicateMetric(name) + } + + if v := reflect.ValueOf(i); v.Kind() == reflect.Func { + i = v.Call(nil)[0].Interface() + } + _, loaded, _ := r.loadOrRegister(name, i) + if loaded { + return DuplicateMetric(name) + } + return nil } // Run all registered healthchecks. func (r *StandardRegistry) RunHealthchecks() { - r.mutex.Lock() - defer r.mutex.Unlock() - for _, i := range r.metrics { - if h, ok := i.(Healthcheck); ok { + r.metrics.Range(func(key, value any) bool { + if h, ok := value.(Healthcheck); ok { h.Check() } - } + return true + }) } // GetAll metrics in the Registry @@ -177,45 +185,31 @@ func (r *StandardRegistry) GetAll() map[string]map[string]interface{} { // Unregister the metric with the given name. func (r *StandardRegistry) Unregister(name string) { - r.mutex.Lock() - defer r.mutex.Unlock() r.stop(name) - delete(r.metrics, name) -} - -// Unregister all metrics. (Mostly for testing.) -func (r *StandardRegistry) UnregisterAll() { - r.mutex.Lock() - defer r.mutex.Unlock() - for name := range r.metrics { - r.stop(name) - delete(r.metrics, name) - } + r.metrics.LoadAndDelete(name) } -func (r *StandardRegistry) register(name string, i interface{}) error { - if _, ok := r.metrics[name]; ok { - return DuplicateMetric(name) - } +func (r *StandardRegistry) loadOrRegister(name string, i interface{}) (interface{}, bool, bool) { switch i.(type) { case Counter, CounterFloat64, Gauge, GaugeFloat64, Healthcheck, Histogram, Meter, Timer, ResettingTimer: - r.metrics[name] = i + default: + return nil, false, false } - return nil + item, loaded := r.metrics.LoadOrStore(name, i) + return item, loaded, true } func (r *StandardRegistry) registered() map[string]interface{} { - r.mutex.Lock() - defer r.mutex.Unlock() - metrics := make(map[string]interface{}, len(r.metrics)) - for name, i := range r.metrics { - metrics[name] = i - } + metrics := make(map[string]interface{}) + r.metrics.Range(func(key, value any) bool { + metrics[key.(string)] = value + return true + }) return metrics } func (r *StandardRegistry) stop(name string) { - if i, ok := r.metrics[name]; ok { + if i, ok := r.metrics.Load(name); ok { if s, ok := i.(Stoppable); ok { s.Stop() } @@ -308,12 +302,9 @@ func (r *PrefixedRegistry) Unregister(name string) { r.underlying.Unregister(realName) } -// Unregister all metrics. (Mostly for testing.) -func (r *PrefixedRegistry) UnregisterAll() { - r.underlying.UnregisterAll() -} - -var DefaultRegistry Registry = NewRegistry() +var ( + DefaultRegistry = NewRegistry() +) // Call the given function for each registered metric. func Each(f func(string, interface{})) { diff --git a/metrics/registry_test.go b/metrics/registry_test.go index d277ae5c3e47..7cc5cf14fe55 100644 --- a/metrics/registry_test.go +++ b/metrics/registry_test.go @@ -1,6 +1,7 @@ package metrics import ( + "sync" "testing" ) @@ -13,6 +14,30 @@ func BenchmarkRegistry(b *testing.B) { } } +func BenchmarkRegistryGetOrRegisterParallel_8(b *testing.B) { + benchmarkRegistryGetOrRegisterParallel(b, 8) +} + +func BenchmarkRegistryGetOrRegisterParallel_32(b *testing.B) { + benchmarkRegistryGetOrRegisterParallel(b, 32) +} + +func benchmarkRegistryGetOrRegisterParallel(b *testing.B, amount int) { + r := NewRegistry() + b.ResetTimer() + var wg sync.WaitGroup + for i := 0; i < amount; i++ { + wg.Add(1) + go func() { + for i := 0; i < b.N; i++ { + r.GetOrRegister("foo", NewMeter) + } + wg.Done() + }() + } + wg.Wait() +} + func TestRegistry(t *testing.T) { r := NewRegistry() r.Register("foo", NewCounter()) From e524043a3b5897ef51292d3e0775e8a6113d67d0 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 53/74] metrics: use slices package for sorting (#27493 #27909) Co-authored-by: Felix Lange --- go.mod | 1 + go.sum | 2 ++ metrics/resetting_timer.go | 12 +++--------- metrics/sample.go | 15 +++++---------- metrics/writer.go | 20 +++++++------------- metrics/writer_test.go | 7 ++++--- 6 files changed, 22 insertions(+), 35 deletions(-) diff --git a/go.mod b/go.mod index 28a24f8b6b38..de84356c85ad 100644 --- a/go.mod +++ b/go.mod @@ -55,6 +55,7 @@ require ( github.com/mattn/go-isatty v0.0.17 github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible github.com/urfave/cli/v2 v2.27.5 + golang.org/x/exp v0.0.0-20231006140011-7918f672742d gopkg.in/natefinch/lumberjack.v2 v2.2.1 ) diff --git a/go.sum b/go.sum index 68019c92aba9..f1e8dd27aa3f 100644 --- a/go.sum +++ b/go.sum @@ -218,6 +218,8 @@ golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= +golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= diff --git a/metrics/resetting_timer.go b/metrics/resetting_timer.go index 57bcb31343fd..228963dc9416 100644 --- a/metrics/resetting_timer.go +++ b/metrics/resetting_timer.go @@ -2,9 +2,10 @@ package metrics import ( "math" - "sort" "sync" "time" + + "golang.org/x/exp/slices" ) // Initial slice capacity for the values stored in a ResettingTimer @@ -182,7 +183,7 @@ func (t *ResettingTimerSnapshot) Mean() float64 { } func (t *ResettingTimerSnapshot) calc(percentiles []float64) { - sort.Sort(Int64Slice(t.values)) + slices.Sort(t.values) count := len(t.values) if count > 0 { @@ -228,10 +229,3 @@ func (t *ResettingTimerSnapshot) calc(percentiles []float64) { t.calculated = true } - -// Int64Slice attaches the methods of sort.Interface to []int64, sorting in increasing order. -type Int64Slice []int64 - -func (s Int64Slice) Len() int { return len(s) } -func (s Int64Slice) Less(i, j int) bool { return s[i] < s[j] } -func (s Int64Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } diff --git a/metrics/sample.go b/metrics/sample.go index afcaa2118426..252a878f581b 100644 --- a/metrics/sample.go +++ b/metrics/sample.go @@ -3,9 +3,10 @@ package metrics import ( "math" "math/rand" - "sort" "sync" "time" + + "golang.org/x/exp/slices" ) const rescaleThreshold = time.Hour @@ -282,17 +283,17 @@ func SampleMin(values []int64) int64 { } // SamplePercentiles returns an arbitrary percentile of the slice of int64. -func SamplePercentile(values int64Slice, p float64) float64 { +func SamplePercentile(values []int64, p float64) float64 { return SamplePercentiles(values, []float64{p})[0] } // SamplePercentiles returns a slice of arbitrary percentiles of the slice of // int64. -func SamplePercentiles(values int64Slice, ps []float64) []float64 { +func SamplePercentiles(values []int64, ps []float64) []float64 { scores := make([]float64, len(ps)) size := len(values) if size > 0 { - sort.Sort(values) + slices.Sort(values) for i, p := range ps { pos := p * float64(size+1) if pos < 1.0 { @@ -633,9 +634,3 @@ func (h *expDecaySampleHeap) down(i, n int) { i = j } } - -type int64Slice []int64 - -func (p int64Slice) Len() int { return len(p) } -func (p int64Slice) Less(i, j int) bool { return p[i] < p[j] } -func (p int64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } diff --git a/metrics/writer.go b/metrics/writer.go index 256fbd14c9b9..82434e9d1d62 100644 --- a/metrics/writer.go +++ b/metrics/writer.go @@ -3,8 +3,10 @@ package metrics import ( "fmt" "io" - "sort" + "strings" "time" + + "golang.org/x/exp/slices" ) // Write sorts writes each metric in the given registry periodically to the @@ -18,12 +20,11 @@ func Write(r Registry, d time.Duration, w io.Writer) { // WriteOnce sorts and writes metrics in the given registry to the given // io.Writer. func WriteOnce(r Registry, w io.Writer) { - var namedMetrics namedMetricSlice + var namedMetrics []namedMetric r.Each(func(name string, i interface{}) { namedMetrics = append(namedMetrics, namedMetric{name, i}) }) - - sort.Sort(namedMetrics) + slices.SortFunc(namedMetrics, namedMetric.cmp) for _, namedMetric := range namedMetrics { switch metric := namedMetric.m.(type) { case Counter: @@ -91,13 +92,6 @@ type namedMetric struct { m interface{} } -// namedMetricSlice is a slice of namedMetrics that implements sort.Interface. -type namedMetricSlice []namedMetric - -func (nms namedMetricSlice) Len() int { return len(nms) } - -func (nms namedMetricSlice) Swap(i, j int) { nms[i], nms[j] = nms[j], nms[i] } - -func (nms namedMetricSlice) Less(i, j int) bool { - return nms[i].name < nms[j].name +func (m namedMetric) cmp(other namedMetric) int { + return strings.Compare(m.name, other.name) } diff --git a/metrics/writer_test.go b/metrics/writer_test.go index 1aacc287121b..8376bf8975c8 100644 --- a/metrics/writer_test.go +++ b/metrics/writer_test.go @@ -1,19 +1,20 @@ package metrics import ( - "sort" "testing" + + "golang.org/x/exp/slices" ) func TestMetricsSorting(t *testing.T) { - var namedMetrics = namedMetricSlice{ + var namedMetrics = []namedMetric{ {name: "zzz"}, {name: "bbb"}, {name: "fff"}, {name: "ggg"}, } - sort.Sort(namedMetrics) + slices.SortFunc(namedMetrics, namedMetric.cmp) for i, name := range []string{"bbb", "fff", "ggg", "zzz"} { if namedMetrics[i].name != name { t.Fail() From 51e49a869dee6cd408e13b0fa6d95c98b32da3f9 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 54/74] metrics: NilTimer should still run the function to be timed (#27723) --- metrics/timer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/timer.go b/metrics/timer.go index a63c9dfb6c2d..2e1a9be47295 100644 --- a/metrics/timer.go +++ b/metrics/timer.go @@ -123,7 +123,7 @@ func (NilTimer) Stop() {} func (NilTimer) Sum() int64 { return 0 } // Time is a no-op. -func (NilTimer) Time(func()) {} +func (NilTimer) Time(f func()) { f() } // Update is a no-op. func (NilTimer) Update(time.Duration) {} From d43f6c0cb0dc82a5f62ea3eab8e83f45cc178051 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 55/74] metrics: NilResettingTimer.Time should execute the timed function (#27724) --- metrics/resetting_timer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/resetting_timer.go b/metrics/resetting_timer.go index 228963dc9416..858984a32cc4 100644 --- a/metrics/resetting_timer.go +++ b/metrics/resetting_timer.go @@ -62,7 +62,7 @@ func (NilResettingTimer) Values() []int64 { return nil } func (NilResettingTimer) Snapshot() ResettingTimer { return NilResettingTimer{} } // Time is a no-op. -func (NilResettingTimer) Time(func()) {} +func (NilResettingTimer) Time(f func()) { f() } // Update is a no-op. func (NilResettingTimer) Update(time.Duration) {} From 35380508fce2f04c87202f103b4cb865a3f30715 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 56/74] metrics, cmd/geth: informational metrics (prometheus, influxdb, opentsb) (#24877) This chang creates a GaugeInfo metrics type for registering informational (textual) metrics, e.g. geth version number. It also improves the testing for backend-exporters, and uses a shared subpackage in 'internal' to provide sample datasets and ordered registry. Implements #21783 --------- Co-authored-by: Martin Holst Swende --- cmd/XDC/config.go | 2 +- cmd/utils/utils.go | 22 ++- core/blockchain.go | 6 + metrics/exp/exp.go | 20 +++ metrics/gauge_info.go | 144 +++++++++++++++++++ metrics/gauge_info_test.go | 75 ++++++++++ metrics/graphite.go | 2 + metrics/influxdb/influxdb.go | 7 + metrics/influxdb/influxdb_test.go | 114 +++++++++++++++ metrics/influxdb/influxdbv1.go | 15 +- metrics/influxdb/influxdbv2.go | 13 +- metrics/influxdb/testdata/influxdbv1.want | 9 ++ metrics/influxdb/testdata/influxdbv2.want | 9 ++ metrics/internal/sampledata.go | 64 +++++++++ metrics/librato/librato.go | 4 + metrics/log.go | 3 + metrics/meter.go | 10 ++ metrics/opentsdb.go | 27 ++-- metrics/opentsdb_test.go | 30 ++++ metrics/prometheus/collector.go | 46 ++++++ metrics/prometheus/collector_test.go | 146 +++++++------------- metrics/prometheus/prometheus.go | 20 +-- metrics/prometheus/testdata/prometheus.want | 48 +++++++ metrics/registry.go | 35 ++++- metrics/syslog.go | 2 + metrics/testdata/opentsb.want | 23 +++ metrics/writer.go | 3 + 27 files changed, 756 insertions(+), 143 deletions(-) create mode 100644 metrics/gauge_info.go create mode 100644 metrics/gauge_info_test.go create mode 100644 metrics/influxdb/influxdb_test.go create mode 100644 metrics/influxdb/testdata/influxdbv1.want create mode 100644 metrics/influxdb/testdata/influxdbv2.want create mode 100644 metrics/internal/sampledata.go create mode 100644 metrics/prometheus/testdata/prometheus.want create mode 100644 metrics/testdata/opentsb.want diff --git a/cmd/XDC/config.go b/cmd/XDC/config.go index 511ba21715a6..7dbc8774b895 100644 --- a/cmd/XDC/config.go +++ b/cmd/XDC/config.go @@ -237,7 +237,7 @@ func makeFullNode(ctx *cli.Context) (*node.Node, XDCConfig) { // Register XDCX's OrderBook service if requested. // enable in default utils.RegisterXDCXService(stack, &cfg.XDCX) - utils.RegisterEthService(stack, &cfg.Eth) + utils.RegisterEthService(stack, &cfg.Eth, cfg.Node.Version) // Add the Ethereum Stats daemon if requested. if cfg.Ethstats.URL != "" { diff --git a/cmd/utils/utils.go b/cmd/utils/utils.go index bc676c7b08ff..5afa50c32bbe 100644 --- a/cmd/utils/utils.go +++ b/cmd/utils/utils.go @@ -1,6 +1,10 @@ package utils import ( + "fmt" + "runtime" + "strings" + "github.com/XinFinOrg/XDPoSChain/XDCx" "github.com/XinFinOrg/XDPoSChain/XDCxlending" "github.com/XinFinOrg/XDPoSChain/eth" @@ -8,11 +12,12 @@ import ( "github.com/XinFinOrg/XDPoSChain/eth/ethconfig" "github.com/XinFinOrg/XDPoSChain/ethstats" "github.com/XinFinOrg/XDPoSChain/les" + "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/node" ) // RegisterEthService adds an Ethereum client to the stack. -func RegisterEthService(stack *node.Node, cfg *ethconfig.Config) { +func RegisterEthService(stack *node.Node, cfg *ethconfig.Config, version string) { var err error if cfg.SyncMode == downloader.LightSync { err = stack.Register(func(ctx *node.ServiceContext) (node.Service, error) { @@ -29,6 +34,21 @@ func RegisterEthService(stack *node.Node, cfg *ethconfig.Config) { ls, _ := les.NewLesServer(fullNode, cfg) fullNode.AddLesServer(ls) } + + // TODO: move the following code to function makeFullNode + // Ref: #21105, #22641, #23761, #24877 + // Create gauge with geth system and build information + var protos []string + for _, p := range fullNode.Protocols() { + protos = append(protos, fmt.Sprintf("%v/%d", p.Name, p.Version)) + } + metrics.NewRegisteredGaugeInfo("xdc/info", nil).Update(metrics.GaugeInfoValue{ + "arch": runtime.GOARCH, + "os": runtime.GOOS, + "version": version, // cfg.Node.Version + "eth_protocols": strings.Join(protos, ","), + }) + return fullNode, err }) } diff --git a/core/blockchain.go b/core/blockchain.go index d11a421978ca..f09fb31d670e 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -60,6 +60,8 @@ var ( headHeaderGauge = metrics.NewRegisteredGauge("chain/head/header", nil) headFastBlockGauge = metrics.NewRegisteredGauge("chain/head/receipt", nil) + chainInfoGauge = metrics.NewRegisteredGaugeInfo("chain/info", nil) + accountReadTimer = metrics.NewRegisteredTimer("chain/account/reads", nil) accountHashTimer = metrics.NewRegisteredTimer("chain/account/hashes", nil) accountUpdateTimer = metrics.NewRegisteredTimer("chain/account/updates", nil) @@ -244,6 +246,10 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par if bc.genesisBlock == nil { return nil, ErrNoGenesis } + + // Update chain info data metrics + chainInfoGauge.Update(metrics.GaugeInfoValue{"chain_id": bc.chainConfig.ChainId.String()}) + if err := bc.loadLastState(); err != nil { return nil, err } diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go index 77b198e610fd..6091f2f848b5 100644 --- a/metrics/exp/exp.go +++ b/metrics/exp/exp.go @@ -95,6 +95,20 @@ func (exp *exp) getFloat(name string) *expvar.Float { return v } +func (exp *exp) getInfo(name string) *expvar.String { + var v *expvar.String + exp.expvarLock.Lock() + p := expvar.Get(name) + if p != nil { + v = p.(*expvar.String) + } else { + v = new(expvar.String) + expvar.Publish(name, v) + } + exp.expvarLock.Unlock() + return v +} + func (exp *exp) publishCounter(name string, metric metrics.Counter) { v := exp.getInt(name) v.Set(metric.Count()) @@ -113,6 +127,10 @@ func (exp *exp) publishGaugeFloat64(name string, metric metrics.GaugeFloat64) { exp.getFloat(name).Set(metric.Value()) } +func (exp *exp) publishGaugeInfo(name string, metric metrics.GaugeInfo) { + exp.getInfo(name).Set(metric.Value().String()) +} + func (exp *exp) publishHistogram(name string, metric metrics.Histogram) { h := metric.Snapshot() ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) @@ -178,6 +196,8 @@ func (exp *exp) syncToExpvar() { exp.publishGauge(name, i) case metrics.GaugeFloat64: exp.publishGaugeFloat64(name, i) + case metrics.GaugeInfo: + exp.publishGaugeInfo(name, i) case metrics.Histogram: exp.publishHistogram(name, i) case metrics.Meter: diff --git a/metrics/gauge_info.go b/metrics/gauge_info.go new file mode 100644 index 000000000000..f1b2075939e7 --- /dev/null +++ b/metrics/gauge_info.go @@ -0,0 +1,144 @@ +package metrics + +import ( + "encoding/json" + "sync" +) + +// GaugeInfos hold a GaugeInfoValue value that can be set arbitrarily. +type GaugeInfo interface { + Snapshot() GaugeInfo + Update(GaugeInfoValue) + Value() GaugeInfoValue +} + +// GaugeInfoValue is a mappng of (string) keys to (string) values +type GaugeInfoValue map[string]string + +func (val GaugeInfoValue) String() string { + data, _ := json.Marshal(val) + return string(data) +} + +// GetOrRegisterGaugeInfo returns an existing GaugeInfo or constructs and registers a +// new StandardGaugeInfo. +func GetOrRegisterGaugeInfo(name string, r Registry) GaugeInfo { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewGaugeInfo()).(GaugeInfo) +} + +// NewGaugeInfo constructs a new StandardGaugeInfo. +func NewGaugeInfo() GaugeInfo { + if !Enabled { + return NilGaugeInfo{} + } + return &StandardGaugeInfo{ + value: GaugeInfoValue{}, + } +} + +// NewRegisteredGaugeInfo constructs and registers a new StandardGaugeInfo. +func NewRegisteredGaugeInfo(name string, r Registry) GaugeInfo { + c := NewGaugeInfo() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// NewFunctionalGauge constructs a new FunctionalGauge. +func NewFunctionalGaugeInfo(f func() GaugeInfoValue) GaugeInfo { + if !Enabled { + return NilGaugeInfo{} + } + return &FunctionalGaugeInfo{value: f} +} + +// NewRegisteredFunctionalGauge constructs and registers a new StandardGauge. +func NewRegisteredFunctionalGaugeInfo(name string, r Registry, f func() GaugeInfoValue) GaugeInfo { + c := NewFunctionalGaugeInfo(f) + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// GaugeInfoSnapshot is a read-only copy of another GaugeInfo. +type GaugeInfoSnapshot GaugeInfoValue + +// Snapshot returns the snapshot. +func (g GaugeInfoSnapshot) Snapshot() GaugeInfo { return g } + +// Update panics. +func (GaugeInfoSnapshot) Update(GaugeInfoValue) { + panic("Update called on a GaugeInfoSnapshot") +} + +// Value returns the value at the time the snapshot was taken. +func (g GaugeInfoSnapshot) Value() GaugeInfoValue { return GaugeInfoValue(g) } + +// NilGauge is a no-op Gauge. +type NilGaugeInfo struct{} + +// Snapshot is a no-op. +func (NilGaugeInfo) Snapshot() GaugeInfo { return NilGaugeInfo{} } + +// Update is a no-op. +func (NilGaugeInfo) Update(v GaugeInfoValue) {} + +// Value is a no-op. +func (NilGaugeInfo) Value() GaugeInfoValue { return GaugeInfoValue{} } + +// StandardGaugeInfo is the standard implementation of a GaugeInfo and uses +// sync.Mutex to manage a single string value. +type StandardGaugeInfo struct { + mutex sync.Mutex + value GaugeInfoValue +} + +// Snapshot returns a read-only copy of the gauge. +func (g *StandardGaugeInfo) Snapshot() GaugeInfo { + return GaugeInfoSnapshot(g.Value()) +} + +// Update updates the gauge's value. +func (g *StandardGaugeInfo) Update(v GaugeInfoValue) { + g.mutex.Lock() + defer g.mutex.Unlock() + g.value = v +} + +// Value returns the gauge's current value. +func (g *StandardGaugeInfo) Value() GaugeInfoValue { + g.mutex.Lock() + defer g.mutex.Unlock() + return g.value +} + +// FunctionalGaugeInfo returns value from given function +type FunctionalGaugeInfo struct { + value func() GaugeInfoValue +} + +// Value returns the gauge's current value. +func (g FunctionalGaugeInfo) Value() GaugeInfoValue { + return g.value() +} + +// Value returns the gauge's current value in JSON string format +func (g FunctionalGaugeInfo) ValueJsonString() string { + data, _ := json.Marshal(g.value()) + return string(data) +} + +// Snapshot returns the snapshot. +func (g FunctionalGaugeInfo) Snapshot() GaugeInfo { return GaugeInfoSnapshot(g.Value()) } + +// Update panics. +func (FunctionalGaugeInfo) Update(GaugeInfoValue) { + panic("Update called on a FunctionalGaugeInfo") +} diff --git a/metrics/gauge_info_test.go b/metrics/gauge_info_test.go new file mode 100644 index 000000000000..4227a6a85fab --- /dev/null +++ b/metrics/gauge_info_test.go @@ -0,0 +1,75 @@ +package metrics + +import ( + "strconv" + "testing" +) + +func TestGaugeInfoJsonString(t *testing.T) { + g := NewGaugeInfo() + g.Update(GaugeInfoValue{ + "chain_id": "5", + "anotherKey": "any_string_value", + "third_key": "anything", + }, + ) + want := `{"anotherKey":"any_string_value","chain_id":"5","third_key":"anything"}` + if have := g.Value().String(); have != want { + t.Errorf("\nhave: %v\nwant: %v\n", have, want) + } +} + +func TestGaugeInfoSnapshot(t *testing.T) { + g := NewGaugeInfo() + g.Update(GaugeInfoValue{"value": "original"}) + snapshot := g.Snapshot() // Snapshot @chainid 5 + g.Update(GaugeInfoValue{"value": "updated"}) + // The 'g' should be updated + if have, want := g.Value().String(), `{"value":"updated"}`; have != want { + t.Errorf("\nhave: %v\nwant: %v\n", have, want) + } + // Snapshot should be unupdated + if have, want := snapshot.Value().String(), `{"value":"original"}`; have != want { + t.Errorf("\nhave: %v\nwant: %v\n", have, want) + } +} + +func TestGetOrRegisterGaugeInfo(t *testing.T) { + r := NewRegistry() + NewRegisteredGaugeInfo("foo", r).Update( + GaugeInfoValue{"chain_id": "5"}) + g := GetOrRegisterGaugeInfo("foo", r) + if have, want := g.Value().String(), `{"chain_id":"5"}`; have != want { + t.Errorf("have\n%v\nwant\n%v\n", have, want) + } +} + +func TestFunctionalGaugeInfo(t *testing.T) { + info := GaugeInfoValue{"chain_id": "0"} + counter := 1 + // A "functional" gauge invokes the method to obtain the value + fg := NewFunctionalGaugeInfo(func() GaugeInfoValue { + info["chain_id"] = strconv.Itoa(counter) + counter++ + return info + }) + fg.Value() + fg.Value() + if have, want := info["chain_id"], "2"; have != want { + t.Errorf("have %v want %v", have, want) + } +} + +func TestGetOrRegisterFunctionalGaugeInfo(t *testing.T) { + r := NewRegistry() + NewRegisteredFunctionalGaugeInfo("foo", r, func() GaugeInfoValue { + return GaugeInfoValue{ + "chain_id": "5", + } + }) + want := `{"chain_id":"5"}` + have := GetOrRegisterGaugeInfo("foo", r).Value().String() + if have != want { + t.Errorf("have\n%v\nwant\n%v\n", have, want) + } +} diff --git a/metrics/graphite.go b/metrics/graphite.go index 29f72b0c4181..4e3dd3b3b894 100644 --- a/metrics/graphite.go +++ b/metrics/graphite.go @@ -73,6 +73,8 @@ func graphite(c *GraphiteConfig) error { fmt.Fprintf(w, "%s.%s.value %d %d\n", c.Prefix, name, metric.Value(), now) case GaugeFloat64: fmt.Fprintf(w, "%s.%s.value %f %d\n", c.Prefix, name, metric.Value(), now) + case GaugeInfo: + fmt.Fprintf(w, "%s.%s.value %s %d\n", c.Prefix, name, metric.Value().String(), now) case Histogram: h := metric.Snapshot() ps := h.Percentiles(c.Percentiles) diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index 1e7fb9e11dd4..a1a210033ff4 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -32,6 +32,13 @@ func readMeter(namespace, name string, i interface{}) (string, map[string]interf "value": metric.Snapshot().Value(), } return measurement, fields + case metrics.GaugeInfo: + ms := metric.Snapshot() + measurement := fmt.Sprintf("%s%s.gauge", namespace, name) + fields := map[string]interface{}{ + "value": ms.Value().String(), + } + return measurement, fields case metrics.Histogram: ms := metric.Snapshot() if ms.Count() <= 0 { diff --git a/metrics/influxdb/influxdb_test.go b/metrics/influxdb/influxdb_test.go new file mode 100644 index 000000000000..dca3d9ab3264 --- /dev/null +++ b/metrics/influxdb/influxdb_test.go @@ -0,0 +1,114 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package influxdb + +import ( + "fmt" + "io" + "net/http" + "net/http/httptest" + "net/url" + "os" + "strings" + "testing" + + "github.com/XinFinOrg/XDPoSChain/metrics" + "github.com/XinFinOrg/XDPoSChain/metrics/internal" + influxdb2 "github.com/influxdata/influxdb-client-go/v2" +) + +func TestMain(m *testing.M) { + metrics.Enabled = true + os.Exit(m.Run()) +} + +func TestExampleV1(t *testing.T) { + r := internal.ExampleMetrics() + var have, want string + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + haveB, _ := io.ReadAll(r.Body) + have = string(haveB) + r.Body.Close() + })) + defer ts.Close() + u, _ := url.Parse(ts.URL) + rep := &reporter{ + reg: r, + url: *u, + namespace: "goth.", + } + if err := rep.makeClient(); err != nil { + t.Fatal(err) + } + if err := rep.send(978307200); err != nil { + t.Fatal(err) + } + if wantB, err := os.ReadFile("./testdata/influxdbv1.want"); err != nil { + t.Fatal(err) + } else { + want = string(wantB) + } + if have != want { + t.Errorf("\nhave:\n%v\nwant:\n%v\n", have, want) + t.Logf("have vs want:\n%v", findFirstDiffPos(have, want)) + } +} + +func TestExampleV2(t *testing.T) { + r := internal.ExampleMetrics() + var have, want string + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + haveB, _ := io.ReadAll(r.Body) + have = string(haveB) + r.Body.Close() + })) + defer ts.Close() + + rep := &v2Reporter{ + reg: r, + endpoint: ts.URL, + namespace: "goth.", + } + rep.client = influxdb2.NewClient(rep.endpoint, rep.token) + defer rep.client.Close() + rep.write = rep.client.WriteAPI(rep.organization, rep.bucket) + + rep.send(978307200) + + if wantB, err := os.ReadFile("./testdata/influxdbv2.want"); err != nil { + t.Fatal(err) + } else { + want = string(wantB) + } + if have != want { + t.Errorf("\nhave:\n%v\nwant:\n%v\n", have, want) + t.Logf("have vs want:\n %v", findFirstDiffPos(have, want)) + } +} + +func findFirstDiffPos(a, b string) string { + yy := strings.Split(b, "\n") + for i, x := range strings.Split(a, "\n") { + if i >= len(yy) { + return fmt.Sprintf("have:%d: %s\nwant:%d: ", i, x, i) + } + if y := yy[i]; x != y { + return fmt.Sprintf("have:%d: %s\nwant:%d: %s", i, x, i, y) + } + } + return "" +} diff --git a/metrics/influxdb/influxdbv1.go b/metrics/influxdb/influxdbv1.go index 40ae9a3c10b7..7b55872fbad3 100644 --- a/metrics/influxdb/influxdbv1.go +++ b/metrics/influxdb/influxdbv1.go @@ -79,7 +79,7 @@ func InfluxDBWithTagsOnce(r metrics.Registry, url, database, username, password, return fmt.Errorf("unable to make InfluxDB client. err: %v", err) } - if err := rep.send(); err != nil { + if err := rep.send(0); err != nil { return fmt.Errorf("unable to send to InfluxDB. err: %v", err) } @@ -107,7 +107,7 @@ func (r *reporter) run() { for { select { case <-intervalTicker.C: - if err := r.send(); err != nil { + if err := r.send(0); err != nil { log.Warn("Unable to send to InfluxDB", "err", err) } case <-pingTicker.C: @@ -123,7 +123,9 @@ func (r *reporter) run() { } } -func (r *reporter) send() error { +// send sends the measurements. If provided tstamp is >0, it is used. Otherwise, +// a 'fresh' timestamp is used. +func (r *reporter) send(tstamp int64) error { bps, err := client.NewBatchPoints( client.BatchPointsConfig{ Database: r.database, @@ -132,7 +134,12 @@ func (r *reporter) send() error { return err } r.reg.Each(func(name string, i interface{}) { - now := time.Now() + var now time.Time + if tstamp <= 0 { + now = time.Now() + } else { + now = time.Unix(tstamp, 0) + } measurement, fields := readMeter(r.namespace, name, i) if fields == nil { return diff --git a/metrics/influxdb/influxdbv2.go b/metrics/influxdb/influxdbv2.go index 15ea660d865c..d45e9c4d45be 100644 --- a/metrics/influxdb/influxdbv2.go +++ b/metrics/influxdb/influxdbv2.go @@ -64,7 +64,7 @@ func (r *v2Reporter) run() { for { select { case <-intervalTicker.C: - r.send() + r.send(0) case <-pingTicker.C: _, err := r.client.Health(context.Background()) if err != nil { @@ -74,9 +74,16 @@ func (r *v2Reporter) run() { } } -func (r *v2Reporter) send() { +// send sends the measurements. If provided tstamp is >0, it is used. Otherwise, +// a 'fresh' timestamp is used. +func (r *v2Reporter) send(tstamp int64) { r.reg.Each(func(name string, i interface{}) { - now := time.Now() + var now time.Time + if tstamp <= 0 { + now = time.Now() + } else { + now = time.Unix(tstamp, 0) + } measurement, fields := readMeter(r.namespace, name, i) if fields == nil { return diff --git a/metrics/influxdb/testdata/influxdbv1.want b/metrics/influxdb/testdata/influxdbv1.want new file mode 100644 index 000000000000..5efffb959504 --- /dev/null +++ b/metrics/influxdb/testdata/influxdbv1.want @@ -0,0 +1,9 @@ +goth.test/counter.count value=12345 978307200000000000 +goth.test/counter_float64.count value=54321.98 978307200000000000 +goth.test/gauge.gauge value=23456i 978307200000000000 +goth.test/gauge_float64.gauge value=34567.89 978307200000000000 +goth.test/gauge_info.gauge value="{\"arch\":\"amd64\",\"commit\":\"7caa2d8163ae3132c1c2d6978c76610caee2d949\",\"os\":\"linux\",\"protocol_versions\":\"64 65 66\",\"version\":\"1.10.18-unstable\"}" 978307200000000000 +goth.test/histogram.histogram count=3i,max=3i,mean=2,min=1i,p25=1,p50=2,p75=3,p95=3,p99=3,p999=3,p9999=3,stddev=0.816496580927726,variance=0.6666666666666666 978307200000000000 +goth.test/meter.meter count=0i,m1=0,m15=0,m5=0,mean=0 978307200000000000 +goth.test/resetting_timer.span count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12000000i,p95=120000000i,p99=120000000i 978307200000000000 +goth.test/timer.timer count=6i,m1=0,m15=0,m5=0,max=120000000i,mean=38333333.333333336,meanrate=0,min=20000000i,p50=22500000,p75=48000000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000,stddev=36545253.529775314,variance=1335555555555555.2 978307200000000000 diff --git a/metrics/influxdb/testdata/influxdbv2.want b/metrics/influxdb/testdata/influxdbv2.want new file mode 100644 index 000000000000..5efffb959504 --- /dev/null +++ b/metrics/influxdb/testdata/influxdbv2.want @@ -0,0 +1,9 @@ +goth.test/counter.count value=12345 978307200000000000 +goth.test/counter_float64.count value=54321.98 978307200000000000 +goth.test/gauge.gauge value=23456i 978307200000000000 +goth.test/gauge_float64.gauge value=34567.89 978307200000000000 +goth.test/gauge_info.gauge value="{\"arch\":\"amd64\",\"commit\":\"7caa2d8163ae3132c1c2d6978c76610caee2d949\",\"os\":\"linux\",\"protocol_versions\":\"64 65 66\",\"version\":\"1.10.18-unstable\"}" 978307200000000000 +goth.test/histogram.histogram count=3i,max=3i,mean=2,min=1i,p25=1,p50=2,p75=3,p95=3,p99=3,p999=3,p9999=3,stddev=0.816496580927726,variance=0.6666666666666666 978307200000000000 +goth.test/meter.meter count=0i,m1=0,m15=0,m5=0,mean=0 978307200000000000 +goth.test/resetting_timer.span count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12000000i,p95=120000000i,p99=120000000i 978307200000000000 +goth.test/timer.timer count=6i,m1=0,m15=0,m5=0,max=120000000i,mean=38333333.333333336,meanrate=0,min=20000000i,p50=22500000,p75=48000000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000,stddev=36545253.529775314,variance=1335555555555555.2 978307200000000000 diff --git a/metrics/internal/sampledata.go b/metrics/internal/sampledata.go new file mode 100644 index 000000000000..6b03af394f70 --- /dev/null +++ b/metrics/internal/sampledata.go @@ -0,0 +1,64 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package internal + +import ( + "time" + + "github.com/XinFinOrg/XDPoSChain/metrics" +) + +// ExampleMetrics returns an ordered registry populated with a sample of metrics. +func ExampleMetrics() metrics.Registry { + var registry = metrics.NewOrderedRegistry() + + metrics.NewRegisteredCounterFloat64("test/counter", registry).Inc(12345) + metrics.NewRegisteredCounterFloat64("test/counter_float64", registry).Inc(54321.98) + metrics.NewRegisteredGauge("test/gauge", registry).Update(23456) + metrics.NewRegisteredGaugeFloat64("test/gauge_float64", registry).Update(34567.89) + metrics.NewRegisteredGaugeInfo("test/gauge_info", registry).Update( + metrics.GaugeInfoValue{ + "version": "1.10.18-unstable", + "arch": "amd64", + "os": "linux", + "commit": "7caa2d8163ae3132c1c2d6978c76610caee2d949", + "protocol_versions": "64 65 66", + }) + metrics.NewRegisteredHistogram("test/histogram", registry, metrics.NewSampleSnapshot(3, []int64{1, 2, 3})) + registry.Register("test/meter", metrics.NewInactiveMeter()) + { + timer := metrics.NewRegisteredResettingTimer("test/resetting_timer", registry) + timer.Update(10 * time.Millisecond) + timer.Update(11 * time.Millisecond) + timer.Update(12 * time.Millisecond) + timer.Update(120 * time.Millisecond) + timer.Update(13 * time.Millisecond) + timer.Update(14 * time.Millisecond) + } + { + timer := metrics.NewRegisteredTimer("test/timer", registry) + timer.Update(20 * time.Millisecond) + timer.Update(21 * time.Millisecond) + timer.Update(22 * time.Millisecond) + timer.Update(120 * time.Millisecond) + timer.Update(23 * time.Millisecond) + timer.Update(24 * time.Millisecond) + timer.Stop() + } + registry.Register("test/empty_resetting_timer", metrics.NewResettingTimer().Snapshot()) + return registry +} diff --git a/metrics/librato/librato.go b/metrics/librato/librato.go index 7fa4c56b5db3..2df2085c6f95 100644 --- a/metrics/librato/librato.go +++ b/metrics/librato/librato.go @@ -126,6 +126,10 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba measurement[Name] = name measurement[Value] = m.Value() snapshot.Gauges = append(snapshot.Gauges, measurement) + case metrics.GaugeInfo: + measurement[Name] = name + measurement[Value] = m.Value() + snapshot.Gauges = append(snapshot.Gauges, measurement) case metrics.Histogram: if m.Count() > 0 { gauges := make([]Measurement, histogramGaugeCount) diff --git a/metrics/log.go b/metrics/log.go index d1ce627a8378..d71a1c3d9c66 100644 --- a/metrics/log.go +++ b/metrics/log.go @@ -33,6 +33,9 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { case GaugeFloat64: l.Printf("gauge %s\n", name) l.Printf(" value: %f\n", metric.Value()) + case GaugeInfo: + l.Printf("gauge %s\n", name) + l.Printf(" value: %s\n", metric.Value()) case Healthcheck: metric.Check() l.Printf("healthcheck %s\n", name) diff --git a/metrics/meter.go b/metrics/meter.go index e8564d6a5e76..8a89dc4275f1 100644 --- a/metrics/meter.go +++ b/metrics/meter.go @@ -58,6 +58,16 @@ func NewMeter() Meter { return m } +// NewInactiveMeter returns a meter but does not start any goroutines. This +// method is mainly intended for testing. +func NewInactiveMeter() Meter { + if !Enabled { + return NilMeter{} + } + m := newStandardMeter() + return m +} + // NewMeterForced constructs a new StandardMeter and launches a goroutine no matter // the global switch is enabled or not. // Be sure to call Stop() once the meter is of no use to allow for garbage collection. diff --git a/metrics/opentsdb.go b/metrics/opentsdb.go index c9fd2e75d5e5..4d2e209238fa 100644 --- a/metrics/opentsdb.go +++ b/metrics/opentsdb.go @@ -3,6 +3,7 @@ package metrics import ( "bufio" "fmt" + "io" "log" "net" "os" @@ -57,16 +58,10 @@ func getShortHostname() string { return shortHostName } -func openTSDB(c *OpenTSDBConfig) error { - shortHostname := getShortHostname() - now := time.Now().Unix() +// writeRegistry writes the registry-metrics on the opentsb format. +func (c *OpenTSDBConfig) writeRegistry(w io.Writer, now int64, shortHostname string) { du := float64(c.DurationUnit) - conn, err := net.DialTCP("tcp", nil, c.Addr) - if nil != err { - return err - } - defer conn.Close() - w := bufio.NewWriter(conn) + c.Registry.Each(func(name string, i interface{}) { switch metric := i.(type) { case Counter: @@ -77,6 +72,8 @@ func openTSDB(c *OpenTSDBConfig) error { fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) case GaugeFloat64: fmt.Fprintf(w, "put %s.%s.value %d %f host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) + case GaugeInfo: + fmt.Fprintf(w, "put %s.%s.value %d %s host=%s\n", c.Prefix, name, now, metric.Value().String(), shortHostname) case Histogram: h := metric.Snapshot() ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) @@ -115,7 +112,17 @@ func openTSDB(c *OpenTSDBConfig) error { fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate15(), shortHostname) fmt.Fprintf(w, "put %s.%s.mean-rate %d %.2f host=%s\n", c.Prefix, name, now, t.RateMean(), shortHostname) } - w.Flush() }) +} + +func openTSDB(c *OpenTSDBConfig) error { + conn, err := net.DialTCP("tcp", nil, c.Addr) + if nil != err { + return err + } + defer conn.Close() + w := bufio.NewWriter(conn) + c.writeRegistry(w, time.Now().Unix(), getShortHostname()) + w.Flush() return nil } diff --git a/metrics/opentsdb_test.go b/metrics/opentsdb_test.go index c43728960ed5..c02b98af061e 100644 --- a/metrics/opentsdb_test.go +++ b/metrics/opentsdb_test.go @@ -2,6 +2,9 @@ package metrics import ( "net" + "os" + "strings" + "testing" "time" ) @@ -19,3 +22,30 @@ func ExampleOpenTSDBWithConfig() { DurationUnit: time.Millisecond, }) } + +func TestExampleOpenTSB(t *testing.T) { + r := NewOrderedRegistry() + NewRegisteredGaugeInfo("foo", r).Update(GaugeInfoValue{"chain_id": "5"}) + NewRegisteredGaugeFloat64("pi", r).Update(3.14) + NewRegisteredCounter("months", r).Inc(12) + NewRegisteredCounterFloat64("tau", r).Inc(1.57) + NewRegisteredMeter("elite", r).Mark(1337) + NewRegisteredTimer("second", r).Update(time.Second) + NewRegisteredCounterFloat64("tau", r).Inc(1.57) + NewRegisteredCounterFloat64("tau", r).Inc(1.57) + + w := new(strings.Builder) + (&OpenTSDBConfig{ + Registry: r, + DurationUnit: time.Millisecond, + Prefix: "pre", + }).writeRegistry(w, 978307200, "hal9000") + + wantB, err := os.ReadFile("./testdata/opentsb.want") + if err != nil { + t.Fatal(err) + } + if have, want := w.String(), string(wantB); have != want { + t.Errorf("\nhave:\n%v\nwant:\n%v\n", have, want) + } +} diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go index b7c238f601d7..2187438df8ee 100644 --- a/metrics/prometheus/collector.go +++ b/metrics/prometheus/collector.go @@ -19,6 +19,7 @@ package prometheus import ( "bytes" "fmt" + "sort" "strconv" "strings" @@ -46,6 +47,34 @@ func newCollector() *collector { } } +// Add adds the metric i to the collector. This method returns an error if the +// metric type is not supported/known. +func (c *collector) Add(name string, i any) error { + switch m := i.(type) { + case metrics.Counter: + c.addCounter(name, m.Snapshot()) + case metrics.CounterFloat64: + c.addCounterFloat64(name, m.Snapshot()) + case metrics.Gauge: + c.addGauge(name, m.Snapshot()) + case metrics.GaugeFloat64: + c.addGaugeFloat64(name, m.Snapshot()) + case metrics.GaugeInfo: + c.addGaugeInfo(name, m.Snapshot()) + case metrics.Histogram: + c.addHistogram(name, m.Snapshot()) + case metrics.Meter: + c.addMeter(name, m.Snapshot()) + case metrics.Timer: + c.addTimer(name, m.Snapshot()) + case metrics.ResettingTimer: + c.addResettingTimer(name, m.Snapshot()) + default: + return fmt.Errorf("unknown prometheus metric type %T", i) + } + return nil +} + func (c *collector) addCounter(name string, m metrics.Counter) { c.writeGaugeCounter(name, m.Count()) } @@ -62,6 +91,10 @@ func (c *collector) addGaugeFloat64(name string, m metrics.GaugeFloat64) { c.writeGaugeCounter(name, m.Value()) } +func (c *collector) addGaugeInfo(name string, m metrics.GaugeInfo) { + c.writeGaugeInfo(name, m.Value()) +} + func (c *collector) addHistogram(name string, m metrics.Histogram) { pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} ps := m.Percentiles(pv) @@ -102,6 +135,19 @@ func (c *collector) addResettingTimer(name string, m metrics.ResettingTimer) { c.buff.WriteRune('\n') } +func (c *collector) writeGaugeInfo(name string, value metrics.GaugeInfoValue) { + name = mutateKey(name) + c.buff.WriteString(fmt.Sprintf(typeGaugeTpl, name)) + c.buff.WriteString(name) + c.buff.WriteString(" ") + var kvs []string + for k, v := range value { + kvs = append(kvs, fmt.Sprintf("%v=%q", k, v)) + } + sort.Strings(kvs) + c.buff.WriteString(fmt.Sprintf("{%v} 1\n\n", strings.Join(kvs, ", "))) +} + func (c *collector) writeGaugeCounter(name string, value interface{}) { name = mutateKey(name) c.buff.WriteString(fmt.Sprintf(typeGaugeTpl, name)) diff --git a/metrics/prometheus/collector_test.go b/metrics/prometheus/collector_test.go index eb6dbe603f99..b7a17ff1011e 100644 --- a/metrics/prometheus/collector_test.go +++ b/metrics/prometheus/collector_test.go @@ -1,11 +1,29 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + package prometheus import ( + "fmt" "os" + "strings" "testing" - "time" "github.com/XinFinOrg/XDPoSChain/metrics" + "github.com/XinFinOrg/XDPoSChain/metrics/internal" ) func TestMain(m *testing.M) { @@ -14,104 +32,34 @@ func TestMain(m *testing.M) { } func TestCollector(t *testing.T) { - c := newCollector() - - counter := metrics.NewCounter() - counter.Inc(12345) - c.addCounter("test/counter", counter) - - counterfloat64 := metrics.NewCounterFloat64() - counterfloat64.Inc(54321.98) - c.addCounterFloat64("test/counter_float64", counterfloat64) - - gauge := metrics.NewGauge() - gauge.Update(23456) - c.addGauge("test/gauge", gauge) - - gaugeFloat64 := metrics.NewGaugeFloat64() - gaugeFloat64.Update(34567.89) - c.addGaugeFloat64("test/gauge_float64", gaugeFloat64) - - histogram := metrics.NewHistogram(&metrics.NilSample{}) - c.addHistogram("test/histogram", histogram) - - meter := metrics.NewMeter() - defer meter.Stop() - meter.Mark(9999999) - c.addMeter("test/meter", meter) - - timer := metrics.NewTimer() - defer timer.Stop() - timer.Update(20 * time.Millisecond) - timer.Update(21 * time.Millisecond) - timer.Update(22 * time.Millisecond) - timer.Update(120 * time.Millisecond) - timer.Update(23 * time.Millisecond) - timer.Update(24 * time.Millisecond) - c.addTimer("test/timer", timer) - - resettingTimer := metrics.NewResettingTimer() - resettingTimer.Update(10 * time.Millisecond) - resettingTimer.Update(11 * time.Millisecond) - resettingTimer.Update(12 * time.Millisecond) - resettingTimer.Update(120 * time.Millisecond) - resettingTimer.Update(13 * time.Millisecond) - resettingTimer.Update(14 * time.Millisecond) - c.addResettingTimer("test/resetting_timer", resettingTimer.Snapshot()) - - emptyResettingTimer := metrics.NewResettingTimer().Snapshot() - c.addResettingTimer("test/empty_resetting_timer", emptyResettingTimer) - - const expectedOutput = `# TYPE test_counter gauge -test_counter 12345 - -# TYPE test_counter_float64 gauge -test_counter_float64 54321.98 - -# TYPE test_gauge gauge -test_gauge 23456 - -# TYPE test_gauge_float64 gauge -test_gauge_float64 34567.89 - -# TYPE test_histogram_count counter -test_histogram_count 0 - -# TYPE test_histogram summary -test_histogram {quantile="0.5"} 0 -test_histogram {quantile="0.75"} 0 -test_histogram {quantile="0.95"} 0 -test_histogram {quantile="0.99"} 0 -test_histogram {quantile="0.999"} 0 -test_histogram {quantile="0.9999"} 0 - -# TYPE test_meter gauge -test_meter 9999999 - -# TYPE test_timer_count counter -test_timer_count 6 - -# TYPE test_timer summary -test_timer {quantile="0.5"} 2.25e+07 -test_timer {quantile="0.75"} 4.8e+07 -test_timer {quantile="0.95"} 1.2e+08 -test_timer {quantile="0.99"} 1.2e+08 -test_timer {quantile="0.999"} 1.2e+08 -test_timer {quantile="0.9999"} 1.2e+08 - -# TYPE test_resetting_timer_count counter -test_resetting_timer_count 6 - -# TYPE test_resetting_timer summary -test_resetting_timer {quantile="0.50"} 12000000 -test_resetting_timer {quantile="0.95"} 120000000 -test_resetting_timer {quantile="0.99"} 120000000 + var ( + c = newCollector() + want string + ) + internal.ExampleMetrics().Each(func(name string, i interface{}) { + c.Add(name, i) + }) + if wantB, err := os.ReadFile("./testdata/prometheus.want"); err != nil { + t.Fatal(err) + } else { + want = string(wantB) + } + if have := c.buff.String(); have != want { + t.Logf("have\n%v", have) + t.Logf("have vs want:\n%v", findFirstDiffPos(have, want)) + t.Fatalf("unexpected collector output") + } +} -` - exp := c.buff.String() - if exp != expectedOutput { - t.Log("Expected Output:\n", expectedOutput) - t.Log("Actual Output:\n", exp) - t.Fatal("unexpected collector output") +func findFirstDiffPos(a, b string) string { + yy := strings.Split(b, "\n") + for i, x := range strings.Split(a, "\n") { + if i >= len(yy) { + return fmt.Sprintf("a:%d: %s\nb:%d: ", i, x, i) + } + if y := yy[i]; x != y { + return fmt.Sprintf("a:%d: %s\nb:%d: %s", i, x, i, y) + } } + return "" } diff --git a/metrics/prometheus/prometheus.go b/metrics/prometheus/prometheus.go index 2d999e493dc8..d11db3bae3eb 100644 --- a/metrics/prometheus/prometheus.go +++ b/metrics/prometheus/prometheus.go @@ -41,25 +41,7 @@ func Handler(reg metrics.Registry) http.Handler { for _, name := range names { i := reg.Get(name) - - switch m := i.(type) { - case metrics.Counter: - c.addCounter(name, m.Snapshot()) - case metrics.CounterFloat64: - c.addCounterFloat64(name, m.Snapshot()) - case metrics.Gauge: - c.addGauge(name, m.Snapshot()) - case metrics.GaugeFloat64: - c.addGaugeFloat64(name, m.Snapshot()) - case metrics.Histogram: - c.addHistogram(name, m.Snapshot()) - case metrics.Meter: - c.addMeter(name, m.Snapshot()) - case metrics.Timer: - c.addTimer(name, m.Snapshot()) - case metrics.ResettingTimer: - c.addResettingTimer(name, m.Snapshot()) - default: + if err := c.Add(name, i); err != nil { log.Warn("Unknown Prometheus metric type", "type", fmt.Sprintf("%T", i)) } } diff --git a/metrics/prometheus/testdata/prometheus.want b/metrics/prometheus/testdata/prometheus.want new file mode 100644 index 000000000000..f35496e61d31 --- /dev/null +++ b/metrics/prometheus/testdata/prometheus.want @@ -0,0 +1,48 @@ +# TYPE test_counter gauge +test_counter 12345 + +# TYPE test_counter_float64 gauge +test_counter_float64 54321.98 + +# TYPE test_gauge gauge +test_gauge 23456 + +# TYPE test_gauge_float64 gauge +test_gauge_float64 34567.89 + +# TYPE test_gauge_info gauge +test_gauge_info {arch="amd64", commit="7caa2d8163ae3132c1c2d6978c76610caee2d949", os="linux", protocol_versions="64 65 66", version="1.10.18-unstable"} 1 + +# TYPE test_histogram_count counter +test_histogram_count 3 + +# TYPE test_histogram summary +test_histogram {quantile="0.5"} 2 +test_histogram {quantile="0.75"} 3 +test_histogram {quantile="0.95"} 3 +test_histogram {quantile="0.99"} 3 +test_histogram {quantile="0.999"} 3 +test_histogram {quantile="0.9999"} 3 + +# TYPE test_meter gauge +test_meter 0 + +# TYPE test_resetting_timer_count counter +test_resetting_timer_count 6 + +# TYPE test_resetting_timer summary +test_resetting_timer {quantile="0.50"} 12000000 +test_resetting_timer {quantile="0.95"} 120000000 +test_resetting_timer {quantile="0.99"} 120000000 + +# TYPE test_timer_count counter +test_timer_count 6 + +# TYPE test_timer summary +test_timer {quantile="0.5"} 2.25e+07 +test_timer {quantile="0.75"} 4.8e+07 +test_timer {quantile="0.95"} 1.2e+08 +test_timer {quantile="0.99"} 1.2e+08 +test_timer {quantile="0.999"} 1.2e+08 +test_timer {quantile="0.9999"} 1.2e+08 + diff --git a/metrics/registry.go b/metrics/registry.go index 88e192c0d196..0b54f2dc92c8 100644 --- a/metrics/registry.go +++ b/metrics/registry.go @@ -3,6 +3,7 @@ package metrics import ( "fmt" "reflect" + "sort" "strings" "sync" ) @@ -47,17 +48,39 @@ type Registry interface { Unregister(string) } +type orderedRegistry struct { + StandardRegistry +} + +// Call the given function for each registered metric. +func (r *orderedRegistry) Each(f func(string, interface{})) { + var names []string + reg := r.registered() + for name := range reg { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + f(name, reg[name]) + } +} + +// NewRegistry creates a new registry. +func NewRegistry() Registry { + return new(StandardRegistry) +} + +// NewOrderedRegistry creates a new ordered registry (for testing). +func NewOrderedRegistry() Registry { + return new(orderedRegistry) +} + // The standard implementation of a Registry uses sync.map // of names to metrics. type StandardRegistry struct { metrics sync.Map } -// Create a new registry. -func NewRegistry() Registry { - return &StandardRegistry{} -} - // Call the given function for each registered metric. func (r *StandardRegistry) Each(f func(string, interface{})) { for name, i := range r.registered() { @@ -191,7 +214,7 @@ func (r *StandardRegistry) Unregister(name string) { func (r *StandardRegistry) loadOrRegister(name string, i interface{}) (interface{}, bool, bool) { switch i.(type) { - case Counter, CounterFloat64, Gauge, GaugeFloat64, Healthcheck, Histogram, Meter, Timer, ResettingTimer: + case Counter, CounterFloat64, Gauge, GaugeFloat64, GaugeInfo, Healthcheck, Histogram, Meter, Timer, ResettingTimer: default: return nil, false, false } diff --git a/metrics/syslog.go b/metrics/syslog.go index f23b07e199f3..76c849056757 100644 --- a/metrics/syslog.go +++ b/metrics/syslog.go @@ -23,6 +23,8 @@ func Syslog(r Registry, d time.Duration, w *syslog.Writer) { w.Info(fmt.Sprintf("gauge %s: value: %d", name, metric.Value())) case GaugeFloat64: w.Info(fmt.Sprintf("gauge %s: value: %f", name, metric.Value())) + case GaugeInfo: + w.Info(fmt.Sprintf("gauge %s: value: %s", name, metric.Value())) case Healthcheck: metric.Check() w.Info(fmt.Sprintf("healthcheck %s: error: %v", name, metric.Error())) diff --git a/metrics/testdata/opentsb.want b/metrics/testdata/opentsb.want new file mode 100644 index 000000000000..c8e40a525042 --- /dev/null +++ b/metrics/testdata/opentsb.want @@ -0,0 +1,23 @@ +put pre.elite.count 978307200 0 host=hal9000 +put pre.elite.one-minute 978307200 0.00 host=hal9000 +put pre.elite.five-minute 978307200 0.00 host=hal9000 +put pre.elite.fifteen-minute 978307200 0.00 host=hal9000 +put pre.elite.mean 978307200 0.00 host=hal9000 +put pre.foo.value 978307200 {"chain_id":"5"} host=hal9000 +put pre.months.count 978307200 12 host=hal9000 +put pre.pi.value 978307200 3.140000 host=hal9000 +put pre.second.count 978307200 1 host=hal9000 +put pre.second.min 978307200 1000 host=hal9000 +put pre.second.max 978307200 1000 host=hal9000 +put pre.second.mean 978307200 1000.00 host=hal9000 +put pre.second.std-dev 978307200 0.00 host=hal9000 +put pre.second.50-percentile 978307200 1000.00 host=hal9000 +put pre.second.75-percentile 978307200 1000.00 host=hal9000 +put pre.second.95-percentile 978307200 1000.00 host=hal9000 +put pre.second.99-percentile 978307200 1000.00 host=hal9000 +put pre.second.999-percentile 978307200 1000.00 host=hal9000 +put pre.second.one-minute 978307200 0.00 host=hal9000 +put pre.second.five-minute 978307200 0.00 host=hal9000 +put pre.second.fifteen-minute 978307200 0.00 host=hal9000 +put pre.second.mean-rate 978307200 0.00 host=hal9000 +put pre.tau.count 978307200 1.570000 host=hal9000 diff --git a/metrics/writer.go b/metrics/writer.go index 82434e9d1d62..ec2e4f8c6a60 100644 --- a/metrics/writer.go +++ b/metrics/writer.go @@ -39,6 +39,9 @@ func WriteOnce(r Registry, w io.Writer) { case GaugeFloat64: fmt.Fprintf(w, "gauge %s\n", namedMetric.name) fmt.Fprintf(w, " value: %f\n", metric.Value()) + case GaugeInfo: + fmt.Fprintf(w, "gauge %s\n", namedMetric.name) + fmt.Fprintf(w, " value: %s\n", metric.Value().String()) case Healthcheck: metric.Check() fmt.Fprintf(w, "healthcheck %s\n", namedMetric.name) From 1fe46b8d3f3bd212d3446aa90dae32d5067649ed Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 57/74] metrics/librato: rename receiver re to rep --- metrics/librato/librato.go | 46 +++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/metrics/librato/librato.go b/metrics/librato/librato.go index 2df2085c6f95..87c3ab6a9088 100644 --- a/metrics/librato/librato.go +++ b/metrics/librato/librato.go @@ -40,15 +40,15 @@ func Librato(r metrics.Registry, d time.Duration, e string, t string, s string, NewReporter(r, d, e, t, s, p, u).Run() } -func (re *Reporter) Run() { +func (rep *Reporter) Run() { log.Printf("WARNING: This client has been DEPRECATED! It has been moved to https://github.com/mihasya/go-metrics-librato and will be removed from rcrowley/go-metrics on August 5th 2015") - ticker := time.NewTicker(re.Interval) + ticker := time.NewTicker(rep.Interval) defer ticker.Stop() - metricsApi := &LibratoClient{re.Email, re.Token} + metricsApi := &LibratoClient{rep.Email, rep.Token} for now := range ticker.C { var metrics Batch var err error - if metrics, err = re.BuildRequest(now, re.Registry); err != nil { + if metrics, err = rep.BuildRequest(now, rep.Registry); err != nil { log.Printf("ERROR constructing librato request body %s", err) continue } @@ -80,21 +80,21 @@ func sumSquaresTimer(t metrics.Timer) float64 { return sumSquares } -func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Batch, err error) { +func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Batch, err error) { snapshot = Batch{ // coerce timestamps to a stepping fn so that they line up in Librato graphs - MeasureTime: (now.Unix() / re.intervalSec) * re.intervalSec, - Source: re.Source, + MeasureTime: (now.Unix() / rep.intervalSec) * rep.intervalSec, + Source: rep.Source, } snapshot.Gauges = make([]Measurement, 0) snapshot.Counters = make([]Measurement, 0) - histogramGaugeCount := 1 + len(re.Percentiles) + histogramGaugeCount := 1 + len(rep.Percentiles) r.Each(func(name string, metric interface{}) { - if re.Namespace != "" { - name = fmt.Sprintf("%s.%s", re.Namespace, name) + if rep.Namespace != "" { + name = fmt.Sprintf("%s.%s", rep.Namespace, name) } measurement := Measurement{} - measurement[Period] = re.Interval.Seconds() + measurement[Period] = rep.Interval.Seconds() switch m := metric.(type) { case metrics.Counter: if m.Count() > 0 { @@ -141,7 +141,7 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba measurement[Sum] = float64(s.Sum()) measurement[SumSquares] = sumSquares(s) gauges[0] = measurement - for i, p := range re.Percentiles { + for i, p := range rep.Percentiles { gauges[i+1] = Measurement{ Name: fmt.Sprintf("%s.%.2f", measurement[Name], p), Value: s.Percentile(p), @@ -158,7 +158,7 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba Measurement{ Name: fmt.Sprintf("%s.%s", name, "1min"), Value: m.Rate1(), - Period: int64(re.Interval.Seconds()), + Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, DisplayUnitsShort: OperationsShort, @@ -168,7 +168,7 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba Measurement{ Name: fmt.Sprintf("%s.%s", name, "5min"), Value: m.Rate5(), - Period: int64(re.Interval.Seconds()), + Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, DisplayUnitsShort: OperationsShort, @@ -178,7 +178,7 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba Measurement{ Name: fmt.Sprintf("%s.%s", name, "15min"), Value: m.Rate15(), - Period: int64(re.Interval.Seconds()), + Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, DisplayUnitsShort: OperationsShort, @@ -200,15 +200,15 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba Max: float64(m.Max()), Min: float64(m.Min()), SumSquares: sumSquaresTimer(m), - Period: int64(re.Interval.Seconds()), - Attributes: re.TimerAttributes, + Period: int64(rep.Interval.Seconds()), + Attributes: rep.TimerAttributes, } - for i, p := range re.Percentiles { + for i, p := range rep.Percentiles { gauges[i+1] = Measurement{ Name: fmt.Sprintf("%s.timer.%2.0f", name, p*100), Value: m.Percentile(p), - Period: int64(re.Interval.Seconds()), - Attributes: re.TimerAttributes, + Period: int64(rep.Interval.Seconds()), + Attributes: rep.TimerAttributes, } } snapshot.Gauges = append(snapshot.Gauges, gauges...) @@ -216,7 +216,7 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba Measurement{ Name: fmt.Sprintf("%s.%s", name, "rate.1min"), Value: m.Rate1(), - Period: int64(re.Interval.Seconds()), + Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, DisplayUnitsShort: OperationsShort, @@ -226,7 +226,7 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba Measurement{ Name: fmt.Sprintf("%s.%s", name, "rate.5min"), Value: m.Rate5(), - Period: int64(re.Interval.Seconds()), + Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, DisplayUnitsShort: OperationsShort, @@ -236,7 +236,7 @@ func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Ba Measurement{ Name: fmt.Sprintf("%s.%s", name, "rate.15min"), Value: m.Rate15(), - Period: int64(re.Interval.Seconds()), + Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, DisplayUnitsShort: OperationsShort, From cc4ea7a68594c9292a05437895e774a15780931a Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 58/74] metrics: refactor metrics (#28035) This change includes a lot of things, listed below. The interfaces have been split up into one write-interface and one read-interface, with `Snapshot` being the gateway from write to read. This simplifies the semantics _a lot_. Example of splitting up an interface into one readonly 'snapshot' part, and one updatable writeonly part: ```golang type MeterSnapshot interface { Count() int64 Rate1() float64 Rate5() float64 Rate15() float64 RateMean() float64 } // Meters count events to produce exponentially-weighted moving average rates // at one-, five-, and fifteen-minutes and a mean rate. type Meter interface { Mark(int64) Snapshot() MeterSnapshot Stop() } ``` This PR makes the concurrency model clearer. We have actual meters and snapshot of meters. The `meter` is the thing which can be accessed from the registry, and updates can be made to it. - For all `meters`, (`Gauge`, `Timer` etc), it is assumed that they are accessed by different threads, making updates. Therefore, all `meters` update-methods (`Inc`, `Add`, `Update`, `Clear` etc) need to be concurrency-safe. - All `meters` have a `Snapshot()` method. This method is _usually_ called from one thread, a backend-exporter. But it's fully possible to have several exporters simultaneously: therefore this method should also be concurrency-safe. TLDR: `meter`s are accessible via registry, all their methods must be concurrency-safe. For all `Snapshot`s, it is assumed that an individual exporter-thread has obtained a `meter` from the registry, and called the `Snapshot` method to obtain a readonly snapshot. This snapshot is _not_ guaranteed to be concurrency-safe. There's no need for a snapshot to be concurrency-safe, since exporters should not share snapshots. Note, though: that by happenstance a lot of the snapshots _are_ concurrency-safe, being unmutable minimal representations of a value. Only the more complex ones are _not_ threadsafe, those that lazily calculate things like `Variance()`, `Mean()`. Example of how a background exporter typically works, obtaining the snapshot and sequentially accessing the non-threadsafe methods in it: ```golang ms := metric.Snapshot() ... fields := map[string]interface{}{ "count": ms.Count(), "max": ms.Max(), "mean": ms.Mean(), "min": ms.Min(), "stddev": ms.StdDev(), "variance": ms.Variance(), ``` TLDR: `snapshots` are not guaranteed to be concurrency-safe (but often are). I also changed the `Sample` type: previously, it iterated the samples fully every time `Mean()`,`Sum()`, `Min()` or `Max()` was invoked. Since we now have readonly base data, we can just iterate it once, in the constructor, and set all four values at once. The same thing has been done for runtimehistogram. Back when ResettingTImer was implemented, as part of https://github.com/ethereum/go-ethereum/pull/15910, Anton implemented a `Percentiles` on the new type. However, the method did not conform to the other existing types which also had a `Percentiles`. 1. The existing ones, on input, took `0.5` to mean `50%`. Anton used `50` to mean `50%`. 2. The existing ones returned `float64` outputs, thus interpolating between values. A value-set of `0, 10`, at `50%` would return `5`, whereas Anton's would return either `0` or `10`. This PR removes the 'new' version, and uses only the 'legacy' percentiles, also for the ResettingTimer type. The resetting timer snapshot was also defined so that it would expose the internal values. This has been removed, and getters for `Max, Min, Mean` have been added instead. A lot of types were exported, but do not need to be. This PR unexports quite a lot of them. metrics: refactor metrics (28035) --- consensus/ethash/ethash.go | 2 +- metrics/counter.go | 72 +--- metrics/counter_float64.go | 61 +-- metrics/counter_float_64_test.go | 16 +- metrics/counter_test.go | 14 +- metrics/doc.go | 4 - metrics/ewma.go | 91 ++--- metrics/ewma_test.go | 233 +++-------- metrics/exp/exp.go | 32 +- metrics/gauge.go | 116 ++---- metrics/gauge_float64.go | 80 +--- metrics/gauge_float64_test.go | 33 +- metrics/gauge_info.go | 88 +--- metrics/gauge_info_test.go | 49 +-- metrics/gauge_test.go | 39 +- metrics/graphite.go | 10 +- metrics/histogram.go | 158 +------ metrics/histogram_test.go | 8 +- metrics/inactive.go | 48 +++ metrics/influxdb/influxdb.go | 21 +- metrics/influxdb/influxdb_test.go | 2 +- metrics/influxdb/testdata/influxdbv1.want | 4 +- metrics/influxdb/testdata/influxdbv2.want | 4 +- metrics/internal/sampledata.go | 33 +- metrics/internal/sampledata_test.go | 27 ++ metrics/librato/librato.go | 73 ++-- metrics/log.go | 10 +- metrics/meter.go | 207 ++-------- metrics/meter_test.go | 30 +- metrics/metrics.go | 6 + metrics/metrics_test.go | 4 +- metrics/opentsdb.go | 10 +- metrics/opentsdb_test.go | 15 + metrics/prometheus/collector.go | 25 +- metrics/prometheus/collector_test.go | 4 +- metrics/prometheus/testdata/prometheus.want | 28 +- metrics/registry.go | 8 +- metrics/registry_test.go | 4 +- metrics/resetting_sample.go | 2 +- metrics/resetting_timer.go | 200 ++++----- metrics/resetting_timer_test.go | 141 +++++-- metrics/runtimehistogram.go | 218 +++++----- metrics/runtimehistogram_test.go | 37 +- metrics/sample.go | 432 ++++++-------------- metrics/sample_test.go | 150 +++---- metrics/syslog.go | 10 +- metrics/testdata/opentsb.want | 2 +- metrics/timer.go | 214 ++-------- metrics/timer_test.go | 10 +- metrics/writer.go | 10 +- 50 files changed, 1107 insertions(+), 1988 deletions(-) delete mode 100644 metrics/doc.go create mode 100644 metrics/inactive.go create mode 100644 metrics/internal/sampledata_test.go diff --git a/consensus/ethash/ethash.go b/consensus/ethash/ethash.go index a65468f57103..98141b29393c 100644 --- a/consensus/ethash/ethash.go +++ b/consensus/ethash/ethash.go @@ -562,7 +562,7 @@ func (ethash *Ethash) SetThreads(threads int) { // Hashrate implements PoW, returning the measured rate of the search invocations // per second over the last minute. func (ethash *Ethash) Hashrate() float64 { - return ethash.hashrate.Rate1() + return ethash.hashrate.Snapshot().Rate1() } // APIs implements consensus.Engine, returning the user facing RPC APIs. Currently diff --git a/metrics/counter.go b/metrics/counter.go index 55e1c59540f6..cb81599c215a 100644 --- a/metrics/counter.go +++ b/metrics/counter.go @@ -4,13 +4,16 @@ import ( "sync/atomic" ) +type CounterSnapshot interface { + Count() int64 +} + // Counters hold an int64 value that can be incremented and decremented. type Counter interface { Clear() - Count() int64 Dec(int64) Inc(int64) - Snapshot() Counter + Snapshot() CounterSnapshot } // GetOrRegisterCounter returns an existing Counter or constructs and registers @@ -38,13 +41,13 @@ func NewCounter() Counter { if !Enabled { return NilCounter{} } - return &StandardCounter{} + return new(StandardCounter) } // NewCounterForced constructs a new StandardCounter and returns it no matter if // the global switch is enabled or not. func NewCounterForced() Counter { - return &StandardCounter{} + return new(StandardCounter) } // NewRegisteredCounter constructs and registers a new StandardCounter. @@ -70,75 +73,40 @@ func NewRegisteredCounterForced(name string, r Registry) Counter { return c } -// CounterSnapshot is a read-only copy of another Counter. -type CounterSnapshot int64 - -// Clear panics. -func (CounterSnapshot) Clear() { - panic("Clear called on a CounterSnapshot") -} +// counterSnapshot is a read-only copy of another Counter. +type counterSnapshot int64 // Count returns the count at the time the snapshot was taken. -func (c CounterSnapshot) Count() int64 { return int64(c) } - -// Dec panics. -func (CounterSnapshot) Dec(int64) { - panic("Dec called on a CounterSnapshot") -} - -// Inc panics. -func (CounterSnapshot) Inc(int64) { - panic("Inc called on a CounterSnapshot") -} - -// Snapshot returns the snapshot. -func (c CounterSnapshot) Snapshot() Counter { return c } +func (c counterSnapshot) Count() int64 { return int64(c) } // NilCounter is a no-op Counter. type NilCounter struct{} -// Clear is a no-op. -func (NilCounter) Clear() {} - -// Count is a no-op. -func (NilCounter) Count() int64 { return 0 } - -// Dec is a no-op. -func (NilCounter) Dec(i int64) {} - -// Inc is a no-op. -func (NilCounter) Inc(i int64) {} - -// Snapshot is a no-op. -func (NilCounter) Snapshot() Counter { return NilCounter{} } +func (NilCounter) Clear() {} +func (NilCounter) Dec(i int64) {} +func (NilCounter) Inc(i int64) {} +func (NilCounter) Snapshot() CounterSnapshot { return (*emptySnapshot)(nil) } // StandardCounter is the standard implementation of a Counter and uses the // sync/atomic package to manage a single int64 value. -type StandardCounter struct { - count atomic.Int64 -} +type StandardCounter atomic.Int64 // Clear sets the counter to zero. func (c *StandardCounter) Clear() { - c.count.Store(0) -} - -// Count returns the current count. -func (c *StandardCounter) Count() int64 { - return c.count.Load() + (*atomic.Int64)(c).Store(0) } // Dec decrements the counter by the given amount. func (c *StandardCounter) Dec(i int64) { - c.count.Add(-i) + (*atomic.Int64)(c).Add(-i) } // Inc increments the counter by the given amount. func (c *StandardCounter) Inc(i int64) { - c.count.Add(i) + (*atomic.Int64)(c).Add(i) } // Snapshot returns a read-only copy of the counter. -func (c *StandardCounter) Snapshot() Counter { - return CounterSnapshot(c.Count()) +func (c *StandardCounter) Snapshot() CounterSnapshot { + return counterSnapshot((*atomic.Int64)(c).Load()) } diff --git a/metrics/counter_float64.go b/metrics/counter_float64.go index d1197bb8e0ae..15c81494efb8 100644 --- a/metrics/counter_float64.go +++ b/metrics/counter_float64.go @@ -5,13 +5,16 @@ import ( "sync/atomic" ) +type CounterFloat64Snapshot interface { + Count() float64 +} + // CounterFloat64 holds a float64 value that can be incremented and decremented. type CounterFloat64 interface { Clear() - Count() float64 Dec(float64) Inc(float64) - Snapshot() CounterFloat64 + Snapshot() CounterFloat64Snapshot } // GetOrRegisterCounterFloat64 returns an existing CounterFloat64 or constructs and registers @@ -71,47 +74,19 @@ func NewRegisteredCounterFloat64Forced(name string, r Registry) CounterFloat64 { return c } -// CounterFloat64Snapshot is a read-only copy of another CounterFloat64. -type CounterFloat64Snapshot float64 - -// Clear panics. -func (CounterFloat64Snapshot) Clear() { - panic("Clear called on a CounterFloat64Snapshot") -} +// counterFloat64Snapshot is a read-only copy of another CounterFloat64. +type counterFloat64Snapshot float64 // Count returns the value at the time the snapshot was taken. -func (c CounterFloat64Snapshot) Count() float64 { return float64(c) } - -// Dec panics. -func (CounterFloat64Snapshot) Dec(float64) { - panic("Dec called on a CounterFloat64Snapshot") -} +func (c counterFloat64Snapshot) Count() float64 { return float64(c) } -// Inc panics. -func (CounterFloat64Snapshot) Inc(float64) { - panic("Inc called on a CounterFloat64Snapshot") -} - -// Snapshot returns the snapshot. -func (c CounterFloat64Snapshot) Snapshot() CounterFloat64 { return c } - -// NilCounterFloat64 is a no-op CounterFloat64. type NilCounterFloat64 struct{} -// Clear is a no-op. -func (NilCounterFloat64) Clear() {} - -// Count is a no-op. -func (NilCounterFloat64) Count() float64 { return 0.0 } - -// Dec is a no-op. -func (NilCounterFloat64) Dec(i float64) {} - -// Inc is a no-op. -func (NilCounterFloat64) Inc(i float64) {} - -// Snapshot is a no-op. -func (NilCounterFloat64) Snapshot() CounterFloat64 { return NilCounterFloat64{} } +func (NilCounterFloat64) Clear() {} +func (NilCounterFloat64) Count() float64 { return 0.0 } +func (NilCounterFloat64) Dec(i float64) {} +func (NilCounterFloat64) Inc(i float64) {} +func (NilCounterFloat64) Snapshot() CounterFloat64Snapshot { return NilCounterFloat64{} } // StandardCounterFloat64 is the standard implementation of a CounterFloat64 and uses the // atomic to manage a single float64 value. @@ -124,11 +99,6 @@ func (c *StandardCounterFloat64) Clear() { c.floatBits.Store(0) } -// Count returns the current value. -func (c *StandardCounterFloat64) Count() float64 { - return math.Float64frombits(c.floatBits.Load()) -} - // Dec decrements the counter by the given amount. func (c *StandardCounterFloat64) Dec(v float64) { atomicAddFloat(&c.floatBits, -v) @@ -140,8 +110,9 @@ func (c *StandardCounterFloat64) Inc(v float64) { } // Snapshot returns a read-only copy of the counter. -func (c *StandardCounterFloat64) Snapshot() CounterFloat64 { - return CounterFloat64Snapshot(c.Count()) +func (c *StandardCounterFloat64) Snapshot() CounterFloat64Snapshot { + v := math.Float64frombits(c.floatBits.Load()) + return counterFloat64Snapshot(v) } func atomicAddFloat(fbits *atomic.Uint64, v float64) { diff --git a/metrics/counter_float_64_test.go b/metrics/counter_float_64_test.go index f17aca330cbe..c21bd3307fa1 100644 --- a/metrics/counter_float_64_test.go +++ b/metrics/counter_float_64_test.go @@ -27,7 +27,7 @@ func BenchmarkCounterFloat64Parallel(b *testing.B) { }() } wg.Wait() - if have, want := c.Count(), 10.0*float64(b.N); have != want { + if have, want := c.Snapshot().Count(), 10.0*float64(b.N); have != want { b.Fatalf("have %f want %f", have, want) } } @@ -36,7 +36,7 @@ func TestCounterFloat64Clear(t *testing.T) { c := NewCounterFloat64() c.Inc(1.0) c.Clear() - if count := c.Count(); count != 0 { + if count := c.Snapshot().Count(); count != 0 { t.Errorf("c.Count(): 0 != %v\n", count) } } @@ -44,7 +44,7 @@ func TestCounterFloat64Clear(t *testing.T) { func TestCounterFloat64Dec1(t *testing.T) { c := NewCounterFloat64() c.Dec(1.0) - if count := c.Count(); count != -1.0 { + if count := c.Snapshot().Count(); count != -1.0 { t.Errorf("c.Count(): -1.0 != %v\n", count) } } @@ -52,7 +52,7 @@ func TestCounterFloat64Dec1(t *testing.T) { func TestCounterFloat64Dec2(t *testing.T) { c := NewCounterFloat64() c.Dec(2.0) - if count := c.Count(); count != -2.0 { + if count := c.Snapshot().Count(); count != -2.0 { t.Errorf("c.Count(): -2.0 != %v\n", count) } } @@ -60,7 +60,7 @@ func TestCounterFloat64Dec2(t *testing.T) { func TestCounterFloat64Inc1(t *testing.T) { c := NewCounterFloat64() c.Inc(1.0) - if count := c.Count(); count != 1.0 { + if count := c.Snapshot().Count(); count != 1.0 { t.Errorf("c.Count(): 1.0 != %v\n", count) } } @@ -68,7 +68,7 @@ func TestCounterFloat64Inc1(t *testing.T) { func TestCounterFloat64Inc2(t *testing.T) { c := NewCounterFloat64() c.Inc(2.0) - if count := c.Count(); count != 2.0 { + if count := c.Snapshot().Count(); count != 2.0 { t.Errorf("c.Count(): 2.0 != %v\n", count) } } @@ -85,7 +85,7 @@ func TestCounterFloat64Snapshot(t *testing.T) { func TestCounterFloat64Zero(t *testing.T) { c := NewCounterFloat64() - if count := c.Count(); count != 0 { + if count := c.Snapshot().Count(); count != 0 { t.Errorf("c.Count(): 0 != %v\n", count) } } @@ -93,7 +93,7 @@ func TestCounterFloat64Zero(t *testing.T) { func TestGetOrRegisterCounterFloat64(t *testing.T) { r := NewRegistry() NewRegisteredCounterFloat64("foo", r).Inc(47.0) - if c := GetOrRegisterCounterFloat64("foo", r); c.Count() != 47.0 { + if c := GetOrRegisterCounterFloat64("foo", r).Snapshot(); c.Count() != 47.0 { t.Fatal(c) } } diff --git a/metrics/counter_test.go b/metrics/counter_test.go index af26ef1548fe..1b15b23f215f 100644 --- a/metrics/counter_test.go +++ b/metrics/counter_test.go @@ -14,7 +14,7 @@ func TestCounterClear(t *testing.T) { c := NewCounter() c.Inc(1) c.Clear() - if count := c.Count(); count != 0 { + if count := c.Snapshot().Count(); count != 0 { t.Errorf("c.Count(): 0 != %v\n", count) } } @@ -22,7 +22,7 @@ func TestCounterClear(t *testing.T) { func TestCounterDec1(t *testing.T) { c := NewCounter() c.Dec(1) - if count := c.Count(); count != -1 { + if count := c.Snapshot().Count(); count != -1 { t.Errorf("c.Count(): -1 != %v\n", count) } } @@ -30,7 +30,7 @@ func TestCounterDec1(t *testing.T) { func TestCounterDec2(t *testing.T) { c := NewCounter() c.Dec(2) - if count := c.Count(); count != -2 { + if count := c.Snapshot().Count(); count != -2 { t.Errorf("c.Count(): -2 != %v\n", count) } } @@ -38,7 +38,7 @@ func TestCounterDec2(t *testing.T) { func TestCounterInc1(t *testing.T) { c := NewCounter() c.Inc(1) - if count := c.Count(); count != 1 { + if count := c.Snapshot().Count(); count != 1 { t.Errorf("c.Count(): 1 != %v\n", count) } } @@ -46,7 +46,7 @@ func TestCounterInc1(t *testing.T) { func TestCounterInc2(t *testing.T) { c := NewCounter() c.Inc(2) - if count := c.Count(); count != 2 { + if count := c.Snapshot().Count(); count != 2 { t.Errorf("c.Count(): 2 != %v\n", count) } } @@ -63,7 +63,7 @@ func TestCounterSnapshot(t *testing.T) { func TestCounterZero(t *testing.T) { c := NewCounter() - if count := c.Count(); count != 0 { + if count := c.Snapshot().Count(); count != 0 { t.Errorf("c.Count(): 0 != %v\n", count) } } @@ -71,7 +71,7 @@ func TestCounterZero(t *testing.T) { func TestGetOrRegisterCounter(t *testing.T) { r := NewRegistry() NewRegisteredCounter("foo", r).Inc(47) - if c := GetOrRegisterCounter("foo", r); c.Count() != 47 { + if c := GetOrRegisterCounter("foo", r).Snapshot(); c.Count() != 47 { t.Fatal(c) } } diff --git a/metrics/doc.go b/metrics/doc.go deleted file mode 100644 index 13f429c1689d..000000000000 --- a/metrics/doc.go +++ /dev/null @@ -1,4 +0,0 @@ -package metrics - -const epsilon = 0.0000000000000001 -const epsilonPercentile = .00000000001 diff --git a/metrics/ewma.go b/metrics/ewma.go index ed95cba19b4f..1d7a4f00cf45 100644 --- a/metrics/ewma.go +++ b/metrics/ewma.go @@ -7,11 +7,14 @@ import ( "time" ) +type EWMASnapshot interface { + Rate() float64 +} + // EWMAs continuously calculate an exponentially-weighted moving average // based on an outside source of clock ticks. type EWMA interface { - Rate() float64 - Snapshot() EWMA + Snapshot() EWMASnapshot Tick() Update(int64) } @@ -36,40 +39,19 @@ func NewEWMA15() EWMA { return NewEWMA(1 - math.Exp(-5.0/60.0/15)) } -// EWMASnapshot is a read-only copy of another EWMA. -type EWMASnapshot float64 +// ewmaSnapshot is a read-only copy of another EWMA. +type ewmaSnapshot float64 // Rate returns the rate of events per second at the time the snapshot was // taken. -func (a EWMASnapshot) Rate() float64 { return float64(a) } - -// Snapshot returns the snapshot. -func (a EWMASnapshot) Snapshot() EWMA { return a } - -// Tick panics. -func (EWMASnapshot) Tick() { - panic("Tick called on an EWMASnapshot") -} - -// Update panics. -func (EWMASnapshot) Update(int64) { - panic("Update called on an EWMASnapshot") -} +func (a ewmaSnapshot) Rate() float64 { return float64(a) } // NilEWMA is a no-op EWMA. type NilEWMA struct{} -// Rate is a no-op. -func (NilEWMA) Rate() float64 { return 0.0 } - -// Snapshot is a no-op. -func (NilEWMA) Snapshot() EWMA { return NilEWMA{} } - -// Tick is a no-op. -func (NilEWMA) Tick() {} - -// Update is a no-op. -func (NilEWMA) Update(n int64) {} +func (NilEWMA) Snapshot() EWMASnapshot { return (*emptySnapshot)(nil) } +func (NilEWMA) Tick() {} +func (NilEWMA) Update(n int64) {} // StandardEWMA is the standard implementation of an EWMA and tracks the number // of uncounted events and processes them on each tick. It uses the @@ -77,37 +59,50 @@ func (NilEWMA) Update(n int64) {} type StandardEWMA struct { uncounted atomic.Int64 alpha float64 - rate float64 - init bool + rate atomic.Uint64 + init atomic.Bool mutex sync.Mutex } -// Rate returns the moving average rate of events per second. -func (a *StandardEWMA) Rate() float64 { - a.mutex.Lock() - defer a.mutex.Unlock() - return a.rate * float64(time.Second) -} - // Snapshot returns a read-only copy of the EWMA. -func (a *StandardEWMA) Snapshot() EWMA { - return EWMASnapshot(a.Rate()) +func (a *StandardEWMA) Snapshot() EWMASnapshot { + r := math.Float64frombits(a.rate.Load()) * float64(time.Second) + return ewmaSnapshot(r) } // Tick ticks the clock to update the moving average. It assumes it is called // every five seconds. func (a *StandardEWMA) Tick() { - count := a.uncounted.Load() - a.uncounted.Add(-count) - instantRate := float64(count) / float64(5*time.Second) + // Optimization to avoid mutex locking in the hot-path. + if a.init.Load() { + a.updateRate(a.fetchInstantRate()) + return + } + // Slow-path: this is only needed on the first Tick() and preserves transactional updating + // of init and rate in the else block. The first conditional is needed below because + // a different thread could have set a.init = 1 between the time of the first atomic load and when + // the lock was acquired. a.mutex.Lock() - defer a.mutex.Unlock() - if a.init { - a.rate += a.alpha * (instantRate - a.rate) + if a.init.Load() { + // The fetchInstantRate() uses atomic loading, which is unnecessary in this critical section + // but again, this section is only invoked on the first successful Tick() operation. + a.updateRate(a.fetchInstantRate()) } else { - a.init = true - a.rate = instantRate + a.init.Store(true) + a.rate.Store(math.Float64bits(a.fetchInstantRate())) } + a.mutex.Unlock() +} + +func (a *StandardEWMA) fetchInstantRate() float64 { + count := a.uncounted.Swap(0) + return float64(count) / float64(5*time.Second) +} + +func (a *StandardEWMA) updateRate(instantRate float64) { + currentRate := math.Float64frombits(a.rate.Load()) + currentRate += a.alpha * (instantRate - currentRate) + a.rate.Store(math.Float64bits(currentRate)) } // Update adds n uncounted events. diff --git a/metrics/ewma_test.go b/metrics/ewma_test.go index 5b244191616e..9a91b43db81a 100644 --- a/metrics/ewma_test.go +++ b/metrics/ewma_test.go @@ -5,6 +5,8 @@ import ( "testing" ) +const epsilon = 0.0000000000000001 + func BenchmarkEWMA(b *testing.B) { a := NewEWMA1() b.ResetTimer() @@ -14,72 +16,33 @@ func BenchmarkEWMA(b *testing.B) { } } +func BenchmarkEWMAParallel(b *testing.B) { + a := NewEWMA1() + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + a.Update(1) + a.Tick() + } + }) +} + func TestEWMA1(t *testing.T) { a := NewEWMA1() a.Update(3) a.Tick() - if rate := a.Rate(); math.Abs(0.6-rate) > epsilon { - t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.22072766470286553-rate) > epsilon { - t.Errorf("1 minute a.Rate(): 0.22072766470286553 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.08120116994196772-rate) > epsilon { - t.Errorf("2 minute a.Rate(): 0.08120116994196772 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.029872241020718428-rate) > epsilon { - t.Errorf("3 minute a.Rate(): 0.029872241020718428 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.01098938333324054-rate) > epsilon { - t.Errorf("4 minute a.Rate(): 0.01098938333324054 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.004042768199451294-rate) > epsilon { - t.Errorf("5 minute a.Rate(): 0.004042768199451294 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.0014872513059998212-rate) > epsilon { - t.Errorf("6 minute a.Rate(): 0.0014872513059998212 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.0005471291793327122-rate) > epsilon { - t.Errorf("7 minute a.Rate(): 0.0005471291793327122 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.00020127757674150815-rate) > epsilon { - t.Errorf("8 minute a.Rate(): 0.00020127757674150815 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(7.404588245200814e-05-rate) > epsilon { - t.Errorf("9 minute a.Rate(): 7.404588245200814e-05 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(2.7239957857491083e-05-rate) > epsilon { - t.Errorf("10 minute a.Rate(): 2.7239957857491083e-05 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(1.0021020474147462e-05-rate) > epsilon { - t.Errorf("11 minute a.Rate(): 1.0021020474147462e-05 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(3.6865274119969525e-06-rate) > epsilon { - t.Errorf("12 minute a.Rate(): 3.6865274119969525e-06 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(1.3561976441886433e-06-rate) > epsilon { - t.Errorf("13 minute a.Rate(): 1.3561976441886433e-06 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(4.989172314621449e-07-rate) > epsilon { - t.Errorf("14 minute a.Rate(): 4.989172314621449e-07 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(1.8354139230109722e-07-rate) > epsilon { - t.Errorf("15 minute a.Rate(): 1.8354139230109722e-07 != %v\n", rate) + for i, want := range []float64{0.6, + 0.22072766470286553, 0.08120116994196772, 0.029872241020718428, + 0.01098938333324054, 0.004042768199451294, 0.0014872513059998212, + 0.0005471291793327122, 0.00020127757674150815, 7.404588245200814e-05, + 2.7239957857491083e-05, 1.0021020474147462e-05, 3.6865274119969525e-06, + 1.3561976441886433e-06, 4.989172314621449e-07, 1.8354139230109722e-07, + } { + if rate := a.Snapshot().Rate(); math.Abs(want-rate) > epsilon { + t.Errorf("%d minute a.Snapshot().Rate(): %f != %v\n", i, want, rate) + } + elapseMinute(a) } } @@ -87,68 +50,17 @@ func TestEWMA5(t *testing.T) { a := NewEWMA5() a.Update(3) a.Tick() - if rate := a.Rate(); math.Abs(0.6-rate) > epsilon { - t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.49123845184678905-rate) > epsilon { - t.Errorf("1 minute a.Rate(): 0.49123845184678905 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.4021920276213837-rate) > epsilon { - t.Errorf("2 minute a.Rate(): 0.4021920276213837 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.32928698165641596-rate) > epsilon { - t.Errorf("3 minute a.Rate(): 0.32928698165641596 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.269597378470333-rate) > epsilon { - t.Errorf("4 minute a.Rate(): 0.269597378470333 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.2207276647028654-rate) > epsilon { - t.Errorf("5 minute a.Rate(): 0.2207276647028654 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.18071652714732128-rate) > epsilon { - t.Errorf("6 minute a.Rate(): 0.18071652714732128 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.14795817836496392-rate) > epsilon { - t.Errorf("7 minute a.Rate(): 0.14795817836496392 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.12113791079679326-rate) > epsilon { - t.Errorf("8 minute a.Rate(): 0.12113791079679326 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.09917933293295193-rate) > epsilon { - t.Errorf("9 minute a.Rate(): 0.09917933293295193 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.08120116994196763-rate) > epsilon { - t.Errorf("10 minute a.Rate(): 0.08120116994196763 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.06648189501740036-rate) > epsilon { - t.Errorf("11 minute a.Rate(): 0.06648189501740036 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.05443077197364752-rate) > epsilon { - t.Errorf("12 minute a.Rate(): 0.05443077197364752 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.04456414692860035-rate) > epsilon { - t.Errorf("13 minute a.Rate(): 0.04456414692860035 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.03648603757513079-rate) > epsilon { - t.Errorf("14 minute a.Rate(): 0.03648603757513079 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.0298722410207183831020718428-rate) > epsilon { - t.Errorf("15 minute a.Rate(): 0.0298722410207183831020718428 != %v\n", rate) + for i, want := range []float64{ + 0.6, 0.49123845184678905, 0.4021920276213837, 0.32928698165641596, + 0.269597378470333, 0.2207276647028654, 0.18071652714732128, + 0.14795817836496392, 0.12113791079679326, 0.09917933293295193, + 0.08120116994196763, 0.06648189501740036, 0.05443077197364752, + 0.04456414692860035, 0.03648603757513079, 0.0298722410207183831020718428, + } { + if rate := a.Snapshot().Rate(); math.Abs(want-rate) > epsilon { + t.Errorf("%d minute a.Snapshot().Rate(): %f != %v\n", i, want, rate) + } + elapseMinute(a) } } @@ -156,68 +68,17 @@ func TestEWMA15(t *testing.T) { a := NewEWMA15() a.Update(3) a.Tick() - if rate := a.Rate(); math.Abs(0.6-rate) > epsilon { - t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.5613041910189706-rate) > epsilon { - t.Errorf("1 minute a.Rate(): 0.5613041910189706 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.5251039914257684-rate) > epsilon { - t.Errorf("2 minute a.Rate(): 0.5251039914257684 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.4912384518467888184678905-rate) > epsilon { - t.Errorf("3 minute a.Rate(): 0.4912384518467888184678905 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.459557003018789-rate) > epsilon { - t.Errorf("4 minute a.Rate(): 0.459557003018789 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.4299187863442732-rate) > epsilon { - t.Errorf("5 minute a.Rate(): 0.4299187863442732 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.4021920276213831-rate) > epsilon { - t.Errorf("6 minute a.Rate(): 0.4021920276213831 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.37625345116383313-rate) > epsilon { - t.Errorf("7 minute a.Rate(): 0.37625345116383313 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.3519877317060185-rate) > epsilon { - t.Errorf("8 minute a.Rate(): 0.3519877317060185 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.3292869816564153165641596-rate) > epsilon { - t.Errorf("9 minute a.Rate(): 0.3292869816564153165641596 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.3080502714195546-rate) > epsilon { - t.Errorf("10 minute a.Rate(): 0.3080502714195546 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.2881831806538789-rate) > epsilon { - t.Errorf("11 minute a.Rate(): 0.2881831806538789 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.26959737847033216-rate) > epsilon { - t.Errorf("12 minute a.Rate(): 0.26959737847033216 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.2522102307052083-rate) > epsilon { - t.Errorf("13 minute a.Rate(): 0.2522102307052083 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.23594443252115815-rate) > epsilon { - t.Errorf("14 minute a.Rate(): 0.23594443252115815 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); math.Abs(0.2207276647028646247028654470286553-rate) > epsilon { - t.Errorf("15 minute a.Rate(): 0.2207276647028646247028654470286553 != %v\n", rate) + for i, want := range []float64{ + 0.6, 0.5613041910189706, 0.5251039914257684, 0.4912384518467888184678905, + 0.459557003018789, 0.4299187863442732, 0.4021920276213831, + 0.37625345116383313, 0.3519877317060185, 0.3292869816564153165641596, + 0.3080502714195546, 0.2881831806538789, 0.26959737847033216, + 0.2522102307052083, 0.23594443252115815, 0.2207276647028646247028654470286553, + } { + if rate := a.Snapshot().Rate(); math.Abs(want-rate) > epsilon { + t.Errorf("%d minute a.Snapshot().Rate(): %f != %v\n", i, want, rate) + } + elapseMinute(a) } } diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go index 6091f2f848b5..e7f54d1320fa 100644 --- a/metrics/exp/exp.go +++ b/metrics/exp/exp.go @@ -109,25 +109,25 @@ func (exp *exp) getInfo(name string) *expvar.String { return v } -func (exp *exp) publishCounter(name string, metric metrics.Counter) { +func (exp *exp) publishCounter(name string, metric metrics.CounterSnapshot) { v := exp.getInt(name) v.Set(metric.Count()) } -func (exp *exp) publishCounterFloat64(name string, metric metrics.CounterFloat64) { +func (exp *exp) publishCounterFloat64(name string, metric metrics.CounterFloat64Snapshot) { v := exp.getFloat(name) v.Set(metric.Count()) } -func (exp *exp) publishGauge(name string, metric metrics.Gauge) { +func (exp *exp) publishGauge(name string, metric metrics.GaugeSnapshot) { v := exp.getInt(name) v.Set(metric.Value()) } -func (exp *exp) publishGaugeFloat64(name string, metric metrics.GaugeFloat64) { +func (exp *exp) publishGaugeFloat64(name string, metric metrics.GaugeFloat64Snapshot) { exp.getFloat(name).Set(metric.Value()) } -func (exp *exp) publishGaugeInfo(name string, metric metrics.GaugeInfo) { +func (exp *exp) publishGaugeInfo(name string, metric metrics.GaugeInfoSnapshot) { exp.getInfo(name).Set(metric.Value().String()) } @@ -176,28 +176,28 @@ func (exp *exp) publishTimer(name string, metric metrics.Timer) { func (exp *exp) publishResettingTimer(name string, metric metrics.ResettingTimer) { t := metric.Snapshot() - ps := t.Percentiles([]float64{50, 75, 95, 99}) - exp.getInt(name + ".count").Set(int64(len(t.Values()))) + ps := t.Percentiles([]float64{0.50, 0.75, 0.95, 0.99}) + exp.getInt(name + ".count").Set(int64(t.Count())) exp.getFloat(name + ".mean").Set(t.Mean()) - exp.getInt(name + ".50-percentile").Set(ps[0]) - exp.getInt(name + ".75-percentile").Set(ps[1]) - exp.getInt(name + ".95-percentile").Set(ps[2]) - exp.getInt(name + ".99-percentile").Set(ps[3]) + exp.getFloat(name + ".50-percentile").Set(ps[0]) + exp.getFloat(name + ".75-percentile").Set(ps[1]) + exp.getFloat(name + ".95-percentile").Set(ps[2]) + exp.getFloat(name + ".99-percentile").Set(ps[3]) } func (exp *exp) syncToExpvar() { exp.registry.Each(func(name string, i interface{}) { switch i := i.(type) { case metrics.Counter: - exp.publishCounter(name, i) + exp.publishCounter(name, i.Snapshot()) case metrics.CounterFloat64: - exp.publishCounterFloat64(name, i) + exp.publishCounterFloat64(name, i.Snapshot()) case metrics.Gauge: - exp.publishGauge(name, i) + exp.publishGauge(name, i.Snapshot()) case metrics.GaugeFloat64: - exp.publishGaugeFloat64(name, i) + exp.publishGaugeFloat64(name, i.Snapshot()) case metrics.GaugeInfo: - exp.publishGaugeInfo(name, i) + exp.publishGaugeInfo(name, i.Snapshot()) case metrics.Histogram: exp.publishHistogram(name, i) case metrics.Meter: diff --git a/metrics/gauge.go b/metrics/gauge.go index 81137d7f7c5e..00b59873848c 100644 --- a/metrics/gauge.go +++ b/metrics/gauge.go @@ -2,13 +2,18 @@ package metrics import "sync/atomic" +// gaugeSnapshot contains a readonly int64. +type GaugeSnapshot interface { + Value() int64 +} + // Gauges hold an int64 value that can be set arbitrarily. type Gauge interface { - Snapshot() Gauge + Snapshot() GaugeSnapshot Update(int64) + UpdateIfGt(int64) Dec(int64) Inc(int64) - Value() int64 } // GetOrRegisterGauge returns an existing Gauge or constructs and registers a @@ -38,65 +43,20 @@ func NewRegisteredGauge(name string, r Registry) Gauge { return c } -// NewFunctionalGauge constructs a new FunctionalGauge. -func NewFunctionalGauge(f func() int64) Gauge { - if !Enabled { - return NilGauge{} - } - return &FunctionalGauge{value: f} -} - -// NewRegisteredFunctionalGauge constructs and registers a new StandardGauge. -func NewRegisteredFunctionalGauge(name string, r Registry, f func() int64) Gauge { - c := NewFunctionalGauge(f) - if nil == r { - r = DefaultRegistry - } - r.Register(name, c) - return c -} - -// GaugeSnapshot is a read-only copy of another Gauge. -type GaugeSnapshot int64 - -// Snapshot returns the snapshot. -func (g GaugeSnapshot) Snapshot() Gauge { return g } - -// Update panics. -func (GaugeSnapshot) Update(int64) { - panic("Update called on a GaugeSnapshot") -} - -// Dec panics. -func (GaugeSnapshot) Dec(int64) { - panic("Dec called on a GaugeSnapshot") -} - -// Inc panics. -func (GaugeSnapshot) Inc(int64) { - panic("Inc called on a GaugeSnapshot") -} +// gaugeSnapshot is a read-only copy of another Gauge. +type gaugeSnapshot int64 // Value returns the value at the time the snapshot was taken. -func (g GaugeSnapshot) Value() int64 { return int64(g) } +func (g gaugeSnapshot) Value() int64 { return int64(g) } // NilGauge is a no-op Gauge. type NilGauge struct{} -// Snapshot is a no-op. -func (NilGauge) Snapshot() Gauge { return NilGauge{} } - -// Update is a no-op. -func (NilGauge) Update(v int64) {} - -// Dec is a no-op. -func (NilGauge) Dec(i int64) {} - -// Inc is a no-op. -func (NilGauge) Inc(i int64) {} - -// Value is a no-op. -func (NilGauge) Value() int64 { return 0 } +func (NilGauge) Snapshot() GaugeSnapshot { return (*emptySnapshot)(nil) } +func (NilGauge) Update(v int64) {} +func (NilGauge) UpdateIfGt(v int64) {} +func (NilGauge) Dec(i int64) {} +func (NilGauge) Inc(i int64) {} // StandardGauge is the standard implementation of a Gauge and uses the // sync/atomic package to manage a single int64 value. @@ -105,8 +65,8 @@ type StandardGauge struct { } // Snapshot returns a read-only copy of the gauge. -func (g *StandardGauge) Snapshot() Gauge { - return GaugeSnapshot(g.Value()) +func (g *StandardGauge) Snapshot() GaugeSnapshot { + return gaugeSnapshot(g.value.Load()) } // Update updates the gauge's value. @@ -114,9 +74,17 @@ func (g *StandardGauge) Update(v int64) { g.value.Store(v) } -// Value returns the gauge's current value. -func (g *StandardGauge) Value() int64 { - return g.value.Load() +// Update updates the gauge's value if v is larger then the current value. +func (g *StandardGauge) UpdateIfGt(v int64) { + for { + exist := g.value.Load() + if exist >= v { + break + } + if g.value.CompareAndSwap(exist, v) { + break + } + } } // Dec decrements the gauge's current value by the given amount. @@ -128,31 +96,3 @@ func (g *StandardGauge) Dec(i int64) { func (g *StandardGauge) Inc(i int64) { g.value.Add(i) } - -// FunctionalGauge returns value from given function -type FunctionalGauge struct { - value func() int64 -} - -// Value returns the gauge's current value. -func (g FunctionalGauge) Value() int64 { - return g.value() -} - -// Snapshot returns the snapshot. -func (g FunctionalGauge) Snapshot() Gauge { return GaugeSnapshot(g.Value()) } - -// Update panics. -func (FunctionalGauge) Update(int64) { - panic("Update called on a FunctionalGauge") -} - -// Dec panics. -func (FunctionalGauge) Dec(int64) { - panic("Dec called on a FunctionalGauge") -} - -// Inc panics. -func (FunctionalGauge) Inc(int64) { - panic("Inc called on a FunctionalGauge") -} diff --git a/metrics/gauge_float64.go b/metrics/gauge_float64.go index 237ff8036e01..967f2bc60e5c 100644 --- a/metrics/gauge_float64.go +++ b/metrics/gauge_float64.go @@ -5,11 +5,14 @@ import ( "sync/atomic" ) -// GaugeFloat64s hold a float64 value that can be set arbitrarily. +type GaugeFloat64Snapshot interface { + Value() float64 +} + +// GaugeFloat64 hold a float64 value that can be set arbitrarily. type GaugeFloat64 interface { - Snapshot() GaugeFloat64 + Snapshot() GaugeFloat64Snapshot Update(float64) - Value() float64 } // GetOrRegisterGaugeFloat64 returns an existing GaugeFloat64 or constructs and registers a @@ -39,49 +42,18 @@ func NewRegisteredGaugeFloat64(name string, r Registry) GaugeFloat64 { return c } -// NewFunctionalGauge constructs a new FunctionalGauge. -func NewFunctionalGaugeFloat64(f func() float64) GaugeFloat64 { - if !Enabled { - return NilGaugeFloat64{} - } - return &FunctionalGaugeFloat64{value: f} -} - -// NewRegisteredFunctionalGauge constructs and registers a new StandardGauge. -func NewRegisteredFunctionalGaugeFloat64(name string, r Registry, f func() float64) GaugeFloat64 { - c := NewFunctionalGaugeFloat64(f) - if nil == r { - r = DefaultRegistry - } - r.Register(name, c) - return c -} - -// GaugeFloat64Snapshot is a read-only copy of another GaugeFloat64. -type GaugeFloat64Snapshot float64 - -// Snapshot returns the snapshot. -func (g GaugeFloat64Snapshot) Snapshot() GaugeFloat64 { return g } - -// Update panics. -func (GaugeFloat64Snapshot) Update(float64) { - panic("Update called on a GaugeFloat64Snapshot") -} +// gaugeFloat64Snapshot is a read-only copy of another GaugeFloat64. +type gaugeFloat64Snapshot float64 // Value returns the value at the time the snapshot was taken. -func (g GaugeFloat64Snapshot) Value() float64 { return float64(g) } +func (g gaugeFloat64Snapshot) Value() float64 { return float64(g) } // NilGauge is a no-op Gauge. type NilGaugeFloat64 struct{} -// Snapshot is a no-op. -func (NilGaugeFloat64) Snapshot() GaugeFloat64 { return NilGaugeFloat64{} } - -// Update is a no-op. -func (NilGaugeFloat64) Update(v float64) {} - -// Value is a no-op. -func (NilGaugeFloat64) Value() float64 { return 0.0 } +func (NilGaugeFloat64) Snapshot() GaugeFloat64Snapshot { return NilGaugeFloat64{} } +func (NilGaugeFloat64) Update(v float64) {} +func (NilGaugeFloat64) Value() float64 { return 0.0 } // StandardGaugeFloat64 is the standard implementation of a GaugeFloat64 and uses // atomic to manage a single float64 value. @@ -90,34 +62,12 @@ type StandardGaugeFloat64 struct { } // Snapshot returns a read-only copy of the gauge. -func (g *StandardGaugeFloat64) Snapshot() GaugeFloat64 { - return GaugeFloat64Snapshot(g.Value()) +func (g *StandardGaugeFloat64) Snapshot() GaugeFloat64Snapshot { + v := math.Float64frombits(g.floatBits.Load()) + return gaugeFloat64Snapshot(v) } // Update updates the gauge's value. func (g *StandardGaugeFloat64) Update(v float64) { g.floatBits.Store(math.Float64bits(v)) } - -// Value returns the gauge's current value. -func (g *StandardGaugeFloat64) Value() float64 { - return math.Float64frombits(g.floatBits.Load()) -} - -// FunctionalGaugeFloat64 returns value from given function -type FunctionalGaugeFloat64 struct { - value func() float64 -} - -// Value returns the gauge's current value. -func (g FunctionalGaugeFloat64) Value() float64 { - return g.value() -} - -// Snapshot returns the snapshot. -func (g FunctionalGaugeFloat64) Snapshot() GaugeFloat64 { return GaugeFloat64Snapshot(g.Value()) } - -// Update panics. -func (FunctionalGaugeFloat64) Update(float64) { - panic("Update called on a FunctionalGaugeFloat64") -} diff --git a/metrics/gauge_float64_test.go b/metrics/gauge_float64_test.go index 0e11d803c4f6..194a18821f83 100644 --- a/metrics/gauge_float64_test.go +++ b/metrics/gauge_float64_test.go @@ -26,19 +26,11 @@ func BenchmarkGaugeFloat64Parallel(b *testing.B) { }() } wg.Wait() - if have, want := c.Value(), float64(b.N-1); have != want { + if have, want := c.Snapshot().Value(), float64(b.N-1); have != want { b.Fatalf("have %f want %f", have, want) } } -func TestGaugeFloat64(t *testing.T) { - g := NewGaugeFloat64() - g.Update(47.0) - if v := g.Value(); v != 47.0 { - t.Errorf("g.Value(): 47.0 != %v\n", v) - } -} - func TestGaugeFloat64Snapshot(t *testing.T) { g := NewGaugeFloat64() g.Update(47.0) @@ -53,28 +45,7 @@ func TestGetOrRegisterGaugeFloat64(t *testing.T) { r := NewRegistry() NewRegisteredGaugeFloat64("foo", r).Update(47.0) t.Logf("registry: %v", r) - if g := GetOrRegisterGaugeFloat64("foo", r); g.Value() != 47.0 { - t.Fatal(g) - } -} - -func TestFunctionalGaugeFloat64(t *testing.T) { - var counter float64 - fg := NewFunctionalGaugeFloat64(func() float64 { - counter++ - return counter - }) - fg.Value() - fg.Value() - if counter != 2 { - t.Error("counter != 2") - } -} - -func TestGetOrRegisterFunctionalGaugeFloat64(t *testing.T) { - r := NewRegistry() - NewRegisteredFunctionalGaugeFloat64("foo", r, func() float64 { return 47 }) - if g := GetOrRegisterGaugeFloat64("foo", r); g.Value() != 47 { + if g := GetOrRegisterGaugeFloat64("foo", r).Snapshot(); g.Value() != 47.0 { t.Fatal(g) } } diff --git a/metrics/gauge_info.go b/metrics/gauge_info.go index f1b2075939e7..c44b2d85f3ad 100644 --- a/metrics/gauge_info.go +++ b/metrics/gauge_info.go @@ -5,14 +5,17 @@ import ( "sync" ) +type GaugeInfoSnapshot interface { + Value() GaugeInfoValue +} + // GaugeInfos hold a GaugeInfoValue value that can be set arbitrarily. type GaugeInfo interface { - Snapshot() GaugeInfo Update(GaugeInfoValue) - Value() GaugeInfoValue + Snapshot() GaugeInfoSnapshot } -// GaugeInfoValue is a mappng of (string) keys to (string) values +// GaugeInfoValue is a mapping of keys to values type GaugeInfoValue map[string]string func (val GaugeInfoValue) String() string { @@ -49,49 +52,17 @@ func NewRegisteredGaugeInfo(name string, r Registry) GaugeInfo { return c } -// NewFunctionalGauge constructs a new FunctionalGauge. -func NewFunctionalGaugeInfo(f func() GaugeInfoValue) GaugeInfo { - if !Enabled { - return NilGaugeInfo{} - } - return &FunctionalGaugeInfo{value: f} -} - -// NewRegisteredFunctionalGauge constructs and registers a new StandardGauge. -func NewRegisteredFunctionalGaugeInfo(name string, r Registry, f func() GaugeInfoValue) GaugeInfo { - c := NewFunctionalGaugeInfo(f) - if nil == r { - r = DefaultRegistry - } - r.Register(name, c) - return c -} - -// GaugeInfoSnapshot is a read-only copy of another GaugeInfo. -type GaugeInfoSnapshot GaugeInfoValue - -// Snapshot returns the snapshot. -func (g GaugeInfoSnapshot) Snapshot() GaugeInfo { return g } - -// Update panics. -func (GaugeInfoSnapshot) Update(GaugeInfoValue) { - panic("Update called on a GaugeInfoSnapshot") -} +// gaugeInfoSnapshot is a read-only copy of another GaugeInfo. +type gaugeInfoSnapshot GaugeInfoValue // Value returns the value at the time the snapshot was taken. -func (g GaugeInfoSnapshot) Value() GaugeInfoValue { return GaugeInfoValue(g) } +func (g gaugeInfoSnapshot) Value() GaugeInfoValue { return GaugeInfoValue(g) } -// NilGauge is a no-op Gauge. type NilGaugeInfo struct{} -// Snapshot is a no-op. -func (NilGaugeInfo) Snapshot() GaugeInfo { return NilGaugeInfo{} } - -// Update is a no-op. -func (NilGaugeInfo) Update(v GaugeInfoValue) {} - -// Value is a no-op. -func (NilGaugeInfo) Value() GaugeInfoValue { return GaugeInfoValue{} } +func (NilGaugeInfo) Snapshot() GaugeInfoSnapshot { return NilGaugeInfo{} } +func (NilGaugeInfo) Update(v GaugeInfoValue) {} +func (NilGaugeInfo) Value() GaugeInfoValue { return GaugeInfoValue{} } // StandardGaugeInfo is the standard implementation of a GaugeInfo and uses // sync.Mutex to manage a single string value. @@ -101,8 +72,8 @@ type StandardGaugeInfo struct { } // Snapshot returns a read-only copy of the gauge. -func (g *StandardGaugeInfo) Snapshot() GaugeInfo { - return GaugeInfoSnapshot(g.Value()) +func (g *StandardGaugeInfo) Snapshot() GaugeInfoSnapshot { + return gaugeInfoSnapshot(g.value) } // Update updates the gauge's value. @@ -111,34 +82,3 @@ func (g *StandardGaugeInfo) Update(v GaugeInfoValue) { defer g.mutex.Unlock() g.value = v } - -// Value returns the gauge's current value. -func (g *StandardGaugeInfo) Value() GaugeInfoValue { - g.mutex.Lock() - defer g.mutex.Unlock() - return g.value -} - -// FunctionalGaugeInfo returns value from given function -type FunctionalGaugeInfo struct { - value func() GaugeInfoValue -} - -// Value returns the gauge's current value. -func (g FunctionalGaugeInfo) Value() GaugeInfoValue { - return g.value() -} - -// Value returns the gauge's current value in JSON string format -func (g FunctionalGaugeInfo) ValueJsonString() string { - data, _ := json.Marshal(g.value()) - return string(data) -} - -// Snapshot returns the snapshot. -func (g FunctionalGaugeInfo) Snapshot() GaugeInfo { return GaugeInfoSnapshot(g.Value()) } - -// Update panics. -func (FunctionalGaugeInfo) Update(GaugeInfoValue) { - panic("Update called on a FunctionalGaugeInfo") -} diff --git a/metrics/gauge_info_test.go b/metrics/gauge_info_test.go index 4227a6a85fab..319afbf92e8f 100644 --- a/metrics/gauge_info_test.go +++ b/metrics/gauge_info_test.go @@ -1,7 +1,6 @@ package metrics import ( - "strconv" "testing" ) @@ -14,22 +13,14 @@ func TestGaugeInfoJsonString(t *testing.T) { }, ) want := `{"anotherKey":"any_string_value","chain_id":"5","third_key":"anything"}` - if have := g.Value().String(); have != want { - t.Errorf("\nhave: %v\nwant: %v\n", have, want) - } -} -func TestGaugeInfoSnapshot(t *testing.T) { - g := NewGaugeInfo() - g.Update(GaugeInfoValue{"value": "original"}) - snapshot := g.Snapshot() // Snapshot @chainid 5 + original := g.Snapshot() g.Update(GaugeInfoValue{"value": "updated"}) - // The 'g' should be updated - if have, want := g.Value().String(), `{"value":"updated"}`; have != want { + + if have := original.Value().String(); have != want { t.Errorf("\nhave: %v\nwant: %v\n", have, want) } - // Snapshot should be unupdated - if have, want := snapshot.Value().String(), `{"value":"original"}`; have != want { + if have, want := g.Snapshot().Value().String(), `{"value":"updated"}`; have != want { t.Errorf("\nhave: %v\nwant: %v\n", have, want) } } @@ -38,38 +29,8 @@ func TestGetOrRegisterGaugeInfo(t *testing.T) { r := NewRegistry() NewRegisteredGaugeInfo("foo", r).Update( GaugeInfoValue{"chain_id": "5"}) - g := GetOrRegisterGaugeInfo("foo", r) + g := GetOrRegisterGaugeInfo("foo", r).Snapshot() if have, want := g.Value().String(), `{"chain_id":"5"}`; have != want { t.Errorf("have\n%v\nwant\n%v\n", have, want) } } - -func TestFunctionalGaugeInfo(t *testing.T) { - info := GaugeInfoValue{"chain_id": "0"} - counter := 1 - // A "functional" gauge invokes the method to obtain the value - fg := NewFunctionalGaugeInfo(func() GaugeInfoValue { - info["chain_id"] = strconv.Itoa(counter) - counter++ - return info - }) - fg.Value() - fg.Value() - if have, want := info["chain_id"], "2"; have != want { - t.Errorf("have %v want %v", have, want) - } -} - -func TestGetOrRegisterFunctionalGaugeInfo(t *testing.T) { - r := NewRegistry() - NewRegisteredFunctionalGaugeInfo("foo", r, func() GaugeInfoValue { - return GaugeInfoValue{ - "chain_id": "5", - } - }) - want := `{"chain_id":"5"}` - have := GetOrRegisterGaugeInfo("foo", r).Value().String() - if have != want { - t.Errorf("have\n%v\nwant\n%v\n", have, want) - } -} diff --git a/metrics/gauge_test.go b/metrics/gauge_test.go index a98fe985d8c2..f2ba930bc465 100644 --- a/metrics/gauge_test.go +++ b/metrics/gauge_test.go @@ -1,7 +1,6 @@ package metrics import ( - "fmt" "testing" ) @@ -13,14 +12,6 @@ func BenchmarkGauge(b *testing.B) { } } -func TestGauge(t *testing.T) { - g := NewGauge() - g.Update(int64(47)) - if v := g.Value(); v != 47 { - t.Errorf("g.Value(): 47 != %v\n", v) - } -} - func TestGaugeSnapshot(t *testing.T) { g := NewGauge() g.Update(int64(47)) @@ -34,35 +25,7 @@ func TestGaugeSnapshot(t *testing.T) { func TestGetOrRegisterGauge(t *testing.T) { r := NewRegistry() NewRegisteredGauge("foo", r).Update(47) - if g := GetOrRegisterGauge("foo", r); g.Value() != 47 { - t.Fatal(g) - } -} - -func TestFunctionalGauge(t *testing.T) { - var counter int64 - fg := NewFunctionalGauge(func() int64 { - counter++ - return counter - }) - fg.Value() - fg.Value() - if counter != 2 { - t.Error("counter != 2") - } -} - -func TestGetOrRegisterFunctionalGauge(t *testing.T) { - r := NewRegistry() - NewRegisteredFunctionalGauge("foo", r, func() int64 { return 47 }) - if g := GetOrRegisterGauge("foo", r); g.Value() != 47 { + if g := GetOrRegisterGauge("foo", r); g.Snapshot().Value() != 47 { t.Fatal(g) } } - -func ExampleGetOrRegisterGauge() { - m := "server.bytes_sent" - g := GetOrRegisterGauge(m, nil) - g.Update(47) - fmt.Println(g.Value()) // Output: 47 -} diff --git a/metrics/graphite.go b/metrics/graphite.go index 4e3dd3b3b894..aba752e0ed5e 100644 --- a/metrics/graphite.go +++ b/metrics/graphite.go @@ -66,15 +66,15 @@ func graphite(c *GraphiteConfig) error { c.Registry.Each(func(name string, i interface{}) { switch metric := i.(type) { case Counter: - fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, metric.Count(), now) + fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, metric.Snapshot().Count(), now) case CounterFloat64: - fmt.Fprintf(w, "%s.%s.count %f %d\n", c.Prefix, name, metric.Count(), now) + fmt.Fprintf(w, "%s.%s.count %f %d\n", c.Prefix, name, metric.Snapshot().Count(), now) case Gauge: - fmt.Fprintf(w, "%s.%s.value %d %d\n", c.Prefix, name, metric.Value(), now) + fmt.Fprintf(w, "%s.%s.value %d %d\n", c.Prefix, name, metric.Snapshot().Value(), now) case GaugeFloat64: - fmt.Fprintf(w, "%s.%s.value %f %d\n", c.Prefix, name, metric.Value(), now) + fmt.Fprintf(w, "%s.%s.value %f %d\n", c.Prefix, name, metric.Snapshot().Value(), now) case GaugeInfo: - fmt.Fprintf(w, "%s.%s.value %s %d\n", c.Prefix, name, metric.Value().String(), now) + fmt.Fprintf(w, "%s.%s.value %s %d\n", c.Prefix, name, metric.Snapshot().Value().String(), now) case Histogram: h := metric.Snapshot() ps := h.Percentiles(c.Percentiles) diff --git a/metrics/histogram.go b/metrics/histogram.go index 2c54ce8b4063..44de588bc1dc 100644 --- a/metrics/histogram.go +++ b/metrics/histogram.go @@ -1,20 +1,14 @@ package metrics +type HistogramSnapshot interface { + SampleSnapshot +} + // Histograms calculate distribution statistics from a series of int64 values. type Histogram interface { Clear() - Count() int64 - Max() int64 - Mean() float64 - Min() int64 - Percentile(float64) float64 - Percentiles([]float64) []float64 - Sample() Sample - Snapshot() Histogram - StdDev() float64 - Sum() int64 Update(int64) - Variance() float64 + Snapshot() HistogramSnapshot } // GetOrRegisterHistogram returns an existing Histogram or constructs and @@ -54,108 +48,12 @@ func NewRegisteredHistogram(name string, r Registry, s Sample) Histogram { return c } -// HistogramSnapshot is a read-only copy of another Histogram. -type HistogramSnapshot struct { - sample *SampleSnapshot -} - -// Clear panics. -func (*HistogramSnapshot) Clear() { - panic("Clear called on a HistogramSnapshot") -} - -// Count returns the number of samples recorded at the time the snapshot was -// taken. -func (h *HistogramSnapshot) Count() int64 { return h.sample.Count() } - -// Max returns the maximum value in the sample at the time the snapshot was -// taken. -func (h *HistogramSnapshot) Max() int64 { return h.sample.Max() } - -// Mean returns the mean of the values in the sample at the time the snapshot -// was taken. -func (h *HistogramSnapshot) Mean() float64 { return h.sample.Mean() } - -// Min returns the minimum value in the sample at the time the snapshot was -// taken. -func (h *HistogramSnapshot) Min() int64 { return h.sample.Min() } - -// Percentile returns an arbitrary percentile of values in the sample at the -// time the snapshot was taken. -func (h *HistogramSnapshot) Percentile(p float64) float64 { - return h.sample.Percentile(p) -} - -// Percentiles returns a slice of arbitrary percentiles of values in the sample -// at the time the snapshot was taken. -func (h *HistogramSnapshot) Percentiles(ps []float64) []float64 { - return h.sample.Percentiles(ps) -} - -// Sample returns the Sample underlying the histogram. -func (h *HistogramSnapshot) Sample() Sample { return h.sample } - -// Snapshot returns the snapshot. -func (h *HistogramSnapshot) Snapshot() Histogram { return h } - -// StdDev returns the standard deviation of the values in the sample at the -// time the snapshot was taken. -func (h *HistogramSnapshot) StdDev() float64 { return h.sample.StdDev() } - -// Sum returns the sum in the sample at the time the snapshot was taken. -func (h *HistogramSnapshot) Sum() int64 { return h.sample.Sum() } - -// Update panics. -func (*HistogramSnapshot) Update(int64) { - panic("Update called on a HistogramSnapshot") -} - -// Variance returns the variance of inputs at the time the snapshot was taken. -func (h *HistogramSnapshot) Variance() float64 { return h.sample.Variance() } - // NilHistogram is a no-op Histogram. type NilHistogram struct{} -// Clear is a no-op. -func (NilHistogram) Clear() {} - -// Count is a no-op. -func (NilHistogram) Count() int64 { return 0 } - -// Max is a no-op. -func (NilHistogram) Max() int64 { return 0 } - -// Mean is a no-op. -func (NilHistogram) Mean() float64 { return 0.0 } - -// Min is a no-op. -func (NilHistogram) Min() int64 { return 0 } - -// Percentile is a no-op. -func (NilHistogram) Percentile(p float64) float64 { return 0.0 } - -// Percentiles is a no-op. -func (NilHistogram) Percentiles(ps []float64) []float64 { - return make([]float64, len(ps)) -} - -// Sample is a no-op. -func (NilHistogram) Sample() Sample { return NilSample{} } - -// Snapshot is a no-op. -func (NilHistogram) Snapshot() Histogram { return NilHistogram{} } - -// StdDev is a no-op. -func (NilHistogram) StdDev() float64 { return 0.0 } - -// Sum is a no-op. -func (NilHistogram) Sum() int64 { return 0 } - -// Update is a no-op. -func (NilHistogram) Update(v int64) {} - -// Variance is a no-op. -func (NilHistogram) Variance() float64 { return 0.0 } +func (NilHistogram) Clear() {} +func (NilHistogram) Snapshot() HistogramSnapshot { return (*emptySnapshot)(nil) } +func (NilHistogram) Update(v int64) {} // StandardHistogram is the standard implementation of a Histogram and uses a // Sample to bound its memory use. @@ -166,46 +64,10 @@ type StandardHistogram struct { // Clear clears the histogram and its sample. func (h *StandardHistogram) Clear() { h.sample.Clear() } -// Count returns the number of samples recorded since the histogram was last -// cleared. -func (h *StandardHistogram) Count() int64 { return h.sample.Count() } - -// Max returns the maximum value in the sample. -func (h *StandardHistogram) Max() int64 { return h.sample.Max() } - -// Mean returns the mean of the values in the sample. -func (h *StandardHistogram) Mean() float64 { return h.sample.Mean() } - -// Min returns the minimum value in the sample. -func (h *StandardHistogram) Min() int64 { return h.sample.Min() } - -// Percentile returns an arbitrary percentile of the values in the sample. -func (h *StandardHistogram) Percentile(p float64) float64 { - return h.sample.Percentile(p) -} - -// Percentiles returns a slice of arbitrary percentiles of the values in the -// sample. -func (h *StandardHistogram) Percentiles(ps []float64) []float64 { - return h.sample.Percentiles(ps) -} - -// Sample returns the Sample underlying the histogram. -func (h *StandardHistogram) Sample() Sample { return h.sample } - // Snapshot returns a read-only copy of the histogram. -func (h *StandardHistogram) Snapshot() Histogram { - return &HistogramSnapshot{sample: h.sample.Snapshot().(*SampleSnapshot)} +func (h *StandardHistogram) Snapshot() HistogramSnapshot { + return h.sample.Snapshot() } -// StdDev returns the standard deviation of the values in the sample. -func (h *StandardHistogram) StdDev() float64 { return h.sample.StdDev() } - -// Sum returns the sum in the sample. -func (h *StandardHistogram) Sum() int64 { return h.sample.Sum() } - // Update samples a new value. func (h *StandardHistogram) Update(v int64) { h.sample.Update(v) } - -// Variance returns the variance of the values in the sample. -func (h *StandardHistogram) Variance() float64 { return h.sample.Variance() } diff --git a/metrics/histogram_test.go b/metrics/histogram_test.go index 7c9f42fcec96..22fc5468b0b5 100644 --- a/metrics/histogram_test.go +++ b/metrics/histogram_test.go @@ -14,7 +14,7 @@ func TestGetOrRegisterHistogram(t *testing.T) { r := NewRegistry() s := NewUniformSample(100) NewRegisteredHistogram("foo", r, s).Update(47) - if h := GetOrRegisterHistogram("foo", r, s); h.Count() != 1 { + if h := GetOrRegisterHistogram("foo", r, s).Snapshot(); h.Count() != 1 { t.Fatal(h) } } @@ -24,11 +24,11 @@ func TestHistogram10000(t *testing.T) { for i := 1; i <= 10000; i++ { h.Update(int64(i)) } - testHistogram10000(t, h) + testHistogram10000(t, h.Snapshot()) } func TestHistogramEmpty(t *testing.T) { - h := NewHistogram(NewUniformSample(100)) + h := NewHistogram(NewUniformSample(100)).Snapshot() if count := h.Count(); count != 0 { t.Errorf("h.Count(): 0 != %v\n", count) } @@ -66,7 +66,7 @@ func TestHistogramSnapshot(t *testing.T) { testHistogram10000(t, snapshot) } -func testHistogram10000(t *testing.T, h Histogram) { +func testHistogram10000(t *testing.T, h HistogramSnapshot) { if count := h.Count(); count != 10000 { t.Errorf("h.Count(): 10000 != %v\n", count) } diff --git a/metrics/inactive.go b/metrics/inactive.go new file mode 100644 index 000000000000..1f47f0210af3 --- /dev/null +++ b/metrics/inactive.go @@ -0,0 +1,48 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package metrics + +// compile-time checks that interfaces are implemented. +var ( + _ SampleSnapshot = (*emptySnapshot)(nil) + _ HistogramSnapshot = (*emptySnapshot)(nil) + _ CounterSnapshot = (*emptySnapshot)(nil) + _ GaugeSnapshot = (*emptySnapshot)(nil) + _ MeterSnapshot = (*emptySnapshot)(nil) + _ EWMASnapshot = (*emptySnapshot)(nil) + _ TimerSnapshot = (*emptySnapshot)(nil) +) + +type emptySnapshot struct{} + +func (*emptySnapshot) Count() int64 { return 0 } +func (*emptySnapshot) Max() int64 { return 0 } +func (*emptySnapshot) Mean() float64 { return 0.0 } +func (*emptySnapshot) Min() int64 { return 0 } +func (*emptySnapshot) Percentile(p float64) float64 { return 0.0 } +func (*emptySnapshot) Percentiles(ps []float64) []float64 { return make([]float64, len(ps)) } +func (*emptySnapshot) Size() int { return 0 } +func (*emptySnapshot) StdDev() float64 { return 0.0 } +func (*emptySnapshot) Sum() int64 { return 0 } +func (*emptySnapshot) Values() []int64 { return []int64{} } +func (*emptySnapshot) Variance() float64 { return 0.0 } +func (*emptySnapshot) Value() int64 { return 0 } +func (*emptySnapshot) Rate() float64 { return 0.0 } +func (*emptySnapshot) Rate1() float64 { return 0.0 } +func (*emptySnapshot) Rate5() float64 { return 0.0 } +func (*emptySnapshot) Rate15() float64 { return 0.0 } +func (*emptySnapshot) RateMean() float64 { return 0.0 } diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index a1a210033ff4..d7c35b671821 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -11,13 +11,13 @@ func readMeter(namespace, name string, i interface{}) (string, map[string]interf case metrics.Counter: measurement := fmt.Sprintf("%s%s.count", namespace, name) fields := map[string]interface{}{ - "value": metric.Count(), + "value": metric.Snapshot().Count(), } return measurement, fields case metrics.CounterFloat64: measurement := fmt.Sprintf("%s%s.count", namespace, name) fields := map[string]interface{}{ - "value": metric.Count(), + "value": metric.Snapshot().Count(), } return measurement, fields case metrics.Gauge: @@ -99,20 +99,19 @@ func readMeter(namespace, name string, i interface{}) (string, map[string]interf return measurement, fields case metrics.ResettingTimer: t := metric.Snapshot() - if len(t.Values()) == 0 { + if t.Count() == 0 { break } - ps := t.Percentiles([]float64{50, 95, 99}) - val := t.Values() + ps := t.Percentiles([]float64{0.50, 0.95, 0.99}) measurement := fmt.Sprintf("%s%s.span", namespace, name) fields := map[string]interface{}{ - "count": len(val), - "max": val[len(val)-1], + "count": t.Count(), + "max": t.Max(), "mean": t.Mean(), - "min": val[0], - "p50": ps[0], - "p95": ps[1], - "p99": ps[2], + "min": t.Min(), + "p50": int(ps[0]), + "p95": int(ps[1]), + "p99": int(ps[2]), } return measurement, fields } diff --git a/metrics/influxdb/influxdb_test.go b/metrics/influxdb/influxdb_test.go index dca3d9ab3264..07af3f90ce21 100644 --- a/metrics/influxdb/influxdb_test.go +++ b/metrics/influxdb/influxdb_test.go @@ -96,7 +96,7 @@ func TestExampleV2(t *testing.T) { } if have != want { t.Errorf("\nhave:\n%v\nwant:\n%v\n", have, want) - t.Logf("have vs want:\n %v", findFirstDiffPos(have, want)) + t.Logf("have vs want:\n%v", findFirstDiffPos(have, want)) } } diff --git a/metrics/influxdb/testdata/influxdbv1.want b/metrics/influxdb/testdata/influxdbv1.want index 5efffb959504..9443faedc5a2 100644 --- a/metrics/influxdb/testdata/influxdbv1.want +++ b/metrics/influxdb/testdata/influxdbv1.want @@ -1,3 +1,5 @@ +goth.system/cpu/schedlatency.histogram count=5645i,max=41943040i,mean=1819544.0410983171,min=0i,p25=0,p50=0,p75=7168,p95=16777216,p99=29360128,p999=33554432,p9999=33554432,stddev=6393570.217198883,variance=40877740122252.57 978307200000000000 +goth.system/memory/pauses.histogram count=14i,max=229376i,mean=50066.28571428572,min=5120i,p25=10240,p50=32768,p75=57344,p95=196608,p99=196608,p999=196608,p9999=196608,stddev=54726.062410783874,variance=2994941906.9890113 978307200000000000 goth.test/counter.count value=12345 978307200000000000 goth.test/counter_float64.count value=54321.98 978307200000000000 goth.test/gauge.gauge value=23456i 978307200000000000 @@ -5,5 +7,5 @@ goth.test/gauge_float64.gauge value=34567.89 978307200000000000 goth.test/gauge_info.gauge value="{\"arch\":\"amd64\",\"commit\":\"7caa2d8163ae3132c1c2d6978c76610caee2d949\",\"os\":\"linux\",\"protocol_versions\":\"64 65 66\",\"version\":\"1.10.18-unstable\"}" 978307200000000000 goth.test/histogram.histogram count=3i,max=3i,mean=2,min=1i,p25=1,p50=2,p75=3,p95=3,p99=3,p999=3,p9999=3,stddev=0.816496580927726,variance=0.6666666666666666 978307200000000000 goth.test/meter.meter count=0i,m1=0,m15=0,m5=0,mean=0 978307200000000000 -goth.test/resetting_timer.span count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12000000i,p95=120000000i,p99=120000000i 978307200000000000 +goth.test/resetting_timer.span count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12500000i,p95=120000000i,p99=120000000i 978307200000000000 goth.test/timer.timer count=6i,m1=0,m15=0,m5=0,max=120000000i,mean=38333333.333333336,meanrate=0,min=20000000i,p50=22500000,p75=48000000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000,stddev=36545253.529775314,variance=1335555555555555.2 978307200000000000 diff --git a/metrics/influxdb/testdata/influxdbv2.want b/metrics/influxdb/testdata/influxdbv2.want index 5efffb959504..9443faedc5a2 100644 --- a/metrics/influxdb/testdata/influxdbv2.want +++ b/metrics/influxdb/testdata/influxdbv2.want @@ -1,3 +1,5 @@ +goth.system/cpu/schedlatency.histogram count=5645i,max=41943040i,mean=1819544.0410983171,min=0i,p25=0,p50=0,p75=7168,p95=16777216,p99=29360128,p999=33554432,p9999=33554432,stddev=6393570.217198883,variance=40877740122252.57 978307200000000000 +goth.system/memory/pauses.histogram count=14i,max=229376i,mean=50066.28571428572,min=5120i,p25=10240,p50=32768,p75=57344,p95=196608,p99=196608,p999=196608,p9999=196608,stddev=54726.062410783874,variance=2994941906.9890113 978307200000000000 goth.test/counter.count value=12345 978307200000000000 goth.test/counter_float64.count value=54321.98 978307200000000000 goth.test/gauge.gauge value=23456i 978307200000000000 @@ -5,5 +7,5 @@ goth.test/gauge_float64.gauge value=34567.89 978307200000000000 goth.test/gauge_info.gauge value="{\"arch\":\"amd64\",\"commit\":\"7caa2d8163ae3132c1c2d6978c76610caee2d949\",\"os\":\"linux\",\"protocol_versions\":\"64 65 66\",\"version\":\"1.10.18-unstable\"}" 978307200000000000 goth.test/histogram.histogram count=3i,max=3i,mean=2,min=1i,p25=1,p50=2,p75=3,p95=3,p99=3,p999=3,p9999=3,stddev=0.816496580927726,variance=0.6666666666666666 978307200000000000 goth.test/meter.meter count=0i,m1=0,m15=0,m5=0,mean=0 978307200000000000 -goth.test/resetting_timer.span count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12000000i,p95=120000000i,p99=120000000i 978307200000000000 +goth.test/resetting_timer.span count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12500000i,p95=120000000i,p99=120000000i 978307200000000000 goth.test/timer.timer count=6i,m1=0,m15=0,m5=0,max=120000000i,mean=38333333.333333336,meanrate=0,min=20000000i,p50=22500000,p75=48000000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000,stddev=36545253.529775314,variance=1335555555555555.2 978307200000000000 diff --git a/metrics/internal/sampledata.go b/metrics/internal/sampledata.go index 6b03af394f70..15b52066f16a 100644 --- a/metrics/internal/sampledata.go +++ b/metrics/internal/sampledata.go @@ -17,6 +17,9 @@ package internal import ( + "bytes" + "encoding/gob" + metrics2 "runtime/metrics" "time" "github.com/XinFinOrg/XDPoSChain/metrics" @@ -38,7 +41,15 @@ func ExampleMetrics() metrics.Registry { "commit": "7caa2d8163ae3132c1c2d6978c76610caee2d949", "protocol_versions": "64 65 66", }) - metrics.NewRegisteredHistogram("test/histogram", registry, metrics.NewSampleSnapshot(3, []int64{1, 2, 3})) + + { + s := metrics.NewUniformSample(3) + s.Update(1) + s.Update(2) + s.Update(3) + //metrics.NewRegisteredHistogram("test/histogram", registry, metrics.NewSampleSnapshot(3, []int64{1, 2, 3})) + metrics.NewRegisteredHistogram("test/histogram", registry, s) + } registry.Register("test/meter", metrics.NewInactiveMeter()) { timer := metrics.NewRegisteredResettingTimer("test/resetting_timer", registry) @@ -60,5 +71,25 @@ func ExampleMetrics() metrics.Registry { timer.Stop() } registry.Register("test/empty_resetting_timer", metrics.NewResettingTimer().Snapshot()) + + { // go runtime metrics + var sLatency = "7\xff\x81\x03\x01\x01\x10Float64Histogram\x01\xff\x82\x00\x01\x02\x01\x06Counts\x01\xff\x84\x00\x01\aBuckets\x01\xff\x86\x00\x00\x00\x16\xff\x83\x02\x01\x01\b[]uint64\x01\xff\x84\x00\x01\x06\x00\x00\x17\xff\x85\x02\x01\x01\t[]float64\x01\xff\x86\x00\x01\b\x00\x00\xfe\x06T\xff\x82\x01\xff\xa2\x00\xfe\r\xef\x00\x01\x02\x02\x04\x05\x04\b\x15\x17 B?6.L;$!2) \x1a? \x190aH7FY6#\x190\x1d\x14\x10\x1b\r\t\x04\x03\x01\x01\x00\x03\x02\x00\x03\x05\x05\x02\x02\x06\x04\v\x06\n\x15\x18\x13'&.\x12=H/L&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\xa3\xfe\xf0\xff\x00\xf8\x95\xd6&\xe8\v.q>\xf8\x95\xd6&\xe8\v.\x81>\xf8\xdfA:\xdc\x11ʼn>\xf8\x95\xd6&\xe8\v.\x91>\xf8:\x8c0\xe2\x8ey\x95>\xf8\xdfA:\xdc\x11Å™>\xf8\x84\xf7CÖ”\x10\x9e>\xf8\x95\xd6&\xe8\v.\xa1>\xf8:\x8c0\xe2\x8ey\xa5>\xf8\xdfA:\xdc\x11Å©>\xf8\x84\xf7CÖ”\x10\xae>\xf8\x95\xd6&\xe8\v.\xb1>\xf8:\x8c0\xe2\x8ey\xb5>\xf8\xdfA:\xdc\x11Ź>\xf8\x84\xf7CÖ”\x10\xbe>\xf8\x95\xd6&\xe8\v.\xc1>\xf8:\x8c0\xe2\x8ey\xc5>\xf8\xdfA:\xdc\x11\xc5\xc9>\xf8\x84\xf7CÖ”\x10\xce>\xf8\x95\xd6&\xe8\v.\xd1>\xf8:\x8c0\xe2\x8ey\xd5>\xf8\xdfA:\xdc\x11\xc5\xd9>\xf8\x84\xf7CÖ”\x10\xde>\xf8\x95\xd6&\xe8\v.\xe1>\xf8:\x8c0\xe2\x8ey\xe5>\xf8\xdfA:\xdc\x11\xc5\xe9>\xf8\x84\xf7CÖ”\x10\xee>\xf8\x95\xd6&\xe8\v.\xf1>\xf8:\x8c0\xe2\x8ey\xf5>\xf8\xdfA:\xdc\x11\xc5\xf9>\xf8\x84\xf7CÖ”\x10\xfe>\xf8\x95\xd6&\xe8\v.\x01?\xf8:\x8c0\xe2\x8ey\x05?\xf8\xdfA:\xdc\x11\xc5\t?\xf8\x84\xf7CÖ”\x10\x0e?\xf8\x95\xd6&\xe8\v.\x11?\xf8:\x8c0\xe2\x8ey\x15?\xf8\xdfA:\xdc\x11\xc5\x19?\xf8\x84\xf7CÖ”\x10\x1e?\xf8\x95\xd6&\xe8\v.!?\xf8:\x8c0\xe2\x8ey%?\xf8\xdfA:\xdc\x11\xc5)?\xf8\x84\xf7CÖ”\x10.?\xf8\x95\xd6&\xe8\v.1?\xf8:\x8c0\xe2\x8ey5?\xf8\xdfA:\xdc\x11\xc59?\xf8\x84\xf7CÖ”\x10>?\xf8\x95\xd6&\xe8\v.A?\xf8:\x8c0\xe2\x8eyE?\xf8\xdfA:\xdc\x11\xc5I?\xf8\x84\xf7CÖ”\x10N?\xf8\x95\xd6&\xe8\v.Q?\xf8:\x8c0\xe2\x8eyU?\xf8\xdfA:\xdc\x11\xc5Y?\xf8\x84\xf7CÖ”\x10^?\xf8\x95\xd6&\xe8\v.a?\xf8:\x8c0\xe2\x8eye?\xf8\xdfA:\xdc\x11\xc5i?\xf8\x84\xf7CÖ”\x10n?\xf8\x95\xd6&\xe8\v.q?\xf8:\x8c0\xe2\x8eyu?\xf8\xdfA:\xdc\x11\xc5y?\xf8\x84\xf7CÖ”\x10~?\xf8\x95\xd6&\xe8\v.\x81?\xf8:\x8c0\xe2\x8ey\x85?\xf8\xdfA:\xdc\x11ʼn?\xf8\x84\xf7CÖ”\x10\x8e?\xf8\x95\xd6&\xe8\v.\x91?\xf8:\x8c0\xe2\x8ey\x95?\xf8\xdfA:\xdc\x11Å™?\xf8\x84\xf7CÖ”\x10\x9e?\xf8\x95\xd6&\xe8\v.\xa1?\xf8:\x8c0\xe2\x8ey\xa5?\xf8\xdfA:\xdc\x11Å©?\xf8\x84\xf7CÖ”\x10\xae?\xf8\x95\xd6&\xe8\v.\xb1?\xf8:\x8c0\xe2\x8ey\xb5?\xf8\xdfA:\xdc\x11Ź?\xf8\x84\xf7CÖ”\x10\xbe?\xf8\x95\xd6&\xe8\v.\xc1?\xf8:\x8c0\xe2\x8ey\xc5?\xf8\xdfA:\xdc\x11\xc5\xc9?\xf8\x84\xf7CÖ”\x10\xce?\xf8\x95\xd6&\xe8\v.\xd1?\xf8:\x8c0\xe2\x8ey\xd5?\xf8\xdfA:\xdc\x11\xc5\xd9?\xf8\x84\xf7CÖ”\x10\xde?\xf8\x95\xd6&\xe8\v.\xe1?\xf8:\x8c0\xe2\x8ey\xe5?\xf8\xdfA:\xdc\x11\xc5\xe9?\xf8\x84\xf7CÖ”\x10\xee?\xf8\x95\xd6&\xe8\v.\xf1?\xf8:\x8c0\xe2\x8ey\xf5?\xf8\xdfA:\xdc\x11\xc5\xf9?\xf8\x84\xf7CÖ”\x10\xfe?\xf8\x95\xd6&\xe8\v.\x01@\xf8:\x8c0\xe2\x8ey\x05@\xf8\xdfA:\xdc\x11\xc5\t@\xf8\x84\xf7CÖ”\x10\x0e@\xf8\x95\xd6&\xe8\v.\x11@\xf8:\x8c0\xe2\x8ey\x15@\xf8\xdfA:\xdc\x11\xc5\x19@\xf8\x84\xf7CÖ”\x10\x1e@\xf8\x95\xd6&\xe8\v.!@\xf8:\x8c0\xe2\x8ey%@\xf8\xdfA:\xdc\x11\xc5)@\xf8\x84\xf7CÖ”\x10.@\xf8\x95\xd6&\xe8\v.1@\xf8:\x8c0\xe2\x8ey5@\xf8\xdfA:\xdc\x11\xc59@\xf8\x84\xf7CÖ”\x10>@\xf8\x95\xd6&\xe8\v.A@\xf8:\x8c0\xe2\x8eyE@\xf8\xdfA:\xdc\x11\xc5I@\xf8\x84\xf7CÖ”\x10N@\xf8\x95\xd6&\xe8\v.Q@\xf8:\x8c0\xe2\x8eyU@\xf8\xdfA:\xdc\x11\xc5Y@\xf8\x84\xf7CÖ”\x10^@\xf8\x95\xd6&\xe8\v.a@\xf8:\x8c0\xe2\x8eye@\xf8\xdfA:\xdc\x11\xc5i@\xf8\x84\xf7CÖ”\x10n@\xf8\x95\xd6&\xe8\v.q@\xf8:\x8c0\xe2\x8eyu@\xf8\xdfA:\xdc\x11\xc5y@\xf8\x84\xf7CÖ”\x10~@\xf8\x95\xd6&\xe8\v.\x81@\xf8:\x8c0\xe2\x8ey\x85@\xf8\xdfA:\xdc\x11ʼn@\xf8\x84\xf7CÖ”\x10\x8e@\xf8\x95\xd6&\xe8\v.\x91@\xf8:\x8c0\xe2\x8ey\x95@\xf8\xdfA:\xdc\x11Å™@\xf8\x84\xf7CÖ”\x10\x9e@\xf8\x95\xd6&\xe8\v.\xa1@\xf8:\x8c0\xe2\x8ey\xa5@\xf8\xdfA:\xdc\x11Å©@\xf8\x84\xf7CÖ”\x10\xae@\xf8\x95\xd6&\xe8\v.\xb1@\xf8:\x8c0\xe2\x8ey\xb5@\xf8\xdfA:\xdc\x11Ź@\xf8\x84\xf7CÖ”\x10\xbe@\xf8\x95\xd6&\xe8\v.\xc1@\xf8:\x8c0\xe2\x8ey\xc5@\xf8\xdfA:\xdc\x11\xc5\xc9@\xf8\x84\xf7CÖ”\x10\xce@\xf8\x95\xd6&\xe8\v.\xd1@\xf8:\x8c0\xe2\x8ey\xd5@\xf8\xdfA:\xdc\x11\xc5\xd9@\xf8\x84\xf7CÖ”\x10\xde@\xf8\x95\xd6&\xe8\v.\xe1@\xf8:\x8c0\xe2\x8ey\xe5@\xf8\xdfA:\xdc\x11\xc5\xe9@\xf8\x84\xf7CÖ”\x10\xee@\xf8\x95\xd6&\xe8\v.\xf1@\xf8:\x8c0\xe2\x8ey\xf5@\xf8\xdfA:\xdc\x11\xc5\xf9@\xf8\x84\xf7CÖ”\x10\xfe@\xf8\x95\xd6&\xe8\v.\x01A\xfe\xf0\x7f\x00" + var gcPauses = "7\xff\x81\x03\x01\x01\x10Float64Histogram\x01\xff\x82\x00\x01\x02\x01\x06Counts\x01\xff\x84\x00\x01\aBuckets\x01\xff\x86\x00\x00\x00\x16\xff\x83\x02\x01\x01\b[]uint64\x01\xff\x84\x00\x01\x06\x00\x00\x17\xff\x85\x02\x01\x01\t[]float64\x01\xff\x86\x00\x01\b\x00\x00\xfe\x06R\xff\x82\x01\xff\xa2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01\x01\x00\x01\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\xa3\xfe\xf0\xff\x00\xf8\x95\xd6&\xe8\v.q>\xf8\x95\xd6&\xe8\v.\x81>\xf8\xdfA:\xdc\x11ʼn>\xf8\x95\xd6&\xe8\v.\x91>\xf8:\x8c0\xe2\x8ey\x95>\xf8\xdfA:\xdc\x11Å™>\xf8\x84\xf7CÖ”\x10\x9e>\xf8\x95\xd6&\xe8\v.\xa1>\xf8:\x8c0\xe2\x8ey\xa5>\xf8\xdfA:\xdc\x11Å©>\xf8\x84\xf7CÖ”\x10\xae>\xf8\x95\xd6&\xe8\v.\xb1>\xf8:\x8c0\xe2\x8ey\xb5>\xf8\xdfA:\xdc\x11Ź>\xf8\x84\xf7CÖ”\x10\xbe>\xf8\x95\xd6&\xe8\v.\xc1>\xf8:\x8c0\xe2\x8ey\xc5>\xf8\xdfA:\xdc\x11\xc5\xc9>\xf8\x84\xf7CÖ”\x10\xce>\xf8\x95\xd6&\xe8\v.\xd1>\xf8:\x8c0\xe2\x8ey\xd5>\xf8\xdfA:\xdc\x11\xc5\xd9>\xf8\x84\xf7CÖ”\x10\xde>\xf8\x95\xd6&\xe8\v.\xe1>\xf8:\x8c0\xe2\x8ey\xe5>\xf8\xdfA:\xdc\x11\xc5\xe9>\xf8\x84\xf7CÖ”\x10\xee>\xf8\x95\xd6&\xe8\v.\xf1>\xf8:\x8c0\xe2\x8ey\xf5>\xf8\xdfA:\xdc\x11\xc5\xf9>\xf8\x84\xf7CÖ”\x10\xfe>\xf8\x95\xd6&\xe8\v.\x01?\xf8:\x8c0\xe2\x8ey\x05?\xf8\xdfA:\xdc\x11\xc5\t?\xf8\x84\xf7CÖ”\x10\x0e?\xf8\x95\xd6&\xe8\v.\x11?\xf8:\x8c0\xe2\x8ey\x15?\xf8\xdfA:\xdc\x11\xc5\x19?\xf8\x84\xf7CÖ”\x10\x1e?\xf8\x95\xd6&\xe8\v.!?\xf8:\x8c0\xe2\x8ey%?\xf8\xdfA:\xdc\x11\xc5)?\xf8\x84\xf7CÖ”\x10.?\xf8\x95\xd6&\xe8\v.1?\xf8:\x8c0\xe2\x8ey5?\xf8\xdfA:\xdc\x11\xc59?\xf8\x84\xf7CÖ”\x10>?\xf8\x95\xd6&\xe8\v.A?\xf8:\x8c0\xe2\x8eyE?\xf8\xdfA:\xdc\x11\xc5I?\xf8\x84\xf7CÖ”\x10N?\xf8\x95\xd6&\xe8\v.Q?\xf8:\x8c0\xe2\x8eyU?\xf8\xdfA:\xdc\x11\xc5Y?\xf8\x84\xf7CÖ”\x10^?\xf8\x95\xd6&\xe8\v.a?\xf8:\x8c0\xe2\x8eye?\xf8\xdfA:\xdc\x11\xc5i?\xf8\x84\xf7CÖ”\x10n?\xf8\x95\xd6&\xe8\v.q?\xf8:\x8c0\xe2\x8eyu?\xf8\xdfA:\xdc\x11\xc5y?\xf8\x84\xf7CÖ”\x10~?\xf8\x95\xd6&\xe8\v.\x81?\xf8:\x8c0\xe2\x8ey\x85?\xf8\xdfA:\xdc\x11ʼn?\xf8\x84\xf7CÖ”\x10\x8e?\xf8\x95\xd6&\xe8\v.\x91?\xf8:\x8c0\xe2\x8ey\x95?\xf8\xdfA:\xdc\x11Å™?\xf8\x84\xf7CÖ”\x10\x9e?\xf8\x95\xd6&\xe8\v.\xa1?\xf8:\x8c0\xe2\x8ey\xa5?\xf8\xdfA:\xdc\x11Å©?\xf8\x84\xf7CÖ”\x10\xae?\xf8\x95\xd6&\xe8\v.\xb1?\xf8:\x8c0\xe2\x8ey\xb5?\xf8\xdfA:\xdc\x11Ź?\xf8\x84\xf7CÖ”\x10\xbe?\xf8\x95\xd6&\xe8\v.\xc1?\xf8:\x8c0\xe2\x8ey\xc5?\xf8\xdfA:\xdc\x11\xc5\xc9?\xf8\x84\xf7CÖ”\x10\xce?\xf8\x95\xd6&\xe8\v.\xd1?\xf8:\x8c0\xe2\x8ey\xd5?\xf8\xdfA:\xdc\x11\xc5\xd9?\xf8\x84\xf7CÖ”\x10\xde?\xf8\x95\xd6&\xe8\v.\xe1?\xf8:\x8c0\xe2\x8ey\xe5?\xf8\xdfA:\xdc\x11\xc5\xe9?\xf8\x84\xf7CÖ”\x10\xee?\xf8\x95\xd6&\xe8\v.\xf1?\xf8:\x8c0\xe2\x8ey\xf5?\xf8\xdfA:\xdc\x11\xc5\xf9?\xf8\x84\xf7CÖ”\x10\xfe?\xf8\x95\xd6&\xe8\v.\x01@\xf8:\x8c0\xe2\x8ey\x05@\xf8\xdfA:\xdc\x11\xc5\t@\xf8\x84\xf7CÖ”\x10\x0e@\xf8\x95\xd6&\xe8\v.\x11@\xf8:\x8c0\xe2\x8ey\x15@\xf8\xdfA:\xdc\x11\xc5\x19@\xf8\x84\xf7CÖ”\x10\x1e@\xf8\x95\xd6&\xe8\v.!@\xf8:\x8c0\xe2\x8ey%@\xf8\xdfA:\xdc\x11\xc5)@\xf8\x84\xf7CÖ”\x10.@\xf8\x95\xd6&\xe8\v.1@\xf8:\x8c0\xe2\x8ey5@\xf8\xdfA:\xdc\x11\xc59@\xf8\x84\xf7CÖ”\x10>@\xf8\x95\xd6&\xe8\v.A@\xf8:\x8c0\xe2\x8eyE@\xf8\xdfA:\xdc\x11\xc5I@\xf8\x84\xf7CÖ”\x10N@\xf8\x95\xd6&\xe8\v.Q@\xf8:\x8c0\xe2\x8eyU@\xf8\xdfA:\xdc\x11\xc5Y@\xf8\x84\xf7CÖ”\x10^@\xf8\x95\xd6&\xe8\v.a@\xf8:\x8c0\xe2\x8eye@\xf8\xdfA:\xdc\x11\xc5i@\xf8\x84\xf7CÖ”\x10n@\xf8\x95\xd6&\xe8\v.q@\xf8:\x8c0\xe2\x8eyu@\xf8\xdfA:\xdc\x11\xc5y@\xf8\x84\xf7CÖ”\x10~@\xf8\x95\xd6&\xe8\v.\x81@\xf8:\x8c0\xe2\x8ey\x85@\xf8\xdfA:\xdc\x11ʼn@\xf8\x84\xf7CÖ”\x10\x8e@\xf8\x95\xd6&\xe8\v.\x91@\xf8:\x8c0\xe2\x8ey\x95@\xf8\xdfA:\xdc\x11Å™@\xf8\x84\xf7CÖ”\x10\x9e@\xf8\x95\xd6&\xe8\v.\xa1@\xf8:\x8c0\xe2\x8ey\xa5@\xf8\xdfA:\xdc\x11Å©@\xf8\x84\xf7CÖ”\x10\xae@\xf8\x95\xd6&\xe8\v.\xb1@\xf8:\x8c0\xe2\x8ey\xb5@\xf8\xdfA:\xdc\x11Ź@\xf8\x84\xf7CÖ”\x10\xbe@\xf8\x95\xd6&\xe8\v.\xc1@\xf8:\x8c0\xe2\x8ey\xc5@\xf8\xdfA:\xdc\x11\xc5\xc9@\xf8\x84\xf7CÖ”\x10\xce@\xf8\x95\xd6&\xe8\v.\xd1@\xf8:\x8c0\xe2\x8ey\xd5@\xf8\xdfA:\xdc\x11\xc5\xd9@\xf8\x84\xf7CÖ”\x10\xde@\xf8\x95\xd6&\xe8\v.\xe1@\xf8:\x8c0\xe2\x8ey\xe5@\xf8\xdfA:\xdc\x11\xc5\xe9@\xf8\x84\xf7CÖ”\x10\xee@\xf8\x95\xd6&\xe8\v.\xf1@\xf8:\x8c0\xe2\x8ey\xf5@\xf8\xdfA:\xdc\x11\xc5\xf9@\xf8\x84\xf7CÖ”\x10\xfe@\xf8\x95\xd6&\xe8\v.\x01A\xfe\xf0\x7f\x00" + + var secondsToNs = float64(time.Second) + + dserialize := func(data string) *metrics2.Float64Histogram { + var res metrics2.Float64Histogram + if err := gob.NewDecoder(bytes.NewReader([]byte(data))).Decode(&res); err != nil { + panic(err) + } + return &res + } + cpuSchedLatency := metrics.RuntimeHistogramFromData(secondsToNs, dserialize(sLatency)) + registry.Register("system/cpu/schedlatency", cpuSchedLatency) + + memPauses := metrics.RuntimeHistogramFromData(secondsToNs, dserialize(gcPauses)) + registry.Register("system/memory/pauses", memPauses) + } return registry } diff --git a/metrics/internal/sampledata_test.go b/metrics/internal/sampledata_test.go new file mode 100644 index 000000000000..f0c0ce39b286 --- /dev/null +++ b/metrics/internal/sampledata_test.go @@ -0,0 +1,27 @@ +package internal + +import ( + "bytes" + "encoding/gob" + "fmt" + metrics2 "runtime/metrics" + "testing" + "time" + + "github.com/XinFinOrg/XDPoSChain/metrics" +) + +func TestCollectRuntimeMetrics(t *testing.T) { + t.Skip("Only used for generating testdata") + serialize := func(path string, histogram *metrics2.Float64Histogram) { + var f = new(bytes.Buffer) + if err := gob.NewEncoder(f).Encode(histogram); err != nil { + panic(err) + } + fmt.Printf("var %v = %q\n", path, f.Bytes()) + } + time.Sleep(2 * time.Second) + stats := metrics.ReadRuntimeStats() + serialize("schedlatency", stats.SchedLatency) + serialize("gcpauses", stats.GCPauses) +} diff --git a/metrics/librato/librato.go b/metrics/librato/librato.go index 87c3ab6a9088..fa0123e05abf 100644 --- a/metrics/librato/librato.go +++ b/metrics/librato/librato.go @@ -61,16 +61,16 @@ func (rep *Reporter) Run() { // calculate sum of squares from data provided by metrics.Histogram // see http://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods -func sumSquares(s metrics.Sample) float64 { - count := float64(s.Count()) - sumSquared := math.Pow(count*s.Mean(), 2) - sumSquares := math.Pow(count*s.StdDev(), 2) + sumSquared/count +func sumSquares(icount int64, mean, stDev float64) float64 { + count := float64(icount) + sumSquared := math.Pow(count*mean, 2) + sumSquares := math.Pow(count*stDev, 2) + sumSquared/count if math.IsNaN(sumSquares) { return 0.0 } return sumSquares } -func sumSquaresTimer(t metrics.Timer) float64 { +func sumSquaresTimer(t metrics.TimerSnapshot) float64 { count := float64(t.Count()) sumSquared := math.Pow(count*t.Mean(), 2) sumSquares := math.Pow(count*t.StdDev(), 2) + sumSquared/count @@ -97,9 +97,10 @@ func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot B measurement[Period] = rep.Interval.Seconds() switch m := metric.(type) { case metrics.Counter: - if m.Count() > 0 { + ms := m.Snapshot() + if ms.Count() > 0 { measurement[Name] = fmt.Sprintf("%s.%s", name, "count") - measurement[Value] = float64(m.Count()) + measurement[Value] = float64(ms.Count()) measurement[Attributes] = map[string]interface{}{ DisplayUnitsLong: Operations, DisplayUnitsShort: OperationsShort, @@ -108,9 +109,9 @@ func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot B snapshot.Counters = append(snapshot.Counters, measurement) } case metrics.CounterFloat64: - if m.Count() > 0 { + if count := m.Snapshot().Count(); count > 0 { measurement[Name] = fmt.Sprintf("%s.%s", name, "count") - measurement[Value] = m.Count() + measurement[Value] = count measurement[Attributes] = map[string]interface{}{ DisplayUnitsLong: Operations, DisplayUnitsShort: OperationsShort, @@ -120,44 +121,45 @@ func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot B } case metrics.Gauge: measurement[Name] = name - measurement[Value] = float64(m.Value()) + measurement[Value] = float64(m.Snapshot().Value()) snapshot.Gauges = append(snapshot.Gauges, measurement) case metrics.GaugeFloat64: measurement[Name] = name - measurement[Value] = m.Value() + measurement[Value] = m.Snapshot().Value() snapshot.Gauges = append(snapshot.Gauges, measurement) case metrics.GaugeInfo: measurement[Name] = name - measurement[Value] = m.Value() + measurement[Value] = m.Snapshot().Value() snapshot.Gauges = append(snapshot.Gauges, measurement) case metrics.Histogram: - if m.Count() > 0 { + ms := m.Snapshot() + if ms.Count() > 0 { gauges := make([]Measurement, histogramGaugeCount) - s := m.Sample() measurement[Name] = fmt.Sprintf("%s.%s", name, "hist") - measurement[Count] = uint64(s.Count()) - measurement[Max] = float64(s.Max()) - measurement[Min] = float64(s.Min()) - measurement[Sum] = float64(s.Sum()) - measurement[SumSquares] = sumSquares(s) + measurement[Count] = uint64(ms.Count()) + measurement[Max] = float64(ms.Max()) + measurement[Min] = float64(ms.Min()) + measurement[Sum] = float64(ms.Sum()) + measurement[SumSquares] = sumSquares(ms.Count(), ms.Mean(), ms.StdDev()) gauges[0] = measurement for i, p := range rep.Percentiles { gauges[i+1] = Measurement{ Name: fmt.Sprintf("%s.%.2f", measurement[Name], p), - Value: s.Percentile(p), + Value: ms.Percentile(p), Period: measurement[Period], } } snapshot.Gauges = append(snapshot.Gauges, gauges...) } case metrics.Meter: + ms := m.Snapshot() measurement[Name] = name - measurement[Value] = float64(m.Count()) + measurement[Value] = float64(ms.Count()) snapshot.Counters = append(snapshot.Counters, measurement) snapshot.Gauges = append(snapshot.Gauges, Measurement{ Name: fmt.Sprintf("%s.%s", name, "1min"), - Value: m.Rate1(), + Value: ms.Rate1(), Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, @@ -167,7 +169,7 @@ func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot B }, Measurement{ Name: fmt.Sprintf("%s.%s", name, "5min"), - Value: m.Rate5(), + Value: ms.Rate5(), Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, @@ -177,7 +179,7 @@ func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot B }, Measurement{ Name: fmt.Sprintf("%s.%s", name, "15min"), - Value: m.Rate15(), + Value: ms.Rate15(), Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, @@ -187,26 +189,27 @@ func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot B }, ) case metrics.Timer: + ms := m.Snapshot() measurement[Name] = name - measurement[Value] = float64(m.Count()) + measurement[Value] = float64(ms.Count()) snapshot.Counters = append(snapshot.Counters, measurement) - if m.Count() > 0 { + if ms.Count() > 0 { libratoName := fmt.Sprintf("%s.%s", name, "timer.mean") gauges := make([]Measurement, histogramGaugeCount) gauges[0] = Measurement{ Name: libratoName, - Count: uint64(m.Count()), - Sum: m.Mean() * float64(m.Count()), - Max: float64(m.Max()), - Min: float64(m.Min()), - SumSquares: sumSquaresTimer(m), + Count: uint64(ms.Count()), + Sum: ms.Mean() * float64(ms.Count()), + Max: float64(ms.Max()), + Min: float64(ms.Min()), + SumSquares: sumSquaresTimer(ms), Period: int64(rep.Interval.Seconds()), Attributes: rep.TimerAttributes, } for i, p := range rep.Percentiles { gauges[i+1] = Measurement{ Name: fmt.Sprintf("%s.timer.%2.0f", name, p*100), - Value: m.Percentile(p), + Value: ms.Percentile(p), Period: int64(rep.Interval.Seconds()), Attributes: rep.TimerAttributes, } @@ -215,7 +218,7 @@ func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot B snapshot.Gauges = append(snapshot.Gauges, Measurement{ Name: fmt.Sprintf("%s.%s", name, "rate.1min"), - Value: m.Rate1(), + Value: ms.Rate1(), Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, @@ -225,7 +228,7 @@ func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot B }, Measurement{ Name: fmt.Sprintf("%s.%s", name, "rate.5min"), - Value: m.Rate5(), + Value: ms.Rate5(), Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, @@ -235,7 +238,7 @@ func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot B }, Measurement{ Name: fmt.Sprintf("%s.%s", name, "rate.15min"), - Value: m.Rate15(), + Value: ms.Rate15(), Period: int64(rep.Interval.Seconds()), Attributes: map[string]interface{}{ DisplayUnitsLong: Operations, diff --git a/metrics/log.go b/metrics/log.go index d71a1c3d9c66..3b9773faa728 100644 --- a/metrics/log.go +++ b/metrics/log.go @@ -23,19 +23,19 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { switch metric := i.(type) { case Counter: l.Printf("counter %s\n", name) - l.Printf(" count: %9d\n", metric.Count()) + l.Printf(" count: %9d\n", metric.Snapshot().Count()) case CounterFloat64: l.Printf("counter %s\n", name) - l.Printf(" count: %f\n", metric.Count()) + l.Printf(" count: %f\n", metric.Snapshot().Count()) case Gauge: l.Printf("gauge %s\n", name) - l.Printf(" value: %9d\n", metric.Value()) + l.Printf(" value: %9d\n", metric.Snapshot().Value()) case GaugeFloat64: l.Printf("gauge %s\n", name) - l.Printf(" value: %f\n", metric.Value()) + l.Printf(" value: %f\n", metric.Snapshot().Value()) case GaugeInfo: l.Printf("gauge %s\n", name) - l.Printf(" value: %s\n", metric.Value()) + l.Printf(" value: %s\n", metric.Snapshot().Value()) case Healthcheck: metric.Check() l.Printf("healthcheck %s\n", name) diff --git a/metrics/meter.go b/metrics/meter.go index 8a89dc4275f1..22475ef6ebee 100644 --- a/metrics/meter.go +++ b/metrics/meter.go @@ -1,21 +1,25 @@ package metrics import ( + "math" "sync" "sync/atomic" "time" ) -// Meters count events to produce exponentially-weighted moving average rates -// at one-, five-, and fifteen-minutes and a mean rate. -type Meter interface { +type MeterSnapshot interface { Count() int64 - Mark(int64) Rate1() float64 Rate5() float64 Rate15() float64 RateMean() float64 - Snapshot() Meter +} + +// Meters count events to produce exponentially-weighted moving average rates +// at one-, five-, and fifteen-minutes and a mean rate. +type Meter interface { + Mark(int64) + Snapshot() MeterSnapshot Stop() } @@ -30,17 +34,6 @@ func GetOrRegisterMeter(name string, r Registry) Meter { return r.GetOrRegister(name, NewMeter).(Meter) } -// GetOrRegisterMeterForced returns an existing Meter or constructs and registers a -// new StandardMeter no matter the global switch is enabled or not. -// Be sure to unregister the meter from the registry once it is of no use to -// allow for garbage collection. -func GetOrRegisterMeterForced(name string, r Registry) Meter { - if nil == r { - r = DefaultRegistry - } - return r.GetOrRegister(name, NewMeterForced).(Meter) -} - // NewMeter constructs a new StandardMeter and launches a goroutine. // Be sure to call Stop() once the meter is of no use to allow for garbage collection. func NewMeter() Meter { @@ -68,115 +61,53 @@ func NewInactiveMeter() Meter { return m } -// NewMeterForced constructs a new StandardMeter and launches a goroutine no matter -// the global switch is enabled or not. -// Be sure to call Stop() once the meter is of no use to allow for garbage collection. -func NewMeterForced() Meter { - m := newStandardMeter() - arbiter.Lock() - defer arbiter.Unlock() - arbiter.meters[m] = struct{}{} - if !arbiter.started { - arbiter.started = true - go arbiter.tick() - } - return m -} - // NewRegisteredMeter constructs and registers a new StandardMeter // and launches a goroutine. // Be sure to unregister the meter from the registry once it is of no use to // allow for garbage collection. func NewRegisteredMeter(name string, r Registry) Meter { - c := NewMeter() - if nil == r { - r = DefaultRegistry - } - r.Register(name, c) - return c + return GetOrRegisterMeter(name, r) } -// NewRegisteredMeterForced constructs and registers a new StandardMeter -// and launches a goroutine no matter the global switch is enabled or not. -// Be sure to unregister the meter from the registry once it is of no use to -// allow for garbage collection. -func NewRegisteredMeterForced(name string, r Registry) Meter { - c := NewMeterForced() - if nil == r { - r = DefaultRegistry - } - r.Register(name, c) - return c -} - -// MeterSnapshot is a read-only copy of another Meter. -type MeterSnapshot struct { - temp atomic.Int64 +// meterSnapshot is a read-only copy of the meter's internal values. +type meterSnapshot struct { count int64 rate1, rate5, rate15, rateMean float64 } // Count returns the count of events at the time the snapshot was taken. -func (m *MeterSnapshot) Count() int64 { return m.count } - -// Mark panics. -func (*MeterSnapshot) Mark(n int64) { - panic("Mark called on a MeterSnapshot") -} +func (m *meterSnapshot) Count() int64 { return m.count } // Rate1 returns the one-minute moving average rate of events per second at the // time the snapshot was taken. -func (m *MeterSnapshot) Rate1() float64 { return m.rate1 } +func (m *meterSnapshot) Rate1() float64 { return m.rate1 } // Rate5 returns the five-minute moving average rate of events per second at // the time the snapshot was taken. -func (m *MeterSnapshot) Rate5() float64 { return m.rate5 } +func (m *meterSnapshot) Rate5() float64 { return m.rate5 } // Rate15 returns the fifteen-minute moving average rate of events per second // at the time the snapshot was taken. -func (m *MeterSnapshot) Rate15() float64 { return m.rate15 } +func (m *meterSnapshot) Rate15() float64 { return m.rate15 } // RateMean returns the meter's mean rate of events per second at the time the // snapshot was taken. -func (m *MeterSnapshot) RateMean() float64 { return m.rateMean } - -// Snapshot returns the snapshot. -func (m *MeterSnapshot) Snapshot() Meter { return m } - -// Stop is a no-op. -func (m *MeterSnapshot) Stop() {} +func (m *meterSnapshot) RateMean() float64 { return m.rateMean } // NilMeter is a no-op Meter. type NilMeter struct{} -// Count is a no-op. -func (NilMeter) Count() int64 { return 0 } - -// Mark is a no-op. -func (NilMeter) Mark(n int64) {} - -// Rate1 is a no-op. -func (NilMeter) Rate1() float64 { return 0.0 } - -// Rate5 is a no-op. -func (NilMeter) Rate5() float64 { return 0.0 } - -// Rate15 is a no-op. -func (NilMeter) Rate15() float64 { return 0.0 } - -// RateMean is a no-op. -func (NilMeter) RateMean() float64 { return 0.0 } - -// Snapshot is a no-op. -func (NilMeter) Snapshot() Meter { return NilMeter{} } - -// Stop is a no-op. -func (NilMeter) Stop() {} +func (NilMeter) Count() int64 { return 0 } +func (NilMeter) Mark(n int64) {} +func (NilMeter) Snapshot() MeterSnapshot { return (*emptySnapshot)(nil) } +func (NilMeter) Stop() {} // StandardMeter is the standard implementation of a Meter. type StandardMeter struct { - lock sync.RWMutex - snapshot *MeterSnapshot + count atomic.Int64 + uncounted atomic.Int64 // not yet added to the EWMAs + rateMean atomic.Uint64 + a1, a5, a15 EWMA startTime time.Time stopped atomic.Bool @@ -184,7 +115,6 @@ type StandardMeter struct { func newStandardMeter() *StandardMeter { return &StandardMeter{ - snapshot: &MeterSnapshot{}, a1: NewEWMA1(), a5: NewEWMA5(), a15: NewEWMA15(), @@ -194,97 +124,42 @@ func newStandardMeter() *StandardMeter { // Stop stops the meter, Mark() will be a no-op if you use it after being stopped. func (m *StandardMeter) Stop() { - stopped := m.stopped.Swap(true) - if !stopped { + if stopped := m.stopped.Swap(true); !stopped { arbiter.Lock() delete(arbiter.meters, m) arbiter.Unlock() } } -// Count returns the number of events recorded. -// It updates the meter to be as accurate as possible -func (m *StandardMeter) Count() int64 { - m.lock.Lock() - defer m.lock.Unlock() - m.updateMeter() - return m.snapshot.count -} - // Mark records the occurrence of n events. func (m *StandardMeter) Mark(n int64) { - m.snapshot.temp.Add(n) -} - -// Rate1 returns the one-minute moving average rate of events per second. -func (m *StandardMeter) Rate1() float64 { - m.lock.RLock() - defer m.lock.RUnlock() - return m.snapshot.rate1 -} - -// Rate5 returns the five-minute moving average rate of events per second. -func (m *StandardMeter) Rate5() float64 { - m.lock.RLock() - defer m.lock.RUnlock() - return m.snapshot.rate5 -} - -// Rate15 returns the fifteen-minute moving average rate of events per second. -func (m *StandardMeter) Rate15() float64 { - m.lock.RLock() - defer m.lock.RUnlock() - return m.snapshot.rate15 -} - -// RateMean returns the meter's mean rate of events per second. -func (m *StandardMeter) RateMean() float64 { - m.lock.RLock() - defer m.lock.RUnlock() - return m.snapshot.rateMean + m.uncounted.Add(n) } // Snapshot returns a read-only copy of the meter. -func (m *StandardMeter) Snapshot() Meter { - m.lock.RLock() - snapshot := MeterSnapshot{ - count: m.snapshot.count, - rate1: m.snapshot.rate1, - rate5: m.snapshot.rate5, - rate15: m.snapshot.rate15, - rateMean: m.snapshot.rateMean, +func (m *StandardMeter) Snapshot() MeterSnapshot { + return &meterSnapshot{ + count: m.count.Load() + m.uncounted.Load(), + rate1: m.a1.Snapshot().Rate(), + rate5: m.a5.Snapshot().Rate(), + rate15: m.a15.Snapshot().Rate(), + rateMean: math.Float64frombits(m.rateMean.Load()), } - snapshot.temp.Store(m.snapshot.temp.Load()) - m.lock.RUnlock() - return &snapshot -} - -func (m *StandardMeter) updateSnapshot() { - // should run with write lock held on m.lock - snapshot := m.snapshot - snapshot.rate1 = m.a1.Rate() - snapshot.rate5 = m.a5.Rate() - snapshot.rate15 = m.a15.Rate() - snapshot.rateMean = float64(snapshot.count) / time.Since(m.startTime).Seconds() } -func (m *StandardMeter) updateMeter() { - // should only run with write lock held on m.lock - n := m.snapshot.temp.Swap(0) - m.snapshot.count += n +func (m *StandardMeter) tick() { + // Take the uncounted values, add to count + n := m.uncounted.Swap(0) + count := m.count.Add(n) + m.rateMean.Store(math.Float64bits(float64(count) / time.Since(m.startTime).Seconds())) + // Update the EWMA's internal state m.a1.Update(n) m.a5.Update(n) m.a15.Update(n) -} - -func (m *StandardMeter) tick() { - m.lock.Lock() - defer m.lock.Unlock() - m.updateMeter() + // And trigger them to calculate the rates m.a1.Tick() m.a5.Tick() m.a15.Tick() - m.updateSnapshot() } // meterArbiter ticks meters every 5s from a single goroutine. diff --git a/metrics/meter_test.go b/metrics/meter_test.go index 99bbd6d98937..8330ea9647f5 100644 --- a/metrics/meter_test.go +++ b/metrics/meter_test.go @@ -12,11 +12,17 @@ func BenchmarkMeter(b *testing.B) { m.Mark(1) } } - +func TestMeter(t *testing.T) { + m := NewMeter() + m.Mark(47) + if v := m.Snapshot().Count(); v != 47 { + t.Fatalf("have %d want %d", v, 47) + } +} func TestGetOrRegisterMeter(t *testing.T) { r := NewRegistry() NewRegisteredMeter("foo", r).Mark(47) - if m := GetOrRegisterMeter("foo", r); m.Count() != 47 { + if m := GetOrRegisterMeter("foo", r).Snapshot(); m.Count() != 47 { t.Fatal(m.Count()) } } @@ -31,10 +37,10 @@ func TestMeterDecay(t *testing.T) { ma.meters[m] = struct{}{} m.Mark(1) ma.tickMeters() - rateMean := m.RateMean() + rateMean := m.Snapshot().RateMean() time.Sleep(100 * time.Millisecond) ma.tickMeters() - if m.RateMean() >= rateMean { + if m.Snapshot().RateMean() >= rateMean { t.Error("m.RateMean() didn't decrease") } } @@ -42,7 +48,7 @@ func TestMeterDecay(t *testing.T) { func TestMeterNonzero(t *testing.T) { m := NewMeter() m.Mark(3) - if count := m.Count(); count != 3 { + if count := m.Snapshot().Count(); count != 3 { t.Errorf("m.Count(): 3 != %v\n", count) } } @@ -59,16 +65,8 @@ func TestMeterStop(t *testing.T) { } } -func TestMeterSnapshot(t *testing.T) { - m := NewMeter() - m.Mark(1) - if snapshot := m.Snapshot(); m.RateMean() != snapshot.RateMean() { - t.Fatal(snapshot) - } -} - func TestMeterZero(t *testing.T) { - m := NewMeter() + m := NewMeter().Snapshot() if count := m.Count(); count != 0 { t.Errorf("m.Count(): 0 != %v\n", count) } @@ -79,13 +77,13 @@ func TestMeterRepeat(t *testing.T) { for i := 0; i < 101; i++ { m.Mark(int64(i)) } - if count := m.Count(); count != 5050 { + if count := m.Snapshot().Count(); count != 5050 { t.Errorf("m.Count(): 5050 != %v\n", count) } for i := 0; i < 101; i++ { m.Mark(int64(i)) } - if count := m.Count(); count != 10100 { + if count := m.Snapshot().Count(); count != 10100 { t.Errorf("m.Count(): 10100 != %v\n", count) } } diff --git a/metrics/metrics.go b/metrics/metrics.go index 3468cfe8e407..d5c1c61cbd06 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -85,6 +85,12 @@ var runtimeSamples = []metrics.Sample{ {Name: "/sched/latencies:seconds"}, // histogram } +func ReadRuntimeStats() *runtimeStats { + r := new(runtimeStats) + readRuntimeStats(r) + return r +} + func readRuntimeStats(v *runtimeStats) { metrics.Read(runtimeSamples) for _, s := range runtimeSamples { diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index 534c44139b36..2861d5f2caf6 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -98,8 +98,8 @@ func Example() { t.Time(func() { time.Sleep(10 * time.Millisecond) }) t.Update(1) - fmt.Println(c.Count()) - fmt.Println(t.Min()) + fmt.Println(c.Snapshot().Count()) + fmt.Println(t.Snapshot().Min()) // Output: 17 // 1 } diff --git a/metrics/opentsdb.go b/metrics/opentsdb.go index 4d2e209238fa..e81690f94340 100644 --- a/metrics/opentsdb.go +++ b/metrics/opentsdb.go @@ -65,15 +65,15 @@ func (c *OpenTSDBConfig) writeRegistry(w io.Writer, now int64, shortHostname str c.Registry.Each(func(name string, i interface{}) { switch metric := i.(type) { case Counter: - fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, metric.Count(), shortHostname) + fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, metric.Snapshot().Count(), shortHostname) case CounterFloat64: - fmt.Fprintf(w, "put %s.%s.count %d %f host=%s\n", c.Prefix, name, now, metric.Count(), shortHostname) + fmt.Fprintf(w, "put %s.%s.count %d %f host=%s\n", c.Prefix, name, now, metric.Snapshot().Count(), shortHostname) case Gauge: - fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) + fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Snapshot().Value(), shortHostname) case GaugeFloat64: - fmt.Fprintf(w, "put %s.%s.value %d %f host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) + fmt.Fprintf(w, "put %s.%s.value %d %f host=%s\n", c.Prefix, name, now, metric.Snapshot().Value(), shortHostname) case GaugeInfo: - fmt.Fprintf(w, "put %s.%s.value %d %s host=%s\n", c.Prefix, name, now, metric.Value().String(), shortHostname) + fmt.Fprintf(w, "put %s.%s.value %d %s host=%s\n", c.Prefix, name, now, metric.Snapshot().Value().String(), shortHostname) case Histogram: h := metric.Snapshot() ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) diff --git a/metrics/opentsdb_test.go b/metrics/opentsdb_test.go index c02b98af061e..4548309f9c23 100644 --- a/metrics/opentsdb_test.go +++ b/metrics/opentsdb_test.go @@ -1,6 +1,7 @@ package metrics import ( + "fmt" "net" "os" "strings" @@ -47,5 +48,19 @@ func TestExampleOpenTSB(t *testing.T) { } if have, want := w.String(), string(wantB); have != want { t.Errorf("\nhave:\n%v\nwant:\n%v\n", have, want) + t.Logf("have vs want:\n%v", findFirstDiffPos(have, want)) } } + +func findFirstDiffPos(a, b string) string { + yy := strings.Split(b, "\n") + for i, x := range strings.Split(a, "\n") { + if i >= len(yy) { + return fmt.Sprintf("have:%d: %s\nwant:%d: ", i, x, i) + } + if y := yy[i]; x != y { + return fmt.Sprintf("have:%d: %s\nwant:%d: %s", i, x, i, y) + } + } + return "" +} diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go index 2187438df8ee..0b81c63ba5b7 100644 --- a/metrics/prometheus/collector.go +++ b/metrics/prometheus/collector.go @@ -75,27 +75,27 @@ func (c *collector) Add(name string, i any) error { return nil } -func (c *collector) addCounter(name string, m metrics.Counter) { +func (c *collector) addCounter(name string, m metrics.CounterSnapshot) { c.writeGaugeCounter(name, m.Count()) } -func (c *collector) addCounterFloat64(name string, m metrics.CounterFloat64) { +func (c *collector) addCounterFloat64(name string, m metrics.CounterFloat64Snapshot) { c.writeGaugeCounter(name, m.Count()) } -func (c *collector) addGauge(name string, m metrics.Gauge) { +func (c *collector) addGauge(name string, m metrics.GaugeSnapshot) { c.writeGaugeCounter(name, m.Value()) } -func (c *collector) addGaugeFloat64(name string, m metrics.GaugeFloat64) { +func (c *collector) addGaugeFloat64(name string, m metrics.GaugeFloat64Snapshot) { c.writeGaugeCounter(name, m.Value()) } -func (c *collector) addGaugeInfo(name string, m metrics.GaugeInfo) { +func (c *collector) addGaugeInfo(name string, m metrics.GaugeInfoSnapshot) { c.writeGaugeInfo(name, m.Value()) } -func (c *collector) addHistogram(name string, m metrics.Histogram) { +func (c *collector) addHistogram(name string, m metrics.HistogramSnapshot) { pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} ps := m.Percentiles(pv) c.writeSummaryCounter(name, m.Count()) @@ -106,11 +106,11 @@ func (c *collector) addHistogram(name string, m metrics.Histogram) { c.buff.WriteRune('\n') } -func (c *collector) addMeter(name string, m metrics.Meter) { +func (c *collector) addMeter(name string, m metrics.MeterSnapshot) { c.writeGaugeCounter(name, m.Count()) } -func (c *collector) addTimer(name string, m metrics.Timer) { +func (c *collector) addTimer(name string, m metrics.TimerSnapshot) { pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} ps := m.Percentiles(pv) c.writeSummaryCounter(name, m.Count()) @@ -121,13 +121,12 @@ func (c *collector) addTimer(name string, m metrics.Timer) { c.buff.WriteRune('\n') } -func (c *collector) addResettingTimer(name string, m metrics.ResettingTimer) { - if len(m.Values()) <= 0 { +func (c *collector) addResettingTimer(name string, m metrics.ResettingTimerSnapshot) { + if m.Count() <= 0 { return } - ps := m.Percentiles([]float64{50, 95, 99}) - val := m.Values() - c.writeSummaryCounter(name, len(val)) + ps := m.Percentiles([]float64{0.50, 0.95, 0.99}) + c.writeSummaryCounter(name, m.Count()) c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) c.writeSummaryPercentile(name, "0.50", ps[0]) c.writeSummaryPercentile(name, "0.95", ps[1]) diff --git a/metrics/prometheus/collector_test.go b/metrics/prometheus/collector_test.go index b7a17ff1011e..948c539ad666 100644 --- a/metrics/prometheus/collector_test.go +++ b/metrics/prometheus/collector_test.go @@ -55,10 +55,10 @@ func findFirstDiffPos(a, b string) string { yy := strings.Split(b, "\n") for i, x := range strings.Split(a, "\n") { if i >= len(yy) { - return fmt.Sprintf("a:%d: %s\nb:%d: ", i, x, i) + return fmt.Sprintf("have:%d: %s\nwant:%d: ", i, x, i) } if y := yy[i]; x != y { - return fmt.Sprintf("a:%d: %s\nb:%d: %s", i, x, i, y) + return fmt.Sprintf("have:%d: %s\nwant:%d: %s", i, x, i, y) } } return "" diff --git a/metrics/prometheus/testdata/prometheus.want b/metrics/prometheus/testdata/prometheus.want index f35496e61d31..861c5f5cf087 100644 --- a/metrics/prometheus/testdata/prometheus.want +++ b/metrics/prometheus/testdata/prometheus.want @@ -1,3 +1,25 @@ +# TYPE system_cpu_schedlatency_count counter +system_cpu_schedlatency_count 5645 + +# TYPE system_cpu_schedlatency summary +system_cpu_schedlatency {quantile="0.5"} 0 +system_cpu_schedlatency {quantile="0.75"} 7168 +system_cpu_schedlatency {quantile="0.95"} 1.6777216e+07 +system_cpu_schedlatency {quantile="0.99"} 2.9360128e+07 +system_cpu_schedlatency {quantile="0.999"} 3.3554432e+07 +system_cpu_schedlatency {quantile="0.9999"} 3.3554432e+07 + +# TYPE system_memory_pauses_count counter +system_memory_pauses_count 14 + +# TYPE system_memory_pauses summary +system_memory_pauses {quantile="0.5"} 32768 +system_memory_pauses {quantile="0.75"} 57344 +system_memory_pauses {quantile="0.95"} 196608 +system_memory_pauses {quantile="0.99"} 196608 +system_memory_pauses {quantile="0.999"} 196608 +system_memory_pauses {quantile="0.9999"} 196608 + # TYPE test_counter gauge test_counter 12345 @@ -31,9 +53,9 @@ test_meter 0 test_resetting_timer_count 6 # TYPE test_resetting_timer summary -test_resetting_timer {quantile="0.50"} 12000000 -test_resetting_timer {quantile="0.95"} 120000000 -test_resetting_timer {quantile="0.99"} 120000000 +test_resetting_timer {quantile="0.50"} 1.25e+07 +test_resetting_timer {quantile="0.95"} 1.2e+08 +test_resetting_timer {quantile="0.99"} 1.2e+08 # TYPE test_timer_count counter test_timer_count 6 diff --git a/metrics/registry.go b/metrics/registry.go index 0b54f2dc92c8..a6cae1fff0a9 100644 --- a/metrics/registry.go +++ b/metrics/registry.go @@ -150,13 +150,13 @@ func (r *StandardRegistry) GetAll() map[string]map[string]interface{} { values := make(map[string]interface{}) switch metric := i.(type) { case Counter: - values["count"] = metric.Count() + values["count"] = metric.Snapshot().Count() case CounterFloat64: - values["count"] = metric.Count() + values["count"] = metric.Snapshot().Count() case Gauge: - values["value"] = metric.Value() + values["value"] = metric.Snapshot().Value() case GaugeFloat64: - values["value"] = metric.Value() + values["value"] = metric.Snapshot().Value() case Healthcheck: values["error"] = nil metric.Check() diff --git a/metrics/registry_test.go b/metrics/registry_test.go index 7cc5cf14fe55..75012dd4ac00 100644 --- a/metrics/registry_test.go +++ b/metrics/registry_test.go @@ -85,11 +85,11 @@ func TestRegistryDuplicate(t *testing.T) { func TestRegistryGet(t *testing.T) { r := NewRegistry() r.Register("foo", NewCounter()) - if count := r.Get("foo").(Counter).Count(); count != 0 { + if count := r.Get("foo").(Counter).Snapshot().Count(); count != 0 { t.Fatal(count) } r.Get("foo").(Counter).Inc(1) - if count := r.Get("foo").(Counter).Count(); count != 1 { + if count := r.Get("foo").(Counter).Snapshot().Count(); count != 1 { t.Fatal(count) } } diff --git a/metrics/resetting_sample.go b/metrics/resetting_sample.go index 43c1129cd0bc..c38ffcd3ec32 100644 --- a/metrics/resetting_sample.go +++ b/metrics/resetting_sample.go @@ -17,7 +17,7 @@ type resettingSample struct { } // Snapshot returns a read-only copy of the sample with the original reset. -func (rs *resettingSample) Snapshot() Sample { +func (rs *resettingSample) Snapshot() SampleSnapshot { s := rs.Sample.Snapshot() rs.Sample.Clear() return s diff --git a/metrics/resetting_timer.go b/metrics/resetting_timer.go index 858984a32cc4..6802e3fcea98 100644 --- a/metrics/resetting_timer.go +++ b/metrics/resetting_timer.go @@ -1,22 +1,24 @@ package metrics import ( - "math" "sync" "time" - - "golang.org/x/exp/slices" ) // Initial slice capacity for the values stored in a ResettingTimer const InitialResettingTimerSliceCap = 10 +type ResettingTimerSnapshot interface { + Count() int + Mean() float64 + Max() int64 + Min() int64 + Percentiles([]float64) []float64 +} + // ResettingTimer is used for storing aggregated values for timers, which are reset on every flush interval. type ResettingTimer interface { - Values() []int64 - Snapshot() ResettingTimer - Percentiles([]float64) []int64 - Mean() float64 + Snapshot() ResettingTimerSnapshot Time(func()) Update(time.Duration) UpdateSince(time.Time) @@ -52,66 +54,40 @@ func NewResettingTimer() ResettingTimer { } // NilResettingTimer is a no-op ResettingTimer. -type NilResettingTimer struct { -} - -// Values is a no-op. -func (NilResettingTimer) Values() []int64 { return nil } - -// Snapshot is a no-op. -func (NilResettingTimer) Snapshot() ResettingTimer { return NilResettingTimer{} } - -// Time is a no-op. -func (NilResettingTimer) Time(f func()) { f() } - -// Update is a no-op. -func (NilResettingTimer) Update(time.Duration) {} - -// Percentiles panics. -func (NilResettingTimer) Percentiles([]float64) []int64 { - panic("Percentiles called on a NilResettingTimer") -} - -// Mean panics. -func (NilResettingTimer) Mean() float64 { - panic("Mean called on a NilResettingTimer") -} - -// UpdateSince is a no-op. -func (NilResettingTimer) UpdateSince(time.Time) {} +type NilResettingTimer struct{} + +func (NilResettingTimer) Values() []int64 { return nil } +func (n NilResettingTimer) Snapshot() ResettingTimerSnapshot { return n } +func (NilResettingTimer) Time(f func()) { f() } +func (NilResettingTimer) Update(time.Duration) {} +func (NilResettingTimer) Percentiles([]float64) []float64 { return nil } +func (NilResettingTimer) Mean() float64 { return 0.0 } +func (NilResettingTimer) Max() int64 { return 0 } +func (NilResettingTimer) Min() int64 { return 0 } +func (NilResettingTimer) UpdateSince(time.Time) {} +func (NilResettingTimer) Count() int { return 0 } // StandardResettingTimer is the standard implementation of a ResettingTimer. // and Meter. type StandardResettingTimer struct { values []int64 - mutex sync.Mutex -} + sum int64 // sum is a running count of the total sum, used later to calculate mean -// Values returns a slice with all measurements. -func (t *StandardResettingTimer) Values() []int64 { - return t.values + mutex sync.Mutex } // Snapshot resets the timer and returns a read-only copy of its contents. -func (t *StandardResettingTimer) Snapshot() ResettingTimer { +func (t *StandardResettingTimer) Snapshot() ResettingTimerSnapshot { t.mutex.Lock() defer t.mutex.Unlock() - currentValues := t.values - t.values = make([]int64, 0, InitialResettingTimerSliceCap) - - return &ResettingTimerSnapshot{ - values: currentValues, + snapshot := &resettingTimerSnapshot{} + if len(t.values) > 0 { + snapshot.mean = float64(t.sum) / float64(len(t.values)) + snapshot.values = t.values + t.values = make([]int64, 0, InitialResettingTimerSliceCap) } -} - -// Percentiles panics. -func (t *StandardResettingTimer) Percentiles([]float64) []int64 { - panic("Percentiles called on a StandardResettingTimer") -} - -// Mean panics. -func (t *StandardResettingTimer) Mean() float64 { - panic("Mean called on a StandardResettingTimer") + t.sum = 0 + return snapshot } // Record the duration of the execution of the given function. @@ -126,106 +102,70 @@ func (t *StandardResettingTimer) Update(d time.Duration) { t.mutex.Lock() defer t.mutex.Unlock() t.values = append(t.values, int64(d)) + t.sum += int64(d) } // Record the duration of an event that started at a time and ends now. func (t *StandardResettingTimer) UpdateSince(ts time.Time) { - t.mutex.Lock() - defer t.mutex.Unlock() - t.values = append(t.values, int64(time.Since(ts))) + t.Update(time.Since(ts)) } -// ResettingTimerSnapshot is a point-in-time copy of another ResettingTimer. -type ResettingTimerSnapshot struct { +// resettingTimerSnapshot is a point-in-time copy of another ResettingTimer. +type resettingTimerSnapshot struct { values []int64 mean float64 - thresholdBoundaries []int64 + max int64 + min int64 + thresholdBoundaries []float64 calculated bool } -// Snapshot returns the snapshot. -func (t *ResettingTimerSnapshot) Snapshot() ResettingTimer { return t } - -// Time panics. -func (*ResettingTimerSnapshot) Time(func()) { - panic("Time called on a ResettingTimerSnapshot") -} - -// Update panics. -func (*ResettingTimerSnapshot) Update(time.Duration) { - panic("Update called on a ResettingTimerSnapshot") -} - -// UpdateSince panics. -func (*ResettingTimerSnapshot) UpdateSince(time.Time) { - panic("UpdateSince called on a ResettingTimerSnapshot") -} - -// Values returns all values from snapshot. -func (t *ResettingTimerSnapshot) Values() []int64 { - return t.values +// Count return the length of the values from snapshot. +func (t *resettingTimerSnapshot) Count() int { + return len(t.values) } // Percentiles returns the boundaries for the input percentiles. -func (t *ResettingTimerSnapshot) Percentiles(percentiles []float64) []int64 { +// note: this method is not thread safe +func (t *resettingTimerSnapshot) Percentiles(percentiles []float64) []float64 { t.calc(percentiles) - return t.thresholdBoundaries } // Mean returns the mean of the snapshotted values -func (t *ResettingTimerSnapshot) Mean() float64 { +// note: this method is not thread safe +func (t *resettingTimerSnapshot) Mean() float64 { if !t.calculated { - t.calc([]float64{}) + t.calc(nil) } return t.mean } -func (t *ResettingTimerSnapshot) calc(percentiles []float64) { - slices.Sort(t.values) - - count := len(t.values) - if count > 0 { - min := t.values[0] - max := t.values[count-1] - - cumulativeValues := make([]int64, count) - cumulativeValues[0] = min - for i := 1; i < count; i++ { - cumulativeValues[i] = t.values[i] + cumulativeValues[i-1] - } - - t.thresholdBoundaries = make([]int64, len(percentiles)) - - thresholdBoundary := max - - for i, pct := range percentiles { - if count > 1 { - var abs float64 - if pct >= 0 { - abs = pct - } else { - abs = 100 + pct - } - // poor man's math.Round(x): - // math.Floor(x + 0.5) - indexOfPerc := int(math.Floor(((abs / 100.0) * float64(count)) + 0.5)) - if pct >= 0 { - indexOfPerc -= 1 // index offset=0 - } - thresholdBoundary = t.values[indexOfPerc] - } - - t.thresholdBoundaries[i] = thresholdBoundary - } - - sum := cumulativeValues[count-1] - t.mean = float64(sum) / float64(count) - } else { - t.thresholdBoundaries = make([]int64, len(percentiles)) - t.mean = 0 +// Max returns the max of the snapshotted values +// note: this method is not thread safe +func (t *resettingTimerSnapshot) Max() int64 { + if !t.calculated { + t.calc(nil) } + return t.max +} - t.calculated = true +// Min returns the min of the snapshotted values +// note: this method is not thread safe +func (t *resettingTimerSnapshot) Min() int64 { + if !t.calculated { + t.calc(nil) + } + return t.min +} + +func (t *resettingTimerSnapshot) calc(percentiles []float64) { + scores := CalculatePercentiles(t.values, percentiles) + t.thresholdBoundaries = scores + if len(t.values) == 0 { + return + } + t.min = t.values[0] + t.max = t.values[len(t.values)-1] } diff --git a/metrics/resetting_timer_test.go b/metrics/resetting_timer_test.go index 58fd47f35245..4571fc8eb052 100644 --- a/metrics/resetting_timer_test.go +++ b/metrics/resetting_timer_test.go @@ -10,9 +10,9 @@ func TestResettingTimer(t *testing.T) { values []int64 start int end int - wantP50 int64 - wantP95 int64 - wantP99 int64 + wantP50 float64 + wantP95 float64 + wantP99 float64 wantMean float64 wantMin int64 wantMax int64 @@ -21,14 +21,14 @@ func TestResettingTimer(t *testing.T) { values: []int64{}, start: 1, end: 11, - wantP50: 5, wantP95: 10, wantP99: 10, + wantP50: 5.5, wantP95: 10, wantP99: 10, wantMin: 1, wantMax: 10, wantMean: 5.5, }, { values: []int64{}, start: 1, end: 101, - wantP50: 50, wantP95: 95, wantP99: 99, + wantP50: 50.5, wantP95: 95.94999999999999, wantP99: 99.99, wantMin: 1, wantMax: 100, wantMean: 50.5, }, { @@ -56,11 +56,11 @@ func TestResettingTimer(t *testing.T) { values: []int64{1, 10}, start: 0, end: 0, - wantP50: 1, wantP95: 10, wantP99: 10, + wantP50: 5.5, wantP95: 10, wantP99: 10, wantMin: 1, wantMax: 10, wantMean: 5.5, }, } - for ind, tt := range tests { + for i, tt := range tests { timer := NewResettingTimer() for i := tt.start; i < tt.end; i++ { @@ -70,37 +70,128 @@ func TestResettingTimer(t *testing.T) { for _, v := range tt.values { timer.Update(time.Duration(v)) } - snap := timer.Snapshot() - ps := snap.Percentiles([]float64{50, 95, 99}) + ps := snap.Percentiles([]float64{0.50, 0.95, 0.99}) - val := snap.Values() + if have, want := snap.Min(), tt.wantMin; have != want { + t.Fatalf("%d: min: have %d, want %d", i, have, want) + } + if have, want := snap.Max(), tt.wantMax; have != want { + t.Fatalf("%d: max: have %d, want %d", i, have, want) + } + if have, want := snap.Mean(), tt.wantMean; have != want { + t.Fatalf("%d: mean: have %v, want %v", i, have, want) + } + if have, want := ps[0], tt.wantP50; have != want { + t.Errorf("%d: p50: have %v, want %v", i, have, want) + } + if have, want := ps[1], tt.wantP95; have != want { + t.Errorf("%d: p95: have %v, want %v", i, have, want) + } + if have, want := ps[2], tt.wantP99; have != want { + t.Errorf("%d: p99: have %v, want %v", i, have, want) + } + } +} - if len(val) > 0 { - if tt.wantMin != val[0] { - t.Fatalf("%d: min: got %d, want %d", ind, val[0], tt.wantMin) - } +func TestResettingTimerWithFivePercentiles(t *testing.T) { + tests := []struct { + values []int64 + start int + end int + wantP05 float64 + wantP20 float64 + wantP50 float64 + wantP95 float64 + wantP99 float64 + wantMean float64 + wantMin int64 + wantMax int64 + }{ + { + values: []int64{}, + start: 1, + end: 11, + wantP05: 1, wantP20: 2.2, wantP50: 5.5, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + { + values: []int64{}, + start: 1, + end: 101, + wantP05: 5.050000000000001, wantP20: 20.200000000000003, wantP50: 50.5, wantP95: 95.94999999999999, wantP99: 99.99, + wantMin: 1, wantMax: 100, wantMean: 50.5, + }, + { + values: []int64{1}, + start: 0, + end: 0, + wantP05: 1, wantP20: 1, wantP50: 1, wantP95: 1, wantP99: 1, + wantMin: 1, wantMax: 1, wantMean: 1, + }, + { + values: []int64{0}, + start: 0, + end: 0, + wantP05: 0, wantP20: 0, wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{}, + start: 0, + end: 0, + wantP05: 0, wantP20: 0, wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{1, 10}, + start: 0, + end: 0, + wantP05: 1, wantP20: 1, wantP50: 5.5, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + } + for ind, tt := range tests { + timer := NewResettingTimer() - if tt.wantMax != val[len(val)-1] { - t.Fatalf("%d: max: got %d, want %d", ind, val[len(val)-1], tt.wantMax) - } + for i := tt.start; i < tt.end; i++ { + tt.values = append(tt.values, int64(i)) } - if tt.wantMean != snap.Mean() { - t.Fatalf("%d: mean: got %.2f, want %.2f", ind, snap.Mean(), tt.wantMean) + for _, v := range tt.values { + timer.Update(time.Duration(v)) } - if tt.wantP50 != ps[0] { - t.Fatalf("%d: p50: got %d, want %d", ind, ps[0], tt.wantP50) + snap := timer.Snapshot() + + ps := snap.Percentiles([]float64{0.05, 0.20, 0.50, 0.95, 0.99}) + + if tt.wantMin != snap.Min() { + t.Errorf("%d: min: got %d, want %d", ind, snap.Min(), tt.wantMin) } - if tt.wantP95 != ps[1] { - t.Fatalf("%d: p95: got %d, want %d", ind, ps[1], tt.wantP95) + if tt.wantMax != snap.Max() { + t.Errorf("%d: max: got %d, want %d", ind, snap.Max(), tt.wantMax) } - if tt.wantP99 != ps[2] { - t.Fatalf("%d: p99: got %d, want %d", ind, ps[2], tt.wantP99) + if tt.wantMean != snap.Mean() { + t.Errorf("%d: mean: got %.2f, want %.2f", ind, snap.Mean(), tt.wantMean) + } + if tt.wantP05 != ps[0] { + t.Errorf("%d: p05: got %v, want %v", ind, ps[0], tt.wantP05) + } + if tt.wantP20 != ps[1] { + t.Errorf("%d: p20: got %v, want %v", ind, ps[1], tt.wantP20) + } + if tt.wantP50 != ps[2] { + t.Errorf("%d: p50: got %v, want %v", ind, ps[2], tt.wantP50) + } + if tt.wantP95 != ps[3] { + t.Errorf("%d: p95: got %v, want %v", ind, ps[3], tt.wantP95) + } + if tt.wantP99 != ps[4] { + t.Errorf("%d: p99: got %v, want %v", ind, ps[4], tt.wantP99) } } } diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go index c68939af1ef7..92fcbcc2814c 100644 --- a/metrics/runtimehistogram.go +++ b/metrics/runtimehistogram.go @@ -17,13 +17,19 @@ func getOrRegisterRuntimeHistogram(name string, scale float64, r Registry) *runt // runtimeHistogram wraps a runtime/metrics histogram. type runtimeHistogram struct { - v atomic.Value + v atomic.Value // v is a pointer to a metrics.Float64Histogram scaleFactor float64 } func newRuntimeHistogram(scale float64) *runtimeHistogram { h := &runtimeHistogram{scaleFactor: scale} - h.update(&metrics.Float64Histogram{}) + h.update(new(metrics.Float64Histogram)) + return h +} + +func RuntimeHistogramFromData(scale float64, hist *metrics.Float64Histogram) *runtimeHistogram { + h := &runtimeHistogram{scaleFactor: scale} + h.update(hist) return h } @@ -35,130 +41,107 @@ func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) { return } - s := runtimeHistogramSnapshot{ + s := metrics.Float64Histogram{ Counts: make([]uint64, len(mh.Counts)), Buckets: make([]float64, len(mh.Buckets)), } copy(s.Counts, mh.Counts) - copy(s.Buckets, mh.Buckets) - for i, b := range s.Buckets { + for i, b := range mh.Buckets { s.Buckets[i] = b * h.scaleFactor } h.v.Store(&s) } -func (h *runtimeHistogram) load() *runtimeHistogramSnapshot { - return h.v.Load().(*runtimeHistogramSnapshot) -} - func (h *runtimeHistogram) Clear() { panic("runtimeHistogram does not support Clear") } func (h *runtimeHistogram) Update(int64) { panic("runtimeHistogram does not support Update") } -func (h *runtimeHistogram) Sample() Sample { - return NilSample{} -} - -// Snapshot returns a non-changing cop of the histogram. -func (h *runtimeHistogram) Snapshot() Histogram { - return h.load() -} - -// Count returns the sample count. -func (h *runtimeHistogram) Count() int64 { - return h.load().Count() -} - -// Mean returns an approximation of the mean. -func (h *runtimeHistogram) Mean() float64 { - return h.load().Mean() -} - -// StdDev approximates the standard deviation of the histogram. -func (h *runtimeHistogram) StdDev() float64 { - return h.load().StdDev() -} - -// Variance approximates the variance of the histogram. -func (h *runtimeHistogram) Variance() float64 { - return h.load().Variance() -} - -// Percentile computes the p'th percentile value. -func (h *runtimeHistogram) Percentile(p float64) float64 { - return h.load().Percentile(p) -} -// Percentiles computes all requested percentile values. -func (h *runtimeHistogram) Percentiles(ps []float64) []float64 { - return h.load().Percentiles(ps) -} - -// Max returns the highest sample value. -func (h *runtimeHistogram) Max() int64 { - return h.load().Max() +// Snapshot returns a non-changing copy of the histogram. +func (h *runtimeHistogram) Snapshot() HistogramSnapshot { + hist := h.v.Load().(*metrics.Float64Histogram) + return newRuntimeHistogramSnapshot(hist) } -// Min returns the lowest sample value. -func (h *runtimeHistogram) Min() int64 { - return h.load().Min() -} - -// Sum returns the sum of all sample values. -func (h *runtimeHistogram) Sum() int64 { - return h.load().Sum() +type runtimeHistogramSnapshot struct { + internal *metrics.Float64Histogram + calculated bool + // The following fields are (lazily) calculated based on 'internal' + mean float64 + count int64 + min int64 // min is the lowest sample value. + max int64 // max is the highest sample value. + variance float64 } -type runtimeHistogramSnapshot metrics.Float64Histogram - -func (h *runtimeHistogramSnapshot) Clear() { - panic("runtimeHistogram does not support Clear") -} -func (h *runtimeHistogramSnapshot) Update(int64) { - panic("runtimeHistogram does not support Update") -} -func (h *runtimeHistogramSnapshot) Sample() Sample { - return NilSample{} +func newRuntimeHistogramSnapshot(h *metrics.Float64Histogram) *runtimeHistogramSnapshot { + return &runtimeHistogramSnapshot{ + internal: h, + } } -func (h *runtimeHistogramSnapshot) Snapshot() Histogram { - return h +// calc calculates the values for the snapshot. This method is not threadsafe. +func (h *runtimeHistogramSnapshot) calc() { + h.calculated = true + var ( + count int64 // number of samples + sum float64 // approx sum of all sample values + min int64 + max float64 + ) + if len(h.internal.Counts) == 0 { + return + } + for i, c := range h.internal.Counts { + if c == 0 { + continue + } + if count == 0 { // Set min only first loop iteration + min = int64(math.Floor(h.internal.Buckets[i])) + } + count += int64(c) + sum += h.midpoint(i) * float64(c) + // Set max on every iteration + edge := h.internal.Buckets[i+1] + if math.IsInf(edge, 1) { + edge = h.internal.Buckets[i] + } + if edge > max { + max = edge + } + } + h.min = min + h.max = int64(max) + h.mean = sum / float64(count) + h.count = count } // Count returns the sample count. func (h *runtimeHistogramSnapshot) Count() int64 { - var count int64 - for _, c := range h.Counts { - count += int64(c) + if !h.calculated { + h.calc() } - return count + return h.count } -// Mean returns an approximation of the mean. -func (h *runtimeHistogramSnapshot) Mean() float64 { - if len(h.Counts) == 0 { - return 0 - } - mean, _ := h.mean() - return mean +// Size returns the size of the sample at the time the snapshot was taken. +func (h *runtimeHistogramSnapshot) Size() int { + return len(h.internal.Counts) } -// mean computes the mean and also the total sample count. -func (h *runtimeHistogramSnapshot) mean() (mean, totalCount float64) { - var sum float64 - for i, c := range h.Counts { - midpoint := h.midpoint(i) - sum += midpoint * float64(c) - totalCount += float64(c) +// Mean returns an approximation of the mean. +func (h *runtimeHistogramSnapshot) Mean() float64 { + if !h.calculated { + h.calc() } - return sum / totalCount, totalCount + return h.mean } func (h *runtimeHistogramSnapshot) midpoint(bucket int) float64 { - high := h.Buckets[bucket+1] - low := h.Buckets[bucket] + high := h.internal.Buckets[bucket+1] + low := h.internal.Buckets[bucket] if math.IsInf(high, 1) { // The edge of the highest bucket can be +Inf, and it's supposed to mean that this // bucket contains all remaining samples > low. We can't get the middle of an @@ -180,23 +163,31 @@ func (h *runtimeHistogramSnapshot) StdDev() float64 { // Variance approximates the variance of the histogram. func (h *runtimeHistogramSnapshot) Variance() float64 { - if len(h.Counts) == 0 { + if len(h.internal.Counts) == 0 { return 0 } - - mean, totalCount := h.mean() - if totalCount <= 1 { + if !h.calculated { + h.calc() + } + if h.count <= 1 { // There is no variance when there are zero or one items. return 0 } - + // Variance is not calculated in 'calc', because it requires a second iteration. + // Therefore we calculate it lazily in this method, triggered either by + // a direct call to Variance or via StdDev. + if h.variance != 0.0 { + return h.variance + } var sum float64 - for i, c := range h.Counts { + + for i, c := range h.internal.Counts { midpoint := h.midpoint(i) - d := midpoint - mean + d := midpoint - h.mean sum += float64(c) * (d * d) } - return sum / (totalCount - 1) + h.variance = sum / float64(h.count-1) + return h.variance } // Percentile computes the p'th percentile value. @@ -231,11 +222,11 @@ func (h *runtimeHistogramSnapshot) Percentiles(ps []float64) []float64 { func (h *runtimeHistogramSnapshot) computePercentiles(thresh []float64) { var totalCount float64 - for i, count := range h.Counts { + for i, count := range h.internal.Counts { totalCount += float64(count) for len(thresh) > 0 && thresh[0] < totalCount { - thresh[0] = h.Buckets[i] + thresh[0] = h.internal.Buckets[i] thresh = thresh[1:] } if len(thresh) == 0 { @@ -250,34 +241,25 @@ func (h *runtimeHistogramSnapshot) computePercentiles(thresh []float64) { // Max returns the highest sample value. func (h *runtimeHistogramSnapshot) Max() int64 { - for i := len(h.Counts) - 1; i >= 0; i-- { - count := h.Counts[i] - if count > 0 { - edge := h.Buckets[i+1] - if math.IsInf(edge, 1) { - edge = h.Buckets[i] - } - return int64(math.Ceil(edge)) - } + if !h.calculated { + h.calc() } - return 0 + return h.max } // Min returns the lowest sample value. func (h *runtimeHistogramSnapshot) Min() int64 { - for i, count := range h.Counts { - if count > 0 { - return int64(math.Floor(h.Buckets[i])) - } + if !h.calculated { + h.calc() } - return 0 + return h.min } // Sum returns the sum of all sample values. func (h *runtimeHistogramSnapshot) Sum() int64 { var sum float64 - for i := range h.Counts { - sum += h.Buckets[i] * float64(h.Counts[i]) + for i := range h.internal.Counts { + sum += h.internal.Buckets[i] * float64(h.internal.Counts[i]) } return int64(math.Ceil(sum)) } diff --git a/metrics/runtimehistogram_test.go b/metrics/runtimehistogram_test.go index d53a01438311..cf7e36420ae9 100644 --- a/metrics/runtimehistogram_test.go +++ b/metrics/runtimehistogram_test.go @@ -1,11 +1,14 @@ package metrics import ( + "bytes" + "encoding/gob" "fmt" "math" "reflect" "runtime/metrics" "testing" + "time" ) var _ Histogram = (*runtimeHistogram)(nil) @@ -74,7 +77,7 @@ func TestRuntimeHistogramStats(t *testing.T) { for i, test := range tests { t.Run(fmt.Sprint(i), func(t *testing.T) { - s := runtimeHistogramSnapshot(test.h) + s := RuntimeHistogramFromData(1.0, &test.h).Snapshot() if v := s.Count(); v != test.Count { t.Errorf("Count() = %v, want %v", v, test.Count) @@ -121,13 +124,39 @@ func approxEqual(x, y, ε float64) bool { // This test verifies that requesting Percentiles in unsorted order // returns them in the requested order. func TestRuntimeHistogramStatsPercentileOrder(t *testing.T) { - p := runtimeHistogramSnapshot{ + s := RuntimeHistogramFromData(1.0, &metrics.Float64Histogram{ Counts: []uint64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, Buckets: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - } - result := p.Percentiles([]float64{1, 0.2, 0.5, 0.1, 0.2}) + }).Snapshot() + result := s.Percentiles([]float64{1, 0.2, 0.5, 0.1, 0.2}) expected := []float64{10, 2, 5, 1, 2} if !reflect.DeepEqual(result, expected) { t.Fatal("wrong result:", result) } } + +func BenchmarkRuntimeHistogramSnapshotRead(b *testing.B) { + var sLatency = "7\xff\x81\x03\x01\x01\x10Float64Histogram\x01\xff\x82\x00\x01\x02\x01\x06Counts\x01\xff\x84\x00\x01\aBuckets\x01\xff\x86\x00\x00\x00\x16\xff\x83\x02\x01\x01\b[]uint64\x01\xff\x84\x00\x01\x06\x00\x00\x17\xff\x85\x02\x01\x01\t[]float64\x01\xff\x86\x00\x01\b\x00\x00\xfe\x06T\xff\x82\x01\xff\xa2\x00\xfe\r\xef\x00\x01\x02\x02\x04\x05\x04\b\x15\x17 B?6.L;$!2) \x1a? \x190aH7FY6#\x190\x1d\x14\x10\x1b\r\t\x04\x03\x01\x01\x00\x03\x02\x00\x03\x05\x05\x02\x02\x06\x04\v\x06\n\x15\x18\x13'&.\x12=H/L&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\xa3\xfe\xf0\xff\x00\xf8\x95\xd6&\xe8\v.q>\xf8\x95\xd6&\xe8\v.\x81>\xf8\xdfA:\xdc\x11ʼn>\xf8\x95\xd6&\xe8\v.\x91>\xf8:\x8c0\xe2\x8ey\x95>\xf8\xdfA:\xdc\x11Å™>\xf8\x84\xf7CÖ”\x10\x9e>\xf8\x95\xd6&\xe8\v.\xa1>\xf8:\x8c0\xe2\x8ey\xa5>\xf8\xdfA:\xdc\x11Å©>\xf8\x84\xf7CÖ”\x10\xae>\xf8\x95\xd6&\xe8\v.\xb1>\xf8:\x8c0\xe2\x8ey\xb5>\xf8\xdfA:\xdc\x11Ź>\xf8\x84\xf7CÖ”\x10\xbe>\xf8\x95\xd6&\xe8\v.\xc1>\xf8:\x8c0\xe2\x8ey\xc5>\xf8\xdfA:\xdc\x11\xc5\xc9>\xf8\x84\xf7CÖ”\x10\xce>\xf8\x95\xd6&\xe8\v.\xd1>\xf8:\x8c0\xe2\x8ey\xd5>\xf8\xdfA:\xdc\x11\xc5\xd9>\xf8\x84\xf7CÖ”\x10\xde>\xf8\x95\xd6&\xe8\v.\xe1>\xf8:\x8c0\xe2\x8ey\xe5>\xf8\xdfA:\xdc\x11\xc5\xe9>\xf8\x84\xf7CÖ”\x10\xee>\xf8\x95\xd6&\xe8\v.\xf1>\xf8:\x8c0\xe2\x8ey\xf5>\xf8\xdfA:\xdc\x11\xc5\xf9>\xf8\x84\xf7CÖ”\x10\xfe>\xf8\x95\xd6&\xe8\v.\x01?\xf8:\x8c0\xe2\x8ey\x05?\xf8\xdfA:\xdc\x11\xc5\t?\xf8\x84\xf7CÖ”\x10\x0e?\xf8\x95\xd6&\xe8\v.\x11?\xf8:\x8c0\xe2\x8ey\x15?\xf8\xdfA:\xdc\x11\xc5\x19?\xf8\x84\xf7CÖ”\x10\x1e?\xf8\x95\xd6&\xe8\v.!?\xf8:\x8c0\xe2\x8ey%?\xf8\xdfA:\xdc\x11\xc5)?\xf8\x84\xf7CÖ”\x10.?\xf8\x95\xd6&\xe8\v.1?\xf8:\x8c0\xe2\x8ey5?\xf8\xdfA:\xdc\x11\xc59?\xf8\x84\xf7CÖ”\x10>?\xf8\x95\xd6&\xe8\v.A?\xf8:\x8c0\xe2\x8eyE?\xf8\xdfA:\xdc\x11\xc5I?\xf8\x84\xf7CÖ”\x10N?\xf8\x95\xd6&\xe8\v.Q?\xf8:\x8c0\xe2\x8eyU?\xf8\xdfA:\xdc\x11\xc5Y?\xf8\x84\xf7CÖ”\x10^?\xf8\x95\xd6&\xe8\v.a?\xf8:\x8c0\xe2\x8eye?\xf8\xdfA:\xdc\x11\xc5i?\xf8\x84\xf7CÖ”\x10n?\xf8\x95\xd6&\xe8\v.q?\xf8:\x8c0\xe2\x8eyu?\xf8\xdfA:\xdc\x11\xc5y?\xf8\x84\xf7CÖ”\x10~?\xf8\x95\xd6&\xe8\v.\x81?\xf8:\x8c0\xe2\x8ey\x85?\xf8\xdfA:\xdc\x11ʼn?\xf8\x84\xf7CÖ”\x10\x8e?\xf8\x95\xd6&\xe8\v.\x91?\xf8:\x8c0\xe2\x8ey\x95?\xf8\xdfA:\xdc\x11Å™?\xf8\x84\xf7CÖ”\x10\x9e?\xf8\x95\xd6&\xe8\v.\xa1?\xf8:\x8c0\xe2\x8ey\xa5?\xf8\xdfA:\xdc\x11Å©?\xf8\x84\xf7CÖ”\x10\xae?\xf8\x95\xd6&\xe8\v.\xb1?\xf8:\x8c0\xe2\x8ey\xb5?\xf8\xdfA:\xdc\x11Ź?\xf8\x84\xf7CÖ”\x10\xbe?\xf8\x95\xd6&\xe8\v.\xc1?\xf8:\x8c0\xe2\x8ey\xc5?\xf8\xdfA:\xdc\x11\xc5\xc9?\xf8\x84\xf7CÖ”\x10\xce?\xf8\x95\xd6&\xe8\v.\xd1?\xf8:\x8c0\xe2\x8ey\xd5?\xf8\xdfA:\xdc\x11\xc5\xd9?\xf8\x84\xf7CÖ”\x10\xde?\xf8\x95\xd6&\xe8\v.\xe1?\xf8:\x8c0\xe2\x8ey\xe5?\xf8\xdfA:\xdc\x11\xc5\xe9?\xf8\x84\xf7CÖ”\x10\xee?\xf8\x95\xd6&\xe8\v.\xf1?\xf8:\x8c0\xe2\x8ey\xf5?\xf8\xdfA:\xdc\x11\xc5\xf9?\xf8\x84\xf7CÖ”\x10\xfe?\xf8\x95\xd6&\xe8\v.\x01@\xf8:\x8c0\xe2\x8ey\x05@\xf8\xdfA:\xdc\x11\xc5\t@\xf8\x84\xf7CÖ”\x10\x0e@\xf8\x95\xd6&\xe8\v.\x11@\xf8:\x8c0\xe2\x8ey\x15@\xf8\xdfA:\xdc\x11\xc5\x19@\xf8\x84\xf7CÖ”\x10\x1e@\xf8\x95\xd6&\xe8\v.!@\xf8:\x8c0\xe2\x8ey%@\xf8\xdfA:\xdc\x11\xc5)@\xf8\x84\xf7CÖ”\x10.@\xf8\x95\xd6&\xe8\v.1@\xf8:\x8c0\xe2\x8ey5@\xf8\xdfA:\xdc\x11\xc59@\xf8\x84\xf7CÖ”\x10>@\xf8\x95\xd6&\xe8\v.A@\xf8:\x8c0\xe2\x8eyE@\xf8\xdfA:\xdc\x11\xc5I@\xf8\x84\xf7CÖ”\x10N@\xf8\x95\xd6&\xe8\v.Q@\xf8:\x8c0\xe2\x8eyU@\xf8\xdfA:\xdc\x11\xc5Y@\xf8\x84\xf7CÖ”\x10^@\xf8\x95\xd6&\xe8\v.a@\xf8:\x8c0\xe2\x8eye@\xf8\xdfA:\xdc\x11\xc5i@\xf8\x84\xf7CÖ”\x10n@\xf8\x95\xd6&\xe8\v.q@\xf8:\x8c0\xe2\x8eyu@\xf8\xdfA:\xdc\x11\xc5y@\xf8\x84\xf7CÖ”\x10~@\xf8\x95\xd6&\xe8\v.\x81@\xf8:\x8c0\xe2\x8ey\x85@\xf8\xdfA:\xdc\x11ʼn@\xf8\x84\xf7CÖ”\x10\x8e@\xf8\x95\xd6&\xe8\v.\x91@\xf8:\x8c0\xe2\x8ey\x95@\xf8\xdfA:\xdc\x11Å™@\xf8\x84\xf7CÖ”\x10\x9e@\xf8\x95\xd6&\xe8\v.\xa1@\xf8:\x8c0\xe2\x8ey\xa5@\xf8\xdfA:\xdc\x11Å©@\xf8\x84\xf7CÖ”\x10\xae@\xf8\x95\xd6&\xe8\v.\xb1@\xf8:\x8c0\xe2\x8ey\xb5@\xf8\xdfA:\xdc\x11Ź@\xf8\x84\xf7CÖ”\x10\xbe@\xf8\x95\xd6&\xe8\v.\xc1@\xf8:\x8c0\xe2\x8ey\xc5@\xf8\xdfA:\xdc\x11\xc5\xc9@\xf8\x84\xf7CÖ”\x10\xce@\xf8\x95\xd6&\xe8\v.\xd1@\xf8:\x8c0\xe2\x8ey\xd5@\xf8\xdfA:\xdc\x11\xc5\xd9@\xf8\x84\xf7CÖ”\x10\xde@\xf8\x95\xd6&\xe8\v.\xe1@\xf8:\x8c0\xe2\x8ey\xe5@\xf8\xdfA:\xdc\x11\xc5\xe9@\xf8\x84\xf7CÖ”\x10\xee@\xf8\x95\xd6&\xe8\v.\xf1@\xf8:\x8c0\xe2\x8ey\xf5@\xf8\xdfA:\xdc\x11\xc5\xf9@\xf8\x84\xf7CÖ”\x10\xfe@\xf8\x95\xd6&\xe8\v.\x01A\xfe\xf0\x7f\x00" + + dserialize := func(data string) *metrics.Float64Histogram { + var res metrics.Float64Histogram + if err := gob.NewDecoder(bytes.NewReader([]byte(data))).Decode(&res); err != nil { + panic(err) + } + return &res + } + latency := RuntimeHistogramFromData(float64(time.Second), dserialize(sLatency)) + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + snap := latency.Snapshot() + // These are the fields that influxdb accesses + _ = snap.Count() + _ = snap.Max() + _ = snap.Mean() + _ = snap.Min() + _ = snap.StdDev() + _ = snap.Variance() + _ = snap.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + } +} diff --git a/metrics/sample.go b/metrics/sample.go index 252a878f581b..5398dd42d5de 100644 --- a/metrics/sample.go +++ b/metrics/sample.go @@ -11,10 +11,7 @@ import ( const rescaleThreshold = time.Hour -// Samples maintain a statistically-significant selection of values from -// a stream. -type Sample interface { - Clear() +type SampleSnapshot interface { Count() int64 Max() int64 Mean() float64 @@ -22,14 +19,19 @@ type Sample interface { Percentile(float64) float64 Percentiles([]float64) []float64 Size() int - Snapshot() Sample StdDev() float64 Sum() int64 - Update(int64) - Values() []int64 Variance() float64 } +// Samples maintain a statistically-significant selection of values from +// a stream. +type Sample interface { + Snapshot() SampleSnapshot + Clear() + Update(int64) +} + // ExpDecaySample is an exponentially-decaying sample using a forward-decaying // priority reservoir. See Cormode et al's "Forward Decay: A Practical Time // Decay Model for Streaming Systems". @@ -77,72 +79,29 @@ func (s *ExpDecaySample) Clear() { s.values.Clear() } -// Count returns the number of samples recorded, which may exceed the -// reservoir size. -func (s *ExpDecaySample) Count() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return s.count -} - -// Max returns the maximum value in the sample, which may not be the maximum -// value ever to be part of the sample. -func (s *ExpDecaySample) Max() int64 { - return SampleMax(s.Values()) -} - -// Mean returns the mean of the values in the sample. -func (s *ExpDecaySample) Mean() float64 { - return SampleMean(s.Values()) -} - -// Min returns the minimum value in the sample, which may not be the minimum -// value ever to be part of the sample. -func (s *ExpDecaySample) Min() int64 { - return SampleMin(s.Values()) -} - -// Percentile returns an arbitrary percentile of values in the sample. -func (s *ExpDecaySample) Percentile(p float64) float64 { - return SamplePercentile(s.Values(), p) -} - -// Percentiles returns a slice of arbitrary percentiles of values in the -// sample. -func (s *ExpDecaySample) Percentiles(ps []float64) []float64 { - return SamplePercentiles(s.Values(), ps) -} - -// Size returns the size of the sample, which is at most the reservoir size. -func (s *ExpDecaySample) Size() int { - s.mutex.Lock() - defer s.mutex.Unlock() - return s.values.Size() -} - // Snapshot returns a read-only copy of the sample. -func (s *ExpDecaySample) Snapshot() Sample { +func (s *ExpDecaySample) Snapshot() SampleSnapshot { s.mutex.Lock() defer s.mutex.Unlock() - vals := s.values.Values() - values := make([]int64, len(vals)) - for i, v := range vals { - values[i] = v.v - } - return &SampleSnapshot{ - count: s.count, - values: values, + var ( + samples = s.values.Values() + values = make([]int64, len(samples)) + max int64 = math.MinInt64 + min int64 = math.MaxInt64 + sum int64 + ) + for i, item := range samples { + v := item.v + values[i] = v + sum += v + if v > max { + max = v + } + if v < min { + min = v + } } -} - -// StdDev returns the standard deviation of the values in the sample. -func (s *ExpDecaySample) StdDev() float64 { - return SampleStdDev(s.Values()) -} - -// Sum returns the sum of the values in the sample. -func (s *ExpDecaySample) Sum() int64 { - return SampleSum(s.Values()) + return newSampleSnapshotPrecalculated(s.count, values, min, max, sum) } // Update samples a new value. @@ -150,23 +109,6 @@ func (s *ExpDecaySample) Update(v int64) { s.update(time.Now(), v) } -// Values returns a copy of the values in the sample. -func (s *ExpDecaySample) Values() []int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - vals := s.values.Values() - values := make([]int64, len(vals)) - for i, v := range vals { - values[i] = v.v - } - return values -} - -// Variance returns the variance of the values in the sample. -func (s *ExpDecaySample) Variance() float64 { - return SampleVariance(s.Values()) -} - // update samples a new value at a particular timestamp. This is a method all // its own to facilitate testing. func (s *ExpDecaySample) update(t time.Time, v int64) { @@ -202,207 +144,160 @@ func (s *ExpDecaySample) update(t time.Time, v int64) { // NilSample is a no-op Sample. type NilSample struct{} -// Clear is a no-op. -func (NilSample) Clear() {} - -// Count is a no-op. -func (NilSample) Count() int64 { return 0 } - -// Max is a no-op. -func (NilSample) Max() int64 { return 0 } - -// Mean is a no-op. -func (NilSample) Mean() float64 { return 0.0 } - -// Min is a no-op. -func (NilSample) Min() int64 { return 0 } - -// Percentile is a no-op. -func (NilSample) Percentile(p float64) float64 { return 0.0 } - -// Percentiles is a no-op. -func (NilSample) Percentiles(ps []float64) []float64 { - return make([]float64, len(ps)) -} - -// Size is a no-op. -func (NilSample) Size() int { return 0 } - -// Sample is a no-op. -func (NilSample) Snapshot() Sample { return NilSample{} } - -// StdDev is a no-op. -func (NilSample) StdDev() float64 { return 0.0 } - -// Sum is a no-op. -func (NilSample) Sum() int64 { return 0 } - -// Update is a no-op. -func (NilSample) Update(v int64) {} - -// Values is a no-op. -func (NilSample) Values() []int64 { return []int64{} } - -// Variance is a no-op. -func (NilSample) Variance() float64 { return 0.0 } - -// SampleMax returns the maximum value of the slice of int64. -func SampleMax(values []int64) int64 { - if len(values) == 0 { - return 0 - } - var max int64 = math.MinInt64 - for _, v := range values { - if max < v { - max = v - } - } - return max -} - -// SampleMean returns the mean value of the slice of int64. -func SampleMean(values []int64) float64 { - if len(values) == 0 { - return 0.0 - } - return float64(SampleSum(values)) / float64(len(values)) -} - -// SampleMin returns the minimum value of the slice of int64. -func SampleMin(values []int64) int64 { - if len(values) == 0 { - return 0 - } - var min int64 = math.MaxInt64 - for _, v := range values { - if min > v { - min = v - } - } - return min -} +func (NilSample) Clear() {} +func (NilSample) Snapshot() SampleSnapshot { return (*emptySnapshot)(nil) } +func (NilSample) Update(v int64) {} // SamplePercentiles returns an arbitrary percentile of the slice of int64. func SamplePercentile(values []int64, p float64) float64 { - return SamplePercentiles(values, []float64{p})[0] + return CalculatePercentiles(values, []float64{p})[0] } -// SamplePercentiles returns a slice of arbitrary percentiles of the slice of -// int64. -func SamplePercentiles(values []int64, ps []float64) []float64 { +// CalculatePercentiles returns a slice of arbitrary percentiles of the slice of +// int64. This method returns interpolated results, so e.g if there are only two +// values, [0, 10], a 50% percentile will land between them. +// +// Note: As a side-effect, this method will also sort the slice of values. +// Note2: The input format for percentiles is NOT percent! To express 50%, use 0.5, not 50. +func CalculatePercentiles(values []int64, ps []float64) []float64 { scores := make([]float64, len(ps)) size := len(values) - if size > 0 { - slices.Sort(values) - for i, p := range ps { - pos := p * float64(size+1) - if pos < 1.0 { - scores[i] = float64(values[0]) - } else if pos >= float64(size) { - scores[i] = float64(values[size-1]) - } else { - lower := float64(values[int(pos)-1]) - upper := float64(values[int(pos)]) - scores[i] = lower + (pos-math.Floor(pos))*(upper-lower) - } + if size == 0 { + return scores + } + slices.Sort(values) + for i, p := range ps { + pos := p * float64(size+1) + + if pos < 1.0 { + scores[i] = float64(values[0]) + } else if pos >= float64(size) { + scores[i] = float64(values[size-1]) + } else { + lower := float64(values[int(pos)-1]) + upper := float64(values[int(pos)]) + scores[i] = lower + (pos-math.Floor(pos))*(upper-lower) } } return scores } -// SampleSnapshot is a read-only copy of another Sample. -type SampleSnapshot struct { +// sampleSnapshot is a read-only copy of another Sample. +type sampleSnapshot struct { count int64 values []int64 + + max int64 + min int64 + mean float64 + sum int64 + variance float64 } -func NewSampleSnapshot(count int64, values []int64) *SampleSnapshot { - return &SampleSnapshot{ +// newSampleSnapshotPrecalculated creates a read-only sampleSnapShot, using +// precalculated sums to avoid iterating the values +func newSampleSnapshotPrecalculated(count int64, values []int64, min, max, sum int64) *sampleSnapshot { + if len(values) == 0 { + return &sampleSnapshot{ + count: count, + values: values, + } + } + return &sampleSnapshot{ count: count, values: values, + max: max, + min: min, + mean: float64(sum) / float64(len(values)), + sum: sum, } } -// Clear panics. -func (*SampleSnapshot) Clear() { - panic("Clear called on a SampleSnapshot") +// newSampleSnapshot creates a read-only sampleSnapShot, and calculates some +// numbers. +func newSampleSnapshot(count int64, values []int64) *sampleSnapshot { + var ( + max int64 = math.MinInt64 + min int64 = math.MaxInt64 + sum int64 + ) + for _, v := range values { + sum += v + if v > max { + max = v + } + if v < min { + min = v + } + } + return newSampleSnapshotPrecalculated(count, values, min, max, sum) } // Count returns the count of inputs at the time the snapshot was taken. -func (s *SampleSnapshot) Count() int64 { return s.count } +func (s *sampleSnapshot) Count() int64 { return s.count } // Max returns the maximal value at the time the snapshot was taken. -func (s *SampleSnapshot) Max() int64 { return SampleMax(s.values) } +func (s *sampleSnapshot) Max() int64 { return s.max } // Mean returns the mean value at the time the snapshot was taken. -func (s *SampleSnapshot) Mean() float64 { return SampleMean(s.values) } +func (s *sampleSnapshot) Mean() float64 { return s.mean } // Min returns the minimal value at the time the snapshot was taken. -func (s *SampleSnapshot) Min() int64 { return SampleMin(s.values) } +func (s *sampleSnapshot) Min() int64 { return s.min } // Percentile returns an arbitrary percentile of values at the time the // snapshot was taken. -func (s *SampleSnapshot) Percentile(p float64) float64 { +func (s *sampleSnapshot) Percentile(p float64) float64 { return SamplePercentile(s.values, p) } // Percentiles returns a slice of arbitrary percentiles of values at the time // the snapshot was taken. -func (s *SampleSnapshot) Percentiles(ps []float64) []float64 { - return SamplePercentiles(s.values, ps) +func (s *sampleSnapshot) Percentiles(ps []float64) []float64 { + return CalculatePercentiles(s.values, ps) } // Size returns the size of the sample at the time the snapshot was taken. -func (s *SampleSnapshot) Size() int { return len(s.values) } +func (s *sampleSnapshot) Size() int { return len(s.values) } // Snapshot returns the snapshot. -func (s *SampleSnapshot) Snapshot() Sample { return s } +func (s *sampleSnapshot) Snapshot() SampleSnapshot { return s } // StdDev returns the standard deviation of values at the time the snapshot was // taken. -func (s *SampleSnapshot) StdDev() float64 { return SampleStdDev(s.values) } +func (s *sampleSnapshot) StdDev() float64 { + if s.variance == 0.0 { + s.variance = SampleVariance(s.mean, s.values) + } + return math.Sqrt(s.variance) +} // Sum returns the sum of values at the time the snapshot was taken. -func (s *SampleSnapshot) Sum() int64 { return SampleSum(s.values) } - -// Update panics. -func (*SampleSnapshot) Update(int64) { - panic("Update called on a SampleSnapshot") -} +func (s *sampleSnapshot) Sum() int64 { return s.sum } // Values returns a copy of the values in the sample. -func (s *SampleSnapshot) Values() []int64 { +func (s *sampleSnapshot) Values() []int64 { values := make([]int64, len(s.values)) copy(values, s.values) return values } // Variance returns the variance of values at the time the snapshot was taken. -func (s *SampleSnapshot) Variance() float64 { return SampleVariance(s.values) } - -// SampleStdDev returns the standard deviation of the slice of int64. -func SampleStdDev(values []int64) float64 { - return math.Sqrt(SampleVariance(values)) -} - -// SampleSum returns the sum of the slice of int64. -func SampleSum(values []int64) int64 { - var sum int64 - for _, v := range values { - sum += v +func (s *sampleSnapshot) Variance() float64 { + if s.variance == 0.0 { + s.variance = SampleVariance(s.mean, s.values) } - return sum + return s.variance } // SampleVariance returns the variance of the slice of int64. -func SampleVariance(values []int64) float64 { +func SampleVariance(mean float64, values []int64) float64 { if len(values) == 0 { return 0.0 } - m := SampleMean(values) var sum float64 for _, v := range values { - d := float64(v) - m + d := float64(v) - mean sum += d * d } return sum / float64(len(values)) @@ -445,83 +340,14 @@ func (s *UniformSample) Clear() { s.values = make([]int64, 0, s.reservoirSize) } -// Count returns the number of samples recorded, which may exceed the -// reservoir size. -func (s *UniformSample) Count() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return s.count -} - -// Max returns the maximum value in the sample, which may not be the maximum -// value ever to be part of the sample. -func (s *UniformSample) Max() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleMax(s.values) -} - -// Mean returns the mean of the values in the sample. -func (s *UniformSample) Mean() float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleMean(s.values) -} - -// Min returns the minimum value in the sample, which may not be the minimum -// value ever to be part of the sample. -func (s *UniformSample) Min() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleMin(s.values) -} - -// Percentile returns an arbitrary percentile of values in the sample. -func (s *UniformSample) Percentile(p float64) float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SamplePercentile(s.values, p) -} - -// Percentiles returns a slice of arbitrary percentiles of values in the -// sample. -func (s *UniformSample) Percentiles(ps []float64) []float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SamplePercentiles(s.values, ps) -} - -// Size returns the size of the sample, which is at most the reservoir size. -func (s *UniformSample) Size() int { - s.mutex.Lock() - defer s.mutex.Unlock() - return len(s.values) -} - // Snapshot returns a read-only copy of the sample. -func (s *UniformSample) Snapshot() Sample { +func (s *UniformSample) Snapshot() SampleSnapshot { s.mutex.Lock() - defer s.mutex.Unlock() values := make([]int64, len(s.values)) copy(values, s.values) - return &SampleSnapshot{ - count: s.count, - values: values, - } -} - -// StdDev returns the standard deviation of the values in the sample. -func (s *UniformSample) StdDev() float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleStdDev(s.values) -} - -// Sum returns the sum of the values in the sample. -func (s *UniformSample) Sum() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleSum(s.values) + count := s.count + s.mutex.Unlock() + return newSampleSnapshot(count, values) } // Update samples a new value. @@ -544,22 +370,6 @@ func (s *UniformSample) Update(v int64) { } } -// Values returns a copy of the values in the sample. -func (s *UniformSample) Values() []int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - values := make([]int64, len(s.values)) - copy(values, s.values) - return values -} - -// Variance returns the variance of the values in the sample. -func (s *UniformSample) Variance() float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleVariance(s.values) -} - // expDecaySample represents an individual sample in a heap. type expDecaySample struct { k float64 diff --git a/metrics/sample_test.go b/metrics/sample_test.go index 3ae128d56f67..79673570554c 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -8,28 +8,36 @@ import ( "time" ) +const epsilonPercentile = .00000000001 + // Benchmark{Compute,Copy}{1000,1000000} demonstrate that, even for relatively // expensive computations like Variance, the cost of copying the Sample, as // approximated by a make and copy, is much greater than the cost of the // computation for small samples and only slightly less for large samples. func BenchmarkCompute1000(b *testing.B) { s := make([]int64, 1000) + var sum int64 for i := 0; i < len(s); i++ { s[i] = int64(i) + sum += int64(i) } + mean := float64(sum) / float64(len(s)) b.ResetTimer() for i := 0; i < b.N; i++ { - SampleVariance(s) + SampleVariance(mean, s) } } func BenchmarkCompute1000000(b *testing.B) { s := make([]int64, 1000000) + var sum int64 for i := 0; i < len(s); i++ { s[i] = int64(i) + sum += int64(i) } + mean := float64(sum) / float64(len(s)) b.ResetTimer() for i := 0; i < b.N; i++ { - SampleVariance(s) + SampleVariance(mean, s) } } func BenchmarkCopy1000(b *testing.B) { @@ -79,65 +87,42 @@ func BenchmarkUniformSample1028(b *testing.B) { benchmarkSample(b, NewUniformSample(1028)) } -func TestExpDecaySample10(t *testing.T) { - s := NewExpDecaySample(100, 0.99) - for i := 0; i < 10; i++ { - s.Update(int64(i)) - } - if size := s.Count(); size != 10 { - t.Errorf("s.Count(): 10 != %v\n", size) - } - if size := s.Size(); size != 10 { - t.Errorf("s.Size(): 10 != %v\n", size) - } - if l := len(s.Values()); l != 10 { - t.Errorf("len(s.Values()): 10 != %v\n", l) - } - for _, v := range s.Values() { - if v > 10 || v < 0 { - t.Errorf("out of range [0, 10): %v\n", v) - } +func min(a, b int) int { + if a < b { + return a } + return b } -func TestExpDecaySample100(t *testing.T) { - s := NewExpDecaySample(1000, 0.01) - for i := 0; i < 100; i++ { - s.Update(int64(i)) - } - if size := s.Count(); size != 100 { - t.Errorf("s.Count(): 100 != %v\n", size) - } - if size := s.Size(); size != 100 { - t.Errorf("s.Size(): 100 != %v\n", size) - } - if l := len(s.Values()); l != 100 { - t.Errorf("len(s.Values()): 100 != %v\n", l) - } - for _, v := range s.Values() { - if v > 100 || v < 0 { - t.Errorf("out of range [0, 100): %v\n", v) +func TestExpDecaySample(t *testing.T) { + for _, tc := range []struct { + reservoirSize int + alpha float64 + updates int + }{ + {100, 0.99, 10}, + {1000, 0.01, 100}, + {100, 0.99, 1000}, + } { + sample := NewExpDecaySample(tc.reservoirSize, tc.alpha) + for i := 0; i < tc.updates; i++ { + sample.Update(int64(i)) } - } -} - -func TestExpDecaySample1000(t *testing.T) { - s := NewExpDecaySample(100, 0.99) - for i := 0; i < 1000; i++ { - s.Update(int64(i)) - } - if size := s.Count(); size != 1000 { - t.Errorf("s.Count(): 1000 != %v\n", size) - } - if size := s.Size(); size != 100 { - t.Errorf("s.Size(): 100 != %v\n", size) - } - if l := len(s.Values()); l != 100 { - t.Errorf("len(s.Values()): 100 != %v\n", l) - } - for _, v := range s.Values() { - if v > 1000 || v < 0 { - t.Errorf("out of range [0, 1000): %v\n", v) + snap := sample.Snapshot() + if have, want := int(snap.Count()), tc.updates; have != want { + t.Errorf("have %d want %d", have, want) + } + if have, want := snap.Size(), min(tc.updates, tc.reservoirSize); have != want { + t.Errorf("have %d want %d", have, want) + } + values := snap.(*sampleSnapshot).values + if have, want := len(values), min(tc.updates, tc.reservoirSize); have != want { + t.Errorf("have %d want %d", have, want) + } + for _, v := range values { + if v > int64(tc.updates) || v < 0 { + t.Errorf("out of range [0, %d): %v", tc.updates, v) + } } } } @@ -147,15 +132,16 @@ func TestExpDecaySample1000(t *testing.T) { // The priority becomes +Inf quickly after starting if this is done, // effectively freezing the set of samples until a rescale step happens. func TestExpDecaySampleNanosecondRegression(t *testing.T) { - s := NewExpDecaySample(100, 0.99) + sw := NewExpDecaySample(100, 0.99) for i := 0; i < 100; i++ { - s.Update(10) + sw.Update(10) } time.Sleep(1 * time.Millisecond) for i := 0; i < 100; i++ { - s.Update(20) + sw.Update(20) } - v := s.Values() + s := sw.Snapshot() + v := s.(*sampleSnapshot).values avg := float64(0) for i := 0; i < len(v); i++ { avg += float64(v[i]) @@ -194,24 +180,27 @@ func TestExpDecaySampleStatistics(t *testing.T) { for i := 1; i <= 10000; i++ { s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) } - testExpDecaySampleStatistics(t, s) + testExpDecaySampleStatistics(t, s.Snapshot()) } func TestUniformSample(t *testing.T) { - s := NewUniformSample(100) + sw := NewUniformSample(100) for i := 0; i < 1000; i++ { - s.Update(int64(i)) + sw.Update(int64(i)) } + s := sw.Snapshot() if size := s.Count(); size != 1000 { t.Errorf("s.Count(): 1000 != %v\n", size) } if size := s.Size(); size != 100 { t.Errorf("s.Size(): 100 != %v\n", size) } - if l := len(s.Values()); l != 100 { + values := s.(*sampleSnapshot).values + + if l := len(values); l != 100 { t.Errorf("len(s.Values()): 100 != %v\n", l) } - for _, v := range s.Values() { + for _, v := range values { if v > 1000 || v < 0 { t.Errorf("out of range [0, 100): %v\n", v) } @@ -219,12 +208,13 @@ func TestUniformSample(t *testing.T) { } func TestUniformSampleIncludesTail(t *testing.T) { - s := NewUniformSample(100) + sw := NewUniformSample(100) max := 100 for i := 0; i < max; i++ { - s.Update(int64(i)) + sw.Update(int64(i)) } - v := s.Values() + s := sw.Snapshot() + v := s.(*sampleSnapshot).values sum := 0 exp := (max - 1) * max / 2 for i := 0; i < len(v); i++ { @@ -250,7 +240,7 @@ func TestUniformSampleStatistics(t *testing.T) { for i := 1; i <= 10000; i++ { s.Update(int64(i)) } - testUniformSampleStatistics(t, s) + testUniformSampleStatistics(t, s.Snapshot()) } func benchmarkSample(b *testing.B, s Sample) { @@ -267,7 +257,7 @@ func benchmarkSample(b *testing.B, s Sample) { b.Logf("GC cost: %d ns/op", int(memStats.PauseTotalNs-pauseTotalNs)/b.N) } -func testExpDecaySampleStatistics(t *testing.T, s Sample) { +func testExpDecaySampleStatistics(t *testing.T, s SampleSnapshot) { if count := s.Count(); count != 10000 { t.Errorf("s.Count(): 10000 != %v\n", count) } @@ -295,7 +285,7 @@ func testExpDecaySampleStatistics(t *testing.T, s Sample) { } } -func testUniformSampleStatistics(t *testing.T, s Sample) { +func testUniformSampleStatistics(t *testing.T, s SampleSnapshot) { if count := s.Count(); count != 10000 { t.Errorf("s.Count(): 10000 != %v\n", count) } @@ -349,8 +339,22 @@ func TestUniformSampleConcurrentUpdateCount(t *testing.T) { } }() for i := 0; i < 1000; i++ { - s.Count() + s.Snapshot().Count() time.Sleep(5 * time.Millisecond) } quit <- struct{}{} } + +func BenchmarkCalculatePercentiles(b *testing.B) { + pss := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} + var vals []int64 + for i := 0; i < 1000; i++ { + vals = append(vals, int64(rand.Int31())) + } + v := make([]int64, len(vals)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + copy(v, vals) + _ = CalculatePercentiles(v, pss) + } +} diff --git a/metrics/syslog.go b/metrics/syslog.go index 76c849056757..fd856d697316 100644 --- a/metrics/syslog.go +++ b/metrics/syslog.go @@ -16,15 +16,15 @@ func Syslog(r Registry, d time.Duration, w *syslog.Writer) { r.Each(func(name string, i interface{}) { switch metric := i.(type) { case Counter: - w.Info(fmt.Sprintf("counter %s: count: %d", name, metric.Count())) + w.Info(fmt.Sprintf("counter %s: count: %d", name, metric.Snapshot().Count())) case CounterFloat64: - w.Info(fmt.Sprintf("counter %s: count: %f", name, metric.Count())) + w.Info(fmt.Sprintf("counter %s: count: %f", name, metric.Snapshot().Count())) case Gauge: - w.Info(fmt.Sprintf("gauge %s: value: %d", name, metric.Value())) + w.Info(fmt.Sprintf("gauge %s: value: %d", name, metric.Snapshot().Value())) case GaugeFloat64: - w.Info(fmt.Sprintf("gauge %s: value: %f", name, metric.Value())) + w.Info(fmt.Sprintf("gauge %s: value: %f", name, metric.Snapshot().Value())) case GaugeInfo: - w.Info(fmt.Sprintf("gauge %s: value: %s", name, metric.Value())) + w.Info(fmt.Sprintf("gauge %s: value: %s", name, metric.Snapshot().Value())) case Healthcheck: metric.Check() w.Info(fmt.Sprintf("healthcheck %s: error: %v", name, metric.Error())) diff --git a/metrics/testdata/opentsb.want b/metrics/testdata/opentsb.want index c8e40a525042..43fe1b2ac27a 100644 --- a/metrics/testdata/opentsb.want +++ b/metrics/testdata/opentsb.want @@ -1,4 +1,4 @@ -put pre.elite.count 978307200 0 host=hal9000 +put pre.elite.count 978307200 1337 host=hal9000 put pre.elite.one-minute 978307200 0.00 host=hal9000 put pre.elite.five-minute 978307200 0.00 host=hal9000 put pre.elite.fifteen-minute 978307200 0.00 host=hal9000 diff --git a/metrics/timer.go b/metrics/timer.go index 2e1a9be47295..576ad8aa3e63 100644 --- a/metrics/timer.go +++ b/metrics/timer.go @@ -5,26 +5,18 @@ import ( "time" ) +type TimerSnapshot interface { + HistogramSnapshot + MeterSnapshot +} + // Timers capture the duration and rate of events. type Timer interface { - Count() int64 - Max() int64 - Mean() float64 - Min() int64 - Percentile(float64) float64 - Percentiles([]float64) []float64 - Rate1() float64 - Rate5() float64 - Rate15() float64 - RateMean() float64 - Snapshot() Timer - StdDev() float64 + Snapshot() TimerSnapshot Stop() - Sum() int64 Time(func()) - Update(time.Duration) UpdateSince(time.Time) - Variance() float64 + Update(time.Duration) } // GetOrRegisterTimer returns an existing Timer or constructs and registers a @@ -78,61 +70,11 @@ func NewTimer() Timer { // NilTimer is a no-op Timer. type NilTimer struct{} -// Count is a no-op. -func (NilTimer) Count() int64 { return 0 } - -// Max is a no-op. -func (NilTimer) Max() int64 { return 0 } - -// Mean is a no-op. -func (NilTimer) Mean() float64 { return 0.0 } - -// Min is a no-op. -func (NilTimer) Min() int64 { return 0 } - -// Percentile is a no-op. -func (NilTimer) Percentile(p float64) float64 { return 0.0 } - -// Percentiles is a no-op. -func (NilTimer) Percentiles(ps []float64) []float64 { - return make([]float64, len(ps)) -} - -// Rate1 is a no-op. -func (NilTimer) Rate1() float64 { return 0.0 } - -// Rate5 is a no-op. -func (NilTimer) Rate5() float64 { return 0.0 } - -// Rate15 is a no-op. -func (NilTimer) Rate15() float64 { return 0.0 } - -// RateMean is a no-op. -func (NilTimer) RateMean() float64 { return 0.0 } - -// Snapshot is a no-op. -func (NilTimer) Snapshot() Timer { return NilTimer{} } - -// StdDev is a no-op. -func (NilTimer) StdDev() float64 { return 0.0 } - -// Stop is a no-op. -func (NilTimer) Stop() {} - -// Sum is a no-op. -func (NilTimer) Sum() int64 { return 0 } - -// Time is a no-op. -func (NilTimer) Time(f func()) { f() } - -// Update is a no-op. -func (NilTimer) Update(time.Duration) {} - -// UpdateSince is a no-op. -func (NilTimer) UpdateSince(time.Time) {} - -// Variance is a no-op. -func (NilTimer) Variance() float64 { return 0.0 } +func (NilTimer) Snapshot() TimerSnapshot { return (*emptySnapshot)(nil) } +func (NilTimer) Stop() {} +func (NilTimer) Time(f func()) { f() } +func (NilTimer) Update(time.Duration) {} +func (NilTimer) UpdateSince(time.Time) {} // StandardTimer is the standard implementation of a Timer and uses a Histogram // and Meter. @@ -142,82 +84,21 @@ type StandardTimer struct { mutex sync.Mutex } -// Count returns the number of events recorded. -func (t *StandardTimer) Count() int64 { - return t.histogram.Count() -} - -// Max returns the maximum value in the sample. -func (t *StandardTimer) Max() int64 { - return t.histogram.Max() -} - -// Mean returns the mean of the values in the sample. -func (t *StandardTimer) Mean() float64 { - return t.histogram.Mean() -} - -// Min returns the minimum value in the sample. -func (t *StandardTimer) Min() int64 { - return t.histogram.Min() -} - -// Percentile returns an arbitrary percentile of the values in the sample. -func (t *StandardTimer) Percentile(p float64) float64 { - return t.histogram.Percentile(p) -} - -// Percentiles returns a slice of arbitrary percentiles of the values in the -// sample. -func (t *StandardTimer) Percentiles(ps []float64) []float64 { - return t.histogram.Percentiles(ps) -} - -// Rate1 returns the one-minute moving average rate of events per second. -func (t *StandardTimer) Rate1() float64 { - return t.meter.Rate1() -} - -// Rate5 returns the five-minute moving average rate of events per second. -func (t *StandardTimer) Rate5() float64 { - return t.meter.Rate5() -} - -// Rate15 returns the fifteen-minute moving average rate of events per second. -func (t *StandardTimer) Rate15() float64 { - return t.meter.Rate15() -} - -// RateMean returns the meter's mean rate of events per second. -func (t *StandardTimer) RateMean() float64 { - return t.meter.RateMean() -} - // Snapshot returns a read-only copy of the timer. -func (t *StandardTimer) Snapshot() Timer { +func (t *StandardTimer) Snapshot() TimerSnapshot { t.mutex.Lock() defer t.mutex.Unlock() - return &TimerSnapshot{ - histogram: t.histogram.Snapshot().(*HistogramSnapshot), - meter: t.meter.Snapshot().(*MeterSnapshot), + return &timerSnapshot{ + histogram: t.histogram.Snapshot(), + meter: t.meter.Snapshot(), } } -// StdDev returns the standard deviation of the values in the sample. -func (t *StandardTimer) StdDev() float64 { - return t.histogram.StdDev() -} - // Stop stops the meter. func (t *StandardTimer) Stop() { t.meter.Stop() } -// Sum returns the sum in the sample. -func (t *StandardTimer) Sum() int64 { - return t.histogram.Sum() -} - // Record the duration of the execution of the given function. func (t *StandardTimer) Time(f func()) { ts := time.Now() @@ -241,86 +122,63 @@ func (t *StandardTimer) UpdateSince(ts time.Time) { t.meter.Mark(1) } -// Variance returns the variance of the values in the sample. -func (t *StandardTimer) Variance() float64 { - return t.histogram.Variance() -} - -// TimerSnapshot is a read-only copy of another Timer. -type TimerSnapshot struct { - histogram *HistogramSnapshot - meter *MeterSnapshot +// timerSnapshot is a read-only copy of another Timer. +type timerSnapshot struct { + histogram HistogramSnapshot + meter MeterSnapshot } // Count returns the number of events recorded at the time the snapshot was // taken. -func (t *TimerSnapshot) Count() int64 { return t.histogram.Count() } +func (t *timerSnapshot) Count() int64 { return t.histogram.Count() } // Max returns the maximum value at the time the snapshot was taken. -func (t *TimerSnapshot) Max() int64 { return t.histogram.Max() } +func (t *timerSnapshot) Max() int64 { return t.histogram.Max() } + +// Size returns the size of the sample at the time the snapshot was taken. +func (t *timerSnapshot) Size() int { return t.histogram.Size() } // Mean returns the mean value at the time the snapshot was taken. -func (t *TimerSnapshot) Mean() float64 { return t.histogram.Mean() } +func (t *timerSnapshot) Mean() float64 { return t.histogram.Mean() } // Min returns the minimum value at the time the snapshot was taken. -func (t *TimerSnapshot) Min() int64 { return t.histogram.Min() } +func (t *timerSnapshot) Min() int64 { return t.histogram.Min() } // Percentile returns an arbitrary percentile of sampled values at the time the // snapshot was taken. -func (t *TimerSnapshot) Percentile(p float64) float64 { +func (t *timerSnapshot) Percentile(p float64) float64 { return t.histogram.Percentile(p) } // Percentiles returns a slice of arbitrary percentiles of sampled values at // the time the snapshot was taken. -func (t *TimerSnapshot) Percentiles(ps []float64) []float64 { +func (t *timerSnapshot) Percentiles(ps []float64) []float64 { return t.histogram.Percentiles(ps) } // Rate1 returns the one-minute moving average rate of events per second at the // time the snapshot was taken. -func (t *TimerSnapshot) Rate1() float64 { return t.meter.Rate1() } +func (t *timerSnapshot) Rate1() float64 { return t.meter.Rate1() } // Rate5 returns the five-minute moving average rate of events per second at // the time the snapshot was taken. -func (t *TimerSnapshot) Rate5() float64 { return t.meter.Rate5() } +func (t *timerSnapshot) Rate5() float64 { return t.meter.Rate5() } // Rate15 returns the fifteen-minute moving average rate of events per second // at the time the snapshot was taken. -func (t *TimerSnapshot) Rate15() float64 { return t.meter.Rate15() } +func (t *timerSnapshot) Rate15() float64 { return t.meter.Rate15() } // RateMean returns the meter's mean rate of events per second at the time the // snapshot was taken. -func (t *TimerSnapshot) RateMean() float64 { return t.meter.RateMean() } - -// Snapshot returns the snapshot. -func (t *TimerSnapshot) Snapshot() Timer { return t } +func (t *timerSnapshot) RateMean() float64 { return t.meter.RateMean() } // StdDev returns the standard deviation of the values at the time the snapshot // was taken. -func (t *TimerSnapshot) StdDev() float64 { return t.histogram.StdDev() } - -// Stop is a no-op. -func (t *TimerSnapshot) Stop() {} +func (t *timerSnapshot) StdDev() float64 { return t.histogram.StdDev() } // Sum returns the sum at the time the snapshot was taken. -func (t *TimerSnapshot) Sum() int64 { return t.histogram.Sum() } - -// Time panics. -func (*TimerSnapshot) Time(func()) { - panic("Time called on a TimerSnapshot") -} - -// Update panics. -func (*TimerSnapshot) Update(time.Duration) { - panic("Update called on a TimerSnapshot") -} - -// UpdateSince panics. -func (*TimerSnapshot) UpdateSince(time.Time) { - panic("UpdateSince called on a TimerSnapshot") -} +func (t *timerSnapshot) Sum() int64 { return t.histogram.Sum() } // Variance returns the variance of the values at the time the snapshot was // taken. -func (t *TimerSnapshot) Variance() float64 { return t.histogram.Variance() } +func (t *timerSnapshot) Variance() float64 { return t.histogram.Variance() } diff --git a/metrics/timer_test.go b/metrics/timer_test.go index 390a0e6c23a8..0afa63a23072 100644 --- a/metrics/timer_test.go +++ b/metrics/timer_test.go @@ -18,7 +18,7 @@ func BenchmarkTimer(b *testing.B) { func TestGetOrRegisterTimer(t *testing.T) { r := NewRegistry() NewRegisteredTimer("foo", r).Update(47) - if tm := GetOrRegisterTimer("foo", r); tm.Count() != 1 { + if tm := GetOrRegisterTimer("foo", r).Snapshot(); tm.Count() != 1 { t.Fatal(tm) } } @@ -27,7 +27,7 @@ func TestTimerExtremes(t *testing.T) { tm := NewTimer() tm.Update(math.MaxInt64) tm.Update(0) - if stdDev := tm.StdDev(); stdDev != 4.611686018427388e+18 { + if stdDev := tm.Snapshot().StdDev(); stdDev != 4.611686018427388e+18 { t.Errorf("tm.StdDev(): 4.611686018427388e+18 != %v\n", stdDev) } } @@ -56,7 +56,7 @@ func TestTimerFunc(t *testing.T) { }) var ( drift = time.Millisecond * 2 - measured = time.Duration(tm.Max()) + measured = time.Duration(tm.Snapshot().Max()) ceil = actualTime + drift floor = actualTime - drift ) @@ -66,7 +66,7 @@ func TestTimerFunc(t *testing.T) { } func TestTimerZero(t *testing.T) { - tm := NewTimer() + tm := NewTimer().Snapshot() if count := tm.Count(); count != 0 { t.Errorf("tm.Count(): 0 != %v\n", count) } @@ -110,5 +110,5 @@ func ExampleGetOrRegisterTimer() { m := "account.create.latency" t := GetOrRegisterTimer(m, nil) t.Update(47) - fmt.Println(t.Max()) // Output: 47 + fmt.Println(t.Snapshot().Max()) // Output: 47 } diff --git a/metrics/writer.go b/metrics/writer.go index ec2e4f8c6a60..098da45c27b2 100644 --- a/metrics/writer.go +++ b/metrics/writer.go @@ -29,19 +29,19 @@ func WriteOnce(r Registry, w io.Writer) { switch metric := namedMetric.m.(type) { case Counter: fmt.Fprintf(w, "counter %s\n", namedMetric.name) - fmt.Fprintf(w, " count: %9d\n", metric.Count()) + fmt.Fprintf(w, " count: %9d\n", metric.Snapshot().Count()) case CounterFloat64: fmt.Fprintf(w, "counter %s\n", namedMetric.name) - fmt.Fprintf(w, " count: %f\n", metric.Count()) + fmt.Fprintf(w, " count: %f\n", metric.Snapshot().Count()) case Gauge: fmt.Fprintf(w, "gauge %s\n", namedMetric.name) - fmt.Fprintf(w, " value: %9d\n", metric.Value()) + fmt.Fprintf(w, " value: %9d\n", metric.Snapshot().Value()) case GaugeFloat64: fmt.Fprintf(w, "gauge %s\n", namedMetric.name) - fmt.Fprintf(w, " value: %f\n", metric.Value()) + fmt.Fprintf(w, " value: %f\n", metric.Snapshot().Value()) case GaugeInfo: fmt.Fprintf(w, "gauge %s\n", namedMetric.name) - fmt.Fprintf(w, " value: %s\n", metric.Value().String()) + fmt.Fprintf(w, " value: %s\n", metric.Snapshot().Value().String()) case Healthcheck: metric.Check() fmt.Fprintf(w, "healthcheck %s\n", namedMetric.name) From 2f36a79f670a2516fb401369d8538677a8fe5b6e Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 59/74] metrics: add support for enabling metrics from env vars (#28118) --- metrics/metrics.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/metrics/metrics.go b/metrics/metrics.go index d5c1c61cbd06..3ff72bd73fd2 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -9,7 +9,9 @@ import ( "os" "runtime/metrics" "runtime/pprof" + "strconv" "strings" + "syscall" "time" "github.com/XinFinOrg/XDPoSChain/log" @@ -30,13 +32,35 @@ var EnabledExpensive = false // enablerFlags is the CLI flag names to use to enable metrics collections. var enablerFlags = []string{"metrics"} +// enablerEnvVars is the env var names to use to enable metrics collections. +var enablerEnvVars = []string{"XDC_METRICS"} + // expensiveEnablerFlags is the CLI flag names to use to enable metrics collections. var expensiveEnablerFlags = []string{"metrics-expensive"} +// expensiveEnablerEnvVars is the env var names to use to enable metrics collections. +var expensiveEnablerEnvVars = []string{"XDC_METRICS_EXPENSIVE"} + // Init enables or disables the metrics system. Since we need this to run before // any other code gets to create meters and timers, we'll actually do an ugly hack // and peek into the command line args for the metrics flag. func init() { + for _, enabler := range enablerEnvVars { + if val, found := syscall.Getenv(enabler); found && !Enabled { + if enable, _ := strconv.ParseBool(val); enable { // ignore error, flag parser will choke on it later + log.Info("Enabling metrics collection") + Enabled = true + } + } + } + for _, enabler := range expensiveEnablerEnvVars { + if val, found := syscall.Getenv(enabler); found && !EnabledExpensive { + if enable, _ := strconv.ParseBool(val); enable { // ignore error, flag parser will choke on it later + log.Info("Enabling expensive metrics collection") + EnabledExpensive = true + } + } + } for _, arg := range os.Args { flag := strings.TrimLeft(arg, "-") From 49b5886150457806fc8f7158570f9e92afd11a1b Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 60/74] =?UTF-8?q?rpc:=20fix=20ns/=C2=B5s=20mismatch=20in?= =?UTF-8?q?=20metrics=20(#28649)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rpc/duration/all meter was in nanoseconds, the individual meter in microseconds. This PR changes it so both of them use nanoseconds. --- metrics/timer.go | 10 ++++------ rpc/metrics.go | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/metrics/timer.go b/metrics/timer.go index 576ad8aa3e63..bb8def82fb29 100644 --- a/metrics/timer.go +++ b/metrics/timer.go @@ -106,20 +106,18 @@ func (t *StandardTimer) Time(f func()) { t.Update(time.Since(ts)) } -// Record the duration of an event. +// Record the duration of an event, in nanoseconds. func (t *StandardTimer) Update(d time.Duration) { t.mutex.Lock() defer t.mutex.Unlock() - t.histogram.Update(int64(d)) + t.histogram.Update(d.Nanoseconds()) t.meter.Mark(1) } // Record the duration of an event that started at a time and ends now. +// The record uses nanoseconds. func (t *StandardTimer) UpdateSince(ts time.Time) { - t.mutex.Lock() - defer t.mutex.Unlock() - t.histogram.Update(int64(time.Since(ts))) - t.meter.Mark(1) + t.Update(time.Since(ts)) } // timerSnapshot is a read-only copy of another Timer. diff --git a/rpc/metrics.go b/rpc/metrics.go index 11f853dd2401..ea8837f6664b 100644 --- a/rpc/metrics.go +++ b/rpc/metrics.go @@ -46,5 +46,5 @@ func updateServeTimeHistogram(method string, success bool, elapsed time.Duration metrics.NewExpDecaySample(1028, 0.015), ) } - metrics.GetOrRegisterHistogramLazy(h, nil, sampler).Update(elapsed.Microseconds()) + metrics.GetOrRegisterHistogramLazy(h, nil, sampler).Update(elapsed.Nanoseconds()) } From 6f67b357deb04736a6972b525aa04eeb895f7f87 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 61/74] metrics: fix docstring names (#28923) --- metrics/counter.go | 2 +- metrics/gauge.go | 4 ++-- metrics/gauge_float64.go | 2 +- metrics/gauge_info.go | 2 +- metrics/healthcheck.go | 2 +- metrics/histogram.go | 2 +- metrics/influxdb/influxdbv2.go | 2 +- metrics/sample.go | 4 ++-- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/metrics/counter.go b/metrics/counter.go index cb81599c215a..dbe8e16a90d2 100644 --- a/metrics/counter.go +++ b/metrics/counter.go @@ -8,7 +8,7 @@ type CounterSnapshot interface { Count() int64 } -// Counters hold an int64 value that can be incremented and decremented. +// Counter hold an int64 value that can be incremented and decremented. type Counter interface { Clear() Dec(int64) diff --git a/metrics/gauge.go b/metrics/gauge.go index 00b59873848c..5933df310786 100644 --- a/metrics/gauge.go +++ b/metrics/gauge.go @@ -2,12 +2,12 @@ package metrics import "sync/atomic" -// gaugeSnapshot contains a readonly int64. +// GaugeSnapshot contains a readonly int64. type GaugeSnapshot interface { Value() int64 } -// Gauges hold an int64 value that can be set arbitrarily. +// Gauge holds an int64 value that can be set arbitrarily. type Gauge interface { Snapshot() GaugeSnapshot Update(int64) diff --git a/metrics/gauge_float64.go b/metrics/gauge_float64.go index 967f2bc60e5c..c1c3c6b6e6f0 100644 --- a/metrics/gauge_float64.go +++ b/metrics/gauge_float64.go @@ -48,7 +48,7 @@ type gaugeFloat64Snapshot float64 // Value returns the value at the time the snapshot was taken. func (g gaugeFloat64Snapshot) Value() float64 { return float64(g) } -// NilGauge is a no-op Gauge. +// NilGaugeFloat64 is a no-op Gauge. type NilGaugeFloat64 struct{} func (NilGaugeFloat64) Snapshot() GaugeFloat64Snapshot { return NilGaugeFloat64{} } diff --git a/metrics/gauge_info.go b/metrics/gauge_info.go index c44b2d85f3ad..0010edc3249d 100644 --- a/metrics/gauge_info.go +++ b/metrics/gauge_info.go @@ -9,7 +9,7 @@ type GaugeInfoSnapshot interface { Value() GaugeInfoValue } -// GaugeInfos hold a GaugeInfoValue value that can be set arbitrarily. +// GaugeInfo holds a GaugeInfoValue value that can be set arbitrarily. type GaugeInfo interface { Update(GaugeInfoValue) Snapshot() GaugeInfoSnapshot diff --git a/metrics/healthcheck.go b/metrics/healthcheck.go index f1ae31e34aee..adcd15ab581a 100644 --- a/metrics/healthcheck.go +++ b/metrics/healthcheck.go @@ -1,6 +1,6 @@ package metrics -// Healthchecks hold an error value describing an arbitrary up/down status. +// Healthcheck holds an error value describing an arbitrary up/down status. type Healthcheck interface { Check() Error() error diff --git a/metrics/histogram.go b/metrics/histogram.go index 44de588bc1dc..10259a246377 100644 --- a/metrics/histogram.go +++ b/metrics/histogram.go @@ -4,7 +4,7 @@ type HistogramSnapshot interface { SampleSnapshot } -// Histograms calculate distribution statistics from a series of int64 values. +// Histogram calculates distribution statistics from a series of int64 values. type Histogram interface { Clear() Update(int64) diff --git a/metrics/influxdb/influxdbv2.go b/metrics/influxdb/influxdbv2.go index d45e9c4d45be..5d8ba1b012ac 100644 --- a/metrics/influxdb/influxdbv2.go +++ b/metrics/influxdb/influxdbv2.go @@ -25,7 +25,7 @@ type v2Reporter struct { write api.WriteAPI } -// InfluxDBWithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags +// InfluxDBV2WithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags func InfluxDBV2WithTags(r metrics.Registry, d time.Duration, endpoint string, token string, bucket string, organization string, namespace string, tags map[string]string) { rep := &v2Reporter{ reg: r, diff --git a/metrics/sample.go b/metrics/sample.go index 5398dd42d5de..dbd5a02f5452 100644 --- a/metrics/sample.go +++ b/metrics/sample.go @@ -148,13 +148,13 @@ func (NilSample) Clear() {} func (NilSample) Snapshot() SampleSnapshot { return (*emptySnapshot)(nil) } func (NilSample) Update(v int64) {} -// SamplePercentiles returns an arbitrary percentile of the slice of int64. +// SamplePercentile returns an arbitrary percentile of the slice of int64. func SamplePercentile(values []int64, p float64) float64 { return CalculatePercentiles(values, []float64{p})[0] } // CalculatePercentiles returns a slice of arbitrary percentiles of the slice of -// int64. This method returns interpolated results, so e.g if there are only two +// int64. This method returns interpolated results, so e.g. if there are only two // values, [0, 10], a 50% percentile will land between them. // // Note: As a side-effect, this method will also sort the slice of values. From 2220156b9a2dbf14b291cb6493c1e18cb9dadffd Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:13 +0800 Subject: [PATCH 62/74] cmd, core, metrics: always report expensive metrics (#29191) * cmd, core, metrics: always report expensive metrics * core, metrics: report block processing metrics as resetting timer * metrics: update reporter tests --- cmd/XDC/config.go | 3 ++- cmd/utils/flags.go | 5 ---- cmd/utils/flags_legacy.go | 6 +++++ core/blockchain.go | 28 ++++++++++----------- core/state/state_object.go | 19 +++++--------- core/state/statedb.go | 26 ++++++++----------- metrics/config.go | 2 +- metrics/influxdb/influxdb.go | 25 ++++++++++-------- metrics/influxdb/testdata/influxdbv1.want | 2 +- metrics/influxdb/testdata/influxdbv2.want | 2 +- metrics/metrics.go | 25 ------------------ metrics/prometheus/collector.go | 9 ++++--- metrics/prometheus/testdata/prometheus.want | 5 +++- 13 files changed, 64 insertions(+), 93 deletions(-) diff --git a/cmd/XDC/config.go b/cmd/XDC/config.go index 7dbc8774b895..360fb768d142 100644 --- a/cmd/XDC/config.go +++ b/cmd/XDC/config.go @@ -32,6 +32,7 @@ import ( "github.com/XinFinOrg/XDPoSChain/common" "github.com/XinFinOrg/XDPoSChain/eth/ethconfig" "github.com/XinFinOrg/XDPoSChain/internal/flags" + "github.com/XinFinOrg/XDPoSChain/log" "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/node" "github.com/XinFinOrg/XDPoSChain/params" @@ -271,7 +272,7 @@ func applyMetricConfig(ctx *cli.Context, cfg *XDCConfig) { cfg.Metrics.Enabled = ctx.Bool(utils.MetricsEnabledFlag.Name) } if ctx.IsSet(utils.MetricsEnabledExpensiveFlag.Name) { - cfg.Metrics.EnabledExpensive = ctx.Bool(utils.MetricsEnabledExpensiveFlag.Name) + log.Warn("Expensive metrics are collected by default, please remove this flag", "flag", utils.MetricsEnabledExpensiveFlag.Name) } if ctx.IsSet(utils.MetricsHTTPFlag.Name) { cfg.Metrics.HTTP = ctx.String(utils.MetricsHTTPFlag.Name) diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 797f578243bb..eb8d75f297ce 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -644,11 +644,6 @@ var ( Usage: "Enable metrics collection and reporting", Category: flags.MetricsCategory, } - MetricsEnabledExpensiveFlag = &cli.BoolFlag{ - Name: "metrics-expensive", - Usage: "Enable expensive metrics collection and reporting", - Category: flags.MetricsCategory, - } // MetricsHTTPFlag defines the endpoint for a stand-alone metrics HTTP endpoint. // Since the pprof service enables sensitive/vulnerable behavior, this allows a user // to enable a public-OK metrics endpoint without having to worry about ALSO exposing diff --git a/cmd/utils/flags_legacy.go b/cmd/utils/flags_legacy.go index 3bd314292d52..afd1e0e8bba8 100644 --- a/cmd/utils/flags_legacy.go +++ b/cmd/utils/flags_legacy.go @@ -68,6 +68,12 @@ var ( Usage: "Prepends log messages with call-site location (deprecated)", Category: flags.DeprecatedCategory, } + // Deprecated February 2024 + MetricsEnabledExpensiveFlag = &cli.BoolFlag{ + Name: "metrics-expensive", + Usage: "Enable expensive metrics collection and reporting (deprecated)", + Category: flags.DeprecatedCategory, + } ) // showDeprecated displays deprecated flags that will be soon removed from the codebase. diff --git a/core/blockchain.go b/core/blockchain.go index f09fb31d670e..7aebc61155bd 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -62,20 +62,20 @@ var ( chainInfoGauge = metrics.NewRegisteredGaugeInfo("chain/info", nil) - accountReadTimer = metrics.NewRegisteredTimer("chain/account/reads", nil) - accountHashTimer = metrics.NewRegisteredTimer("chain/account/hashes", nil) - accountUpdateTimer = metrics.NewRegisteredTimer("chain/account/updates", nil) - accountCommitTimer = metrics.NewRegisteredTimer("chain/account/commits", nil) - - storageReadTimer = metrics.NewRegisteredTimer("chain/storage/reads", nil) - storageHashTimer = metrics.NewRegisteredTimer("chain/storage/hashes", nil) - storageUpdateTimer = metrics.NewRegisteredTimer("chain/storage/updates", nil) - storageCommitTimer = metrics.NewRegisteredTimer("chain/storage/commits", nil) - - blockInsertTimer = metrics.NewRegisteredTimer("chain/inserts", nil) - blockValidationTimer = metrics.NewRegisteredTimer("chain/validation", nil) - blockExecutionTimer = metrics.NewRegisteredTimer("chain/execution", nil) - blockWriteTimer = metrics.NewRegisteredTimer("chain/write", nil) + accountReadTimer = metrics.NewRegisteredResettingTimer("chain/account/reads", nil) + accountHashTimer = metrics.NewRegisteredResettingTimer("chain/account/hashes", nil) + accountUpdateTimer = metrics.NewRegisteredResettingTimer("chain/account/updates", nil) + accountCommitTimer = metrics.NewRegisteredResettingTimer("chain/account/commits", nil) + + storageReadTimer = metrics.NewRegisteredResettingTimer("chain/storage/reads", nil) + storageHashTimer = metrics.NewRegisteredResettingTimer("chain/storage/hashes", nil) + storageUpdateTimer = metrics.NewRegisteredResettingTimer("chain/storage/updates", nil) + storageCommitTimer = metrics.NewRegisteredResettingTimer("chain/storage/commits", nil) + + blockInsertTimer = metrics.NewRegisteredResettingTimer("chain/inserts", nil) + blockValidationTimer = metrics.NewRegisteredResettingTimer("chain/validation", nil) + blockExecutionTimer = metrics.NewRegisteredResettingTimer("chain/execution", nil) + blockWriteTimer = metrics.NewRegisteredResettingTimer("chain/write", nil) blockReorgMeter = metrics.NewRegisteredMeter("chain/reorg/executes", nil) blockReorgAddMeter = metrics.NewRegisteredMeter("chain/reorg/add", nil) diff --git a/core/state/state_object.go b/core/state/state_object.go index 10fcfc8b3cb2..bb19597cc6f9 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -26,7 +26,6 @@ import ( "github.com/XinFinOrg/XDPoSChain/common" "github.com/XinFinOrg/XDPoSChain/core/types" "github.com/XinFinOrg/XDPoSChain/crypto" - "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/rlp" ) @@ -176,9 +175,7 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has return s.fakeStorage[key] } // Track the amount of time wasted on reading the storge trie - if metrics.EnabledExpensive { - defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now()) - } + defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now()) value := common.Hash{} // Load from DB in case it is missing. enc, err := s.getTrie(db).TryGet(key[:]) @@ -276,9 +273,7 @@ func (s *stateObject) setState(key, value common.Hash) { // updateTrie writes cached storage modifications into the object's storage trie. func (s *stateObject) updateTrie(db Database) Trie { // Track the amount of time wasted on updating the storge trie - if metrics.EnabledExpensive { - defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now()) - } + defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now()) tr := s.getTrie(db) for key, value := range s.dirtyStorage { delete(s.dirtyStorage, key) @@ -298,9 +293,8 @@ func (s *stateObject) updateRoot(db Database) { s.updateTrie(db) // Track the amount of time wasted on hashing the storge trie - if metrics.EnabledExpensive { - defer func(start time.Time) { s.db.StorageHashes += time.Since(start) }(time.Now()) - } + defer func(start time.Time) { s.db.StorageHashes += time.Since(start) }(time.Now()) + s.data.Root = s.trie.Hash() } @@ -312,9 +306,8 @@ func (s *stateObject) CommitTrie(db Database) error { return s.dbErr } // Track the amount of time wasted on committing the storge trie - if metrics.EnabledExpensive { - defer func(start time.Time) { s.db.StorageCommits += time.Since(start) }(time.Now()) - } + defer func(start time.Time) { s.db.StorageCommits += time.Since(start) }(time.Now()) + root, err := s.trie.Commit(nil) if err == nil { s.data.Root = root diff --git a/core/state/statedb.go b/core/state/statedb.go index 0e0dc9e7e5b5..d357e6e1bf58 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -28,7 +28,6 @@ import ( "github.com/XinFinOrg/XDPoSChain/core/types" "github.com/XinFinOrg/XDPoSChain/crypto" "github.com/XinFinOrg/XDPoSChain/log" - "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/params" "github.com/XinFinOrg/XDPoSChain/rlp" "github.com/XinFinOrg/XDPoSChain/trie" @@ -459,9 +458,8 @@ func (s *StateDB) GetTransientState(addr common.Address, key common.Hash) common // updateStateObject writes the given object to the trie. func (s *StateDB) updateStateObject(stateObject *stateObject) { // Track the amount of time wasted on updating the account from the trie - if metrics.EnabledExpensive { - defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) - } + defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) + // Encode the account and update the account trie addr := stateObject.Address() @@ -475,9 +473,8 @@ func (s *StateDB) updateStateObject(stateObject *stateObject) { // deleteStateObject removes the given object from the state trie. func (s *StateDB) deleteStateObject(stateObject *stateObject) { // Track the amount of time wasted on deleting the account from the trie - if metrics.EnabledExpensive { - defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) - } + defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) + // Delete the account from the trie stateObject.deleted = true @@ -503,9 +500,8 @@ func (s *StateDB) getStateObject(addr common.Address) (stateObject *stateObject) return obj } // Track the amount of time wasted on loading the object from the database - if metrics.EnabledExpensive { - defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now()) - } + defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now()) + // Load the object from the database enc, err := s.trie.TryGet(addr[:]) if len(enc) == 0 { @@ -701,9 +697,8 @@ func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { s.Finalise(deleteEmptyObjects) // Track the amount of time wasted on hashing the account trie - if metrics.EnabledExpensive { - defer func(start time.Time) { s.AccountHashes += time.Since(start) }(time.Now()) - } + defer func(start time.Time) { s.AccountHashes += time.Since(start) }(time.Now()) + return s.trie.Hash() } @@ -770,9 +765,8 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) delete(s.stateObjectsDirty, addr) } // Write the account trie changes, measuing the amount of wasted time - if metrics.EnabledExpensive { - defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now()) - } + defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now()) + root, err = s.trie.Commit(func(leaf []byte, parent common.Hash) error { var account Account if err := rlp.DecodeBytes(leaf, &account); err != nil { diff --git a/metrics/config.go b/metrics/config.go index cac1fc9c184a..6acb985c16e5 100644 --- a/metrics/config.go +++ b/metrics/config.go @@ -19,7 +19,7 @@ package metrics // Config contains the configuration for the metric collection. type Config struct { Enabled bool `toml:",omitempty"` - EnabledExpensive bool `toml:",omitempty"` + EnabledExpensive bool `toml:"-"` HTTP string `toml:",omitempty"` Port int `toml:",omitempty"` EnableInfluxDB bool `toml:",omitempty"` diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index d7c35b671821..8cfa081c1421 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -98,20 +98,23 @@ func readMeter(namespace, name string, i interface{}) (string, map[string]interf } return measurement, fields case metrics.ResettingTimer: - t := metric.Snapshot() - if t.Count() == 0 { + ms := metric.Snapshot() + if ms.Count() == 0 { break } - ps := t.Percentiles([]float64{0.50, 0.95, 0.99}) - measurement := fmt.Sprintf("%s%s.span", namespace, name) + ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + measurement := fmt.Sprintf("%s%s.timer", namespace, name) fields := map[string]interface{}{ - "count": t.Count(), - "max": t.Max(), - "mean": t.Mean(), - "min": t.Min(), - "p50": int(ps[0]), - "p95": int(ps[1]), - "p99": int(ps[2]), + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "p50": ps[0], + "p75": ps[1], + "p95": ps[2], + "p99": ps[3], + "p999": ps[4], + "p9999": ps[5], } return measurement, fields } diff --git a/metrics/influxdb/testdata/influxdbv1.want b/metrics/influxdb/testdata/influxdbv1.want index 9443faedc5a2..ded9434c7314 100644 --- a/metrics/influxdb/testdata/influxdbv1.want +++ b/metrics/influxdb/testdata/influxdbv1.want @@ -7,5 +7,5 @@ goth.test/gauge_float64.gauge value=34567.89 978307200000000000 goth.test/gauge_info.gauge value="{\"arch\":\"amd64\",\"commit\":\"7caa2d8163ae3132c1c2d6978c76610caee2d949\",\"os\":\"linux\",\"protocol_versions\":\"64 65 66\",\"version\":\"1.10.18-unstable\"}" 978307200000000000 goth.test/histogram.histogram count=3i,max=3i,mean=2,min=1i,p25=1,p50=2,p75=3,p95=3,p99=3,p999=3,p9999=3,stddev=0.816496580927726,variance=0.6666666666666666 978307200000000000 goth.test/meter.meter count=0i,m1=0,m15=0,m5=0,mean=0 978307200000000000 -goth.test/resetting_timer.span count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12500000i,p95=120000000i,p99=120000000i 978307200000000000 +goth.test/resetting_timer.timer count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12500000,p75=40500000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000 978307200000000000 goth.test/timer.timer count=6i,m1=0,m15=0,m5=0,max=120000000i,mean=38333333.333333336,meanrate=0,min=20000000i,p50=22500000,p75=48000000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000,stddev=36545253.529775314,variance=1335555555555555.2 978307200000000000 diff --git a/metrics/influxdb/testdata/influxdbv2.want b/metrics/influxdb/testdata/influxdbv2.want index 9443faedc5a2..ded9434c7314 100644 --- a/metrics/influxdb/testdata/influxdbv2.want +++ b/metrics/influxdb/testdata/influxdbv2.want @@ -7,5 +7,5 @@ goth.test/gauge_float64.gauge value=34567.89 978307200000000000 goth.test/gauge_info.gauge value="{\"arch\":\"amd64\",\"commit\":\"7caa2d8163ae3132c1c2d6978c76610caee2d949\",\"os\":\"linux\",\"protocol_versions\":\"64 65 66\",\"version\":\"1.10.18-unstable\"}" 978307200000000000 goth.test/histogram.histogram count=3i,max=3i,mean=2,min=1i,p25=1,p50=2,p75=3,p95=3,p99=3,p999=3,p9999=3,stddev=0.816496580927726,variance=0.6666666666666666 978307200000000000 goth.test/meter.meter count=0i,m1=0,m15=0,m5=0,mean=0 978307200000000000 -goth.test/resetting_timer.span count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12500000i,p95=120000000i,p99=120000000i 978307200000000000 +goth.test/resetting_timer.timer count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12500000,p75=40500000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000 978307200000000000 goth.test/timer.timer count=6i,m1=0,m15=0,m5=0,max=120000000i,mean=38333333.333333336,meanrate=0,min=20000000i,p50=22500000,p75=48000000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000,stddev=36545253.529775314,variance=1335555555555555.2 978307200000000000 diff --git a/metrics/metrics.go b/metrics/metrics.go index 3ff72bd73fd2..42623114d021 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -24,23 +24,12 @@ import ( // for less cluttered pprof profiles. var Enabled = false -// EnabledExpensive is a soft-flag meant for external packages to check if costly -// metrics gathering is allowed or not. The goal is to separate standard metrics -// for health monitoring and debug metrics that might impact runtime performance. -var EnabledExpensive = false - // enablerFlags is the CLI flag names to use to enable metrics collections. var enablerFlags = []string{"metrics"} // enablerEnvVars is the env var names to use to enable metrics collections. var enablerEnvVars = []string{"XDC_METRICS"} -// expensiveEnablerFlags is the CLI flag names to use to enable metrics collections. -var expensiveEnablerFlags = []string{"metrics-expensive"} - -// expensiveEnablerEnvVars is the env var names to use to enable metrics collections. -var expensiveEnablerEnvVars = []string{"XDC_METRICS_EXPENSIVE"} - // Init enables or disables the metrics system. Since we need this to run before // any other code gets to create meters and timers, we'll actually do an ugly hack // and peek into the command line args for the metrics flag. @@ -53,14 +42,6 @@ func init() { } } } - for _, enabler := range expensiveEnablerEnvVars { - if val, found := syscall.Getenv(enabler); found && !EnabledExpensive { - if enable, _ := strconv.ParseBool(val); enable { // ignore error, flag parser will choke on it later - log.Info("Enabling expensive metrics collection") - EnabledExpensive = true - } - } - } for _, arg := range os.Args { flag := strings.TrimLeft(arg, "-") @@ -70,12 +51,6 @@ func init() { Enabled = true } } - for _, enabler := range expensiveEnablerFlags { - if !EnabledExpensive && flag == enabler { - log.Info("Enabling expensive metrics collection") - EnabledExpensive = true - } - } } } diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go index 0b81c63ba5b7..ac2c960ac775 100644 --- a/metrics/prometheus/collector.go +++ b/metrics/prometheus/collector.go @@ -125,12 +125,13 @@ func (c *collector) addResettingTimer(name string, m metrics.ResettingTimerSnaps if m.Count() <= 0 { return } - ps := m.Percentiles([]float64{0.50, 0.95, 0.99}) + pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} + ps := m.Percentiles(pv) c.writeSummaryCounter(name, m.Count()) c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) - c.writeSummaryPercentile(name, "0.50", ps[0]) - c.writeSummaryPercentile(name, "0.95", ps[1]) - c.writeSummaryPercentile(name, "0.99", ps[2]) + for i := range pv { + c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) + } c.buff.WriteRune('\n') } diff --git a/metrics/prometheus/testdata/prometheus.want b/metrics/prometheus/testdata/prometheus.want index 861c5f5cf087..a999d83801c6 100644 --- a/metrics/prometheus/testdata/prometheus.want +++ b/metrics/prometheus/testdata/prometheus.want @@ -53,9 +53,12 @@ test_meter 0 test_resetting_timer_count 6 # TYPE test_resetting_timer summary -test_resetting_timer {quantile="0.50"} 1.25e+07 +test_resetting_timer {quantile="0.5"} 1.25e+07 +test_resetting_timer {quantile="0.75"} 4.05e+07 test_resetting_timer {quantile="0.95"} 1.2e+08 test_resetting_timer {quantile="0.99"} 1.2e+08 +test_resetting_timer {quantile="0.999"} 1.2e+08 +test_resetting_timer {quantile="0.9999"} 1.2e+08 # TYPE test_timer_count counter test_timer_count 6 From 7f37ee2e635179c5f0b2456d9a5af4a9ca9b09ee Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 63/74] metrics: fix docstrings (#29279) --- metrics/json.go | 4 ++-- metrics/registry.go | 38 +++++++++++++++++++------------------- metrics/timer.go | 8 ++++---- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/metrics/json.go b/metrics/json.go index 2087d8211eb1..6b134d477b60 100644 --- a/metrics/json.go +++ b/metrics/json.go @@ -26,6 +26,6 @@ func WriteJSONOnce(r Registry, w io.Writer) { json.NewEncoder(w).Encode(r) } -func (p *PrefixedRegistry) MarshalJSON() ([]byte, error) { - return json.Marshal(p.GetAll()) +func (r *PrefixedRegistry) MarshalJSON() ([]byte, error) { + return json.Marshal(r.GetAll()) } diff --git a/metrics/registry.go b/metrics/registry.go index a6cae1fff0a9..8325f4e153e6 100644 --- a/metrics/registry.go +++ b/metrics/registry.go @@ -8,8 +8,8 @@ import ( "sync" ) -// DuplicateMetric is the error returned by Registry.Register when a metric -// already exists. If you mean to Register that metric you must first +// DuplicateMetric is the error returned by Registry. Register when a metric +// already exists. If you mean to Register that metric you must first // Unregister the existing metric. type DuplicateMetric string @@ -20,11 +20,11 @@ func (err DuplicateMetric) Error() string { // A Registry holds references to a set of metrics by name and can iterate // over them, calling callback functions provided by the user. // -// This is an interface so as to encourage other structs to implement +// This is an interface to encourage other structs to implement // the Registry API as appropriate. type Registry interface { - // Call the given function for each registered metric. + // Each call the given function for each registered metric. Each(func(string, interface{})) // Get the metric by the given name or nil if none is registered. @@ -33,7 +33,7 @@ type Registry interface { // GetAll metrics in the Registry. GetAll() map[string]map[string]interface{} - // Gets an existing metric or registers the given one. + // GetOrRegister gets an existing metric or registers the given one. // The interface can be the metric to register if not found in registry, // or a function returning the metric for lazy instantiation. GetOrRegister(string, interface{}) interface{} @@ -41,7 +41,7 @@ type Registry interface { // Register the given metric under the given name. Register(string, interface{}) error - // Run all registered healthchecks. + // RunHealthchecks run all registered healthchecks. RunHealthchecks() // Unregister the metric with the given name. @@ -52,7 +52,7 @@ type orderedRegistry struct { StandardRegistry } -// Call the given function for each registered metric. +// Each call the given function for each registered metric. func (r *orderedRegistry) Each(f func(string, interface{})) { var names []string reg := r.registered() @@ -75,13 +75,13 @@ func NewOrderedRegistry() Registry { return new(orderedRegistry) } -// The standard implementation of a Registry uses sync.map +// StandardRegistry the standard implementation of a Registry uses sync.map // of names to metrics. type StandardRegistry struct { metrics sync.Map } -// Call the given function for each registered metric. +// Each call the given function for each registered metric. func (r *StandardRegistry) Each(f func(string, interface{})) { for name, i := range r.registered() { f(name, i) @@ -94,7 +94,7 @@ func (r *StandardRegistry) Get(name string) interface{} { return item } -// Gets an existing metric or creates and registers a new one. Threadsafe +// GetOrRegister gets an existing metric or creates and registers a new one. Threadsafe // alternative to calling Get and Register on failure. // The interface can be the metric to register if not found in registry, // or a function returning the metric for lazy instantiation. @@ -114,7 +114,7 @@ func (r *StandardRegistry) GetOrRegister(name string, i interface{}) interface{} return item } -// Register the given metric under the given name. Returns a DuplicateMetric +// Register the given metric under the given name. Returns a DuplicateMetric // if a metric by the given name is already registered. func (r *StandardRegistry) Register(name string, i interface{}) error { // fast path @@ -133,7 +133,7 @@ func (r *StandardRegistry) Register(name string, i interface{}) error { return nil } -// Run all registered healthchecks. +// RunHealthchecks run all registered healthchecks. func (r *StandardRegistry) RunHealthchecks() { r.metrics.Range(func(key, value any) bool { if h, ok := value.(Healthcheck); ok { @@ -263,7 +263,7 @@ func NewPrefixedChildRegistry(parent Registry, prefix string) Registry { } } -// Call the given function for each registered metric. +// Each call the given function for each registered metric. func (r *PrefixedRegistry) Each(fn func(string, interface{})) { wrappedFn := func(prefix string) func(string, interface{}) { return func(name string, iface interface{}) { @@ -295,7 +295,7 @@ func (r *PrefixedRegistry) Get(name string) interface{} { return r.underlying.Get(realName) } -// Gets an existing metric or registers the given one. +// GetOrRegister gets an existing metric or registers the given one. // The interface can be the metric to register if not found in registry, // or a function returning the metric for lazy instantiation. func (r *PrefixedRegistry) GetOrRegister(name string, metric interface{}) interface{} { @@ -309,7 +309,7 @@ func (r *PrefixedRegistry) Register(name string, metric interface{}) error { return r.underlying.Register(realName, metric) } -// Run all registered healthchecks. +// RunHealthchecks run all registered healthchecks. func (r *PrefixedRegistry) RunHealthchecks() { r.underlying.RunHealthchecks() } @@ -329,7 +329,7 @@ var ( DefaultRegistry = NewRegistry() ) -// Call the given function for each registered metric. +// Each call the given function for each registered metric. func Each(f func(string, interface{})) { DefaultRegistry.Each(f) } @@ -339,7 +339,7 @@ func Get(name string) interface{} { return DefaultRegistry.Get(name) } -// Gets an existing metric or creates and registers a new one. Threadsafe +// GetOrRegister gets an existing metric or creates and registers a new one. Threadsafe // alternative to calling Get and Register on failure. func GetOrRegister(name string, i interface{}) interface{} { return DefaultRegistry.GetOrRegister(name, i) @@ -351,7 +351,7 @@ func Register(name string, i interface{}) error { return DefaultRegistry.Register(name, i) } -// Register the given metric under the given name. Panics if a metric by the +// MustRegister register the given metric under the given name. Panics if a metric by the // given name is already registered. func MustRegister(name string, i interface{}) { if err := Register(name, i); err != nil { @@ -359,7 +359,7 @@ func MustRegister(name string, i interface{}) { } } -// Run all registered healthchecks. +// RunHealthchecks run all registered healthchecks. func RunHealthchecks() { DefaultRegistry.RunHealthchecks() } diff --git a/metrics/timer.go b/metrics/timer.go index bb8def82fb29..fc2a88f508bc 100644 --- a/metrics/timer.go +++ b/metrics/timer.go @@ -10,7 +10,7 @@ type TimerSnapshot interface { MeterSnapshot } -// Timers capture the duration and rate of events. +// Timer capture the duration and rate of events. type Timer interface { Snapshot() TimerSnapshot Stop() @@ -99,14 +99,14 @@ func (t *StandardTimer) Stop() { t.meter.Stop() } -// Record the duration of the execution of the given function. +// Time record the duration of the execution of the given function. func (t *StandardTimer) Time(f func()) { ts := time.Now() f() t.Update(time.Since(ts)) } -// Record the duration of an event, in nanoseconds. +// Update the duration of an event, in nanoseconds. func (t *StandardTimer) Update(d time.Duration) { t.mutex.Lock() defer t.mutex.Unlock() @@ -114,7 +114,7 @@ func (t *StandardTimer) Update(d time.Duration) { t.meter.Mark(1) } -// Record the duration of an event that started at a time and ends now. +// UpdateSince update the duration of an event that started at a time and ends now. // The record uses nanoseconds. func (t *StandardTimer) UpdateSince(ts time.Time) { t.Update(time.Since(ts)) From fd8782b2d08810a7964d4bbdafd9c8a133c3d53f Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 64/74] metrics: use min from go1.21 (#29307) --- metrics/sample_test.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/metrics/sample_test.go b/metrics/sample_test.go index 79673570554c..9835ec1c3003 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -87,13 +87,6 @@ func BenchmarkUniformSample1028(b *testing.B) { benchmarkSample(b, NewUniformSample(1028)) } -func min(a, b int) int { - if a < b { - return a - } - return b -} - func TestExpDecaySample(t *testing.T) { for _, tc := range []struct { reservoirSize int From 99f605ffabd7d3d1c70bdee393896af68e5aefca Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 65/74] metrics/influxdb: skip float64-precision-dependent tests on arm64 (#29047) metrics/influxdb: fix failed cases caused by float64 precision on arm64 --- metrics/influxdb/influxdb_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/metrics/influxdb/influxdb_test.go b/metrics/influxdb/influxdb_test.go index 07af3f90ce21..aa47f15e1825 100644 --- a/metrics/influxdb/influxdb_test.go +++ b/metrics/influxdb/influxdb_test.go @@ -23,6 +23,7 @@ import ( "net/http/httptest" "net/url" "os" + "runtime" "strings" "testing" @@ -37,6 +38,10 @@ func TestMain(m *testing.M) { } func TestExampleV1(t *testing.T) { + if runtime.GOARCH == "arm64" { + t.Skip("test skipped on ARM64 due to floating point precision differences") + } + r := internal.ExampleMetrics() var have, want string ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -69,6 +74,10 @@ func TestExampleV1(t *testing.T) { } func TestExampleV2(t *testing.T) { + if runtime.GOARCH == "arm64" { + t.Skip("test skipped on ARM64 due to floating point precision differences") + } + r := internal.ExampleMetrics() var have, want string ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { From c7565af9b8dca054d0b77e77d67bcca3734c0a02 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 66/74] metrics: remove dependency on golang.org/exp (#29314) --- go.mod | 1 - go.sum | 2 -- metrics/sample.go | 3 +-- metrics/writer.go | 3 +-- metrics/writer_test.go | 3 +-- 5 files changed, 3 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index de84356c85ad..28a24f8b6b38 100644 --- a/go.mod +++ b/go.mod @@ -55,7 +55,6 @@ require ( github.com/mattn/go-isatty v0.0.17 github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible github.com/urfave/cli/v2 v2.27.5 - golang.org/x/exp v0.0.0-20231006140011-7918f672742d gopkg.in/natefinch/lumberjack.v2 v2.2.1 ) diff --git a/go.sum b/go.sum index f1e8dd27aa3f..68019c92aba9 100644 --- a/go.sum +++ b/go.sum @@ -218,8 +218,6 @@ golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= diff --git a/metrics/sample.go b/metrics/sample.go index dbd5a02f5452..17b2bee28f10 100644 --- a/metrics/sample.go +++ b/metrics/sample.go @@ -3,10 +3,9 @@ package metrics import ( "math" "math/rand" + "slices" "sync" "time" - - "golang.org/x/exp/slices" ) const rescaleThreshold = time.Hour diff --git a/metrics/writer.go b/metrics/writer.go index 098da45c27b2..c211c5046b7d 100644 --- a/metrics/writer.go +++ b/metrics/writer.go @@ -3,10 +3,9 @@ package metrics import ( "fmt" "io" + "slices" "strings" "time" - - "golang.org/x/exp/slices" ) // Write sorts writes each metric in the given registry periodically to the diff --git a/metrics/writer_test.go b/metrics/writer_test.go index 8376bf8975c8..edcfe955abcf 100644 --- a/metrics/writer_test.go +++ b/metrics/writer_test.go @@ -1,9 +1,8 @@ package metrics import ( + "slices" "testing" - - "golang.org/x/exp/slices" ) func TestMetricsSorting(t *testing.T) { From 988465cafa5304e6de97fd259db74f29210e4bb7 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 67/74] metrics: fix mismatched names in comments (#29348) --- metrics/gauge.go | 2 +- metrics/meter.go | 2 +- metrics/metrics.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/metrics/gauge.go b/metrics/gauge.go index 5933df310786..6d10bbf85660 100644 --- a/metrics/gauge.go +++ b/metrics/gauge.go @@ -74,7 +74,7 @@ func (g *StandardGauge) Update(v int64) { g.value.Store(v) } -// Update updates the gauge's value if v is larger then the current value. +// UpdateIfGt updates the gauge's value if v is larger then the current value. func (g *StandardGauge) UpdateIfGt(v int64) { for { exist := g.value.Load() diff --git a/metrics/meter.go b/metrics/meter.go index 22475ef6ebee..432838f4ef7b 100644 --- a/metrics/meter.go +++ b/metrics/meter.go @@ -173,7 +173,7 @@ type meterArbiter struct { var arbiter = meterArbiter{ticker: time.NewTicker(5 * time.Second), meters: make(map[*StandardMeter]struct{})} -// Ticks meters on the scheduled interval +// tick meters on the scheduled interval func (ma *meterArbiter) tick() { for range ma.ticker.C { ma.tickMeters() diff --git a/metrics/metrics.go b/metrics/metrics.go index 42623114d021..797001ab656e 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -30,7 +30,7 @@ var enablerFlags = []string{"metrics"} // enablerEnvVars is the env var names to use to enable metrics collections. var enablerEnvVars = []string{"XDC_METRICS"} -// Init enables or disables the metrics system. Since we need this to run before +// init enables or disables the metrics system. Since we need this to run before // any other code gets to create meters and timers, we'll actually do an ugly hack // and peek into the command line args for the metrics flag. func init() { From cf90a7aa76aba706beef6515968687c08ae48f6f Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 68/74] metrics: remove librato (#29624) --- metrics/README.md | 19 --- metrics/librato/client.go | 104 --------------- metrics/librato/librato.go | 254 ------------------------------------- 3 files changed, 377 deletions(-) delete mode 100644 metrics/librato/client.go delete mode 100644 metrics/librato/librato.go diff --git a/metrics/README.md b/metrics/README.md index e2d7945008e6..85b119470a92 100644 --- a/metrics/README.md +++ b/metrics/README.md @@ -100,24 +100,6 @@ go influxdb.InfluxDB(metrics.DefaultRegistry, ) ``` -Periodically upload every metric to Librato using the [Librato client](https://github.com/mihasya/go-metrics-librato): - -**Note**: the client included with this repository under the `librato` package -has been deprecated and moved to the repository linked above. - -```go -import "github.com/mihasya/go-metrics-librato" - -go librato.Librato(metrics.DefaultRegistry, - 10e9, // interval - "example@example.com", // account owner email address - "token", // Librato API token - "hostname", // source - []float64{0.95}, // percentiles to send - time.Millisecond, // time unit -) -``` - Periodically emit every metric to StatHat: ```go @@ -157,7 +139,6 @@ Publishing Metrics Clients are available for the following destinations: -* Librato - https://github.com/mihasya/go-metrics-librato * Graphite - https://github.com/cyberdelia/go-metrics-graphite * InfluxDB - https://github.com/vrischmann/go-metrics-influxdb * Ganglia - https://github.com/appscode/metlia diff --git a/metrics/librato/client.go b/metrics/librato/client.go deleted file mode 100644 index 053e1811d4ed..000000000000 --- a/metrics/librato/client.go +++ /dev/null @@ -1,104 +0,0 @@ -package librato - -import ( - "bytes" - "encoding/json" - "fmt" - "io" - "net/http" -) - -const Operations = "operations" -const OperationsShort = "ops" - -type LibratoClient struct { - Email, Token string -} - -// property strings -const ( - // display attributes - Color = "color" - DisplayMax = "display_max" - DisplayMin = "display_min" - DisplayUnitsLong = "display_units_long" - DisplayUnitsShort = "display_units_short" - DisplayStacked = "display_stacked" - DisplayTransform = "display_transform" - // special gauge display attributes - SummarizeFunction = "summarize_function" - Aggregate = "aggregate" - - // metric keys - Name = "name" - Period = "period" - Description = "description" - DisplayName = "display_name" - Attributes = "attributes" - - // measurement keys - MeasureTime = "measure_time" - Source = "source" - Value = "value" - - // special gauge keys - Count = "count" - Sum = "sum" - Max = "max" - Min = "min" - SumSquares = "sum_squares" - - // batch keys - Counters = "counters" - Gauges = "gauges" - - MetricsPostUrl = "https://metrics-api.librato.com/v1/metrics" -) - -type Measurement map[string]interface{} -type Metric map[string]interface{} - -type Batch struct { - Gauges []Measurement `json:"gauges,omitempty"` - Counters []Measurement `json:"counters,omitempty"` - MeasureTime int64 `json:"measure_time"` - Source string `json:"source"` -} - -func (lc *LibratoClient) PostMetrics(batch Batch) (err error) { - var ( - js []byte - req *http.Request - resp *http.Response - ) - - if len(batch.Counters) == 0 && len(batch.Gauges) == 0 { - return nil - } - - if js, err = json.Marshal(batch); err != nil { - return - } - - if req, err = http.NewRequest(http.MethodPost, MetricsPostUrl, bytes.NewBuffer(js)); err != nil { - return - } - - req.Header.Set("Content-Type", "application/json") - req.SetBasicAuth(lc.Email, lc.Token) - - resp, err = http.DefaultClient.Do(req) - if err != nil { - return - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - var body []byte - if body, err = io.ReadAll(resp.Body); err != nil { - body = []byte(fmt.Sprintf("(could not fetch response body for error: %s)", err)) - } - err = fmt.Errorf("unable to post to Librato: %d %s %s", resp.StatusCode, resp.Status, string(body)) - } - return -} diff --git a/metrics/librato/librato.go b/metrics/librato/librato.go deleted file mode 100644 index fa0123e05abf..000000000000 --- a/metrics/librato/librato.go +++ /dev/null @@ -1,254 +0,0 @@ -package librato - -import ( - "fmt" - "log" - "math" - "regexp" - "time" - - "github.com/XinFinOrg/XDPoSChain/metrics" -) - -// a regexp for extracting the unit from time.Duration.String -var unitRegexp = regexp.MustCompile(`[^\\d]+$`) - -// a helper that turns a time.Duration into librato display attributes for timer metrics -func translateTimerAttributes(d time.Duration) (attrs map[string]interface{}) { - attrs = make(map[string]interface{}) - attrs[DisplayTransform] = fmt.Sprintf("x/%d", int64(d)) - attrs[DisplayUnitsShort] = string(unitRegexp.Find([]byte(d.String()))) - return -} - -type Reporter struct { - Email, Token string - Namespace string - Source string - Interval time.Duration - Registry metrics.Registry - Percentiles []float64 // percentiles to report on histogram metrics - TimerAttributes map[string]interface{} // units in which timers will be displayed - intervalSec int64 -} - -func NewReporter(r metrics.Registry, d time.Duration, e string, t string, s string, p []float64, u time.Duration) *Reporter { - return &Reporter{e, t, "", s, d, r, p, translateTimerAttributes(u), int64(d / time.Second)} -} - -func Librato(r metrics.Registry, d time.Duration, e string, t string, s string, p []float64, u time.Duration) { - NewReporter(r, d, e, t, s, p, u).Run() -} - -func (rep *Reporter) Run() { - log.Printf("WARNING: This client has been DEPRECATED! It has been moved to https://github.com/mihasya/go-metrics-librato and will be removed from rcrowley/go-metrics on August 5th 2015") - ticker := time.NewTicker(rep.Interval) - defer ticker.Stop() - metricsApi := &LibratoClient{rep.Email, rep.Token} - for now := range ticker.C { - var metrics Batch - var err error - if metrics, err = rep.BuildRequest(now, rep.Registry); err != nil { - log.Printf("ERROR constructing librato request body %s", err) - continue - } - if err := metricsApi.PostMetrics(metrics); err != nil { - log.Printf("ERROR sending metrics to librato %s", err) - continue - } - } -} - -// calculate sum of squares from data provided by metrics.Histogram -// see http://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods -func sumSquares(icount int64, mean, stDev float64) float64 { - count := float64(icount) - sumSquared := math.Pow(count*mean, 2) - sumSquares := math.Pow(count*stDev, 2) + sumSquared/count - if math.IsNaN(sumSquares) { - return 0.0 - } - return sumSquares -} -func sumSquaresTimer(t metrics.TimerSnapshot) float64 { - count := float64(t.Count()) - sumSquared := math.Pow(count*t.Mean(), 2) - sumSquares := math.Pow(count*t.StdDev(), 2) + sumSquared/count - if math.IsNaN(sumSquares) { - return 0.0 - } - return sumSquares -} - -func (rep *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Batch, err error) { - snapshot = Batch{ - // coerce timestamps to a stepping fn so that they line up in Librato graphs - MeasureTime: (now.Unix() / rep.intervalSec) * rep.intervalSec, - Source: rep.Source, - } - snapshot.Gauges = make([]Measurement, 0) - snapshot.Counters = make([]Measurement, 0) - histogramGaugeCount := 1 + len(rep.Percentiles) - r.Each(func(name string, metric interface{}) { - if rep.Namespace != "" { - name = fmt.Sprintf("%s.%s", rep.Namespace, name) - } - measurement := Measurement{} - measurement[Period] = rep.Interval.Seconds() - switch m := metric.(type) { - case metrics.Counter: - ms := m.Snapshot() - if ms.Count() > 0 { - measurement[Name] = fmt.Sprintf("%s.%s", name, "count") - measurement[Value] = float64(ms.Count()) - measurement[Attributes] = map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - } - snapshot.Counters = append(snapshot.Counters, measurement) - } - case metrics.CounterFloat64: - if count := m.Snapshot().Count(); count > 0 { - measurement[Name] = fmt.Sprintf("%s.%s", name, "count") - measurement[Value] = count - measurement[Attributes] = map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - } - snapshot.Counters = append(snapshot.Counters, measurement) - } - case metrics.Gauge: - measurement[Name] = name - measurement[Value] = float64(m.Snapshot().Value()) - snapshot.Gauges = append(snapshot.Gauges, measurement) - case metrics.GaugeFloat64: - measurement[Name] = name - measurement[Value] = m.Snapshot().Value() - snapshot.Gauges = append(snapshot.Gauges, measurement) - case metrics.GaugeInfo: - measurement[Name] = name - measurement[Value] = m.Snapshot().Value() - snapshot.Gauges = append(snapshot.Gauges, measurement) - case metrics.Histogram: - ms := m.Snapshot() - if ms.Count() > 0 { - gauges := make([]Measurement, histogramGaugeCount) - measurement[Name] = fmt.Sprintf("%s.%s", name, "hist") - measurement[Count] = uint64(ms.Count()) - measurement[Max] = float64(ms.Max()) - measurement[Min] = float64(ms.Min()) - measurement[Sum] = float64(ms.Sum()) - measurement[SumSquares] = sumSquares(ms.Count(), ms.Mean(), ms.StdDev()) - gauges[0] = measurement - for i, p := range rep.Percentiles { - gauges[i+1] = Measurement{ - Name: fmt.Sprintf("%s.%.2f", measurement[Name], p), - Value: ms.Percentile(p), - Period: measurement[Period], - } - } - snapshot.Gauges = append(snapshot.Gauges, gauges...) - } - case metrics.Meter: - ms := m.Snapshot() - measurement[Name] = name - measurement[Value] = float64(ms.Count()) - snapshot.Counters = append(snapshot.Counters, measurement) - snapshot.Gauges = append(snapshot.Gauges, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "1min"), - Value: ms.Rate1(), - Period: int64(rep.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "5min"), - Value: ms.Rate5(), - Period: int64(rep.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "15min"), - Value: ms.Rate15(), - Period: int64(rep.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - ) - case metrics.Timer: - ms := m.Snapshot() - measurement[Name] = name - measurement[Value] = float64(ms.Count()) - snapshot.Counters = append(snapshot.Counters, measurement) - if ms.Count() > 0 { - libratoName := fmt.Sprintf("%s.%s", name, "timer.mean") - gauges := make([]Measurement, histogramGaugeCount) - gauges[0] = Measurement{ - Name: libratoName, - Count: uint64(ms.Count()), - Sum: ms.Mean() * float64(ms.Count()), - Max: float64(ms.Max()), - Min: float64(ms.Min()), - SumSquares: sumSquaresTimer(ms), - Period: int64(rep.Interval.Seconds()), - Attributes: rep.TimerAttributes, - } - for i, p := range rep.Percentiles { - gauges[i+1] = Measurement{ - Name: fmt.Sprintf("%s.timer.%2.0f", name, p*100), - Value: ms.Percentile(p), - Period: int64(rep.Interval.Seconds()), - Attributes: rep.TimerAttributes, - } - } - snapshot.Gauges = append(snapshot.Gauges, gauges...) - snapshot.Gauges = append(snapshot.Gauges, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "rate.1min"), - Value: ms.Rate1(), - Period: int64(rep.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "rate.5min"), - Value: ms.Rate5(), - Period: int64(rep.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "rate.15min"), - Value: ms.Rate15(), - Period: int64(rep.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - ) - } - } - }) - return -} From a9cbcde7780b59344dd5392d5df76494d2fc880a Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 69/74] metrics: fix out of range error message (#29821) --- metrics/sample_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metrics/sample_test.go b/metrics/sample_test.go index 9835ec1c3003..92806c65b2c7 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -114,7 +114,7 @@ func TestExpDecaySample(t *testing.T) { } for _, v := range values { if v > int64(tc.updates) || v < 0 { - t.Errorf("out of range [0, %d): %v", tc.updates, v) + t.Errorf("out of range [0, %d]: %v", tc.updates, v) } } } @@ -195,7 +195,7 @@ func TestUniformSample(t *testing.T) { } for _, v := range values { if v > 1000 || v < 0 { - t.Errorf("out of range [0, 100): %v\n", v) + t.Errorf("out of range [0, 1000]: %v\n", v) } } } From fb6a26876310df920d4212e787bf520338d9bb96 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 70/74] metrics: fix flaky test `TestExpDecaySampleNanosecondRegression` (#29832) --- metrics/sample_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metrics/sample_test.go b/metrics/sample_test.go index 92806c65b2c7..87f085c70454 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -125,12 +125,12 @@ func TestExpDecaySample(t *testing.T) { // The priority becomes +Inf quickly after starting if this is done, // effectively freezing the set of samples until a rescale step happens. func TestExpDecaySampleNanosecondRegression(t *testing.T) { - sw := NewExpDecaySample(100, 0.99) - for i := 0; i < 100; i++ { + sw := NewExpDecaySample(1000, 0.99) + for i := 0; i < 1000; i++ { sw.Update(10) } time.Sleep(1 * time.Millisecond) - for i := 0; i < 100; i++ { + for i := 0; i < 1000; i++ { sw.Update(20) } s := sw.Snapshot() From 7e784f1d701f9d550bd7b68475a78939992f499c Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 71/74] metrics: add test for `SampleSnapshot.Sum` (#29831) --- metrics/sample_test.go | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/metrics/sample_test.go b/metrics/sample_test.go index 87f085c70454..4227b43ef775 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -103,14 +103,14 @@ func TestExpDecaySample(t *testing.T) { } snap := sample.Snapshot() if have, want := int(snap.Count()), tc.updates; have != want { - t.Errorf("have %d want %d", have, want) + t.Errorf("unexpected count: have %d want %d", have, want) } if have, want := snap.Size(), min(tc.updates, tc.reservoirSize); have != want { - t.Errorf("have %d want %d", have, want) + t.Errorf("unexpected size: have %d want %d", have, want) } values := snap.(*sampleSnapshot).values if have, want := len(values), min(tc.updates, tc.reservoirSize); have != want { - t.Errorf("have %d want %d", have, want) + t.Errorf("unexpected values length: have %d want %d", have, want) } for _, v := range values { if v > int64(tc.updates) || v < 0 { @@ -251,6 +251,9 @@ func benchmarkSample(b *testing.B, s Sample) { } func testExpDecaySampleStatistics(t *testing.T, s SampleSnapshot) { + if sum := s.Sum(); sum != 496598 { + t.Errorf("s.Sum(): 496598 != %v\n", sum) + } if count := s.Count(); count != 10000 { t.Errorf("s.Count(): 10000 != %v\n", count) } From ea355e824bcfd8cddeda60b4d4104be2fe3579f6 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 72/74] metrics: fix function comment (#29843) --- metrics/debug.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/metrics/debug.go b/metrics/debug.go index de4a2739fe08..9dfee1a86698 100644 --- a/metrics/debug.go +++ b/metrics/debug.go @@ -19,18 +19,18 @@ var ( gcStats debug.GCStats ) -// Capture new values for the Go garbage collector statistics exported in -// debug.GCStats. This is designed to be called as a goroutine. +// CaptureDebugGCStats captures new values for the Go garbage collector statistics +// exported in debug.GCStats. This is designed to be called as a goroutine. func CaptureDebugGCStats(r Registry, d time.Duration) { for range time.Tick(d) { CaptureDebugGCStatsOnce(r) } } -// Capture new values for the Go garbage collector statistics exported in -// debug.GCStats. This is designed to be called in a background goroutine. -// Giving a registry which has not been given to RegisterDebugGCStats will -// panic. +// CaptureDebugGCStatsOnce captures new values for the Go garbage collector +// statistics exported in debug.GCStats. This is designed to be called in +// a background goroutine. Giving a registry which has not been given to +// RegisterDebugGCStats will panic. // // Be careful (but much less so) with this because debug.ReadGCStats calls // the C function runtime·lock(runtime·mheap) which, while not a stop-the-world @@ -50,9 +50,9 @@ func CaptureDebugGCStatsOnce(r Registry) { debugMetrics.GCStats.PauseTotal.Update(int64(gcStats.PauseTotal)) } -// Register metrics for the Go garbage collector statistics exported in -// debug.GCStats. The metrics are named by their fully-qualified Go symbols, -// i.e. debug.GCStats.PauseTotal. +// RegisterDebugGCStats registers metrics for the Go garbage collector statistics +// exported in debug.GCStats. The metrics are named by their fully-qualified Go +// symbols, i.e. debug.GCStats.PauseTotal. func RegisterDebugGCStats(r Registry) { debugMetrics.GCStats.LastGC = NewGauge() debugMetrics.GCStats.NumGC = NewGauge() From 7d82f278941634f99a933ee1431143b72a3e7b61 Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 73/74] metrics: fix issues with benchmarks (#30667) --- metrics/sample_test.go | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/metrics/sample_test.go b/metrics/sample_test.go index 4227b43ef775..c855671ae23c 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -3,7 +3,6 @@ package metrics import ( "math" "math/rand" - "runtime" "testing" "time" ) @@ -27,6 +26,7 @@ func BenchmarkCompute1000(b *testing.B) { SampleVariance(mean, s) } } + func BenchmarkCompute1000000(b *testing.B) { s := make([]int64, 1000000) var sum int64 @@ -40,28 +40,6 @@ func BenchmarkCompute1000000(b *testing.B) { SampleVariance(mean, s) } } -func BenchmarkCopy1000(b *testing.B) { - s := make([]int64, 1000) - for i := 0; i < len(s); i++ { - s[i] = int64(i) - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - sCopy := make([]int64, len(s)) - copy(sCopy, s) - } -} -func BenchmarkCopy1000000(b *testing.B) { - s := make([]int64, 1000000) - for i := 0; i < len(s); i++ { - s[i] = int64(i) - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - sCopy := make([]int64, len(s)) - copy(sCopy, s) - } -} func BenchmarkExpDecaySample257(b *testing.B) { benchmarkSample(b, NewExpDecaySample(257, 0.015)) @@ -237,17 +215,9 @@ func TestUniformSampleStatistics(t *testing.T) { } func benchmarkSample(b *testing.B, s Sample) { - var memStats runtime.MemStats - runtime.ReadMemStats(&memStats) - pauseTotalNs := memStats.PauseTotalNs - b.ResetTimer() for i := 0; i < b.N; i++ { s.Update(1) } - b.StopTimer() - runtime.GC() - runtime.ReadMemStats(&memStats) - b.Logf("GC cost: %d ns/op", int(memStats.PauseTotalNs-pauseTotalNs)/b.N) } func testExpDecaySampleStatistics(t *testing.T, s SampleSnapshot) { From 6beee27886d6ccd67d75f1e08896694dcf6af85f Mon Sep 17 00:00:00 2001 From: Daniel Liu Date: Fri, 13 Dec 2024 14:00:14 +0800 Subject: [PATCH 74/74] metrics, cmd/XDC: change init-process of metrics (#30814) This PR modifies how the metrics library handles `Enabled`: previously, the package `init` decided whether to serve real metrics or just dummy-types. This has several drawbacks: - During pkg init, we need to determine whether metrics are enabled or not. So we first hacked in a check if certain geth-specific commandline-flags were enabled. Then we added a similar check for geth-env-vars. Then we almost added a very elaborate check for toml-config-file, plus toml parsing. - Using "real" types and dummy types interchangeably means that everything is hidden behind interfaces. This has a performance penalty, and also it just adds a lot of code. This PR removes the interface stuff, uses concrete types, and allows for the setting of Enabled to happen later. It is still assumed that `metrics.Enable()` is invoked early on. The somewhat 'heavy' operations, such as ticking meters and exp-decay, now checks the enable-flag to prevent resource leak. The change may be large, but it's mostly pretty trivial, and from the last time I gutted the metrics, I ensured that we have fairly good test coverage. --------- Co-authored-by: Felix Lange --- cmd/XDC/chaincmd.go | 11 +- cmd/XDC/config.go | 24 +++ cmd/XDC/main.go | 5 - cmd/utils/flags.go | 103 +++++----- consensus/ethash/ethash.go | 2 +- eth/downloader/downloader.go | 2 +- eth/downloader/queue.go | 4 +- eth/metrics.go | 2 +- ethdb/leveldb/leveldb.go | 24 +-- les/metrics.go | 2 +- metrics/counter.go | 94 ++------- metrics/counter_float64.go | 103 +++------- metrics/counter_float_64_test.go | 58 ++---- metrics/counter_test.go | 38 ++-- metrics/debug.go | 8 +- metrics/ewma.go | 64 +++--- metrics/ewma_test.go | 14 +- metrics/exp/exp.go | 22 +-- metrics/gauge.go | 84 +++----- metrics/gauge_float64.go | 65 ++---- metrics/gauge_info.go | 50 ++--- metrics/graphite.go | 117 ----------- metrics/graphite_test.go | 22 --- metrics/healthcheck.go | 44 +---- metrics/histogram.go | 23 ++- metrics/inactive.go | 48 ----- metrics/influxdb/influxdb.go | 16 +- metrics/influxdb/influxdb_test.go | 2 +- metrics/init_test.go | 2 +- metrics/log.go | 18 +- metrics/meter.go | 156 +++++++-------- metrics/meter_test.go | 12 +- metrics/metrics.go | 57 ++---- metrics/metrics_test.go | 71 ++----- metrics/opentsdb.go | 14 +- metrics/prometheus/collector.go | 22 +-- metrics/prometheus/collector_test.go | 2 +- metrics/registry.go | 35 ++-- metrics/registry_test.go | 18 +- metrics/resetting_sample.go | 2 +- metrics/resetting_timer.go | 90 +++------ metrics/sample.go | 282 +++++++++++++-------------- metrics/sample_test.go | 14 +- metrics/syslog.go | 16 +- metrics/timer.go | 104 ++++------ metrics/writer.go | 16 +- p2p/metrics.go | 2 +- 47 files changed, 674 insertions(+), 1310 deletions(-) delete mode 100644 metrics/graphite.go delete mode 100644 metrics/graphite_test.go delete mode 100644 metrics/inactive.go diff --git a/cmd/XDC/chaincmd.go b/cmd/XDC/chaincmd.go index 3ddcae6f145f..aa050dfd3c29 100644 --- a/cmd/XDC/chaincmd.go +++ b/cmd/XDC/chaincmd.go @@ -35,7 +35,6 @@ import ( "github.com/XinFinOrg/XDPoSChain/eth/downloader" "github.com/XinFinOrg/XDPoSChain/event" "github.com/XinFinOrg/XDPoSChain/log" - "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/urfave/cli/v2" ) @@ -239,14 +238,12 @@ func importChain(ctx *cli.Context) error { if ctx.Args().Len() < 1 { utils.Fatalf("This command requires an argument.") } - // Start metrics export if enabled - utils.SetupMetrics(ctx) - // Start system runtime metrics collection - go metrics.CollectProcessMetrics(3 * time.Second) - - stack, _ := makeFullNode(ctx) + stack, cfg := makeFullNode(ctx) defer stack.Close() + // Start metrics export if enabled + utils.SetupMetrics(&cfg.Metrics) + chain, chainDb := utils.MakeChain(ctx, stack) defer chainDb.Close() diff --git a/cmd/XDC/config.go b/cmd/XDC/config.go index 360fb768d142..ef0dbbb00df1 100644 --- a/cmd/XDC/config.go +++ b/cmd/XDC/config.go @@ -235,6 +235,9 @@ func applyValues(values []string, params *[]string) { func makeFullNode(ctx *cli.Context) (*node.Node, XDCConfig) { stack, cfg := makeConfigNode(ctx) + // Start metrics export if enabled + utils.SetupMetrics(&cfg.Metrics) + // Register XDCX's OrderBook service if requested. // enable in default utils.RegisterXDCXService(stack, &cfg.XDCX) @@ -310,4 +313,25 @@ func applyMetricConfig(ctx *cli.Context, cfg *XDCConfig) { if ctx.IsSet(utils.MetricsInfluxDBOrganizationFlag.Name) { cfg.Metrics.InfluxDBOrganization = ctx.String(utils.MetricsInfluxDBOrganizationFlag.Name) } + // Sanity-check the commandline flags. It is fine if some unused fields is part + // of the toml-config, but we expect the commandline to only contain relevant + // arguments, otherwise it indicates an error. + var ( + enableExport = ctx.Bool(utils.MetricsEnableInfluxDBFlag.Name) + enableExportV2 = ctx.Bool(utils.MetricsEnableInfluxDBV2Flag.Name) + ) + if enableExport || enableExportV2 { + v1FlagIsSet := ctx.IsSet(utils.MetricsInfluxDBUsernameFlag.Name) || + ctx.IsSet(utils.MetricsInfluxDBPasswordFlag.Name) + + v2FlagIsSet := ctx.IsSet(utils.MetricsInfluxDBTokenFlag.Name) || + ctx.IsSet(utils.MetricsInfluxDBOrganizationFlag.Name) || + ctx.IsSet(utils.MetricsInfluxDBBucketFlag.Name) + + if enableExport && v2FlagIsSet { + utils.Fatalf("Flags --influxdb-metrics.organization, --influxdb-metrics.token, --influxdb-metrics.bucket are only available for influxdb-v2") + } else if enableExportV2 && v1FlagIsSet { + utils.Fatalf("Flags --influxdb-metrics.username, --influxdb-metrics.password are only available for influxdb-v1") + } + } } diff --git a/cmd/XDC/main.go b/cmd/XDC/main.go index b11f3b976c63..f3b9bdba139e 100644 --- a/cmd/XDC/main.go +++ b/cmd/XDC/main.go @@ -321,11 +321,6 @@ func startNode(ctx *cli.Context, stack *node.Node, cfg XDCConfig) { if ctx.Bool(utils.LightModeFlag.Name) || ctx.String(utils.SyncModeFlag.Name) == "light" { utils.Fatalf("Light clients do not support staking") } - // Start metrics export if enabled - utils.SetupMetrics(ctx) - - // Start system runtime metrics collection - go metrics.CollectProcessMetrics(3 * time.Second) var ethereum *eth.Ethereum if err := stack.Service(ðereum); err != nil { diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index eb8d75f297ce..18ce92a23fc8 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -22,6 +22,7 @@ import ( "fmt" "math" "math/big" + "net" "os" "path/filepath" "runtime" @@ -1532,66 +1533,56 @@ func SetupNetwork(ctx *cli.Context) { params.TargetGasLimit = ctx.Uint64(MinerGasLimitFlag.Name) } -func SetupMetrics(ctx *cli.Context) { - if metrics.Enabled { - log.Info("Enabling metrics collection") - var ( - enableExport = ctx.Bool(MetricsEnableInfluxDBFlag.Name) - enableExportV2 = ctx.Bool(MetricsEnableInfluxDBV2Flag.Name) - ) - - if enableExport || enableExportV2 { - checkExclusive(ctx, MetricsEnableInfluxDBFlag, MetricsEnableInfluxDBV2Flag) - - v1FlagIsSet := ctx.IsSet(MetricsInfluxDBUsernameFlag.Name) || - ctx.IsSet(MetricsInfluxDBPasswordFlag.Name) - - v2FlagIsSet := ctx.IsSet(MetricsInfluxDBTokenFlag.Name) || - ctx.IsSet(MetricsInfluxDBOrganizationFlag.Name) || - ctx.IsSet(MetricsInfluxDBBucketFlag.Name) - - if enableExport && v2FlagIsSet { - Fatalf("Flags --influxdb.metrics.organization, --influxdb.metrics.token, --influxdb.metrics.bucket are only available for influxdb-v2") - } else if enableExportV2 && v1FlagIsSet { - Fatalf("Flags --influxdb.metrics.username, --influxdb.metrics.password are only available for influxdb-v1") - } - } - - var ( - endpoint = ctx.String(MetricsInfluxDBEndpointFlag.Name) - database = ctx.String(MetricsInfluxDBDatabaseFlag.Name) - username = ctx.String(MetricsInfluxDBUsernameFlag.Name) - password = ctx.String(MetricsInfluxDBPasswordFlag.Name) - - token = ctx.String(MetricsInfluxDBTokenFlag.Name) - bucket = ctx.String(MetricsInfluxDBBucketFlag.Name) - organization = ctx.String(MetricsInfluxDBOrganizationFlag.Name) - ) - - if enableExport { - log.Info("Enabling metrics export to InfluxDB") - - tagsMap := SplitTagsFlag(ctx.String(MetricsInfluxDBTagsFlag.Name)) - - go influxdb.InfluxDBWithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, database, username, password, "xdc.", tagsMap) - } else if enableExportV2 { - log.Info("Enabling metrics export to InfluxDB (v2)") - - tagsMap := SplitTagsFlag(ctx.String(MetricsInfluxDBTagsFlag.Name)) - - go influxdb.InfluxDBV2WithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, token, bucket, organization, "xdc.", tagsMap) - } +// SetupMetrics configures the metrics system. +func SetupMetrics(cfg *metrics.Config) { + if !cfg.Enabled { + return + } + log.Info("Enabling metrics collection") + metrics.Enable() - if ctx.IsSet(MetricsHTTPFlag.Name) { - address := fmt.Sprintf("%s:%d", ctx.String(MetricsHTTPFlag.Name), ctx.Int(MetricsPortFlag.Name)) - log.Info("Enabling stand-alone metrics HTTP endpoint", "address", address) - exp.Setup(address) - } else if ctx.IsSet(MetricsPortFlag.Name) { - log.Warn(fmt.Sprintf("--%s specified without --%s, metrics server will not start.", MetricsPortFlag.Name, MetricsHTTPFlag.Name)) - } + // InfluxDB exporter. + var ( + enableExport = cfg.EnableInfluxDB + enableExportV2 = cfg.EnableInfluxDBV2 + ) + if cfg.EnableInfluxDB && cfg.EnableInfluxDBV2 { + Fatalf("Flags %v can't be used at the same time", strings.Join([]string{MetricsEnableInfluxDBFlag.Name, MetricsEnableInfluxDBV2Flag.Name}, ", ")) } + var ( + endpoint = cfg.InfluxDBEndpoint + database = cfg.InfluxDBDatabase + username = cfg.InfluxDBUsername + password = cfg.InfluxDBPassword + + token = cfg.InfluxDBToken + bucket = cfg.InfluxDBBucket + organization = cfg.InfluxDBOrganization + tagsMap = SplitTagsFlag(cfg.InfluxDBTags) + ) + if enableExport { + log.Info("Enabling metrics export to InfluxDB") + go influxdb.InfluxDBWithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, database, username, password, "geth.", tagsMap) + } else if enableExportV2 { + tagsMap := SplitTagsFlag(cfg.InfluxDBTags) + log.Info("Enabling metrics export to InfluxDB (v2)") + go influxdb.InfluxDBV2WithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, token, bucket, organization, "geth.", tagsMap) + } + + // Expvar exporter. + if cfg.HTTP != "" { + address := net.JoinHostPort(cfg.HTTP, fmt.Sprintf("%d", cfg.Port)) + log.Info("Enabling stand-alone metrics HTTP endpoint", "address", address) + exp.Setup(address) + } else if cfg.HTTP == "" && cfg.Port != 0 { + log.Warn(fmt.Sprintf("--%s specified without --%s, metrics server will not start.", MetricsPortFlag.Name, MetricsHTTPFlag.Name)) + } + + // Enable system metrics collection. + go metrics.CollectProcessMetrics(3 * time.Second) } +// SplitTagsFlag parses a comma-separated list of k=v metrics tags. func SplitTagsFlag(tagsFlag string) map[string]string { tags := strings.Split(tagsFlag, ",") tagsMap := map[string]string{} diff --git a/consensus/ethash/ethash.go b/consensus/ethash/ethash.go index 98141b29393c..394d495a07e8 100644 --- a/consensus/ethash/ethash.go +++ b/consensus/ethash/ethash.go @@ -401,7 +401,7 @@ type Ethash struct { rand *rand.Rand // Properly seeded random source for nonces threads int // Number of threads to mine on if mining update chan struct{} // Notification channel to update mining parameters - hashrate metrics.Meter // Meter tracking the average hashrate + hashrate *metrics.Meter // Meter tracking the average hashrate // The fields below are hooks for testing shared *Ethash // Shared PoW verifier to avoid cache regeneration diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index 1a500142c839..f5fb6d1c5851 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -1586,7 +1586,7 @@ func (d *Downloader) DeliverNodeData(id string, data [][]byte) (err error) { } // deliver injects a new batch of data received from a remote node. -func (d *Downloader) deliver(id string, destCh chan dataPack, packet dataPack, inMeter, dropMeter metrics.Meter) (err error) { +func (d *Downloader) deliver(id string, destCh chan dataPack, packet dataPack, inMeter, dropMeter *metrics.Meter) (err error) { // Update the delivery metrics for both good and failed deliveries inMeter.Mark(int64(packet.Items())) defer func() { diff --git a/eth/downloader/queue.go b/eth/downloader/queue.go index 7f55ce733574..b21460c2efd6 100644 --- a/eth/downloader/queue.go +++ b/eth/downloader/queue.go @@ -650,7 +650,7 @@ func (q *queue) ExpireReceipts(timeout time.Duration) map[string]int { // Note, this method expects the queue lock to be already held. The // reason the lock is not obtained in here is because the parameters already need // to access the queue, so they already need a lock anyway. -func (q *queue) expire(timeout time.Duration, pendPool map[string]*fetchRequest, taskQueue *prque.Prque, timeoutMeter metrics.Meter) map[string]int { +func (q *queue) expire(timeout time.Duration, pendPool map[string]*fetchRequest, taskQueue *prque.Prque, timeoutMeter *metrics.Meter) map[string]int { // Iterate over the expired requests and return each to the queue expiries := make(map[string]int) for id, request := range pendPool { @@ -805,7 +805,7 @@ func (q *queue) DeliverReceipts(id string, receiptList [][]*types.Receipt) (int, // reason the lock is not obtained in here is because the parameters already need // to access the queue, so they already need a lock anyway. func (q *queue) deliver(id string, taskPool map[common.Hash]*types.Header, taskQueue *prque.Prque, - pendPool map[string]*fetchRequest, donePool map[common.Hash]struct{}, reqTimer metrics.Timer, + pendPool map[string]*fetchRequest, donePool map[common.Hash]struct{}, reqTimer *metrics.Timer, results int, reconstruct func(header *types.Header, index int, result *fetchResult) error) (int, error) { // Short circuit if the data was never requested diff --git a/eth/metrics.go b/eth/metrics.go index 2f3bd6bfb839..59d9258a0625 100644 --- a/eth/metrics.go +++ b/eth/metrics.go @@ -66,7 +66,7 @@ type meteredMsgReadWriter struct { // newMeteredMsgWriter wraps a p2p MsgReadWriter with metering support. If the // metrics system is disabled, this function returns the original object. func newMeteredMsgWriter(rw p2p.MsgReadWriter) p2p.MsgReadWriter { - if !metrics.Enabled { + if !metrics.Enabled() { return rw } return &meteredMsgReadWriter{MsgReadWriter: rw} diff --git a/ethdb/leveldb/leveldb.go b/ethdb/leveldb/leveldb.go index 601ffc94a1df..cd4603982695 100644 --- a/ethdb/leveldb/leveldb.go +++ b/ethdb/leveldb/leveldb.go @@ -62,18 +62,18 @@ type Database struct { fn string // filename for reporting db *leveldb.DB // LevelDB instance - compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction - compReadMeter metrics.Meter // Meter for measuring the data read during compaction - compWriteMeter metrics.Meter // Meter for measuring the data written during compaction - writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction - writeDelayMeter metrics.Meter // Meter for measuring the write delay duration due to database compaction - diskSizeGauge metrics.Gauge // Gauge for tracking the size of all the levels in the database - diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read - diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written - memCompGauge metrics.Gauge // Gauge for tracking the number of memory compaction - level0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in level0 - nonlevel0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in non0 level - seekCompGauge metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt + compTimeMeter *metrics.Meter // Meter for measuring the total time spent in database compaction + compReadMeter *metrics.Meter // Meter for measuring the data read during compaction + compWriteMeter *metrics.Meter // Meter for measuring the data written during compaction + writeDelayNMeter *metrics.Meter // Meter for measuring the write delay number due to database compaction + writeDelayMeter *metrics.Meter // Meter for measuring the write delay duration due to database compaction + diskSizeGauge *metrics.Gauge // Gauge for tracking the size of all the levels in the database + diskReadMeter *metrics.Meter // Meter for measuring the effective amount of data read + diskWriteMeter *metrics.Meter // Meter for measuring the effective amount of data written + memCompGauge *metrics.Gauge // Gauge for tracking the number of memory compaction + level0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in level0 + nonlevel0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in non0 level + seekCompGauge *metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt quitLock sync.Mutex // Mutex protecting the quit channel access quitChan chan chan error // Quit channel to stop the metrics collection before closing the database diff --git a/les/metrics.go b/les/metrics.go index 4ba1af2475c5..9215c712eb71 100644 --- a/les/metrics.go +++ b/les/metrics.go @@ -74,7 +74,7 @@ type meteredMsgReadWriter struct { // newMeteredMsgWriter wraps a p2p MsgReadWriter with metering support. If the // metrics system is disabled, this function returns the original object. func newMeteredMsgWriter(rw p2p.MsgReadWriter) p2p.MsgReadWriter { - if !metrics.Enabled { + if !metrics.Enabled() { return rw } return &meteredMsgReadWriter{MsgReadWriter: rw} diff --git a/metrics/counter.go b/metrics/counter.go index dbe8e16a90d2..0f373b0d9289 100644 --- a/metrics/counter.go +++ b/metrics/counter.go @@ -4,109 +4,55 @@ import ( "sync/atomic" ) -type CounterSnapshot interface { - Count() int64 -} - -// Counter hold an int64 value that can be incremented and decremented. -type Counter interface { - Clear() - Dec(int64) - Inc(int64) - Snapshot() CounterSnapshot -} - // GetOrRegisterCounter returns an existing Counter or constructs and registers -// a new StandardCounter. -func GetOrRegisterCounter(name string, r Registry) Counter { - if nil == r { +// a new Counter. +func GetOrRegisterCounter(name string, r Registry) *Counter { + if r == nil { r = DefaultRegistry } - return r.GetOrRegister(name, NewCounter).(Counter) + return r.GetOrRegister(name, NewCounter).(*Counter) } -// GetOrRegisterCounterForced returns an existing Counter or constructs and registers a -// new Counter no matter the global switch is enabled or not. -// Be sure to unregister the counter from the registry once it is of no use to -// allow for garbage collection. -func GetOrRegisterCounterForced(name string, r Registry) Counter { - if nil == r { - r = DefaultRegistry - } - return r.GetOrRegister(name, NewCounterForced).(Counter) +// NewCounter constructs a new Counter. +func NewCounter() *Counter { + return new(Counter) } -// NewCounter constructs a new StandardCounter. -func NewCounter() Counter { - if !Enabled { - return NilCounter{} - } - return new(StandardCounter) -} - -// NewCounterForced constructs a new StandardCounter and returns it no matter if -// the global switch is enabled or not. -func NewCounterForced() Counter { - return new(StandardCounter) -} - -// NewRegisteredCounter constructs and registers a new StandardCounter. -func NewRegisteredCounter(name string, r Registry) Counter { +// NewRegisteredCounter constructs and registers a new Counter. +func NewRegisteredCounter(name string, r Registry) *Counter { c := NewCounter() - if nil == r { - r = DefaultRegistry - } - r.Register(name, c) - return c -} - -// NewRegisteredCounterForced constructs and registers a new StandardCounter -// and launches a goroutine no matter the global switch is enabled or not. -// Be sure to unregister the counter from the registry once it is of no use to -// allow for garbage collection. -func NewRegisteredCounterForced(name string, r Registry) Counter { - c := NewCounterForced() - if nil == r { + if r == nil { r = DefaultRegistry } r.Register(name, c) return c } -// counterSnapshot is a read-only copy of another Counter. -type counterSnapshot int64 +// CounterSnapshot is a read-only copy of a Counter. +type CounterSnapshot int64 // Count returns the count at the time the snapshot was taken. -func (c counterSnapshot) Count() int64 { return int64(c) } - -// NilCounter is a no-op Counter. -type NilCounter struct{} +func (c CounterSnapshot) Count() int64 { return int64(c) } -func (NilCounter) Clear() {} -func (NilCounter) Dec(i int64) {} -func (NilCounter) Inc(i int64) {} -func (NilCounter) Snapshot() CounterSnapshot { return (*emptySnapshot)(nil) } - -// StandardCounter is the standard implementation of a Counter and uses the -// sync/atomic package to manage a single int64 value. -type StandardCounter atomic.Int64 +// Counter hold an int64 value that can be incremented and decremented. +type Counter atomic.Int64 // Clear sets the counter to zero. -func (c *StandardCounter) Clear() { +func (c *Counter) Clear() { (*atomic.Int64)(c).Store(0) } // Dec decrements the counter by the given amount. -func (c *StandardCounter) Dec(i int64) { +func (c *Counter) Dec(i int64) { (*atomic.Int64)(c).Add(-i) } // Inc increments the counter by the given amount. -func (c *StandardCounter) Inc(i int64) { +func (c *Counter) Inc(i int64) { (*atomic.Int64)(c).Add(i) } // Snapshot returns a read-only copy of the counter. -func (c *StandardCounter) Snapshot() CounterSnapshot { - return counterSnapshot((*atomic.Int64)(c).Load()) +func (c *Counter) Snapshot() CounterSnapshot { + return CounterSnapshot((*atomic.Int64)(c).Load()) } diff --git a/metrics/counter_float64.go b/metrics/counter_float64.go index 15c81494efb8..91c4215c4df6 100644 --- a/metrics/counter_float64.go +++ b/metrics/counter_float64.go @@ -5,114 +5,57 @@ import ( "sync/atomic" ) -type CounterFloat64Snapshot interface { - Count() float64 -} - -// CounterFloat64 holds a float64 value that can be incremented and decremented. -type CounterFloat64 interface { - Clear() - Dec(float64) - Inc(float64) - Snapshot() CounterFloat64Snapshot -} - -// GetOrRegisterCounterFloat64 returns an existing CounterFloat64 or constructs and registers -// a new StandardCounterFloat64. -func GetOrRegisterCounterFloat64(name string, r Registry) CounterFloat64 { +// GetOrRegisterCounterFloat64 returns an existing *CounterFloat64 or constructs and registers +// a new CounterFloat64. +func GetOrRegisterCounterFloat64(name string, r Registry) *CounterFloat64 { if nil == r { r = DefaultRegistry } - return r.GetOrRegister(name, NewCounterFloat64).(CounterFloat64) -} - -// GetOrRegisterCounterFloat64Forced returns an existing CounterFloat64 or constructs and registers a -// new CounterFloat64 no matter the global switch is enabled or not. -// Be sure to unregister the counter from the registry once it is of no use to -// allow for garbage collection. -func GetOrRegisterCounterFloat64Forced(name string, r Registry) CounterFloat64 { - if nil == r { - r = DefaultRegistry - } - return r.GetOrRegister(name, NewCounterFloat64Forced).(CounterFloat64) -} - -// NewCounterFloat64 constructs a new StandardCounterFloat64. -func NewCounterFloat64() CounterFloat64 { - if !Enabled { - return NilCounterFloat64{} - } - return &StandardCounterFloat64{} + return r.GetOrRegister(name, NewCounterFloat64).(*CounterFloat64) } -// NewCounterFloat64Forced constructs a new StandardCounterFloat64 and returns it no matter if -// the global switch is enabled or not. -func NewCounterFloat64Forced() CounterFloat64 { - return &StandardCounterFloat64{} +// NewCounterFloat64 constructs a new CounterFloat64. +func NewCounterFloat64() *CounterFloat64 { + return new(CounterFloat64) } -// NewRegisteredCounterFloat64 constructs and registers a new StandardCounterFloat64. -func NewRegisteredCounterFloat64(name string, r Registry) CounterFloat64 { +// NewRegisteredCounterFloat64 constructs and registers a new CounterFloat64. +func NewRegisteredCounterFloat64(name string, r Registry) *CounterFloat64 { c := NewCounterFloat64() - if nil == r { - r = DefaultRegistry - } - r.Register(name, c) - return c -} - -// NewRegisteredCounterFloat64Forced constructs and registers a new StandardCounterFloat64 -// and launches a goroutine no matter the global switch is enabled or not. -// Be sure to unregister the counter from the registry once it is of no use to -// allow for garbage collection. -func NewRegisteredCounterFloat64Forced(name string, r Registry) CounterFloat64 { - c := NewCounterFloat64Forced() - if nil == r { + if r == nil { r = DefaultRegistry } r.Register(name, c) return c } -// counterFloat64Snapshot is a read-only copy of another CounterFloat64. -type counterFloat64Snapshot float64 +// CounterFloat64Snapshot is a read-only copy of a float64 counter. +type CounterFloat64Snapshot float64 // Count returns the value at the time the snapshot was taken. -func (c counterFloat64Snapshot) Count() float64 { return float64(c) } - -type NilCounterFloat64 struct{} +func (c CounterFloat64Snapshot) Count() float64 { return float64(c) } -func (NilCounterFloat64) Clear() {} -func (NilCounterFloat64) Count() float64 { return 0.0 } -func (NilCounterFloat64) Dec(i float64) {} -func (NilCounterFloat64) Inc(i float64) {} -func (NilCounterFloat64) Snapshot() CounterFloat64Snapshot { return NilCounterFloat64{} } - -// StandardCounterFloat64 is the standard implementation of a CounterFloat64 and uses the -// atomic to manage a single float64 value. -type StandardCounterFloat64 struct { - floatBits atomic.Uint64 -} +// CounterFloat64 holds a float64 value that can be incremented and decremented. +type CounterFloat64 atomic.Uint64 // Clear sets the counter to zero. -func (c *StandardCounterFloat64) Clear() { - c.floatBits.Store(0) +func (c *CounterFloat64) Clear() { + (*atomic.Uint64)(c).Store(0) } // Dec decrements the counter by the given amount. -func (c *StandardCounterFloat64) Dec(v float64) { - atomicAddFloat(&c.floatBits, -v) +func (c *CounterFloat64) Dec(v float64) { + atomicAddFloat((*atomic.Uint64)(c), -v) } // Inc increments the counter by the given amount. -func (c *StandardCounterFloat64) Inc(v float64) { - atomicAddFloat(&c.floatBits, v) +func (c *CounterFloat64) Inc(v float64) { + atomicAddFloat((*atomic.Uint64)(c), v) } // Snapshot returns a read-only copy of the counter. -func (c *StandardCounterFloat64) Snapshot() CounterFloat64Snapshot { - v := math.Float64frombits(c.floatBits.Load()) - return counterFloat64Snapshot(v) +func (c *CounterFloat64) Snapshot() CounterFloat64Snapshot { + return CounterFloat64Snapshot(math.Float64frombits((*atomic.Uint64)(c).Load())) } func atomicAddFloat(fbits *atomic.Uint64, v float64) { diff --git a/metrics/counter_float_64_test.go b/metrics/counter_float_64_test.go index c21bd3307fa1..618cbbbc2b08 100644 --- a/metrics/counter_float_64_test.go +++ b/metrics/counter_float_64_test.go @@ -32,61 +32,35 @@ func BenchmarkCounterFloat64Parallel(b *testing.B) { } } -func TestCounterFloat64Clear(t *testing.T) { +func TestCounterFloat64(t *testing.T) { c := NewCounterFloat64() - c.Inc(1.0) - c.Clear() if count := c.Snapshot().Count(); count != 0 { - t.Errorf("c.Count(): 0 != %v\n", count) + t.Errorf("wrong count: %v", count) } -} - -func TestCounterFloat64Dec1(t *testing.T) { - c := NewCounterFloat64() c.Dec(1.0) if count := c.Snapshot().Count(); count != -1.0 { - t.Errorf("c.Count(): -1.0 != %v\n", count) + t.Errorf("wrong count: %v", count) } -} - -func TestCounterFloat64Dec2(t *testing.T) { - c := NewCounterFloat64() + snapshot := c.Snapshot() c.Dec(2.0) - if count := c.Snapshot().Count(); count != -2.0 { - t.Errorf("c.Count(): -2.0 != %v\n", count) + if count := c.Snapshot().Count(); count != -3.0 { + t.Errorf("wrong count: %v", count) } -} - -func TestCounterFloat64Inc1(t *testing.T) { - c := NewCounterFloat64() c.Inc(1.0) - if count := c.Snapshot().Count(); count != 1.0 { - t.Errorf("c.Count(): 1.0 != %v\n", count) + if count := c.Snapshot().Count(); count != -2.0 { + t.Errorf("wrong count: %v", count) } -} - -func TestCounterFloat64Inc2(t *testing.T) { - c := NewCounterFloat64() c.Inc(2.0) - if count := c.Snapshot().Count(); count != 2.0 { - t.Errorf("c.Count(): 2.0 != %v\n", count) + if count := c.Snapshot().Count(); count != 0.0 { + t.Errorf("wrong count: %v", count) } -} - -func TestCounterFloat64Snapshot(t *testing.T) { - c := NewCounterFloat64() - c.Inc(1.0) - snapshot := c.Snapshot() - c.Inc(1.0) - if count := snapshot.Count(); count != 1.0 { - t.Errorf("c.Count(): 1.0 != %v\n", count) + if count := snapshot.Count(); count != -1.0 { + t.Errorf("snapshot count wrong: %v", count) } -} - -func TestCounterFloat64Zero(t *testing.T) { - c := NewCounterFloat64() - if count := c.Snapshot().Count(); count != 0 { - t.Errorf("c.Count(): 0 != %v\n", count) + c.Inc(1.0) + c.Clear() + if count := c.Snapshot().Count(); count != 0.0 { + t.Errorf("wrong count: %v", count) } } diff --git a/metrics/counter_test.go b/metrics/counter_test.go index 1b15b23f215f..bf0ca6bae44f 100644 --- a/metrics/counter_test.go +++ b/metrics/counter_test.go @@ -19,35 +19,26 @@ func TestCounterClear(t *testing.T) { } } -func TestCounterDec1(t *testing.T) { +func TestCounter(t *testing.T) { c := NewCounter() + if count := c.Snapshot().Count(); count != 0 { + t.Errorf("wrong count: %v", count) + } c.Dec(1) if count := c.Snapshot().Count(); count != -1 { - t.Errorf("c.Count(): -1 != %v\n", count) + t.Errorf("wrong count: %v", count) } -} - -func TestCounterDec2(t *testing.T) { - c := NewCounter() c.Dec(2) - if count := c.Snapshot().Count(); count != -2 { - t.Errorf("c.Count(): -2 != %v\n", count) + if count := c.Snapshot().Count(); count != -3 { + t.Errorf("wrong count: %v", count) } -} - -func TestCounterInc1(t *testing.T) { - c := NewCounter() c.Inc(1) - if count := c.Snapshot().Count(); count != 1 { - t.Errorf("c.Count(): 1 != %v\n", count) + if count := c.Snapshot().Count(); count != -2 { + t.Errorf("wrong count: %v", count) } -} - -func TestCounterInc2(t *testing.T) { - c := NewCounter() c.Inc(2) - if count := c.Snapshot().Count(); count != 2 { - t.Errorf("c.Count(): 2 != %v\n", count) + if count := c.Snapshot().Count(); count != 0 { + t.Errorf("wrong count: %v", count) } } @@ -61,13 +52,6 @@ func TestCounterSnapshot(t *testing.T) { } } -func TestCounterZero(t *testing.T) { - c := NewCounter() - if count := c.Snapshot().Count(); count != 0 { - t.Errorf("c.Count(): 0 != %v\n", count) - } -} - func TestGetOrRegisterCounter(t *testing.T) { r := NewRegistry() NewRegisteredCounter("foo", r).Inc(47) diff --git a/metrics/debug.go b/metrics/debug.go index 9dfee1a86698..5d0d3992f10b 100644 --- a/metrics/debug.go +++ b/metrics/debug.go @@ -8,13 +8,13 @@ import ( var ( debugMetrics struct { GCStats struct { - LastGC Gauge - NumGC Gauge + LastGC *Gauge + NumGC *Gauge Pause Histogram //PauseQuantiles Histogram - PauseTotal Gauge + PauseTotal *Gauge } - ReadGCStats Timer + ReadGCStats *Timer } gcStats debug.GCStats ) diff --git a/metrics/ewma.go b/metrics/ewma.go index 1d7a4f00cf45..194527a79892 100644 --- a/metrics/ewma.go +++ b/metrics/ewma.go @@ -7,56 +7,36 @@ import ( "time" ) -type EWMASnapshot interface { - Rate() float64 -} +// EWMASnapshot is a read-only copy of an EWMA. +type EWMASnapshot float64 -// EWMAs continuously calculate an exponentially-weighted moving average -// based on an outside source of clock ticks. -type EWMA interface { - Snapshot() EWMASnapshot - Tick() - Update(int64) -} +// Rate returns the rate of events per second at the time the snapshot was +// taken. +func (a EWMASnapshot) Rate() float64 { return float64(a) } // NewEWMA constructs a new EWMA with the given alpha. -func NewEWMA(alpha float64) EWMA { - return &StandardEWMA{alpha: alpha} +func NewEWMA(alpha float64) *EWMA { + return &EWMA{alpha: alpha} } // NewEWMA1 constructs a new EWMA for a one-minute moving average. -func NewEWMA1() EWMA { +func NewEWMA1() *EWMA { return NewEWMA(1 - math.Exp(-5.0/60.0/1)) } // NewEWMA5 constructs a new EWMA for a five-minute moving average. -func NewEWMA5() EWMA { +func NewEWMA5() *EWMA { return NewEWMA(1 - math.Exp(-5.0/60.0/5)) } // NewEWMA15 constructs a new EWMA for a fifteen-minute moving average. -func NewEWMA15() EWMA { +func NewEWMA15() *EWMA { return NewEWMA(1 - math.Exp(-5.0/60.0/15)) } -// ewmaSnapshot is a read-only copy of another EWMA. -type ewmaSnapshot float64 - -// Rate returns the rate of events per second at the time the snapshot was -// taken. -func (a ewmaSnapshot) Rate() float64 { return float64(a) } - -// NilEWMA is a no-op EWMA. -type NilEWMA struct{} - -func (NilEWMA) Snapshot() EWMASnapshot { return (*emptySnapshot)(nil) } -func (NilEWMA) Tick() {} -func (NilEWMA) Update(n int64) {} - -// StandardEWMA is the standard implementation of an EWMA and tracks the number -// of uncounted events and processes them on each tick. It uses the -// sync/atomic package to manage uncounted events. -type StandardEWMA struct { +// EWMA continuously calculate an exponentially-weighted moving average +// based on an outside source of clock ticks. +type EWMA struct { uncounted atomic.Int64 alpha float64 rate atomic.Uint64 @@ -65,27 +45,27 @@ type StandardEWMA struct { } // Snapshot returns a read-only copy of the EWMA. -func (a *StandardEWMA) Snapshot() EWMASnapshot { +func (a *EWMA) Snapshot() EWMASnapshot { r := math.Float64frombits(a.rate.Load()) * float64(time.Second) - return ewmaSnapshot(r) + return EWMASnapshot(r) } -// Tick ticks the clock to update the moving average. It assumes it is called +// tick ticks the clock to update the moving average. It assumes it is called // every five seconds. -func (a *StandardEWMA) Tick() { +func (a *EWMA) tick() { // Optimization to avoid mutex locking in the hot-path. if a.init.Load() { a.updateRate(a.fetchInstantRate()) return } - // Slow-path: this is only needed on the first Tick() and preserves transactional updating + // Slow-path: this is only needed on the first tick() and preserves transactional updating // of init and rate in the else block. The first conditional is needed below because // a different thread could have set a.init = 1 between the time of the first atomic load and when // the lock was acquired. a.mutex.Lock() if a.init.Load() { // The fetchInstantRate() uses atomic loading, which is unnecessary in this critical section - // but again, this section is only invoked on the first successful Tick() operation. + // but again, this section is only invoked on the first successful tick() operation. a.updateRate(a.fetchInstantRate()) } else { a.init.Store(true) @@ -94,18 +74,18 @@ func (a *StandardEWMA) Tick() { a.mutex.Unlock() } -func (a *StandardEWMA) fetchInstantRate() float64 { +func (a *EWMA) fetchInstantRate() float64 { count := a.uncounted.Swap(0) return float64(count) / float64(5*time.Second) } -func (a *StandardEWMA) updateRate(instantRate float64) { +func (a *EWMA) updateRate(instantRate float64) { currentRate := math.Float64frombits(a.rate.Load()) currentRate += a.alpha * (instantRate - currentRate) a.rate.Store(math.Float64bits(currentRate)) } // Update adds n uncounted events. -func (a *StandardEWMA) Update(n int64) { +func (a *EWMA) Update(n int64) { a.uncounted.Add(n) } diff --git a/metrics/ewma_test.go b/metrics/ewma_test.go index 9a91b43db81a..4b9bde3a4b37 100644 --- a/metrics/ewma_test.go +++ b/metrics/ewma_test.go @@ -12,7 +12,7 @@ func BenchmarkEWMA(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { a.Update(1) - a.Tick() + a.tick() } } @@ -23,7 +23,7 @@ func BenchmarkEWMAParallel(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { a.Update(1) - a.Tick() + a.tick() } }) } @@ -31,7 +31,7 @@ func BenchmarkEWMAParallel(b *testing.B) { func TestEWMA1(t *testing.T) { a := NewEWMA1() a.Update(3) - a.Tick() + a.tick() for i, want := range []float64{0.6, 0.22072766470286553, 0.08120116994196772, 0.029872241020718428, 0.01098938333324054, 0.004042768199451294, 0.0014872513059998212, @@ -49,7 +49,7 @@ func TestEWMA1(t *testing.T) { func TestEWMA5(t *testing.T) { a := NewEWMA5() a.Update(3) - a.Tick() + a.tick() for i, want := range []float64{ 0.6, 0.49123845184678905, 0.4021920276213837, 0.32928698165641596, 0.269597378470333, 0.2207276647028654, 0.18071652714732128, @@ -67,7 +67,7 @@ func TestEWMA5(t *testing.T) { func TestEWMA15(t *testing.T) { a := NewEWMA15() a.Update(3) - a.Tick() + a.tick() for i, want := range []float64{ 0.6, 0.5613041910189706, 0.5251039914257684, 0.4912384518467888184678905, 0.459557003018789, 0.4299187863442732, 0.4021920276213831, @@ -82,8 +82,8 @@ func TestEWMA15(t *testing.T) { } } -func elapseMinute(a EWMA) { +func elapseMinute(a *EWMA) { for i := 0; i < 12; i++ { - a.Tick() + a.tick() } } diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go index e7f54d1320fa..149176989d9f 100644 --- a/metrics/exp/exp.go +++ b/metrics/exp/exp.go @@ -146,7 +146,7 @@ func (exp *exp) publishHistogram(name string, metric metrics.Histogram) { exp.getFloat(name + ".999-percentile").Set(ps[4]) } -func (exp *exp) publishMeter(name string, metric metrics.Meter) { +func (exp *exp) publishMeter(name string, metric *metrics.Meter) { m := metric.Snapshot() exp.getInt(name + ".count").Set(m.Count()) exp.getFloat(name + ".one-minute").Set(m.Rate1()) @@ -155,7 +155,7 @@ func (exp *exp) publishMeter(name string, metric metrics.Meter) { exp.getFloat(name + ".mean").Set(m.RateMean()) } -func (exp *exp) publishTimer(name string, metric metrics.Timer) { +func (exp *exp) publishTimer(name string, metric *metrics.Timer) { t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) exp.getInt(name + ".count").Set(t.Count()) @@ -174,7 +174,7 @@ func (exp *exp) publishTimer(name string, metric metrics.Timer) { exp.getFloat(name + ".mean-rate").Set(t.RateMean()) } -func (exp *exp) publishResettingTimer(name string, metric metrics.ResettingTimer) { +func (exp *exp) publishResettingTimer(name string, metric *metrics.ResettingTimer) { t := metric.Snapshot() ps := t.Percentiles([]float64{0.50, 0.75, 0.95, 0.99}) exp.getInt(name + ".count").Set(int64(t.Count())) @@ -188,23 +188,23 @@ func (exp *exp) publishResettingTimer(name string, metric metrics.ResettingTimer func (exp *exp) syncToExpvar() { exp.registry.Each(func(name string, i interface{}) { switch i := i.(type) { - case metrics.Counter: + case *metrics.Counter: exp.publishCounter(name, i.Snapshot()) - case metrics.CounterFloat64: + case *metrics.CounterFloat64: exp.publishCounterFloat64(name, i.Snapshot()) - case metrics.Gauge: + case *metrics.Gauge: exp.publishGauge(name, i.Snapshot()) - case metrics.GaugeFloat64: + case *metrics.GaugeFloat64: exp.publishGaugeFloat64(name, i.Snapshot()) - case metrics.GaugeInfo: + case *metrics.GaugeInfo: exp.publishGaugeInfo(name, i.Snapshot()) case metrics.Histogram: exp.publishHistogram(name, i) - case metrics.Meter: + case *metrics.Meter: exp.publishMeter(name, i) - case metrics.Timer: + case *metrics.Timer: exp.publishTimer(name, i) - case metrics.ResettingTimer: + case *metrics.ResettingTimer: exp.publishResettingTimer(name, i) default: panic(fmt.Sprintf("unsupported type for '%s': %T", name, i)) diff --git a/metrics/gauge.go b/metrics/gauge.go index 6d10bbf85660..ba7843e03b27 100644 --- a/metrics/gauge.go +++ b/metrics/gauge.go @@ -2,97 +2,69 @@ package metrics import "sync/atomic" -// GaugeSnapshot contains a readonly int64. -type GaugeSnapshot interface { - Value() int64 -} +// GaugeSnapshot is a read-only copy of a Gauge. +type GaugeSnapshot int64 -// Gauge holds an int64 value that can be set arbitrarily. -type Gauge interface { - Snapshot() GaugeSnapshot - Update(int64) - UpdateIfGt(int64) - Dec(int64) - Inc(int64) -} +// Value returns the value at the time the snapshot was taken. +func (g GaugeSnapshot) Value() int64 { return int64(g) } // GetOrRegisterGauge returns an existing Gauge or constructs and registers a -// new StandardGauge. -func GetOrRegisterGauge(name string, r Registry) Gauge { - if nil == r { +// new Gauge. +func GetOrRegisterGauge(name string, r Registry) *Gauge { + if r == nil { r = DefaultRegistry } - return r.GetOrRegister(name, NewGauge).(Gauge) + return r.GetOrRegister(name, NewGauge).(*Gauge) } -// NewGauge constructs a new StandardGauge. -func NewGauge() Gauge { - if !Enabled { - return NilGauge{} - } - return &StandardGauge{} +// NewGauge constructs a new Gauge. +func NewGauge() *Gauge { + return &Gauge{} } -// NewRegisteredGauge constructs and registers a new StandardGauge. -func NewRegisteredGauge(name string, r Registry) Gauge { +// NewRegisteredGauge constructs and registers a new Gauge. +func NewRegisteredGauge(name string, r Registry) *Gauge { c := NewGauge() - if nil == r { + if r == nil { r = DefaultRegistry } r.Register(name, c) return c } -// gaugeSnapshot is a read-only copy of another Gauge. -type gaugeSnapshot int64 - -// Value returns the value at the time the snapshot was taken. -func (g gaugeSnapshot) Value() int64 { return int64(g) } - -// NilGauge is a no-op Gauge. -type NilGauge struct{} - -func (NilGauge) Snapshot() GaugeSnapshot { return (*emptySnapshot)(nil) } -func (NilGauge) Update(v int64) {} -func (NilGauge) UpdateIfGt(v int64) {} -func (NilGauge) Dec(i int64) {} -func (NilGauge) Inc(i int64) {} - -// StandardGauge is the standard implementation of a Gauge and uses the -// sync/atomic package to manage a single int64 value. -type StandardGauge struct { - value atomic.Int64 -} +// Gauge holds an int64 value that can be set arbitrarily. +type Gauge atomic.Int64 // Snapshot returns a read-only copy of the gauge. -func (g *StandardGauge) Snapshot() GaugeSnapshot { - return gaugeSnapshot(g.value.Load()) +func (g *Gauge) Snapshot() GaugeSnapshot { + return GaugeSnapshot((*atomic.Int64)(g).Load()) } // Update updates the gauge's value. -func (g *StandardGauge) Update(v int64) { - g.value.Store(v) +func (g *Gauge) Update(v int64) { + (*atomic.Int64)(g).Store(v) } // UpdateIfGt updates the gauge's value if v is larger then the current value. -func (g *StandardGauge) UpdateIfGt(v int64) { +func (g *Gauge) UpdateIfGt(v int64) { + value := (*atomic.Int64)(g) for { - exist := g.value.Load() + exist := value.Load() if exist >= v { break } - if g.value.CompareAndSwap(exist, v) { + if value.CompareAndSwap(exist, v) { break } } } // Dec decrements the gauge's current value by the given amount. -func (g *StandardGauge) Dec(i int64) { - g.value.Add(-i) +func (g *Gauge) Dec(i int64) { + (*atomic.Int64)(g).Add(-i) } // Inc increments the gauge's current value by the given amount. -func (g *StandardGauge) Inc(i int64) { - g.value.Add(i) +func (g *Gauge) Inc(i int64) { + (*atomic.Int64)(g).Add(i) } diff --git a/metrics/gauge_float64.go b/metrics/gauge_float64.go index c1c3c6b6e6f0..05b401ef9cb7 100644 --- a/metrics/gauge_float64.go +++ b/metrics/gauge_float64.go @@ -5,35 +5,28 @@ import ( "sync/atomic" ) -type GaugeFloat64Snapshot interface { - Value() float64 -} - -// GaugeFloat64 hold a float64 value that can be set arbitrarily. -type GaugeFloat64 interface { - Snapshot() GaugeFloat64Snapshot - Update(float64) -} - // GetOrRegisterGaugeFloat64 returns an existing GaugeFloat64 or constructs and registers a -// new StandardGaugeFloat64. -func GetOrRegisterGaugeFloat64(name string, r Registry) GaugeFloat64 { +// new GaugeFloat64. +func GetOrRegisterGaugeFloat64(name string, r Registry) *GaugeFloat64 { if nil == r { r = DefaultRegistry } - return r.GetOrRegister(name, NewGaugeFloat64()).(GaugeFloat64) + return r.GetOrRegister(name, NewGaugeFloat64()).(*GaugeFloat64) } -// NewGaugeFloat64 constructs a new StandardGaugeFloat64. -func NewGaugeFloat64() GaugeFloat64 { - if !Enabled { - return NilGaugeFloat64{} - } - return &StandardGaugeFloat64{} +// GaugeFloat64Snapshot is a read-only copy of a GaugeFloat64. +type GaugeFloat64Snapshot float64 + +// Value returns the value at the time the snapshot was taken. +func (g GaugeFloat64Snapshot) Value() float64 { return float64(g) } + +// NewGaugeFloat64 constructs a new GaugeFloat64. +func NewGaugeFloat64() *GaugeFloat64 { + return new(GaugeFloat64) } -// NewRegisteredGaugeFloat64 constructs and registers a new StandardGaugeFloat64. -func NewRegisteredGaugeFloat64(name string, r Registry) GaugeFloat64 { +// NewRegisteredGaugeFloat64 constructs and registers a new GaugeFloat64. +func NewRegisteredGaugeFloat64(name string, r Registry) *GaugeFloat64 { c := NewGaugeFloat64() if nil == r { r = DefaultRegistry @@ -42,32 +35,16 @@ func NewRegisteredGaugeFloat64(name string, r Registry) GaugeFloat64 { return c } -// gaugeFloat64Snapshot is a read-only copy of another GaugeFloat64. -type gaugeFloat64Snapshot float64 - -// Value returns the value at the time the snapshot was taken. -func (g gaugeFloat64Snapshot) Value() float64 { return float64(g) } - -// NilGaugeFloat64 is a no-op Gauge. -type NilGaugeFloat64 struct{} - -func (NilGaugeFloat64) Snapshot() GaugeFloat64Snapshot { return NilGaugeFloat64{} } -func (NilGaugeFloat64) Update(v float64) {} -func (NilGaugeFloat64) Value() float64 { return 0.0 } - -// StandardGaugeFloat64 is the standard implementation of a GaugeFloat64 and uses -// atomic to manage a single float64 value. -type StandardGaugeFloat64 struct { - floatBits atomic.Uint64 -} +// GaugeFloat64 hold a float64 value that can be set arbitrarily. +type GaugeFloat64 atomic.Uint64 // Snapshot returns a read-only copy of the gauge. -func (g *StandardGaugeFloat64) Snapshot() GaugeFloat64Snapshot { - v := math.Float64frombits(g.floatBits.Load()) - return gaugeFloat64Snapshot(v) +func (g *GaugeFloat64) Snapshot() GaugeFloat64Snapshot { + v := math.Float64frombits((*atomic.Uint64)(g).Load()) + return GaugeFloat64Snapshot(v) } // Update updates the gauge's value. -func (g *StandardGaugeFloat64) Update(v float64) { - g.floatBits.Store(math.Float64bits(v)) +func (g *GaugeFloat64) Update(v float64) { + (*atomic.Uint64)(g).Store(math.Float64bits(v)) } diff --git a/metrics/gauge_info.go b/metrics/gauge_info.go index 0010edc3249d..2f78455649e4 100644 --- a/metrics/gauge_info.go +++ b/metrics/gauge_info.go @@ -5,16 +5,6 @@ import ( "sync" ) -type GaugeInfoSnapshot interface { - Value() GaugeInfoValue -} - -// GaugeInfo holds a GaugeInfoValue value that can be set arbitrarily. -type GaugeInfo interface { - Update(GaugeInfoValue) - Snapshot() GaugeInfoSnapshot -} - // GaugeInfoValue is a mapping of keys to values type GaugeInfoValue map[string]string @@ -24,26 +14,23 @@ func (val GaugeInfoValue) String() string { } // GetOrRegisterGaugeInfo returns an existing GaugeInfo or constructs and registers a -// new StandardGaugeInfo. -func GetOrRegisterGaugeInfo(name string, r Registry) GaugeInfo { +// new GaugeInfo. +func GetOrRegisterGaugeInfo(name string, r Registry) *GaugeInfo { if nil == r { r = DefaultRegistry } - return r.GetOrRegister(name, NewGaugeInfo()).(GaugeInfo) + return r.GetOrRegister(name, NewGaugeInfo()).(*GaugeInfo) } -// NewGaugeInfo constructs a new StandardGaugeInfo. -func NewGaugeInfo() GaugeInfo { - if !Enabled { - return NilGaugeInfo{} - } - return &StandardGaugeInfo{ +// NewGaugeInfo constructs a new GaugeInfo. +func NewGaugeInfo() *GaugeInfo { + return &GaugeInfo{ value: GaugeInfoValue{}, } } -// NewRegisteredGaugeInfo constructs and registers a new StandardGaugeInfo. -func NewRegisteredGaugeInfo(name string, r Registry) GaugeInfo { +// NewRegisteredGaugeInfo constructs and registers a new GaugeInfo. +func NewRegisteredGaugeInfo(name string, r Registry) *GaugeInfo { c := NewGaugeInfo() if nil == r { r = DefaultRegistry @@ -53,31 +40,24 @@ func NewRegisteredGaugeInfo(name string, r Registry) GaugeInfo { } // gaugeInfoSnapshot is a read-only copy of another GaugeInfo. -type gaugeInfoSnapshot GaugeInfoValue +type GaugeInfoSnapshot GaugeInfoValue // Value returns the value at the time the snapshot was taken. -func (g gaugeInfoSnapshot) Value() GaugeInfoValue { return GaugeInfoValue(g) } - -type NilGaugeInfo struct{} - -func (NilGaugeInfo) Snapshot() GaugeInfoSnapshot { return NilGaugeInfo{} } -func (NilGaugeInfo) Update(v GaugeInfoValue) {} -func (NilGaugeInfo) Value() GaugeInfoValue { return GaugeInfoValue{} } +func (g GaugeInfoSnapshot) Value() GaugeInfoValue { return GaugeInfoValue(g) } -// StandardGaugeInfo is the standard implementation of a GaugeInfo and uses -// sync.Mutex to manage a single string value. -type StandardGaugeInfo struct { +// GaugeInfo maintains a set of key/value mappings. +type GaugeInfo struct { mutex sync.Mutex value GaugeInfoValue } // Snapshot returns a read-only copy of the gauge. -func (g *StandardGaugeInfo) Snapshot() GaugeInfoSnapshot { - return gaugeInfoSnapshot(g.value) +func (g *GaugeInfo) Snapshot() GaugeInfoSnapshot { + return GaugeInfoSnapshot(g.value) } // Update updates the gauge's value. -func (g *StandardGaugeInfo) Update(v GaugeInfoValue) { +func (g *GaugeInfo) Update(v GaugeInfoValue) { g.mutex.Lock() defer g.mutex.Unlock() g.value = v diff --git a/metrics/graphite.go b/metrics/graphite.go deleted file mode 100644 index aba752e0ed5e..000000000000 --- a/metrics/graphite.go +++ /dev/null @@ -1,117 +0,0 @@ -package metrics - -import ( - "bufio" - "fmt" - "log" - "net" - "strconv" - "strings" - "time" -) - -// GraphiteConfig provides a container with configuration parameters for -// the Graphite exporter -type GraphiteConfig struct { - Addr *net.TCPAddr // Network address to connect to - Registry Registry // Registry to be exported - FlushInterval time.Duration // Flush interval - DurationUnit time.Duration // Time conversion unit for durations - Prefix string // Prefix to be prepended to metric names - Percentiles []float64 // Percentiles to export from timers and histograms -} - -// Graphite is a blocking exporter function which reports metrics in r -// to a graphite server located at addr, flushing them every d duration -// and prepending metric names with prefix. -func Graphite(r Registry, d time.Duration, prefix string, addr *net.TCPAddr) { - GraphiteWithConfig(GraphiteConfig{ - Addr: addr, - Registry: r, - FlushInterval: d, - DurationUnit: time.Nanosecond, - Prefix: prefix, - Percentiles: []float64{0.5, 0.75, 0.95, 0.99, 0.999}, - }) -} - -// GraphiteWithConfig is a blocking exporter function just like Graphite, -// but it takes a GraphiteConfig instead. -func GraphiteWithConfig(c GraphiteConfig) { - log.Printf("WARNING: This go-metrics client has been DEPRECATED! It has been moved to https://github.com/cyberdelia/go-metrics-graphite and will be removed from rcrowley/go-metrics on August 12th 2015") - for range time.Tick(c.FlushInterval) { - if err := graphite(&c); nil != err { - log.Println(err) - } - } -} - -// GraphiteOnce performs a single submission to Graphite, returning a -// non-nil error on failed connections. This can be used in a loop -// similar to GraphiteWithConfig for custom error handling. -func GraphiteOnce(c GraphiteConfig) error { - log.Printf("WARNING: This go-metrics client has been DEPRECATED! It has been moved to https://github.com/cyberdelia/go-metrics-graphite and will be removed from rcrowley/go-metrics on August 12th 2015") - return graphite(&c) -} - -func graphite(c *GraphiteConfig) error { - now := time.Now().Unix() - du := float64(c.DurationUnit) - conn, err := net.DialTCP("tcp", nil, c.Addr) - if nil != err { - return err - } - defer conn.Close() - w := bufio.NewWriter(conn) - c.Registry.Each(func(name string, i interface{}) { - switch metric := i.(type) { - case Counter: - fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, metric.Snapshot().Count(), now) - case CounterFloat64: - fmt.Fprintf(w, "%s.%s.count %f %d\n", c.Prefix, name, metric.Snapshot().Count(), now) - case Gauge: - fmt.Fprintf(w, "%s.%s.value %d %d\n", c.Prefix, name, metric.Snapshot().Value(), now) - case GaugeFloat64: - fmt.Fprintf(w, "%s.%s.value %f %d\n", c.Prefix, name, metric.Snapshot().Value(), now) - case GaugeInfo: - fmt.Fprintf(w, "%s.%s.value %s %d\n", c.Prefix, name, metric.Snapshot().Value().String(), now) - case Histogram: - h := metric.Snapshot() - ps := h.Percentiles(c.Percentiles) - fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, h.Count(), now) - fmt.Fprintf(w, "%s.%s.min %d %d\n", c.Prefix, name, h.Min(), now) - fmt.Fprintf(w, "%s.%s.max %d %d\n", c.Prefix, name, h.Max(), now) - fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, h.Mean(), now) - fmt.Fprintf(w, "%s.%s.std-dev %.2f %d\n", c.Prefix, name, h.StdDev(), now) - for psIdx, psKey := range c.Percentiles { - key := strings.Replace(strconv.FormatFloat(psKey*100.0, 'f', -1, 64), ".", "", 1) - fmt.Fprintf(w, "%s.%s.%s-percentile %.2f %d\n", c.Prefix, name, key, ps[psIdx], now) - } - case Meter: - m := metric.Snapshot() - fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, m.Count(), now) - fmt.Fprintf(w, "%s.%s.one-minute %.2f %d\n", c.Prefix, name, m.Rate1(), now) - fmt.Fprintf(w, "%s.%s.five-minute %.2f %d\n", c.Prefix, name, m.Rate5(), now) - fmt.Fprintf(w, "%s.%s.fifteen-minute %.2f %d\n", c.Prefix, name, m.Rate15(), now) - fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, m.RateMean(), now) - case Timer: - t := metric.Snapshot() - ps := t.Percentiles(c.Percentiles) - fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, t.Count(), now) - fmt.Fprintf(w, "%s.%s.min %d %d\n", c.Prefix, name, t.Min()/int64(du), now) - fmt.Fprintf(w, "%s.%s.max %d %d\n", c.Prefix, name, t.Max()/int64(du), now) - fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, t.Mean()/du, now) - fmt.Fprintf(w, "%s.%s.std-dev %.2f %d\n", c.Prefix, name, t.StdDev()/du, now) - for psIdx, psKey := range c.Percentiles { - key := strings.Replace(strconv.FormatFloat(psKey*100.0, 'f', -1, 64), ".", "", 1) - fmt.Fprintf(w, "%s.%s.%s-percentile %.2f %d\n", c.Prefix, name, key, ps[psIdx], now) - } - fmt.Fprintf(w, "%s.%s.one-minute %.2f %d\n", c.Prefix, name, t.Rate1(), now) - fmt.Fprintf(w, "%s.%s.five-minute %.2f %d\n", c.Prefix, name, t.Rate5(), now) - fmt.Fprintf(w, "%s.%s.fifteen-minute %.2f %d\n", c.Prefix, name, t.Rate15(), now) - fmt.Fprintf(w, "%s.%s.mean-rate %.2f %d\n", c.Prefix, name, t.RateMean(), now) - } - w.Flush() - }) - return nil -} diff --git a/metrics/graphite_test.go b/metrics/graphite_test.go deleted file mode 100644 index c797c781df6f..000000000000 --- a/metrics/graphite_test.go +++ /dev/null @@ -1,22 +0,0 @@ -package metrics - -import ( - "net" - "time" -) - -func ExampleGraphite() { - addr, _ := net.ResolveTCPAddr("net", ":2003") - go Graphite(DefaultRegistry, 1*time.Second, "some.prefix", addr) -} - -func ExampleGraphiteWithConfig() { - addr, _ := net.ResolveTCPAddr("net", ":2003") - go GraphiteWithConfig(GraphiteConfig{ - Addr: addr, - Registry: DefaultRegistry, - FlushInterval: 1 * time.Second, - DurationUnit: time.Millisecond, - Percentiles: []float64{0.5, 0.75, 0.99, 0.999}, - }) -} diff --git a/metrics/healthcheck.go b/metrics/healthcheck.go index adcd15ab581a..435e5e0bf93d 100644 --- a/metrics/healthcheck.go +++ b/metrics/healthcheck.go @@ -1,61 +1,35 @@ package metrics -// Healthcheck holds an error value describing an arbitrary up/down status. -type Healthcheck interface { - Check() - Error() error - Healthy() - Unhealthy(error) -} - // NewHealthcheck constructs a new Healthcheck which will use the given // function to update its status. -func NewHealthcheck(f func(Healthcheck)) Healthcheck { - if !Enabled { - return NilHealthcheck{} - } - return &StandardHealthcheck{nil, f} +func NewHealthcheck(f func(*Healthcheck)) *Healthcheck { + return &Healthcheck{nil, f} } -// NilHealthcheck is a no-op. -type NilHealthcheck struct{} - -// Check is a no-op. -func (NilHealthcheck) Check() {} - -// Error is a no-op. -func (NilHealthcheck) Error() error { return nil } - -// Healthy is a no-op. -func (NilHealthcheck) Healthy() {} - -// Unhealthy is a no-op. -func (NilHealthcheck) Unhealthy(error) {} - -// StandardHealthcheck is the standard implementation of a Healthcheck and +// Healthcheck is the standard implementation of a Healthcheck and // stores the status and a function to call to update the status. -type StandardHealthcheck struct { +type Healthcheck struct { err error - f func(Healthcheck) + f func(*Healthcheck) } // Check runs the healthcheck function to update the healthcheck's status. -func (h *StandardHealthcheck) Check() { +func (h *Healthcheck) Check() { h.f(h) } // Error returns the healthcheck's status, which will be nil if it is healthy. -func (h *StandardHealthcheck) Error() error { +func (h *Healthcheck) Error() error { return h.err } // Healthy marks the healthcheck as healthy. -func (h *StandardHealthcheck) Healthy() { +func (h *Healthcheck) Healthy() { h.err = nil } // Unhealthy marks the healthcheck as unhealthy. The error is stored and // may be retrieved by the Error method. -func (h *StandardHealthcheck) Unhealthy(err error) { +func (h *Healthcheck) Unhealthy(err error) { h.err = err } diff --git a/metrics/histogram.go b/metrics/histogram.go index 10259a246377..7c27bcc92880 100644 --- a/metrics/histogram.go +++ b/metrics/histogram.go @@ -1,7 +1,16 @@ package metrics type HistogramSnapshot interface { - SampleSnapshot + Count() int64 + Max() int64 + Mean() float64 + Min() int64 + Percentile(float64) float64 + Percentiles([]float64) []float64 + Size() int + StdDev() float64 + Sum() int64 + Variance() float64 } // Histogram calculates distribution statistics from a series of int64 values. @@ -31,10 +40,7 @@ func GetOrRegisterHistogramLazy(name string, r Registry, s func() Sample) Histog // NewHistogram constructs a new StandardHistogram from a Sample. func NewHistogram(s Sample) Histogram { - if !Enabled { - return NilHistogram{} - } - return &StandardHistogram{sample: s} + return &StandardHistogram{s} } // NewRegisteredHistogram constructs and registers a new StandardHistogram from @@ -48,13 +54,6 @@ func NewRegisteredHistogram(name string, r Registry, s Sample) Histogram { return c } -// NilHistogram is a no-op Histogram. -type NilHistogram struct{} - -func (NilHistogram) Clear() {} -func (NilHistogram) Snapshot() HistogramSnapshot { return (*emptySnapshot)(nil) } -func (NilHistogram) Update(v int64) {} - // StandardHistogram is the standard implementation of a Histogram and uses a // Sample to bound its memory use. type StandardHistogram struct { diff --git a/metrics/inactive.go b/metrics/inactive.go deleted file mode 100644 index 1f47f0210af3..000000000000 --- a/metrics/inactive.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2023 The go-ethereum Authors -// This file is part of the go-ethereum library. -// -// The go-ethereum library is free software: you can redistribute it and/or modify -// it under the terms of the GNU Lesser General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// The go-ethereum library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Lesser General Public License for more details. -// -// You should have received a copy of the GNU Lesser General Public License -// along with the go-ethereum library. If not, see . - -package metrics - -// compile-time checks that interfaces are implemented. -var ( - _ SampleSnapshot = (*emptySnapshot)(nil) - _ HistogramSnapshot = (*emptySnapshot)(nil) - _ CounterSnapshot = (*emptySnapshot)(nil) - _ GaugeSnapshot = (*emptySnapshot)(nil) - _ MeterSnapshot = (*emptySnapshot)(nil) - _ EWMASnapshot = (*emptySnapshot)(nil) - _ TimerSnapshot = (*emptySnapshot)(nil) -) - -type emptySnapshot struct{} - -func (*emptySnapshot) Count() int64 { return 0 } -func (*emptySnapshot) Max() int64 { return 0 } -func (*emptySnapshot) Mean() float64 { return 0.0 } -func (*emptySnapshot) Min() int64 { return 0 } -func (*emptySnapshot) Percentile(p float64) float64 { return 0.0 } -func (*emptySnapshot) Percentiles(ps []float64) []float64 { return make([]float64, len(ps)) } -func (*emptySnapshot) Size() int { return 0 } -func (*emptySnapshot) StdDev() float64 { return 0.0 } -func (*emptySnapshot) Sum() int64 { return 0 } -func (*emptySnapshot) Values() []int64 { return []int64{} } -func (*emptySnapshot) Variance() float64 { return 0.0 } -func (*emptySnapshot) Value() int64 { return 0 } -func (*emptySnapshot) Rate() float64 { return 0.0 } -func (*emptySnapshot) Rate1() float64 { return 0.0 } -func (*emptySnapshot) Rate5() float64 { return 0.0 } -func (*emptySnapshot) Rate15() float64 { return 0.0 } -func (*emptySnapshot) RateMean() float64 { return 0.0 } diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index 8cfa081c1421..37a6a3808e3b 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -8,31 +8,31 @@ import ( func readMeter(namespace, name string, i interface{}) (string, map[string]interface{}) { switch metric := i.(type) { - case metrics.Counter: + case *metrics.Counter: measurement := fmt.Sprintf("%s%s.count", namespace, name) fields := map[string]interface{}{ "value": metric.Snapshot().Count(), } return measurement, fields - case metrics.CounterFloat64: + case *metrics.CounterFloat64: measurement := fmt.Sprintf("%s%s.count", namespace, name) fields := map[string]interface{}{ "value": metric.Snapshot().Count(), } return measurement, fields - case metrics.Gauge: + case *metrics.Gauge: measurement := fmt.Sprintf("%s%s.gauge", namespace, name) fields := map[string]interface{}{ "value": metric.Snapshot().Value(), } return measurement, fields - case metrics.GaugeFloat64: + case *metrics.GaugeFloat64: measurement := fmt.Sprintf("%s%s.gauge", namespace, name) fields := map[string]interface{}{ "value": metric.Snapshot().Value(), } return measurement, fields - case metrics.GaugeInfo: + case *metrics.GaugeInfo: ms := metric.Snapshot() measurement := fmt.Sprintf("%s%s.gauge", namespace, name) fields := map[string]interface{}{ @@ -62,7 +62,7 @@ func readMeter(namespace, name string, i interface{}) (string, map[string]interf "p9999": ps[6], } return measurement, fields - case metrics.Meter: + case *metrics.Meter: ms := metric.Snapshot() measurement := fmt.Sprintf("%s%s.meter", namespace, name) fields := map[string]interface{}{ @@ -73,7 +73,7 @@ func readMeter(namespace, name string, i interface{}) (string, map[string]interf "mean": ms.RateMean(), } return measurement, fields - case metrics.Timer: + case *metrics.Timer: ms := metric.Snapshot() ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) @@ -97,7 +97,7 @@ func readMeter(namespace, name string, i interface{}) (string, map[string]interf "meanrate": ms.RateMean(), } return measurement, fields - case metrics.ResettingTimer: + case *metrics.ResettingTimer: ms := metric.Snapshot() if ms.Count() == 0 { break diff --git a/metrics/influxdb/influxdb_test.go b/metrics/influxdb/influxdb_test.go index aa47f15e1825..d7e47fda0e08 100644 --- a/metrics/influxdb/influxdb_test.go +++ b/metrics/influxdb/influxdb_test.go @@ -33,7 +33,7 @@ import ( ) func TestMain(m *testing.M) { - metrics.Enabled = true + metrics.Enable() os.Exit(m.Run()) } diff --git a/metrics/init_test.go b/metrics/init_test.go index 43401e833c34..af75bee425b1 100644 --- a/metrics/init_test.go +++ b/metrics/init_test.go @@ -1,5 +1,5 @@ package metrics func init() { - Enabled = true + metricsEnabled = true } diff --git a/metrics/log.go b/metrics/log.go index 3b9773faa728..3380bbf9c4d0 100644 --- a/metrics/log.go +++ b/metrics/log.go @@ -21,25 +21,21 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { for range time.Tick(freq) { r.Each(func(name string, i interface{}) { switch metric := i.(type) { - case Counter: + case *Counter: l.Printf("counter %s\n", name) l.Printf(" count: %9d\n", metric.Snapshot().Count()) - case CounterFloat64: + case *CounterFloat64: l.Printf("counter %s\n", name) l.Printf(" count: %f\n", metric.Snapshot().Count()) - case Gauge: + case *Gauge: l.Printf("gauge %s\n", name) l.Printf(" value: %9d\n", metric.Snapshot().Value()) - case GaugeFloat64: + case *GaugeFloat64: l.Printf("gauge %s\n", name) l.Printf(" value: %f\n", metric.Snapshot().Value()) - case GaugeInfo: + case *GaugeInfo: l.Printf("gauge %s\n", name) l.Printf(" value: %s\n", metric.Snapshot().Value()) - case Healthcheck: - metric.Check() - l.Printf("healthcheck %s\n", name) - l.Printf(" error: %v\n", metric.Error()) case Histogram: h := metric.Snapshot() ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) @@ -54,7 +50,7 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { l.Printf(" 95%%: %12.2f\n", ps[2]) l.Printf(" 99%%: %12.2f\n", ps[3]) l.Printf(" 99.9%%: %12.2f\n", ps[4]) - case Meter: + case *Meter: m := metric.Snapshot() l.Printf("meter %s\n", name) l.Printf(" count: %9d\n", m.Count()) @@ -62,7 +58,7 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { l.Printf(" 5-min rate: %12.2f\n", m.Rate5()) l.Printf(" 15-min rate: %12.2f\n", m.Rate15()) l.Printf(" mean rate: %12.2f\n", m.RateMean()) - case Timer: + case *Timer: t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) l.Printf("timer %s\n", name) diff --git a/metrics/meter.go b/metrics/meter.go index 432838f4ef7b..194bd1f30411 100644 --- a/metrics/meter.go +++ b/metrics/meter.go @@ -7,114 +7,78 @@ import ( "time" ) -type MeterSnapshot interface { - Count() int64 - Rate1() float64 - Rate5() float64 - Rate15() float64 - RateMean() float64 -} - -// Meters count events to produce exponentially-weighted moving average rates -// at one-, five-, and fifteen-minutes and a mean rate. -type Meter interface { - Mark(int64) - Snapshot() MeterSnapshot - Stop() -} - // GetOrRegisterMeter returns an existing Meter or constructs and registers a -// new StandardMeter. +// new Meter. // Be sure to unregister the meter from the registry once it is of no use to // allow for garbage collection. -func GetOrRegisterMeter(name string, r Registry) Meter { - if nil == r { +func GetOrRegisterMeter(name string, r Registry) *Meter { + if r == nil { r = DefaultRegistry } - return r.GetOrRegister(name, NewMeter).(Meter) + return r.GetOrRegister(name, NewMeter).(*Meter) } -// NewMeter constructs a new StandardMeter and launches a goroutine. +// NewMeter constructs a new Meter and launches a goroutine. // Be sure to call Stop() once the meter is of no use to allow for garbage collection. -func NewMeter() Meter { - if !Enabled { - return NilMeter{} - } - m := newStandardMeter() - arbiter.Lock() - defer arbiter.Unlock() - arbiter.meters[m] = struct{}{} - if !arbiter.started { - arbiter.started = true - go arbiter.tick() - } +func NewMeter() *Meter { + m := newMeter() + arbiter.add(m) return m } // NewInactiveMeter returns a meter but does not start any goroutines. This // method is mainly intended for testing. -func NewInactiveMeter() Meter { - if !Enabled { - return NilMeter{} - } - m := newStandardMeter() - return m +func NewInactiveMeter() *Meter { + return newMeter() } -// NewRegisteredMeter constructs and registers a new StandardMeter +// NewRegisteredMeter constructs and registers a new Meter // and launches a goroutine. // Be sure to unregister the meter from the registry once it is of no use to // allow for garbage collection. -func NewRegisteredMeter(name string, r Registry) Meter { +func NewRegisteredMeter(name string, r Registry) *Meter { return GetOrRegisterMeter(name, r) } -// meterSnapshot is a read-only copy of the meter's internal values. -type meterSnapshot struct { +// MeterSnapshot is a read-only copy of the meter's internal values. +type MeterSnapshot struct { count int64 rate1, rate5, rate15, rateMean float64 } // Count returns the count of events at the time the snapshot was taken. -func (m *meterSnapshot) Count() int64 { return m.count } +func (m *MeterSnapshot) Count() int64 { return m.count } // Rate1 returns the one-minute moving average rate of events per second at the // time the snapshot was taken. -func (m *meterSnapshot) Rate1() float64 { return m.rate1 } +func (m *MeterSnapshot) Rate1() float64 { return m.rate1 } // Rate5 returns the five-minute moving average rate of events per second at // the time the snapshot was taken. -func (m *meterSnapshot) Rate5() float64 { return m.rate5 } +func (m *MeterSnapshot) Rate5() float64 { return m.rate5 } // Rate15 returns the fifteen-minute moving average rate of events per second // at the time the snapshot was taken. -func (m *meterSnapshot) Rate15() float64 { return m.rate15 } +func (m *MeterSnapshot) Rate15() float64 { return m.rate15 } // RateMean returns the meter's mean rate of events per second at the time the // snapshot was taken. -func (m *meterSnapshot) RateMean() float64 { return m.rateMean } - -// NilMeter is a no-op Meter. -type NilMeter struct{} +func (m *MeterSnapshot) RateMean() float64 { return m.rateMean } -func (NilMeter) Count() int64 { return 0 } -func (NilMeter) Mark(n int64) {} -func (NilMeter) Snapshot() MeterSnapshot { return (*emptySnapshot)(nil) } -func (NilMeter) Stop() {} - -// StandardMeter is the standard implementation of a Meter. -type StandardMeter struct { +// Meter count events to produce exponentially-weighted moving average rates +// at one-, five-, and fifteen-minutes and a mean rate. +type Meter struct { count atomic.Int64 uncounted atomic.Int64 // not yet added to the EWMAs rateMean atomic.Uint64 - a1, a5, a15 EWMA + a1, a5, a15 *EWMA startTime time.Time stopped atomic.Bool } -func newStandardMeter() *StandardMeter { - return &StandardMeter{ +func newMeter() *Meter { + return &Meter{ a1: NewEWMA1(), a5: NewEWMA5(), a15: NewEWMA15(), @@ -123,22 +87,20 @@ func newStandardMeter() *StandardMeter { } // Stop stops the meter, Mark() will be a no-op if you use it after being stopped. -func (m *StandardMeter) Stop() { +func (m *Meter) Stop() { if stopped := m.stopped.Swap(true); !stopped { - arbiter.Lock() - delete(arbiter.meters, m) - arbiter.Unlock() + arbiter.remove(m) } } // Mark records the occurrence of n events. -func (m *StandardMeter) Mark(n int64) { +func (m *Meter) Mark(n int64) { m.uncounted.Add(n) } // Snapshot returns a read-only copy of the meter. -func (m *StandardMeter) Snapshot() MeterSnapshot { - return &meterSnapshot{ +func (m *Meter) Snapshot() *MeterSnapshot { + return &MeterSnapshot{ count: m.count.Load() + m.uncounted.Load(), rate1: m.a1.Snapshot().Rate(), rate5: m.a5.Snapshot().Rate(), @@ -147,7 +109,7 @@ func (m *StandardMeter) Snapshot() MeterSnapshot { } } -func (m *StandardMeter) tick() { +func (m *Meter) tick() { // Take the uncounted values, add to count n := m.uncounted.Swap(0) count := m.count.Add(n) @@ -157,33 +119,51 @@ func (m *StandardMeter) tick() { m.a5.Update(n) m.a15.Update(n) // And trigger them to calculate the rates - m.a1.Tick() - m.a5.Tick() - m.a15.Tick() + m.a1.tick() + m.a5.tick() + m.a15.tick() } -// meterArbiter ticks meters every 5s from a single goroutine. +var arbiter = meterTicker{meters: make(map[*Meter]struct{})} + +// meterTicker ticks meters every 5s from a single goroutine. // meters are references in a set for future stopping. -type meterArbiter struct { - sync.RWMutex +type meterTicker struct { + mu sync.RWMutex + started bool - meters map[*StandardMeter]struct{} - ticker *time.Ticker + meters map[*Meter]struct{} } -var arbiter = meterArbiter{ticker: time.NewTicker(5 * time.Second), meters: make(map[*StandardMeter]struct{})} - -// tick meters on the scheduled interval -func (ma *meterArbiter) tick() { - for range ma.ticker.C { - ma.tickMeters() +// add adds another *Meter ot the arbiter, and starts the arbiter ticker. +func (ma *meterTicker) add(m *Meter) { + ma.mu.Lock() + defer ma.mu.Unlock() + ma.meters[m] = struct{}{} + if !ma.started { + ma.started = true + go ma.loop() } } -func (ma *meterArbiter) tickMeters() { - ma.RLock() - defer ma.RUnlock() - for meter := range ma.meters { - meter.tick() +// remove removes a meter from the set of ticked meters. +func (ma *meterTicker) remove(m *Meter) { + ma.mu.Lock() + delete(ma.meters, m) + ma.mu.Unlock() +} + +// loop ticks meters on a 5 second interval. +func (ma *meterTicker) loop() { + ticker := time.NewTicker(5 * time.Second) + for range ticker.C { + if !metricsEnabled { + continue + } + ma.mu.RLock() + for meter := range ma.meters { + meter.tick() + } + ma.mu.RUnlock() } } diff --git a/metrics/meter_test.go b/metrics/meter_test.go index 8330ea9647f5..d62cf386402c 100644 --- a/metrics/meter_test.go +++ b/metrics/meter_test.go @@ -28,18 +28,12 @@ func TestGetOrRegisterMeter(t *testing.T) { } func TestMeterDecay(t *testing.T) { - ma := meterArbiter{ - ticker: time.NewTicker(time.Millisecond), - meters: make(map[*StandardMeter]struct{}), - } - defer ma.ticker.Stop() - m := newStandardMeter() - ma.meters[m] = struct{}{} + m := newMeter() m.Mark(1) - ma.tickMeters() + m.tick() rateMean := m.Snapshot().RateMean() time.Sleep(100 * time.Millisecond) - ma.tickMeters() + m.tick() if m.Snapshot().RateMean() >= rateMean { t.Error("m.RateMean() didn't decrease") } diff --git a/metrics/metrics.go b/metrics/metrics.go index 797001ab656e..a9d6623173ed 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -6,52 +6,29 @@ package metrics import ( - "os" "runtime/metrics" "runtime/pprof" - "strconv" - "strings" - "syscall" "time" +) - "github.com/XinFinOrg/XDPoSChain/log" +var ( + metricsEnabled = false ) -// Enabled is checked by the constructor functions for all of the -// standard metrics. If it is true, the metric returned is a stub. +// Enabled is checked by functions that are deemed 'expensive', e.g. if a +// meter-type does locking and/or non-trivial math operations during update. +func Enabled() bool { + return metricsEnabled +} + +// Enable enables the metrics system. +// The Enabled-flag is expected to be set, once, during startup, but toggling off and on +// is not supported. // -// This global kill-switch helps quantify the observer effect and makes -// for less cluttered pprof profiles. -var Enabled = false - -// enablerFlags is the CLI flag names to use to enable metrics collections. -var enablerFlags = []string{"metrics"} - -// enablerEnvVars is the env var names to use to enable metrics collections. -var enablerEnvVars = []string{"XDC_METRICS"} - -// init enables or disables the metrics system. Since we need this to run before -// any other code gets to create meters and timers, we'll actually do an ugly hack -// and peek into the command line args for the metrics flag. -func init() { - for _, enabler := range enablerEnvVars { - if val, found := syscall.Getenv(enabler); found && !Enabled { - if enable, _ := strconv.ParseBool(val); enable { // ignore error, flag parser will choke on it later - log.Info("Enabling metrics collection") - Enabled = true - } - } - } - for _, arg := range os.Args { - flag := strings.TrimLeft(arg, "-") - - for _, enabler := range enablerFlags { - if !Enabled && flag == enabler { - log.Info("Enabling metrics collection") - Enabled = true - } - } - } +// Enable is not safe to call concurrently. You need to call this as early as possible in +// the program, before any metrics collection will happen. +func Enable() { + metricsEnabled = true } var threadCreateProfile = pprof.Lookup("threadcreate") @@ -128,7 +105,7 @@ func readRuntimeStats(v *runtimeStats) { // CollectProcessMetrics periodically collects various metrics about the running process. func CollectProcessMetrics(refresh time.Duration) { // Short circuit if the metrics system is disabled - if !Enabled { + if !metricsEnabled { return } diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index 2861d5f2caf6..dc144f2425a6 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -7,8 +7,6 @@ import ( "time" ) -const FANOUT = 128 - func TestReadRuntimeValues(t *testing.T) { var v runtimeStats readRuntimeStats(&v) @@ -16,60 +14,23 @@ func TestReadRuntimeValues(t *testing.T) { } func BenchmarkMetrics(b *testing.B) { - r := NewRegistry() - c := NewRegisteredCounter("counter", r) - cf := NewRegisteredCounterFloat64("counterfloat64", r) - g := NewRegisteredGauge("gauge", r) - gf := NewRegisteredGaugeFloat64("gaugefloat64", r) - h := NewRegisteredHistogram("histogram", r, NewUniformSample(100)) - m := NewRegisteredMeter("meter", r) - t := NewRegisteredTimer("timer", r) + var ( + r = NewRegistry() + c = NewRegisteredCounter("counter", r) + cf = NewRegisteredCounterFloat64("counterfloat64", r) + g = NewRegisteredGauge("gauge", r) + gf = NewRegisteredGaugeFloat64("gaugefloat64", r) + h = NewRegisteredHistogram("histogram", r, NewUniformSample(100)) + m = NewRegisteredMeter("meter", r) + t = NewRegisteredTimer("timer", r) + ) RegisterDebugGCStats(r) b.ResetTimer() - ch := make(chan bool) - - wgD := &sync.WaitGroup{} - /* - wgD.Add(1) - go func() { - defer wgD.Done() - //log.Println("go CaptureDebugGCStats") - for { - select { - case <-ch: - //log.Println("done CaptureDebugGCStats") - return - default: - CaptureDebugGCStatsOnce(r) - } - } - }() - //*/ - - wgW := &sync.WaitGroup{} - /* - wgW.Add(1) + var wg sync.WaitGroup + wg.Add(128) + for i := 0; i < 128; i++ { go func() { - defer wgW.Done() - //log.Println("go Write") - for { - select { - case <-ch: - //log.Println("done Write") - return - default: - WriteOnce(r, io.Discard) - } - } - }() - //*/ - - wg := &sync.WaitGroup{} - wg.Add(FANOUT) - for i := 0; i < FANOUT; i++ { - go func(i int) { defer wg.Done() - //log.Println("go", i) for i := 0; i < b.N; i++ { c.Inc(1) cf.Inc(1.0) @@ -79,13 +40,9 @@ func BenchmarkMetrics(b *testing.B) { m.Mark(1) t.Update(1) } - //log.Println("done", i) - }(i) + }() } wg.Wait() - close(ch) - wgD.Wait() - wgW.Wait() } func Example() { diff --git a/metrics/opentsdb.go b/metrics/opentsdb.go index e81690f94340..57af3d025e67 100644 --- a/metrics/opentsdb.go +++ b/metrics/opentsdb.go @@ -64,15 +64,15 @@ func (c *OpenTSDBConfig) writeRegistry(w io.Writer, now int64, shortHostname str c.Registry.Each(func(name string, i interface{}) { switch metric := i.(type) { - case Counter: + case *Counter: fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, metric.Snapshot().Count(), shortHostname) - case CounterFloat64: + case *CounterFloat64: fmt.Fprintf(w, "put %s.%s.count %d %f host=%s\n", c.Prefix, name, now, metric.Snapshot().Count(), shortHostname) - case Gauge: + case *Gauge: fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Snapshot().Value(), shortHostname) - case GaugeFloat64: + case *GaugeFloat64: fmt.Fprintf(w, "put %s.%s.value %d %f host=%s\n", c.Prefix, name, now, metric.Snapshot().Value(), shortHostname) - case GaugeInfo: + case *GaugeInfo: fmt.Fprintf(w, "put %s.%s.value %d %s host=%s\n", c.Prefix, name, now, metric.Snapshot().Value().String(), shortHostname) case Histogram: h := metric.Snapshot() @@ -87,14 +87,14 @@ func (c *OpenTSDBConfig) writeRegistry(w io.Writer, now int64, shortHostname str fmt.Fprintf(w, "put %s.%s.95-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[2], shortHostname) fmt.Fprintf(w, "put %s.%s.99-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[3], shortHostname) fmt.Fprintf(w, "put %s.%s.999-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[4], shortHostname) - case Meter: + case *Meter: m := metric.Snapshot() fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, m.Count(), shortHostname) fmt.Fprintf(w, "put %s.%s.one-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate1(), shortHostname) fmt.Fprintf(w, "put %s.%s.five-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate5(), shortHostname) fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate15(), shortHostname) fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, m.RateMean(), shortHostname) - case Timer: + case *Timer: t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, t.Count(), shortHostname) diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go index ac2c960ac775..1d49f51dc295 100644 --- a/metrics/prometheus/collector.go +++ b/metrics/prometheus/collector.go @@ -51,23 +51,23 @@ func newCollector() *collector { // metric type is not supported/known. func (c *collector) Add(name string, i any) error { switch m := i.(type) { - case metrics.Counter: + case *metrics.Counter: c.addCounter(name, m.Snapshot()) - case metrics.CounterFloat64: + case *metrics.CounterFloat64: c.addCounterFloat64(name, m.Snapshot()) - case metrics.Gauge: + case *metrics.Gauge: c.addGauge(name, m.Snapshot()) - case metrics.GaugeFloat64: + case *metrics.GaugeFloat64: c.addGaugeFloat64(name, m.Snapshot()) - case metrics.GaugeInfo: + case *metrics.GaugeInfo: c.addGaugeInfo(name, m.Snapshot()) case metrics.Histogram: c.addHistogram(name, m.Snapshot()) - case metrics.Meter: + case *metrics.Meter: c.addMeter(name, m.Snapshot()) - case metrics.Timer: + case *metrics.Timer: c.addTimer(name, m.Snapshot()) - case metrics.ResettingTimer: + case *metrics.ResettingTimer: c.addResettingTimer(name, m.Snapshot()) default: return fmt.Errorf("unknown prometheus metric type %T", i) @@ -106,11 +106,11 @@ func (c *collector) addHistogram(name string, m metrics.HistogramSnapshot) { c.buff.WriteRune('\n') } -func (c *collector) addMeter(name string, m metrics.MeterSnapshot) { +func (c *collector) addMeter(name string, m *metrics.MeterSnapshot) { c.writeGaugeCounter(name, m.Count()) } -func (c *collector) addTimer(name string, m metrics.TimerSnapshot) { +func (c *collector) addTimer(name string, m *metrics.TimerSnapshot) { pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} ps := m.Percentiles(pv) c.writeSummaryCounter(name, m.Count()) @@ -121,7 +121,7 @@ func (c *collector) addTimer(name string, m metrics.TimerSnapshot) { c.buff.WriteRune('\n') } -func (c *collector) addResettingTimer(name string, m metrics.ResettingTimerSnapshot) { +func (c *collector) addResettingTimer(name string, m *metrics.ResettingTimerSnapshot) { if m.Count() <= 0 { return } diff --git a/metrics/prometheus/collector_test.go b/metrics/prometheus/collector_test.go index 948c539ad666..49b979432fbd 100644 --- a/metrics/prometheus/collector_test.go +++ b/metrics/prometheus/collector_test.go @@ -27,7 +27,7 @@ import ( ) func TestMain(m *testing.M) { - metrics.Enabled = true + metrics.Enable() os.Exit(m.Run()) } diff --git a/metrics/registry.go b/metrics/registry.go index 8325f4e153e6..527da6238de7 100644 --- a/metrics/registry.go +++ b/metrics/registry.go @@ -1,6 +1,7 @@ package metrics import ( + "errors" "fmt" "reflect" "sort" @@ -8,14 +9,10 @@ import ( "sync" ) -// DuplicateMetric is the error returned by Registry. Register when a metric +// ErrDuplicateMetric is the error returned by Registry.Register when a metric // already exists. If you mean to Register that metric you must first // Unregister the existing metric. -type DuplicateMetric string - -func (err DuplicateMetric) Error() string { - return fmt.Sprintf("duplicate metric: %s", string(err)) -} +var ErrDuplicateMetric = errors.New("duplicate metric") // A Registry holds references to a set of metrics by name and can iterate // over them, calling callback functions provided by the user. @@ -114,13 +111,13 @@ func (r *StandardRegistry) GetOrRegister(name string, i interface{}) interface{} return item } -// Register the given metric under the given name. Returns a DuplicateMetric +// Register the given metric under the given name. Returns a ErrDuplicateMetric // if a metric by the given name is already registered. func (r *StandardRegistry) Register(name string, i interface{}) error { // fast path _, ok := r.metrics.Load(name) if ok { - return DuplicateMetric(name) + return fmt.Errorf("%w: %v", ErrDuplicateMetric, name) } if v := reflect.ValueOf(i); v.Kind() == reflect.Func { @@ -128,7 +125,7 @@ func (r *StandardRegistry) Register(name string, i interface{}) error { } _, loaded, _ := r.loadOrRegister(name, i) if loaded { - return DuplicateMetric(name) + return fmt.Errorf("%w: %v", ErrDuplicateMetric, name) } return nil } @@ -136,7 +133,7 @@ func (r *StandardRegistry) Register(name string, i interface{}) error { // RunHealthchecks run all registered healthchecks. func (r *StandardRegistry) RunHealthchecks() { r.metrics.Range(func(key, value any) bool { - if h, ok := value.(Healthcheck); ok { + if h, ok := value.(*Healthcheck); ok { h.Check() } return true @@ -149,15 +146,15 @@ func (r *StandardRegistry) GetAll() map[string]map[string]interface{} { r.Each(func(name string, i interface{}) { values := make(map[string]interface{}) switch metric := i.(type) { - case Counter: + case *Counter: values["count"] = metric.Snapshot().Count() - case CounterFloat64: + case *CounterFloat64: values["count"] = metric.Snapshot().Count() - case Gauge: + case *Gauge: values["value"] = metric.Snapshot().Value() - case GaugeFloat64: + case *GaugeFloat64: values["value"] = metric.Snapshot().Value() - case Healthcheck: + case *Healthcheck: values["error"] = nil metric.Check() if err := metric.Error(); nil != err { @@ -176,14 +173,14 @@ func (r *StandardRegistry) GetAll() map[string]map[string]interface{} { values["95%"] = ps[2] values["99%"] = ps[3] values["99.9%"] = ps[4] - case Meter: + case *Meter: m := metric.Snapshot() values["count"] = m.Count() values["1m.rate"] = m.Rate1() values["5m.rate"] = m.Rate5() values["15m.rate"] = m.Rate15() values["mean.rate"] = m.RateMean() - case Timer: + case *Timer: t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) values["count"] = t.Count() @@ -214,7 +211,7 @@ func (r *StandardRegistry) Unregister(name string) { func (r *StandardRegistry) loadOrRegister(name string, i interface{}) (interface{}, bool, bool) { switch i.(type) { - case Counter, CounterFloat64, Gauge, GaugeFloat64, GaugeInfo, Healthcheck, Histogram, Meter, Timer, ResettingTimer: + case *Counter, *CounterFloat64, *Gauge, *GaugeFloat64, *GaugeInfo, *Healthcheck, Histogram, *Meter, *Timer, *ResettingTimer: default: return nil, false, false } @@ -345,7 +342,7 @@ func GetOrRegister(name string, i interface{}) interface{} { return DefaultRegistry.GetOrRegister(name, i) } -// Register the given metric under the given name. Returns a DuplicateMetric +// Register the given metric under the given name. Returns a ErrDuplicateMetric // if a metric by the given name is already registered. func Register(name string, i interface{}) error { return DefaultRegistry.Register(name, i) diff --git a/metrics/registry_test.go b/metrics/registry_test.go index 75012dd4ac00..bdc58fee6c74 100644 --- a/metrics/registry_test.go +++ b/metrics/registry_test.go @@ -47,7 +47,7 @@ func TestRegistry(t *testing.T) { if name != "foo" { t.Fatal(name) } - if _, ok := iface.(Counter); !ok { + if _, ok := iface.(*Counter); !ok { t.Fatal(iface) } }) @@ -73,7 +73,7 @@ func TestRegistryDuplicate(t *testing.T) { i := 0 r.Each(func(name string, iface interface{}) { i++ - if _, ok := iface.(Counter); !ok { + if _, ok := iface.(*Counter); !ok { t.Fatal(iface) } }) @@ -85,11 +85,11 @@ func TestRegistryDuplicate(t *testing.T) { func TestRegistryGet(t *testing.T) { r := NewRegistry() r.Register("foo", NewCounter()) - if count := r.Get("foo").(Counter).Snapshot().Count(); count != 0 { + if count := r.Get("foo").(*Counter).Snapshot().Count(); count != 0 { t.Fatal(count) } - r.Get("foo").(Counter).Inc(1) - if count := r.Get("foo").(Counter).Snapshot().Count(); count != 1 { + r.Get("foo").(*Counter).Inc(1) + if count := r.Get("foo").(*Counter).Snapshot().Count(); count != 1 { t.Fatal(count) } } @@ -100,7 +100,7 @@ func TestRegistryGetOrRegister(t *testing.T) { // First metric wins with GetOrRegister _ = r.GetOrRegister("foo", NewCounter()) m := r.GetOrRegister("foo", NewGauge()) - if _, ok := m.(Counter); !ok { + if _, ok := m.(*Counter); !ok { t.Fatal(m) } @@ -110,7 +110,7 @@ func TestRegistryGetOrRegister(t *testing.T) { if name != "foo" { t.Fatal(name) } - if _, ok := iface.(Counter); !ok { + if _, ok := iface.(*Counter); !ok { t.Fatal(iface) } }) @@ -125,7 +125,7 @@ func TestRegistryGetOrRegisterWithLazyInstantiation(t *testing.T) { // First metric wins with GetOrRegister _ = r.GetOrRegister("foo", NewCounter) m := r.GetOrRegister("foo", NewGauge) - if _, ok := m.(Counter); !ok { + if _, ok := m.(*Counter); !ok { t.Fatal(m) } @@ -135,7 +135,7 @@ func TestRegistryGetOrRegisterWithLazyInstantiation(t *testing.T) { if name != "foo" { t.Fatal(name) } - if _, ok := iface.(Counter); !ok { + if _, ok := iface.(*Counter); !ok { t.Fatal(iface) } }) diff --git a/metrics/resetting_sample.go b/metrics/resetting_sample.go index c38ffcd3ec32..730ef93416d5 100644 --- a/metrics/resetting_sample.go +++ b/metrics/resetting_sample.go @@ -17,7 +17,7 @@ type resettingSample struct { } // Snapshot returns a read-only copy of the sample with the original reset. -func (rs *resettingSample) Snapshot() SampleSnapshot { +func (rs *resettingSample) Snapshot() *sampleSnapshot { s := rs.Sample.Snapshot() rs.Sample.Clear() return s diff --git a/metrics/resetting_timer.go b/metrics/resetting_timer.go index 6802e3fcea98..1b3e87bc3d12 100644 --- a/metrics/resetting_timer.go +++ b/metrics/resetting_timer.go @@ -5,36 +5,17 @@ import ( "time" ) -// Initial slice capacity for the values stored in a ResettingTimer -const InitialResettingTimerSliceCap = 10 - -type ResettingTimerSnapshot interface { - Count() int - Mean() float64 - Max() int64 - Min() int64 - Percentiles([]float64) []float64 -} - -// ResettingTimer is used for storing aggregated values for timers, which are reset on every flush interval. -type ResettingTimer interface { - Snapshot() ResettingTimerSnapshot - Time(func()) - Update(time.Duration) - UpdateSince(time.Time) -} - // GetOrRegisterResettingTimer returns an existing ResettingTimer or constructs and registers a -// new StandardResettingTimer. -func GetOrRegisterResettingTimer(name string, r Registry) ResettingTimer { +// new ResettingTimer. +func GetOrRegisterResettingTimer(name string, r Registry) *ResettingTimer { if nil == r { r = DefaultRegistry } - return r.GetOrRegister(name, NewResettingTimer).(ResettingTimer) + return r.GetOrRegister(name, NewResettingTimer).(*ResettingTimer) } -// NewRegisteredResettingTimer constructs and registers a new StandardResettingTimer. -func NewRegisteredResettingTimer(name string, r Registry) ResettingTimer { +// NewRegisteredResettingTimer constructs and registers a new ResettingTimer. +func NewRegisteredResettingTimer(name string, r Registry) *ResettingTimer { c := NewResettingTimer() if nil == r { r = DefaultRegistry @@ -43,33 +24,15 @@ func NewRegisteredResettingTimer(name string, r Registry) ResettingTimer { return c } -// NewResettingTimer constructs a new StandardResettingTimer -func NewResettingTimer() ResettingTimer { - if !Enabled { - return NilResettingTimer{} - } - return &StandardResettingTimer{ - values: make([]int64, 0, InitialResettingTimerSliceCap), +// NewResettingTimer constructs a new ResettingTimer +func NewResettingTimer() *ResettingTimer { + return &ResettingTimer{ + values: make([]int64, 0, 10), } } -// NilResettingTimer is a no-op ResettingTimer. -type NilResettingTimer struct{} - -func (NilResettingTimer) Values() []int64 { return nil } -func (n NilResettingTimer) Snapshot() ResettingTimerSnapshot { return n } -func (NilResettingTimer) Time(f func()) { f() } -func (NilResettingTimer) Update(time.Duration) {} -func (NilResettingTimer) Percentiles([]float64) []float64 { return nil } -func (NilResettingTimer) Mean() float64 { return 0.0 } -func (NilResettingTimer) Max() int64 { return 0 } -func (NilResettingTimer) Min() int64 { return 0 } -func (NilResettingTimer) UpdateSince(time.Time) {} -func (NilResettingTimer) Count() int { return 0 } - -// StandardResettingTimer is the standard implementation of a ResettingTimer. -// and Meter. -type StandardResettingTimer struct { +// ResettingTimer is used for storing aggregated values for timers, which are reset on every flush interval. +type ResettingTimer struct { values []int64 sum int64 // sum is a running count of the total sum, used later to calculate mean @@ -77,28 +40,31 @@ type StandardResettingTimer struct { } // Snapshot resets the timer and returns a read-only copy of its contents. -func (t *StandardResettingTimer) Snapshot() ResettingTimerSnapshot { +func (t *ResettingTimer) Snapshot() *ResettingTimerSnapshot { t.mutex.Lock() defer t.mutex.Unlock() - snapshot := &resettingTimerSnapshot{} + snapshot := &ResettingTimerSnapshot{} if len(t.values) > 0 { snapshot.mean = float64(t.sum) / float64(len(t.values)) snapshot.values = t.values - t.values = make([]int64, 0, InitialResettingTimerSliceCap) + t.values = make([]int64, 0, 10) } t.sum = 0 return snapshot } // Record the duration of the execution of the given function. -func (t *StandardResettingTimer) Time(f func()) { +func (t *ResettingTimer) Time(f func()) { ts := time.Now() f() t.Update(time.Since(ts)) } // Record the duration of an event. -func (t *StandardResettingTimer) Update(d time.Duration) { +func (t *ResettingTimer) Update(d time.Duration) { + if !metricsEnabled { + return + } t.mutex.Lock() defer t.mutex.Unlock() t.values = append(t.values, int64(d)) @@ -106,12 +72,12 @@ func (t *StandardResettingTimer) Update(d time.Duration) { } // Record the duration of an event that started at a time and ends now. -func (t *StandardResettingTimer) UpdateSince(ts time.Time) { +func (t *ResettingTimer) UpdateSince(ts time.Time) { t.Update(time.Since(ts)) } -// resettingTimerSnapshot is a point-in-time copy of another ResettingTimer. -type resettingTimerSnapshot struct { +// ResettingTimerSnapshot is a point-in-time copy of another ResettingTimer. +type ResettingTimerSnapshot struct { values []int64 mean float64 max int64 @@ -121,20 +87,20 @@ type resettingTimerSnapshot struct { } // Count return the length of the values from snapshot. -func (t *resettingTimerSnapshot) Count() int { +func (t *ResettingTimerSnapshot) Count() int { return len(t.values) } // Percentiles returns the boundaries for the input percentiles. // note: this method is not thread safe -func (t *resettingTimerSnapshot) Percentiles(percentiles []float64) []float64 { +func (t *ResettingTimerSnapshot) Percentiles(percentiles []float64) []float64 { t.calc(percentiles) return t.thresholdBoundaries } // Mean returns the mean of the snapshotted values // note: this method is not thread safe -func (t *resettingTimerSnapshot) Mean() float64 { +func (t *ResettingTimerSnapshot) Mean() float64 { if !t.calculated { t.calc(nil) } @@ -144,7 +110,7 @@ func (t *resettingTimerSnapshot) Mean() float64 { // Max returns the max of the snapshotted values // note: this method is not thread safe -func (t *resettingTimerSnapshot) Max() int64 { +func (t *ResettingTimerSnapshot) Max() int64 { if !t.calculated { t.calc(nil) } @@ -153,14 +119,14 @@ func (t *resettingTimerSnapshot) Max() int64 { // Min returns the min of the snapshotted values // note: this method is not thread safe -func (t *resettingTimerSnapshot) Min() int64 { +func (t *ResettingTimerSnapshot) Min() int64 { if !t.calculated { t.calc(nil) } return t.min } -func (t *resettingTimerSnapshot) calc(percentiles []float64) { +func (t *ResettingTimerSnapshot) calc(percentiles []float64) { scores := CalculatePercentiles(t.values, percentiles) t.thresholdBoundaries = scores if len(t.values) == 0 { diff --git a/metrics/sample.go b/metrics/sample.go index 17b2bee28f10..dc8167809fdd 100644 --- a/metrics/sample.go +++ b/metrics/sample.go @@ -10,27 +10,123 @@ import ( const rescaleThreshold = time.Hour -type SampleSnapshot interface { - Count() int64 - Max() int64 - Mean() float64 - Min() int64 - Percentile(float64) float64 - Percentiles([]float64) []float64 - Size() int - StdDev() float64 - Sum() int64 - Variance() float64 -} - -// Samples maintain a statistically-significant selection of values from +// Sample maintains a statistically-significant selection of values from // a stream. type Sample interface { - Snapshot() SampleSnapshot + Snapshot() *sampleSnapshot Clear() Update(int64) } +var ( + _ Sample = (*ExpDecaySample)(nil) + _ Sample = (*UniformSample)(nil) + _ Sample = (*resettingSample)(nil) +) + +// sampleSnapshot is a read-only copy of a Sample. +type sampleSnapshot struct { + count int64 + values []int64 + + max int64 + min int64 + mean float64 + sum int64 + variance float64 +} + +// newSampleSnapshotPrecalculated creates a read-only sampleSnapShot, using +// precalculated sums to avoid iterating the values +func newSampleSnapshotPrecalculated(count int64, values []int64, min, max, sum int64) *sampleSnapshot { + if len(values) == 0 { + return &sampleSnapshot{ + count: count, + values: values, + } + } + return &sampleSnapshot{ + count: count, + values: values, + max: max, + min: min, + mean: float64(sum) / float64(len(values)), + sum: sum, + } +} + +// newSampleSnapshot creates a read-only sampleSnapShot, and calculates some +// numbers. +func newSampleSnapshot(count int64, values []int64) *sampleSnapshot { + var ( + max int64 = math.MinInt64 + min int64 = math.MaxInt64 + sum int64 + ) + for _, v := range values { + sum += v + if v > max { + max = v + } + if v < min { + min = v + } + } + return newSampleSnapshotPrecalculated(count, values, min, max, sum) +} + +// Count returns the count of inputs at the time the snapshot was taken. +func (s *sampleSnapshot) Count() int64 { return s.count } + +// Max returns the maximal value at the time the snapshot was taken. +func (s *sampleSnapshot) Max() int64 { return s.max } + +// Mean returns the mean value at the time the snapshot was taken. +func (s *sampleSnapshot) Mean() float64 { return s.mean } + +// Min returns the minimal value at the time the snapshot was taken. +func (s *sampleSnapshot) Min() int64 { return s.min } + +// Percentile returns an arbitrary percentile of values at the time the +// snapshot was taken. +func (s *sampleSnapshot) Percentile(p float64) float64 { + return SamplePercentile(s.values, p) +} + +// Percentiles returns a slice of arbitrary percentiles of values at the time +// the snapshot was taken. +func (s *sampleSnapshot) Percentiles(ps []float64) []float64 { + return CalculatePercentiles(s.values, ps) +} + +// Size returns the size of the sample at the time the snapshot was taken. +func (s *sampleSnapshot) Size() int { return len(s.values) } + +// StdDev returns the standard deviation of values at the time the snapshot was +// taken. +func (s *sampleSnapshot) StdDev() float64 { + if s.variance == 0.0 { + s.variance = SampleVariance(s.mean, s.values) + } + return math.Sqrt(s.variance) +} + +// Sum returns the sum of values at the time the snapshot was taken. +func (s *sampleSnapshot) Sum() int64 { return s.sum } + +// Values returns a copy of the values in the sample. +func (s *sampleSnapshot) Values() []int64 { + return slices.Clone(s.values) +} + +// Variance returns the variance of values at the time the snapshot was taken. +func (s *sampleSnapshot) Variance() float64 { + if s.variance == 0.0 { + s.variance = SampleVariance(s.mean, s.values) + } + return s.variance +} + // ExpDecaySample is an exponentially-decaying sample using a forward-decaying // priority reservoir. See Cormode et al's "Forward Decay: A Practical Time // Decay Model for Streaming Systems". @@ -49,9 +145,6 @@ type ExpDecaySample struct { // NewExpDecaySample constructs a new exponentially-decaying sample with the // given reservoir size and alpha. func NewExpDecaySample(reservoirSize int, alpha float64) Sample { - if !Enabled { - return NilSample{} - } s := &ExpDecaySample{ alpha: alpha, reservoirSize: reservoirSize, @@ -79,7 +172,7 @@ func (s *ExpDecaySample) Clear() { } // Snapshot returns a read-only copy of the sample. -func (s *ExpDecaySample) Snapshot() SampleSnapshot { +func (s *ExpDecaySample) Snapshot() *sampleSnapshot { s.mutex.Lock() defer s.mutex.Unlock() var ( @@ -105,6 +198,9 @@ func (s *ExpDecaySample) Snapshot() SampleSnapshot { // Update samples a new value. func (s *ExpDecaySample) Update(v int64) { + if !metricsEnabled { + return + } s.update(time.Now(), v) } @@ -140,13 +236,6 @@ func (s *ExpDecaySample) update(t time.Time, v int64) { } } -// NilSample is a no-op Sample. -type NilSample struct{} - -func (NilSample) Clear() {} -func (NilSample) Snapshot() SampleSnapshot { return (*emptySnapshot)(nil) } -func (NilSample) Update(v int64) {} - // SamplePercentile returns an arbitrary percentile of the slice of int64. func SamplePercentile(values []int64, p float64) float64 { return CalculatePercentiles(values, []float64{p})[0] @@ -181,114 +270,6 @@ func CalculatePercentiles(values []int64, ps []float64) []float64 { return scores } -// sampleSnapshot is a read-only copy of another Sample. -type sampleSnapshot struct { - count int64 - values []int64 - - max int64 - min int64 - mean float64 - sum int64 - variance float64 -} - -// newSampleSnapshotPrecalculated creates a read-only sampleSnapShot, using -// precalculated sums to avoid iterating the values -func newSampleSnapshotPrecalculated(count int64, values []int64, min, max, sum int64) *sampleSnapshot { - if len(values) == 0 { - return &sampleSnapshot{ - count: count, - values: values, - } - } - return &sampleSnapshot{ - count: count, - values: values, - max: max, - min: min, - mean: float64(sum) / float64(len(values)), - sum: sum, - } -} - -// newSampleSnapshot creates a read-only sampleSnapShot, and calculates some -// numbers. -func newSampleSnapshot(count int64, values []int64) *sampleSnapshot { - var ( - max int64 = math.MinInt64 - min int64 = math.MaxInt64 - sum int64 - ) - for _, v := range values { - sum += v - if v > max { - max = v - } - if v < min { - min = v - } - } - return newSampleSnapshotPrecalculated(count, values, min, max, sum) -} - -// Count returns the count of inputs at the time the snapshot was taken. -func (s *sampleSnapshot) Count() int64 { return s.count } - -// Max returns the maximal value at the time the snapshot was taken. -func (s *sampleSnapshot) Max() int64 { return s.max } - -// Mean returns the mean value at the time the snapshot was taken. -func (s *sampleSnapshot) Mean() float64 { return s.mean } - -// Min returns the minimal value at the time the snapshot was taken. -func (s *sampleSnapshot) Min() int64 { return s.min } - -// Percentile returns an arbitrary percentile of values at the time the -// snapshot was taken. -func (s *sampleSnapshot) Percentile(p float64) float64 { - return SamplePercentile(s.values, p) -} - -// Percentiles returns a slice of arbitrary percentiles of values at the time -// the snapshot was taken. -func (s *sampleSnapshot) Percentiles(ps []float64) []float64 { - return CalculatePercentiles(s.values, ps) -} - -// Size returns the size of the sample at the time the snapshot was taken. -func (s *sampleSnapshot) Size() int { return len(s.values) } - -// Snapshot returns the snapshot. -func (s *sampleSnapshot) Snapshot() SampleSnapshot { return s } - -// StdDev returns the standard deviation of values at the time the snapshot was -// taken. -func (s *sampleSnapshot) StdDev() float64 { - if s.variance == 0.0 { - s.variance = SampleVariance(s.mean, s.values) - } - return math.Sqrt(s.variance) -} - -// Sum returns the sum of values at the time the snapshot was taken. -func (s *sampleSnapshot) Sum() int64 { return s.sum } - -// Values returns a copy of the values in the sample. -func (s *sampleSnapshot) Values() []int64 { - values := make([]int64, len(s.values)) - copy(values, s.values) - return values -} - -// Variance returns the variance of values at the time the snapshot was taken. -func (s *sampleSnapshot) Variance() float64 { - if s.variance == 0.0 { - s.variance = SampleVariance(s.mean, s.values) - } - return s.variance -} - // SampleVariance returns the variance of the slice of int64. func SampleVariance(mean float64, values []int64) float64 { if len(values) == 0 { @@ -302,7 +283,7 @@ func SampleVariance(mean float64, values []int64) float64 { return sum / float64(len(values)) } -// A uniform sample using Vitter's Algorithm R. +// UniformSample implements a uniform sample using Vitter's Algorithm R. // // type UniformSample struct { @@ -316,9 +297,6 @@ type UniformSample struct { // NewUniformSample constructs a new uniform sample with the given reservoir // size. func NewUniformSample(reservoirSize int) Sample { - if !Enabled { - return NilSample{} - } return &UniformSample{ reservoirSize: reservoirSize, values: make([]int64, 0, reservoirSize), @@ -336,14 +314,13 @@ func (s *UniformSample) Clear() { s.mutex.Lock() defer s.mutex.Unlock() s.count = 0 - s.values = make([]int64, 0, s.reservoirSize) + clear(s.values) } // Snapshot returns a read-only copy of the sample. -func (s *UniformSample) Snapshot() SampleSnapshot { +func (s *UniformSample) Snapshot() *sampleSnapshot { s.mutex.Lock() - values := make([]int64, len(s.values)) - copy(values, s.values) + values := slices.Clone(s.values) count := s.count s.mutex.Unlock() return newSampleSnapshot(count, values) @@ -351,21 +328,24 @@ func (s *UniformSample) Snapshot() SampleSnapshot { // Update samples a new value. func (s *UniformSample) Update(v int64) { + if !metricsEnabled { + return + } s.mutex.Lock() defer s.mutex.Unlock() s.count++ if len(s.values) < s.reservoirSize { s.values = append(s.values, v) + return + } + var r int64 + if s.rand != nil { + r = s.rand.Int63n(s.count) } else { - var r int64 - if s.rand != nil { - r = s.rand.Int63n(s.count) - } else { - r = rand.Int63n(s.count) - } - if r < int64(len(s.values)) { - s.values[int(r)] = v - } + r = rand.Int63n(s.count) + } + if r < int64(len(s.values)) { + s.values[int(r)] = v } } diff --git a/metrics/sample_test.go b/metrics/sample_test.go index c855671ae23c..6619eb1e9eb2 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -86,7 +86,7 @@ func TestExpDecaySample(t *testing.T) { if have, want := snap.Size(), min(tc.updates, tc.reservoirSize); have != want { t.Errorf("unexpected size: have %d want %d", have, want) } - values := snap.(*sampleSnapshot).values + values := snap.values if have, want := len(values), min(tc.updates, tc.reservoirSize); have != want { t.Errorf("unexpected values length: have %d want %d", have, want) } @@ -111,8 +111,7 @@ func TestExpDecaySampleNanosecondRegression(t *testing.T) { for i := 0; i < 1000; i++ { sw.Update(20) } - s := sw.Snapshot() - v := s.(*sampleSnapshot).values + v := sw.Snapshot().values avg := float64(0) for i := 0; i < len(v); i++ { avg += float64(v[i]) @@ -166,7 +165,7 @@ func TestUniformSample(t *testing.T) { if size := s.Size(); size != 100 { t.Errorf("s.Size(): 100 != %v\n", size) } - values := s.(*sampleSnapshot).values + values := s.values if l := len(values); l != 100 { t.Errorf("len(s.Values()): 100 != %v\n", l) @@ -184,8 +183,7 @@ func TestUniformSampleIncludesTail(t *testing.T) { for i := 0; i < max; i++ { sw.Update(int64(i)) } - s := sw.Snapshot() - v := s.(*sampleSnapshot).values + v := sw.Snapshot().values sum := 0 exp := (max - 1) * max / 2 for i := 0; i < len(v); i++ { @@ -220,7 +218,7 @@ func benchmarkSample(b *testing.B, s Sample) { } } -func testExpDecaySampleStatistics(t *testing.T, s SampleSnapshot) { +func testExpDecaySampleStatistics(t *testing.T, s *sampleSnapshot) { if sum := s.Sum(); sum != 496598 { t.Errorf("s.Sum(): 496598 != %v\n", sum) } @@ -251,7 +249,7 @@ func testExpDecaySampleStatistics(t *testing.T, s SampleSnapshot) { } } -func testUniformSampleStatistics(t *testing.T, s SampleSnapshot) { +func testUniformSampleStatistics(t *testing.T, s *sampleSnapshot) { if count := s.Count(); count != 10000 { t.Errorf("s.Count(): 10000 != %v\n", count) } diff --git a/metrics/syslog.go b/metrics/syslog.go index fd856d697316..0bc4ed0da59f 100644 --- a/metrics/syslog.go +++ b/metrics/syslog.go @@ -15,17 +15,17 @@ func Syslog(r Registry, d time.Duration, w *syslog.Writer) { for range time.Tick(d) { r.Each(func(name string, i interface{}) { switch metric := i.(type) { - case Counter: + case *Counter: w.Info(fmt.Sprintf("counter %s: count: %d", name, metric.Snapshot().Count())) - case CounterFloat64: + case *CounterFloat64: w.Info(fmt.Sprintf("counter %s: count: %f", name, metric.Snapshot().Count())) - case Gauge: + case *Gauge: w.Info(fmt.Sprintf("gauge %s: value: %d", name, metric.Snapshot().Value())) - case GaugeFloat64: + case *GaugeFloat64: w.Info(fmt.Sprintf("gauge %s: value: %f", name, metric.Snapshot().Value())) - case GaugeInfo: + case *GaugeInfo: w.Info(fmt.Sprintf("gauge %s: value: %s", name, metric.Snapshot().Value())) - case Healthcheck: + case *Healthcheck: metric.Check() w.Info(fmt.Sprintf("healthcheck %s: error: %v", name, metric.Error())) case Histogram: @@ -45,7 +45,7 @@ func Syslog(r Registry, d time.Duration, w *syslog.Writer) { ps[3], ps[4], )) - case Meter: + case *Meter: m := metric.Snapshot() w.Info(fmt.Sprintf( "meter %s: count: %d 1-min: %.2f 5-min: %.2f 15-min: %.2f mean: %.2f", @@ -56,7 +56,7 @@ func Syslog(r Registry, d time.Duration, w *syslog.Writer) { m.Rate15(), m.RateMean(), )) - case Timer: + case *Timer: t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) w.Info(fmt.Sprintf( diff --git a/metrics/timer.go b/metrics/timer.go index fc2a88f508bc..9df15c967aba 100644 --- a/metrics/timer.go +++ b/metrics/timer.go @@ -5,47 +5,30 @@ import ( "time" ) -type TimerSnapshot interface { - HistogramSnapshot - MeterSnapshot -} - -// Timer capture the duration and rate of events. -type Timer interface { - Snapshot() TimerSnapshot - Stop() - Time(func()) - UpdateSince(time.Time) - Update(time.Duration) -} - // GetOrRegisterTimer returns an existing Timer or constructs and registers a -// new StandardTimer. +// new Timer. // Be sure to unregister the meter from the registry once it is of no use to // allow for garbage collection. -func GetOrRegisterTimer(name string, r Registry) Timer { +func GetOrRegisterTimer(name string, r Registry) *Timer { if nil == r { r = DefaultRegistry } - return r.GetOrRegister(name, NewTimer).(Timer) + return r.GetOrRegister(name, NewTimer).(*Timer) } -// NewCustomTimer constructs a new StandardTimer from a Histogram and a Meter. +// NewCustomTimer constructs a new Timer from a Histogram and a Meter. // Be sure to call Stop() once the timer is of no use to allow for garbage collection. -func NewCustomTimer(h Histogram, m Meter) Timer { - if !Enabled { - return NilTimer{} - } - return &StandardTimer{ +func NewCustomTimer(h Histogram, m *Meter) *Timer { + return &Timer{ histogram: h, meter: m, } } -// NewRegisteredTimer constructs and registers a new StandardTimer. +// NewRegisteredTimer constructs and registers a new Timer. // Be sure to unregister the meter from the registry once it is of no use to // allow for garbage collection. -func NewRegisteredTimer(name string, r Registry) Timer { +func NewRegisteredTimer(name string, r Registry) *Timer { c := NewTimer() if nil == r { r = DefaultRegistry @@ -54,60 +37,47 @@ func NewRegisteredTimer(name string, r Registry) Timer { return c } -// NewTimer constructs a new StandardTimer using an exponentially-decaying +// NewTimer constructs a new Timer using an exponentially-decaying // sample with the same reservoir size and alpha as UNIX load averages. // Be sure to call Stop() once the timer is of no use to allow for garbage collection. -func NewTimer() Timer { - if !Enabled { - return NilTimer{} - } - return &StandardTimer{ +func NewTimer() *Timer { + return &Timer{ histogram: NewHistogram(NewExpDecaySample(1028, 0.015)), meter: NewMeter(), } } -// NilTimer is a no-op Timer. -type NilTimer struct{} - -func (NilTimer) Snapshot() TimerSnapshot { return (*emptySnapshot)(nil) } -func (NilTimer) Stop() {} -func (NilTimer) Time(f func()) { f() } -func (NilTimer) Update(time.Duration) {} -func (NilTimer) UpdateSince(time.Time) {} - -// StandardTimer is the standard implementation of a Timer and uses a Histogram -// and Meter. -type StandardTimer struct { +// Timer captures the duration and rate of events, using a Histogram and a Meter. +type Timer struct { histogram Histogram - meter Meter + meter *Meter mutex sync.Mutex } // Snapshot returns a read-only copy of the timer. -func (t *StandardTimer) Snapshot() TimerSnapshot { +func (t *Timer) Snapshot() *TimerSnapshot { t.mutex.Lock() defer t.mutex.Unlock() - return &timerSnapshot{ + return &TimerSnapshot{ histogram: t.histogram.Snapshot(), meter: t.meter.Snapshot(), } } // Stop stops the meter. -func (t *StandardTimer) Stop() { +func (t *Timer) Stop() { t.meter.Stop() } // Time record the duration of the execution of the given function. -func (t *StandardTimer) Time(f func()) { +func (t *Timer) Time(f func()) { ts := time.Now() f() t.Update(time.Since(ts)) } // Update the duration of an event, in nanoseconds. -func (t *StandardTimer) Update(d time.Duration) { +func (t *Timer) Update(d time.Duration) { t.mutex.Lock() defer t.mutex.Unlock() t.histogram.Update(d.Nanoseconds()) @@ -116,67 +86,67 @@ func (t *StandardTimer) Update(d time.Duration) { // UpdateSince update the duration of an event that started at a time and ends now. // The record uses nanoseconds. -func (t *StandardTimer) UpdateSince(ts time.Time) { +func (t *Timer) UpdateSince(ts time.Time) { t.Update(time.Since(ts)) } -// timerSnapshot is a read-only copy of another Timer. -type timerSnapshot struct { +// TimerSnapshot is a read-only copy of another Timer. +type TimerSnapshot struct { histogram HistogramSnapshot - meter MeterSnapshot + meter *MeterSnapshot } // Count returns the number of events recorded at the time the snapshot was // taken. -func (t *timerSnapshot) Count() int64 { return t.histogram.Count() } +func (t *TimerSnapshot) Count() int64 { return t.histogram.Count() } // Max returns the maximum value at the time the snapshot was taken. -func (t *timerSnapshot) Max() int64 { return t.histogram.Max() } +func (t *TimerSnapshot) Max() int64 { return t.histogram.Max() } // Size returns the size of the sample at the time the snapshot was taken. -func (t *timerSnapshot) Size() int { return t.histogram.Size() } +func (t *TimerSnapshot) Size() int { return t.histogram.Size() } // Mean returns the mean value at the time the snapshot was taken. -func (t *timerSnapshot) Mean() float64 { return t.histogram.Mean() } +func (t *TimerSnapshot) Mean() float64 { return t.histogram.Mean() } // Min returns the minimum value at the time the snapshot was taken. -func (t *timerSnapshot) Min() int64 { return t.histogram.Min() } +func (t *TimerSnapshot) Min() int64 { return t.histogram.Min() } // Percentile returns an arbitrary percentile of sampled values at the time the // snapshot was taken. -func (t *timerSnapshot) Percentile(p float64) float64 { +func (t *TimerSnapshot) Percentile(p float64) float64 { return t.histogram.Percentile(p) } // Percentiles returns a slice of arbitrary percentiles of sampled values at // the time the snapshot was taken. -func (t *timerSnapshot) Percentiles(ps []float64) []float64 { +func (t *TimerSnapshot) Percentiles(ps []float64) []float64 { return t.histogram.Percentiles(ps) } // Rate1 returns the one-minute moving average rate of events per second at the // time the snapshot was taken. -func (t *timerSnapshot) Rate1() float64 { return t.meter.Rate1() } +func (t *TimerSnapshot) Rate1() float64 { return t.meter.Rate1() } // Rate5 returns the five-minute moving average rate of events per second at // the time the snapshot was taken. -func (t *timerSnapshot) Rate5() float64 { return t.meter.Rate5() } +func (t *TimerSnapshot) Rate5() float64 { return t.meter.Rate5() } // Rate15 returns the fifteen-minute moving average rate of events per second // at the time the snapshot was taken. -func (t *timerSnapshot) Rate15() float64 { return t.meter.Rate15() } +func (t *TimerSnapshot) Rate15() float64 { return t.meter.Rate15() } // RateMean returns the meter's mean rate of events per second at the time the // snapshot was taken. -func (t *timerSnapshot) RateMean() float64 { return t.meter.RateMean() } +func (t *TimerSnapshot) RateMean() float64 { return t.meter.RateMean() } // StdDev returns the standard deviation of the values at the time the snapshot // was taken. -func (t *timerSnapshot) StdDev() float64 { return t.histogram.StdDev() } +func (t *TimerSnapshot) StdDev() float64 { return t.histogram.StdDev() } // Sum returns the sum at the time the snapshot was taken. -func (t *timerSnapshot) Sum() int64 { return t.histogram.Sum() } +func (t *TimerSnapshot) Sum() int64 { return t.histogram.Sum() } // Variance returns the variance of the values at the time the snapshot was // taken. -func (t *timerSnapshot) Variance() float64 { return t.histogram.Variance() } +func (t *TimerSnapshot) Variance() float64 { return t.histogram.Variance() } diff --git a/metrics/writer.go b/metrics/writer.go index c211c5046b7d..2a41f8e1fe36 100644 --- a/metrics/writer.go +++ b/metrics/writer.go @@ -26,22 +26,22 @@ func WriteOnce(r Registry, w io.Writer) { slices.SortFunc(namedMetrics, namedMetric.cmp) for _, namedMetric := range namedMetrics { switch metric := namedMetric.m.(type) { - case Counter: + case *Counter: fmt.Fprintf(w, "counter %s\n", namedMetric.name) fmt.Fprintf(w, " count: %9d\n", metric.Snapshot().Count()) - case CounterFloat64: + case *CounterFloat64: fmt.Fprintf(w, "counter %s\n", namedMetric.name) fmt.Fprintf(w, " count: %f\n", metric.Snapshot().Count()) - case Gauge: + case *Gauge: fmt.Fprintf(w, "gauge %s\n", namedMetric.name) fmt.Fprintf(w, " value: %9d\n", metric.Snapshot().Value()) - case GaugeFloat64: + case *GaugeFloat64: fmt.Fprintf(w, "gauge %s\n", namedMetric.name) fmt.Fprintf(w, " value: %f\n", metric.Snapshot().Value()) - case GaugeInfo: + case *GaugeInfo: fmt.Fprintf(w, "gauge %s\n", namedMetric.name) fmt.Fprintf(w, " value: %s\n", metric.Snapshot().Value().String()) - case Healthcheck: + case *Healthcheck: metric.Check() fmt.Fprintf(w, "healthcheck %s\n", namedMetric.name) fmt.Fprintf(w, " error: %v\n", metric.Error()) @@ -59,7 +59,7 @@ func WriteOnce(r Registry, w io.Writer) { fmt.Fprintf(w, " 95%%: %12.2f\n", ps[2]) fmt.Fprintf(w, " 99%%: %12.2f\n", ps[3]) fmt.Fprintf(w, " 99.9%%: %12.2f\n", ps[4]) - case Meter: + case *Meter: m := metric.Snapshot() fmt.Fprintf(w, "meter %s\n", namedMetric.name) fmt.Fprintf(w, " count: %9d\n", m.Count()) @@ -67,7 +67,7 @@ func WriteOnce(r Registry, w io.Writer) { fmt.Fprintf(w, " 5-min rate: %12.2f\n", m.Rate5()) fmt.Fprintf(w, " 15-min rate: %12.2f\n", m.Rate15()) fmt.Fprintf(w, " mean rate: %12.2f\n", m.RateMean()) - case Timer: + case *Timer: t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) fmt.Fprintf(w, "timer %s\n", namedMetric.name) diff --git a/p2p/metrics.go b/p2p/metrics.go index 0f1f42b08a48..febff6c4ece9 100644 --- a/p2p/metrics.go +++ b/p2p/metrics.go @@ -42,7 +42,7 @@ type meteredConn struct { // returns the original object. func newMeteredConn(conn net.Conn, ingress bool) net.Conn { // Short circuit if metrics are disabled - if !metrics.Enabled { + if !metrics.Enabled() { return conn } // Otherwise bump the connection counters and wrap the connection