From 208ceef33729a7b803486d94fc8a11c4580d722d Mon Sep 17 00:00:00 2001 From: Woosang Son Date: Thu, 4 Mar 2021 11:14:46 +0900 Subject: [PATCH] feat: more prometheus metrics for monitoring performance (#146) (#175) --- mempool/clist_mempool.go | 8 ++++++- mempool/metrics.go | 17 +++++++++---- state/execution.go | 33 ++++++++++++++++++------- state/metrics.go | 52 ++++++++++++++++++++++++++-------------- 4 files changed, 78 insertions(+), 32 deletions(-) diff --git a/mempool/clist_mempool.go b/mempool/clist_mempool.go index 4541f7c92..6ea78cf6b 100644 --- a/mempool/clist_mempool.go +++ b/mempool/clist_mempool.go @@ -7,6 +7,7 @@ import ( "fmt" "sync" "sync/atomic" + "time" abci "github.com/line/ostracon/abci/types" cfg "github.com/line/ostracon/config" @@ -305,7 +306,7 @@ func (mem *CListMempool) globalCb(req *abci.Request, res *abci.Response) { return } - mem.metrics.RecheckTimes.Add(1) + mem.metrics.RecheckCount.Add(1) mem.resCbRecheck(req, res) // update metrics @@ -610,6 +611,7 @@ func (mem *CListMempool) Update( // Either recheck non-committed txs to see if they became invalid // or just notify there're some txs left. + recheckStartTime := time.Now().UnixNano() if mem.Size() > 0 { if mem.config.Recheck { mem.logger.Info("Recheck txs", "numtxs", mem.Size(), "height", height) @@ -621,6 +623,10 @@ func (mem *CListMempool) Update( mem.notifyTxsAvailable() } } + recheckEndTime := time.Now().UnixNano() + + recheckTimeMs := float64(recheckEndTime-recheckStartTime) / 1000000 + mem.metrics.RecheckTime.Set(recheckTimeMs) // Update metrics mem.metrics.Size.Set(float64(mem.Size())) diff --git a/mempool/metrics.go b/mempool/metrics.go index 5e4eaf5ed..39459de69 100644 --- a/mempool/metrics.go +++ b/mempool/metrics.go @@ -23,7 +23,9 @@ type Metrics struct { // Number of failed transactions. FailedTxs metrics.Counter // Number of times transactions are rechecked in the mempool. - RecheckTimes metrics.Counter + RecheckCount metrics.Counter + // Time of recheck transactions in the mempool. + RecheckTime metrics.Gauge } // PrometheusMetrics returns Metrics build using Prometheus client library. @@ -54,12 +56,18 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "failed_txs", Help: "Number of failed transactions.", }, labels).With(labelsAndValues...), - RecheckTimes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ + RecheckCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, - Name: "recheck_times", + Name: "recheck_count", Help: "Number of times transactions are rechecked in the mempool.", }, labels).With(labelsAndValues...), + RecheckTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "recheck_time", + Help: "Time of recheck transactions in the mempool in ms.", + }, labels).With(labelsAndValues...), } } @@ -69,6 +77,7 @@ func NopMetrics() *Metrics { Size: discard.NewGauge(), TxSizeBytes: discard.NewHistogram(), FailedTxs: discard.NewCounter(), - RecheckTimes: discard.NewCounter(), + RecheckCount: discard.NewCounter(), + RecheckTime: discard.NewGauge(), } } diff --git a/state/execution.go b/state/execution.go index 2060147ca..586f3ecf8 100644 --- a/state/execution.go +++ b/state/execution.go @@ -133,21 +133,22 @@ func (blockExec *BlockExecutor) ApplyBlock( state State, blockID types.BlockID, block *types.Block, ) (State, int64, error) { - startTime := time.Now().UnixNano() // When doing ApplyBlock, we don't need to check whether the block.Round is same to current round, // so we just put block.Round for the current round parameter if err := blockExec.ValidateBlock(state, block.Round, block); err != nil { return state, 0, ErrInvalidBlock(err) } - endTime := time.Now().UnixNano() - blockExec.metrics.BlockVerifyingTime.Observe(float64(endTime-startTime) / 1000000) - startTime = endTime + execStartTime := time.Now().UnixNano() abciResponses, err := execBlockOnProxyApp( blockExec.logger, blockExec.proxyApp, block, blockExec.store, state.InitialHeight, state.VoterParams, ) - endTime = time.Now().UnixNano() - blockExec.metrics.BlockProcessingTime.Observe(float64(endTime-startTime) / 1000000) + execEndTime := time.Now().UnixNano() + + execTimeMs := float64(execEndTime-execStartTime) / 1000000 + blockExec.metrics.BlockProcessingTime.Observe(execTimeMs) + blockExec.metrics.BlockExecutionTime.Set(execTimeMs) + if err != nil { return state, 0, ErrProxyAppConn(err) } @@ -182,14 +183,17 @@ func (blockExec *BlockExecutor) ApplyBlock( return state, 0, fmt.Errorf("commit failed for application: %v", err) } - startTime = time.Now().UnixNano() // Lock mempool, commit app state, update mempoool. + commitStartTime := time.Now().UnixNano() appHash, retainHeight, err := blockExec.Commit(state, block, abciResponses.DeliverTxs) + commitEndTime := time.Now().UnixNano() + + commitTimeMs := float64(commitEndTime-commitStartTime) / 1000000 + blockExec.metrics.BlockCommitTime.Set(commitTimeMs) + if err != nil { return state, 0, fmt.Errorf("commit failed for application: %v", err) } - endTime = time.Now().UnixNano() - blockExec.metrics.BlockCommittingTime.Observe(float64(endTime-startTime) / 1000000) // Update evpool with the latest state. blockExec.evpool.Update(state, block.Evidence.Evidence) @@ -234,7 +238,13 @@ func (blockExec *BlockExecutor) Commit( } // Commit block, get hash back + appCommitStartTime := time.Now().UnixNano() res, err := blockExec.proxyApp.CommitSync() + appCommitEndTime := time.Now().UnixNano() + + appCommitTimeMs := float64(appCommitEndTime-appCommitStartTime) / 1000000 + blockExec.metrics.BlockAppCommitTime.Set(appCommitTimeMs) + if err != nil { blockExec.logger.Error("client error during proxyAppConn.CommitSync", "err", err) return nil, 0, err @@ -249,6 +259,7 @@ func (blockExec *BlockExecutor) Commit( ) // Update mempool. + updateMempoolStartTime := time.Now().UnixNano() err = blockExec.mempool.Update( block.Height, block.Txs, @@ -256,6 +267,10 @@ func (blockExec *BlockExecutor) Commit( TxPreCheck(state), TxPostCheck(state), ) + updateMempoolEndTime := time.Now().UnixNano() + + updateMempoolTimeMs := float64(updateMempoolEndTime-updateMempoolStartTime) / 1000000 + blockExec.metrics.BlockUpdateMempoolTime.Set(updateMempoolTimeMs) return res.Data, res.RetainHeight, err } diff --git a/state/metrics.go b/state/metrics.go index 7461936d9..8033cefa6 100644 --- a/state/metrics.go +++ b/state/metrics.go @@ -15,12 +15,16 @@ const ( // Metrics contains metrics exposed by this package. type Metrics struct { - // Time of ValidBlock - BlockVerifyingTime metrics.Histogram // Time between BeginBlock and EndBlock. BlockProcessingTime metrics.Histogram - // Time of Commit - BlockCommittingTime metrics.Histogram + // Time gauge between BeginBlock and EndBlock. + BlockExecutionTime metrics.Gauge + // Time of commit + BlockCommitTime metrics.Gauge + // Time of app commit + BlockAppCommitTime metrics.Gauge + // Time of update mempool + BlockUpdateMempoolTime metrics.Gauge } // PrometheusMetrics returns Metrics build using Prometheus client library. @@ -36,13 +40,6 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { compositeBuckets = append(compositeBuckets, stdprometheus.LinearBuckets(1000, 500, 4)...) return &Metrics{ - BlockVerifyingTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ - Namespace: namespace, - Subsystem: MetricsSubsystem, - Name: "block_verifying_time", - Help: "Time of ValidBlock in ms.", - Buckets: stdprometheus.LinearBuckets(50, 50, 10), - }, labels).With(labelsAndValues...), BlockProcessingTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, @@ -50,12 +47,29 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Help: "Time between BeginBlock and EndBlock in ms.", Buckets: compositeBuckets, }, labels).With(labelsAndValues...), - BlockCommittingTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{ + BlockExecutionTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_execution_time", + Help: "Time between BeginBlock and EndBlock in ms.", + }, labels).With(labelsAndValues...), + BlockCommitTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_commit_time", + Help: "Time of commit in ms.", + }, labels).With(labelsAndValues...), + BlockAppCommitTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "block_app_commit_time", + Help: "Time of app commit in ms.", + }, labels).With(labelsAndValues...), + BlockUpdateMempoolTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, - Name: "block_committing_time", - Help: "Time of Commit in ms.", - Buckets: stdprometheus.LinearBuckets(20, 20, 10), + Name: "block_update_mempool_time", + Help: "Time of update mempool in ms.", }, labels).With(labelsAndValues...), } } @@ -63,8 +77,10 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { // NopMetrics returns no-op Metrics. func NopMetrics() *Metrics { return &Metrics{ - BlockVerifyingTime: discard.NewHistogram(), - BlockProcessingTime: discard.NewHistogram(), - BlockCommittingTime: discard.NewHistogram(), + BlockProcessingTime: discard.NewHistogram(), + BlockExecutionTime: discard.NewGauge(), + BlockCommitTime: discard.NewGauge(), + BlockAppCommitTime: discard.NewGauge(), + BlockUpdateMempoolTime: discard.NewGauge(), } }