Skip to content

Commit

Permalink
feat: more prometheus metrics for monitoring performance (#146)
Browse files Browse the repository at this point in the history
* feat: more prometheus metrics for monitoring performance

* chore: touch

* chore: remove touch
  • Loading branch information
jinsan-line authored Dec 11, 2020
1 parent 1baf670 commit 05dd210
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 9 deletions.
8 changes: 7 additions & 1 deletion mempool/clist_mempool.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"sync"
"sync/atomic"
"time"

abci "github.com/tendermint/tendermint/abci/types"
cfg "github.com/tendermint/tendermint/config"
Expand Down Expand Up @@ -305,7 +306,7 @@ func (mem *CListMempool) globalCb(req *abci.Request, res *abci.Response) {
return
}

mem.metrics.RecheckTimes.Add(1)
mem.metrics.RecheckCount.Add(1)
mem.resCbRecheck(req, res)

// update metrics
Expand Down Expand Up @@ -599,6 +600,7 @@ func (mem *CListMempool) Update(

// Either recheck non-committed txs to see if they became invalid
// or just notify there're some txs left.
recheckStartTime := time.Now().UnixNano()
if mem.Size() > 0 {
if mem.config.Recheck {
mem.logger.Info("Recheck txs", "numtxs", mem.Size(), "height", height)
Expand All @@ -610,6 +612,10 @@ func (mem *CListMempool) Update(
mem.notifyTxsAvailable()
}
}
recheckEndTime := time.Now().UnixNano()

recheckTimeMs := float64(recheckEndTime-recheckStartTime) / 1000000
mem.metrics.RecheckTime.Set(recheckTimeMs)

// Update metrics
mem.metrics.Size.Set(float64(mem.Size()))
Expand Down
17 changes: 13 additions & 4 deletions mempool/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ type Metrics struct {
// Number of failed transactions.
FailedTxs metrics.Counter
// Number of times transactions are rechecked in the mempool.
RecheckTimes metrics.Counter
RecheckCount metrics.Counter
// Time of recheck transactions in the mempool.
RecheckTime metrics.Gauge
}

// PrometheusMetrics returns Metrics build using Prometheus client library.
Expand Down Expand Up @@ -54,12 +56,18 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "failed_txs",
Help: "Number of failed transactions.",
}, labels).With(labelsAndValues...),
RecheckTimes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
RecheckCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "recheck_times",
Name: "recheck_count",
Help: "Number of times transactions are rechecked in the mempool.",
}, labels).With(labelsAndValues...),
RecheckTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "recheck_time",
Help: "Time of recheck transactions in the mempool in ms.",
}, labels).With(labelsAndValues...),
}
}

Expand All @@ -69,6 +77,7 @@ func NopMetrics() *Metrics {
Size: discard.NewGauge(),
TxSizeBytes: discard.NewHistogram(),
FailedTxs: discard.NewCounter(),
RecheckTimes: discard.NewCounter(),
RecheckCount: discard.NewCounter(),
RecheckTime: discard.NewGauge(),
}
}
27 changes: 24 additions & 3 deletions state/execution.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,14 @@ func (blockExec *BlockExecutor) ApplyBlock(
return state, 0, ErrInvalidBlock(err)
}

startTime := time.Now().UnixNano()
execStartTime := time.Now().UnixNano()
abciResponses, err := execBlockOnProxyApp(blockExec.logger, blockExec.proxyApp, block, blockExec.db)
endTime := time.Now().UnixNano()
blockExec.metrics.BlockProcessingTime.Observe(float64(endTime-startTime) / 1000000)
execEndTime := time.Now().UnixNano()

execTimeMs := float64(execEndTime-execStartTime) / 1000000
blockExec.metrics.BlockProcessingTime.Observe(execTimeMs)
blockExec.metrics.BlockExecutionTime.Set(execTimeMs)

if err != nil {
return state, 0, ErrProxyAppConn(err)
}
Expand Down Expand Up @@ -167,7 +171,13 @@ func (blockExec *BlockExecutor) ApplyBlock(
}

// Lock mempool, commit app state, update mempoool.
commitStartTime := time.Now().UnixNano()
appHash, retainHeight, err := blockExec.Commit(state, block, abciResponses.DeliverTxs)
commitEndTime := time.Now().UnixNano()

commitTimeMs := float64(commitEndTime-commitStartTime) / 1000000
blockExec.metrics.BlockCommitTime.Set(commitTimeMs)

if err != nil {
return state, 0, fmt.Errorf("commit failed for application: %v", err)
}
Expand Down Expand Up @@ -213,7 +223,13 @@ func (blockExec *BlockExecutor) Commit(
}

// Commit block, get hash back
appCommitStartTime := time.Now().UnixNano()
res, err := blockExec.proxyApp.CommitSync()
appCommitEndTime := time.Now().UnixNano()

appCommitTimeMs := float64(appCommitEndTime-appCommitStartTime) / 1000000
blockExec.metrics.BlockAppCommitTime.Set(appCommitTimeMs)

if err != nil {
blockExec.logger.Error(
"Client error during proxyAppConn.CommitSync",
Expand All @@ -231,13 +247,18 @@ func (blockExec *BlockExecutor) Commit(
)

// Update mempool.
updateMempoolStartTime := time.Now().UnixNano()
err = blockExec.mempool.Update(
block.Height,
block.Txs,
deliverTxResponses,
TxPreCheck(state),
TxPostCheck(state),
)
updateMempoolEndTime := time.Now().UnixNano()

updateMempoolTimeMs := float64(updateMempoolEndTime-updateMempoolStartTime) / 1000000
blockExec.metrics.BlockUpdateMempoolTime.Set(updateMempoolTimeMs)

return res.Data, res.RetainHeight, err
}
Expand Down
38 changes: 37 additions & 1 deletion state/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ const (
type Metrics struct {
// Time between BeginBlock and EndBlock.
BlockProcessingTime metrics.Histogram
// Time gauge between BeginBlock and EndBlock.
BlockExecutionTime metrics.Gauge
// Time of commit
BlockCommitTime metrics.Gauge
// Time of app commit
BlockAppCommitTime metrics.Gauge
// Time of update mempool
BlockUpdateMempoolTime metrics.Gauge
}

// PrometheusMetrics returns Metrics build using Prometheus client library.
Expand All @@ -35,12 +43,40 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Help: "Time between BeginBlock and EndBlock in ms.",
Buckets: stdprometheus.LinearBuckets(1, 10, 10),
}, labels).With(labelsAndValues...),
BlockExecutionTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_execution_time",
Help: "Time between BeginBlock and EndBlock in ms.",
}, labels).With(labelsAndValues...),
BlockCommitTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_commit_time",
Help: "Time of commit in ms.",
}, labels).With(labelsAndValues...),
BlockAppCommitTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_app_commit_time",
Help: "Time of app commit in ms.",
}, labels).With(labelsAndValues...),
BlockUpdateMempoolTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_update_mempool_time",
Help: "Time of update mempool in ms.",
}, labels).With(labelsAndValues...),
}
}

// NopMetrics returns no-op Metrics.
func NopMetrics() *Metrics {
return &Metrics{
BlockProcessingTime: discard.NewHistogram(),
BlockProcessingTime: discard.NewHistogram(),
BlockExecutionTime: discard.NewGauge(),
BlockCommitTime: discard.NewGauge(),
BlockAppCommitTime: discard.NewGauge(),
BlockUpdateMempoolTime: discard.NewGauge(),
}
}

0 comments on commit 05dd210

Please sign in to comment.