Skip to content

Commit

Permalink
feat: more prometheus metrics for monitoring performance (#146) (#175)
Browse files Browse the repository at this point in the history
  • Loading branch information
Woosang Son committed Jul 7, 2021
1 parent a727fe1 commit 208ceef
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 32 deletions.
8 changes: 7 additions & 1 deletion mempool/clist_mempool.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"sync"
"sync/atomic"
"time"

abci "github.com/line/ostracon/abci/types"
cfg "github.com/line/ostracon/config"
Expand Down Expand Up @@ -305,7 +306,7 @@ func (mem *CListMempool) globalCb(req *abci.Request, res *abci.Response) {
return
}

mem.metrics.RecheckTimes.Add(1)
mem.metrics.RecheckCount.Add(1)
mem.resCbRecheck(req, res)

// update metrics
Expand Down Expand Up @@ -610,6 +611,7 @@ func (mem *CListMempool) Update(

// Either recheck non-committed txs to see if they became invalid
// or just notify there're some txs left.
recheckStartTime := time.Now().UnixNano()
if mem.Size() > 0 {
if mem.config.Recheck {
mem.logger.Info("Recheck txs", "numtxs", mem.Size(), "height", height)
Expand All @@ -621,6 +623,10 @@ func (mem *CListMempool) Update(
mem.notifyTxsAvailable()
}
}
recheckEndTime := time.Now().UnixNano()

recheckTimeMs := float64(recheckEndTime-recheckStartTime) / 1000000
mem.metrics.RecheckTime.Set(recheckTimeMs)

// Update metrics
mem.metrics.Size.Set(float64(mem.Size()))
Expand Down
17 changes: 13 additions & 4 deletions mempool/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ type Metrics struct {
// Number of failed transactions.
FailedTxs metrics.Counter
// Number of times transactions are rechecked in the mempool.
RecheckTimes metrics.Counter
RecheckCount metrics.Counter
// Time of recheck transactions in the mempool.
RecheckTime metrics.Gauge
}

// PrometheusMetrics returns Metrics build using Prometheus client library.
Expand Down Expand Up @@ -54,12 +56,18 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "failed_txs",
Help: "Number of failed transactions.",
}, labels).With(labelsAndValues...),
RecheckTimes: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
RecheckCount: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "recheck_times",
Name: "recheck_count",
Help: "Number of times transactions are rechecked in the mempool.",
}, labels).With(labelsAndValues...),
RecheckTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "recheck_time",
Help: "Time of recheck transactions in the mempool in ms.",
}, labels).With(labelsAndValues...),
}
}

Expand All @@ -69,6 +77,7 @@ func NopMetrics() *Metrics {
Size: discard.NewGauge(),
TxSizeBytes: discard.NewHistogram(),
FailedTxs: discard.NewCounter(),
RecheckTimes: discard.NewCounter(),
RecheckCount: discard.NewCounter(),
RecheckTime: discard.NewGauge(),
}
}
33 changes: 24 additions & 9 deletions state/execution.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,21 +133,22 @@ func (blockExec *BlockExecutor) ApplyBlock(
state State, blockID types.BlockID, block *types.Block,
) (State, int64, error) {

startTime := time.Now().UnixNano()
// When doing ApplyBlock, we don't need to check whether the block.Round is same to current round,
// so we just put block.Round for the current round parameter
if err := blockExec.ValidateBlock(state, block.Round, block); err != nil {
return state, 0, ErrInvalidBlock(err)
}
endTime := time.Now().UnixNano()
blockExec.metrics.BlockVerifyingTime.Observe(float64(endTime-startTime) / 1000000)

startTime = endTime
execStartTime := time.Now().UnixNano()
abciResponses, err := execBlockOnProxyApp(
blockExec.logger, blockExec.proxyApp, block, blockExec.store, state.InitialHeight, state.VoterParams,
)
endTime = time.Now().UnixNano()
blockExec.metrics.BlockProcessingTime.Observe(float64(endTime-startTime) / 1000000)
execEndTime := time.Now().UnixNano()

execTimeMs := float64(execEndTime-execStartTime) / 1000000
blockExec.metrics.BlockProcessingTime.Observe(execTimeMs)
blockExec.metrics.BlockExecutionTime.Set(execTimeMs)

if err != nil {
return state, 0, ErrProxyAppConn(err)
}
Expand Down Expand Up @@ -182,14 +183,17 @@ func (blockExec *BlockExecutor) ApplyBlock(
return state, 0, fmt.Errorf("commit failed for application: %v", err)
}

startTime = time.Now().UnixNano()
// Lock mempool, commit app state, update mempoool.
commitStartTime := time.Now().UnixNano()
appHash, retainHeight, err := blockExec.Commit(state, block, abciResponses.DeliverTxs)
commitEndTime := time.Now().UnixNano()

commitTimeMs := float64(commitEndTime-commitStartTime) / 1000000
blockExec.metrics.BlockCommitTime.Set(commitTimeMs)

if err != nil {
return state, 0, fmt.Errorf("commit failed for application: %v", err)
}
endTime = time.Now().UnixNano()
blockExec.metrics.BlockCommittingTime.Observe(float64(endTime-startTime) / 1000000)

// Update evpool with the latest state.
blockExec.evpool.Update(state, block.Evidence.Evidence)
Expand Down Expand Up @@ -234,7 +238,13 @@ func (blockExec *BlockExecutor) Commit(
}

// Commit block, get hash back
appCommitStartTime := time.Now().UnixNano()
res, err := blockExec.proxyApp.CommitSync()
appCommitEndTime := time.Now().UnixNano()

appCommitTimeMs := float64(appCommitEndTime-appCommitStartTime) / 1000000
blockExec.metrics.BlockAppCommitTime.Set(appCommitTimeMs)

if err != nil {
blockExec.logger.Error("client error during proxyAppConn.CommitSync", "err", err)
return nil, 0, err
Expand All @@ -249,13 +259,18 @@ func (blockExec *BlockExecutor) Commit(
)

// Update mempool.
updateMempoolStartTime := time.Now().UnixNano()
err = blockExec.mempool.Update(
block.Height,
block.Txs,
deliverTxResponses,
TxPreCheck(state),
TxPostCheck(state),
)
updateMempoolEndTime := time.Now().UnixNano()

updateMempoolTimeMs := float64(updateMempoolEndTime-updateMempoolStartTime) / 1000000
blockExec.metrics.BlockUpdateMempoolTime.Set(updateMempoolTimeMs)

return res.Data, res.RetainHeight, err
}
Expand Down
52 changes: 34 additions & 18 deletions state/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@ const (

// Metrics contains metrics exposed by this package.
type Metrics struct {
// Time of ValidBlock
BlockVerifyingTime metrics.Histogram
// Time between BeginBlock and EndBlock.
BlockProcessingTime metrics.Histogram
// Time of Commit
BlockCommittingTime metrics.Histogram
// Time gauge between BeginBlock and EndBlock.
BlockExecutionTime metrics.Gauge
// Time of commit
BlockCommitTime metrics.Gauge
// Time of app commit
BlockAppCommitTime metrics.Gauge
// Time of update mempool
BlockUpdateMempoolTime metrics.Gauge
}

// PrometheusMetrics returns Metrics build using Prometheus client library.
Expand All @@ -36,35 +40,47 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
compositeBuckets = append(compositeBuckets, stdprometheus.LinearBuckets(1000, 500, 4)...)

return &Metrics{
BlockVerifyingTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_verifying_time",
Help: "Time of ValidBlock in ms.",
Buckets: stdprometheus.LinearBuckets(50, 50, 10),
}, labels).With(labelsAndValues...),
BlockProcessingTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_processing_time",
Help: "Time between BeginBlock and EndBlock in ms.",
Buckets: compositeBuckets,
}, labels).With(labelsAndValues...),
BlockCommittingTime: prometheus.NewHistogramFrom(stdprometheus.HistogramOpts{
BlockExecutionTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_execution_time",
Help: "Time between BeginBlock and EndBlock in ms.",
}, labels).With(labelsAndValues...),
BlockCommitTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_commit_time",
Help: "Time of commit in ms.",
}, labels).With(labelsAndValues...),
BlockAppCommitTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_app_commit_time",
Help: "Time of app commit in ms.",
}, labels).With(labelsAndValues...),
BlockUpdateMempoolTime: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "block_committing_time",
Help: "Time of Commit in ms.",
Buckets: stdprometheus.LinearBuckets(20, 20, 10),
Name: "block_update_mempool_time",
Help: "Time of update mempool in ms.",
}, labels).With(labelsAndValues...),
}
}

// NopMetrics returns no-op Metrics.
func NopMetrics() *Metrics {
return &Metrics{
BlockVerifyingTime: discard.NewHistogram(),
BlockProcessingTime: discard.NewHistogram(),
BlockCommittingTime: discard.NewHistogram(),
BlockProcessingTime: discard.NewHistogram(),
BlockExecutionTime: discard.NewGauge(),
BlockCommitTime: discard.NewGauge(),
BlockAppCommitTime: discard.NewGauge(),
BlockUpdateMempoolTime: discard.NewGauge(),
}
}

0 comments on commit 208ceef

Please sign in to comment.