Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added processDebugger component #4288

Merged
merged 17 commits into from
Sep 14, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/node/config/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,11 @@
[Debug.EpochStart]
GoRoutineAnalyserEnabled = true
ProcessDataTrieOnCommitEpoch = true
[Debug.Process]
Enabled = true
DebuggingLogLevel = "*:DEBUG,p2p:TRACE,debug:DEBUG,process:TRACE,intercept:TRACE"
GoRoutinesDump = true
PollingTimeInSeconds = 240 # 4 minutes

[Health]
IntervalVerifyMemoryInSeconds = 5
Expand Down
9 changes: 9 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ type DebugConfig struct {
Antiflood AntifloodDebugConfig
ShuffleOut ShuffleOutDebugConfig
EpochStart EpochStartDebugConfig
Process ProcessDebugConfig
}

// HealthServiceConfig will hold health service (monitoring) configuration
Expand Down Expand Up @@ -512,6 +513,14 @@ type EpochStartDebugConfig struct {
ProcessDataTrieOnCommitEpoch bool
}

// ProcessDebugConfig will hold the process debug configuration
type ProcessDebugConfig struct {
Enabled bool
GoRoutinesDump bool
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

variables with the same type should stay together (optime memory usage)
GoRoutinesDump can be moved near Enabled field

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

DebuggingLogLevel string
PollingTimeInSeconds int
}

// ApiRoutesConfig holds the configuration related to Rest API routes
type ApiRoutesConfig struct {
Logging ApiLoggingConfig
Expand Down
7 changes: 7 additions & 0 deletions debug/factory/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,10 @@ type InterceptorResolverDebugHandler interface {
Close() error
IsInterfaceNil() bool
}

// ProcessDebugger defines what a process debugger implementation should do
type ProcessDebugger interface {
SetLastCommittedBlockRound(round uint64)
Close() error
IsInterfaceNil() bool
}
15 changes: 15 additions & 0 deletions debug/factory/processDebuggerFactory.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package factory

import (
"github.com/ElrondNetwork/elrond-go/config"
"github.com/ElrondNetwork/elrond-go/debug/process"
)

// CreateProcessDebugger creates a new instance of type ProcessDebugger
func CreateProcessDebugger(configs config.ProcessDebugConfig) (ProcessDebugger, error) {
if !configs.Enabled {
return process.NewDisabledDebugger(), nil
}

return process.NewProcessDebugger(configs)
}
50 changes: 50 additions & 0 deletions debug/factory/processDebuggerFactory_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package factory

import (
"fmt"
"testing"

"github.com/ElrondNetwork/elrond-go-core/core/check"
"github.com/ElrondNetwork/elrond-go/config"
"github.com/stretchr/testify/assert"
)

func TestCreateProcessDebugger(t *testing.T) {
t.Parallel()

t.Run("create disabled process debugger", func(t *testing.T) {
t.Parallel()

configs := config.ProcessDebugConfig{
Enabled: false,
}
debugger, err := CreateProcessDebugger(configs)
assert.Nil(t, err)
assert.False(t, check.IfNil(debugger))
assert.Equal(t, "*process.disabledDebugger", fmt.Sprintf("%T", debugger))
})
t.Run("create real process debugger", func(t *testing.T) {
t.Parallel()

configs := config.ProcessDebugConfig{
Enabled: true,
LogLevelChanger: "",
GoRoutinesDump: false,
PollingTimeInSeconds: 1,
}
debugger, err := CreateProcessDebugger(configs)
assert.Nil(t, err)
assert.False(t, check.IfNil(debugger))
assert.Equal(t, "*process.processDebugger", fmt.Sprintf("%T", debugger))
})
t.Run("create real process debugger errors", func(t *testing.T) {
t.Parallel()

configs := config.ProcessDebugConfig{
Enabled: true,
}
debugger, err := CreateProcessDebugger(configs)
assert.NotNil(t, err)
assert.True(t, check.IfNil(debugger))
})
}
170 changes: 170 additions & 0 deletions debug/process/debugger.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
package process

import (
"context"
"fmt"
"runtime"
"sync"
"time"

logger "github.com/ElrondNetwork/elrond-go-logger"
"github.com/ElrondNetwork/elrond-go/config"
)

const (
minAcceptedValue = 1
buffSize = 100 * 1024 * 1024 // 100MB
)

var log = logger.GetOrCreate("debug/process")

type processDebugger struct {
timer *time.Timer
mut sync.RWMutex
lastCheckedBlockRound int64
lastCommittedBlockRound int64
cancel func()
goRoutinesDumpHandler func()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

goRoutinesDumpHandler and logChangeHandler can be removed and call the functions directly?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, can not properly test the component otherwise

logChangeHandler func()
pollingTime time.Duration
debuggingLogLevel string
dumpGoRoutines bool
}

// NewProcessDebugger creates a new debugger instance used to monitor the block process flow
func NewProcessDebugger(config config.ProcessDebugConfig) (*processDebugger, error) {
err := checkConfigs(config)
if err != nil {
return nil, err
}

pollingTime := time.Duration(config.PollingTimeInSeconds) * time.Second
d := &processDebugger{
timer: time.NewTimer(pollingTime),

pollingTime: pollingTime,
debuggingLogLevel: config.DebuggingLogLevel,
dumpGoRoutines: config.GoRoutinesDump,
}

ctx, cancel := context.WithCancel(context.Background())
d.cancel = cancel
d.goRoutinesDumpHandler = dumpGoRoutines
d.logChangeHandler = d.changeLog

go d.processLoop(ctx)

return d, nil
}

func checkConfigs(config config.ProcessDebugConfig) error {
if config.PollingTimeInSeconds < minAcceptedValue {
return fmt.Errorf("%w for PollingTimeInSeconds, minimum %d, got %d",
errInvalidValue, minAcceptedValue, config.PollingTimeInSeconds)
}

return nil
}

func (debugger *processDebugger) processLoop(ctx context.Context) {
log.Debug("processor debugger processLoop is starting...")

defer debugger.timer.Stop()

for {
debugger.timer.Reset(debugger.pollingTime)

select {
case <-ctx.Done():
log.Debug("processor debugger processLoop is closing...")
return
case <-debugger.timer.C:
debugger.checkRounds()
}
}
}

func (debugger *processDebugger) checkRounds() {
if debugger.shouldTriggerUpdatingLastCheckedRound() {
debugger.trigger()
}
}

func (debugger *processDebugger) shouldTriggerUpdatingLastCheckedRound() bool {
debugger.mut.Lock()
defer debugger.mut.Unlock()

isNodeStarting := debugger.lastCheckedBlockRound == 0 && debugger.lastCommittedBlockRound <= 0
if isNodeStarting {
log.Debug("processor debugger: node is starting")
return false
}

defer func() {
// update the last checked round
debugger.lastCheckedBlockRound = debugger.lastCommittedBlockRound
}()

isFirstCommit := debugger.lastCheckedBlockRound == 0 && debugger.lastCommittedBlockRound > 0
if isFirstCommit {
log.Debug("processor debugger: first committed block", "round", debugger.lastCommittedBlockRound)
return false
}

isNodeRunning := debugger.lastCheckedBlockRound < debugger.lastCommittedBlockRound
if isNodeRunning {
log.Debug("processor debugger: node is running, nothing to do", "round", debugger.lastCommittedBlockRound)
return false
}

return true
}

func (debugger *processDebugger) trigger() {
debugger.mut.RLock()
lastCommittedBlockRound := debugger.lastCommittedBlockRound
debugger.mut.RUnlock()

log.Warn("processor debugger: node is stuck",
"last committed round", lastCommittedBlockRound)

debugger.logChangeHandler()

if debugger.dumpGoRoutines {
debugger.goRoutinesDumpHandler()
}
}

// SetLastCommittedBlockRound sets the last committed block's round
func (debugger *processDebugger) SetLastCommittedBlockRound(round uint64) {
debugger.mut.Lock()
defer debugger.mut.Unlock()

log.Debug("processor debugger: updated last committed block round", "round", round)
debugger.lastCommittedBlockRound = int64(round)
}

// Close stops any started go routines
func (debugger *processDebugger) Close() error {
debugger.cancel()

return nil
}

func dumpGoRoutines() {
buff := make([]byte, buffSize)
numBytes := runtime.Stack(buff, true)
log.Debug(string(buff[:numBytes]))
}

func (debugger *processDebugger) changeLog() {
errSetLogLevel := logger.SetLogLevel(debugger.debuggingLogLevel)
if errSetLogLevel != nil {
log.Error("debugger.changeLog: cannot change log level", "error", errSetLogLevel)
}
}

// IsInterfaceNil returns true if there is no value under the interface
func (debugger *processDebugger) IsInterfaceNil() bool {
return debugger == nil
}
Loading