From 74d9e4ef65d088501286ae6aa85e799bbc289386 Mon Sep 17 00:00:00 2001 From: Peter Argue <89119817+peterargue@users.noreply.github.com> Date: Fri, 3 Nov 2023 15:32:22 -0700 Subject: [PATCH] [Access] Improve script exec compare logging --- engine/access/rpc/backend/backend_scripts.go | 80 ++++++++++++++------ 1 file changed, 57 insertions(+), 23 deletions(-) diff --git a/engine/access/rpc/backend/backend_scripts.go b/engine/access/rpc/backend/backend_scripts.go index 6d4c70bb3d5..1d838562a1f 100644 --- a/engine/access/rpc/backend/backend_scripts.go +++ b/engine/access/rpc/backend/backend_scripts.go @@ -102,33 +102,35 @@ func (b *backendScripts) executeScript( switch b.scriptExecMode { case ScriptExecutionModeExecutionNodesOnly: - return b.executeScriptOnAvailableExecutionNodes(ctx, blockID, script, arguments, insecureScriptHash) + result, _, err := b.executeScriptOnAvailableExecutionNodes(ctx, blockID, script, arguments, insecureScriptHash) + return result, err case ScriptExecutionModeLocalOnly: - return b.executeScriptLocally(ctx, blockID, height, script, arguments, insecureScriptHash) + result, _, err := b.executeScriptLocally(ctx, blockID, height, script, arguments, insecureScriptHash) + return result, err case ScriptExecutionModeFailover: - localResult, localErr := b.executeScriptLocally(ctx, blockID, height, script, arguments, insecureScriptHash) + localResult, localDuration, localErr := b.executeScriptLocally(ctx, blockID, height, script, arguments, insecureScriptHash) if localErr == nil || isInvalidArgumentError(localErr) { return localResult, localErr } - execResult, execErr := b.executeScriptOnAvailableExecutionNodes(ctx, blockID, script, arguments, insecureScriptHash) + execResult, execDuration, execErr := b.executeScriptOnAvailableExecutionNodes(ctx, blockID, script, arguments, insecureScriptHash) - b.compareScriptExecutionResults(execResult, execErr, localResult, localErr, blockID, script, insecureScriptHash) + b.compareScriptExecutionResults(execResult, execDuration, execErr, localResult, localDuration, localErr, blockID, script, arguments, insecureScriptHash) return execResult, execErr case ScriptExecutionModeCompare: - execResult, execErr := b.executeScriptOnAvailableExecutionNodes(ctx, blockID, script, arguments, insecureScriptHash) + execResult, execDuration, execErr := b.executeScriptOnAvailableExecutionNodes(ctx, blockID, script, arguments, insecureScriptHash) // we can only compare the results if there were either no errors or a cadence error // since we cannot distinguish the EN error as caused by the block being pruned or some other reason, // which may produce a valid RN output but an error for the EN if execErr != nil && !isInvalidArgumentError(execErr) { return nil, execErr } - localResult, localErr := b.executeScriptLocally(ctx, blockID, height, script, arguments, insecureScriptHash) + localResult, localDuration, localErr := b.executeScriptLocally(ctx, blockID, height, script, arguments, insecureScriptHash) - b.compareScriptExecutionResults(execResult, execErr, localResult, localErr, blockID, script, insecureScriptHash) + b.compareScriptExecutionResults(execResult, execDuration, execErr, localResult, localDuration, localErr, blockID, script, arguments, insecureScriptHash) // always return EN results return execResult, execErr @@ -146,7 +148,7 @@ func (b *backendScripts) executeScriptLocally( script []byte, arguments [][]byte, insecureScriptHash [md5.Size]byte, -) ([]byte, error) { +) ([]byte, time.Duration, error) { execStartTime := time.Now() result, err := b.scriptExecutor.ExecuteAtBlockHeight(ctx, script, arguments, height) @@ -174,7 +176,7 @@ func (b *backendScripts) executeScriptLocally( b.metrics.ScriptExecutionErrorLocal() } - return nil, convertedErr + return nil, execDuration, convertedErr } if b.log.GetLevel() == zerolog.DebugLevel && b.shouldLogScript(execEndTime, insecureScriptHash) { @@ -187,7 +189,7 @@ func (b *backendScripts) executeScriptLocally( // log execution time b.metrics.ScriptExecuted(execDuration, len(script)) - return result, nil + return result, execDuration, nil } // executeScriptOnAvailableExecutionNodes executes the provided script using available execution nodes. @@ -197,11 +199,11 @@ func (b *backendScripts) executeScriptOnAvailableExecutionNodes( script []byte, arguments [][]byte, insecureScriptHash [md5.Size]byte, -) ([]byte, error) { +) ([]byte, time.Duration, error) { // find few execution nodes which have executed the block earlier and provided an execution receipt for it executors, err := executionNodesForBlockID(ctx, blockID, b.executionReceipts, b.state, b.log) if err != nil { - return nil, status.Errorf(codes.Internal, "failed to find script executors at blockId %v: %v", blockID.String(), err) + return nil, 0, status.Errorf(codes.Internal, "failed to find script executors at blockId %v: %v", blockID.String(), err) } lg := b.log.With(). @@ -210,21 +212,27 @@ func (b *backendScripts) executeScriptOnAvailableExecutionNodes( Logger() var result []byte + var execDuration time.Duration errToReturn := b.nodeCommunicator.CallAvailableNode( executors, func(node *flow.Identity) error { execStartTime := time.Now() + result, err = b.tryExecuteScriptOnExecutionNode(ctx, node.Address, blockID, script, arguments) + + executionTime := time.Now() + execDuration = executionTime.Sub(execStartTime) + if err != nil { return err } if b.log.GetLevel() == zerolog.DebugLevel { - executionTime := time.Now() if b.shouldLogScript(executionTime, insecureScriptHash) { lg.Debug(). Str("script_executor_addr", node.Address). Str("script", string(script)). + Dur("execution_dur_ms", execDuration). Msg("Successfully executed script") b.loggedScripts.Add(insecureScriptHash, executionTime) } @@ -252,10 +260,10 @@ func (b *backendScripts) executeScriptOnAvailableExecutionNodes( b.metrics.ScriptExecutionErrorOnExecutionNode() b.log.Error().Err(errToReturn).Msg("script execution failed for execution node internal reasons") } - return nil, rpc.ConvertError(errToReturn, "failed to execute script on execution nodes", codes.Internal) + return nil, execDuration, rpc.ConvertError(errToReturn, "failed to execute script on execution nodes", codes.Internal) } - return result, nil + return result, execDuration, nil } // tryExecuteScriptOnExecutionNode attempts to execute the script on the given execution node. @@ -291,17 +299,20 @@ func isInvalidArgumentError(scriptExecutionErr error) bool { func (b *backendScripts) compareScriptExecutionResults( execNodeResult []byte, + execDuration time.Duration, execErr error, localResult []byte, + localDuration time.Duration, localErr error, blockID flow.Identifier, script []byte, + arguments [][]byte, insecureScriptHash [md5.Size]byte, ) { // record errors caused by missing local data if localErr != nil && status.Code(localErr) == codes.OutOfRange { b.metrics.ScriptExecutionNotIndexed() - b.logScriptExecutionComparison(execNodeResult, execErr, localResult, localErr, blockID, script, insecureScriptHash, + b.logScriptExecutionComparison(execNodeResult, execDuration, execErr, localResult, localDuration, localErr, blockID, script, arguments, insecureScriptHash, "script execution results do not match EN because data is not indexed yet") return } @@ -322,14 +333,26 @@ func (b *backendScripts) compareScriptExecutionResults( // should match. execParts := strings.Split(execErr.Error(), "failed to execute script at block") localParts := strings.Split(localErr.Error(), "failed to execute script at block") - if len(execParts) == 2 && len(localParts) == 2 && execParts[1] == localParts[1] { - b.metrics.ScriptExecutionErrorMatch() - return + if len(execParts) == 2 && len(localParts) == 2 { + if execParts[1] == localParts[1] { + b.metrics.ScriptExecutionErrorMatch() + return + } + + // by default ENs are configured with longer script error size limits. check if the + // non-truncated parts match + localSubParts := strings.Split(localParts[1], " ... ") + if len(localSubParts) == 2 && + strings.HasPrefix(execParts[1], localSubParts[0]) && + strings.HasSuffix(execParts[1], localSubParts[1]) { + b.metrics.ScriptExecutionErrorMatch() + return + } } } b.metrics.ScriptExecutionErrorMismatch() - b.logScriptExecutionComparison(execNodeResult, execErr, localResult, localErr, blockID, script, insecureScriptHash, + b.logScriptExecutionComparison(execNodeResult, execDuration, execErr, localResult, localDuration, localErr, blockID, script, arguments, insecureScriptHash, "cadence errors from local execution do not match and EN") return } @@ -340,37 +363,48 @@ func (b *backendScripts) compareScriptExecutionResults( } b.metrics.ScriptExecutionResultMismatch() - b.logScriptExecutionComparison(execNodeResult, execErr, localResult, localErr, blockID, script, insecureScriptHash, + b.logScriptExecutionComparison(execNodeResult, execDuration, execErr, localResult, localDuration, localErr, blockID, script, arguments, insecureScriptHash, "script execution results from local execution do not match EN") } // logScriptExecutionComparison logs the script execution comparison between local execution and execution node func (b *backendScripts) logScriptExecutionComparison( execNodeResult []byte, + execDuration time.Duration, execErr error, localResult []byte, + localDuration time.Duration, localErr error, blockID flow.Identifier, script []byte, + arguments [][]byte, insecureScriptHash [md5.Size]byte, msg string, ) { + args := make([]string, len(arguments)) + for i, arg := range arguments { + args[i] = string(arg) + } + lgCtx := b.log.With(). Hex("block_id", blockID[:]). + Hex("script_hash", insecureScriptHash[:]). Str("script", string(script)). - Hex("script_hash", insecureScriptHash[:]) + Strs("args", args) if execErr != nil { lgCtx = lgCtx.AnErr("execution_node_error", execErr) } else { lgCtx = lgCtx.Hex("execution_node_result", execNodeResult) } + lgCtx = lgCtx.Dur("execution_node_duration_ms", execDuration) if localErr != nil { lgCtx = lgCtx.AnErr("local_error", localErr) } else { lgCtx = lgCtx.Hex("local_result", localResult) } + lgCtx = lgCtx.Dur("local_duration_ms", localDuration) lg := lgCtx.Logger() lg.Debug().Msg(msg)