diff --git a/admin/README.md b/admin/README.md index 2d003798b0f..9da63d0831e 100644 --- a/admin/README.md +++ b/admin/README.md @@ -109,3 +109,9 @@ curl localhost:9002/admin/run_command -H 'Content-Type: application/json' -d '{" curl localhost:9002/admin/run_command -H 'Content-Type: application/json' -d '{"commandName": "ingest-tx-rate-limit", "data": { "command": "get_config" }}' curl localhost:9002/admin/run_command -H 'Content-Type: application/json' -d '{"commandName": "ingest-tx-rate-limit", "data": { "command": "set_config", "limit": 1, "burst": 1 }}' ``` + +### To create a protocol snapshot for latest checkpoint (execution node only) +``` +curl localhost:9002/admin/run_command -H 'Content-Type: application/json' -d '{"commandName": "protocol-snapshot"}' +curl localhost:9002/admin/run_command -H 'Content-Type: application/json' -d '{"commandName": "protocol-snapshot", "data": { "blocks-to-skip": 10 }}' +``` diff --git a/admin/commands/storage/read_protocol_snapshot.go b/admin/commands/storage/read_protocol_snapshot.go new file mode 100644 index 00000000000..8b87164f230 --- /dev/null +++ b/admin/commands/storage/read_protocol_snapshot.go @@ -0,0 +1,112 @@ +package storage + +import ( + "context" + "fmt" + + "github.com/rs/zerolog" + + "github.com/onflow/flow-go/admin" + "github.com/onflow/flow-go/admin/commands" + "github.com/onflow/flow-go/cmd/util/common" + "github.com/onflow/flow-go/state/protocol" + "github.com/onflow/flow-go/state/protocol/inmem" + "github.com/onflow/flow-go/storage" + "github.com/onflow/flow-go/utils/logging" +) + +var _ commands.AdminCommand = (*ProtocolSnapshotCommand)(nil) + +type protocolSnapshotData struct { + blocksToSkip uint +} + +// ProtocolSnapshotCommand is a command that generates a protocol snapshot for a checkpoint (usually latest checkpoint) +// This command is only available for execution node +type ProtocolSnapshotCommand struct { + logger zerolog.Logger + state protocol.State + headers storage.Headers + seals storage.Seals + checkpointDir string // the directory where the checkpoint is stored +} + +func NewProtocolSnapshotCommand( + logger zerolog.Logger, + state protocol.State, + headers storage.Headers, + seals storage.Seals, + checkpointDir string, +) *ProtocolSnapshotCommand { + return &ProtocolSnapshotCommand{ + logger: logger, + state: state, + headers: headers, + seals: seals, + checkpointDir: checkpointDir, + } +} + +func (s *ProtocolSnapshotCommand) Handler(_ context.Context, req *admin.CommandRequest) (interface{}, error) { + validated, ok := req.ValidatorData.(*protocolSnapshotData) + if !ok { + return nil, fmt.Errorf("fail to parse validator data") + } + + blocksToSkip := validated.blocksToSkip + + s.logger.Info().Uint("blocksToSkip", blocksToSkip).Msgf("admintool: generating protocol snapshot") + + snapshot, sealedHeight, commit, err := common.GenerateProtocolSnapshotForCheckpoint( + s.logger, s.state, s.headers, s.seals, s.checkpointDir, blocksToSkip) + if err != nil { + return nil, fmt.Errorf("could not generate protocol snapshot for checkpoint, checkpointDir %v: %w", + s.checkpointDir, err) + } + + header, err := snapshot.Head() + if err != nil { + return nil, fmt.Errorf("could not get header from snapshot: %w", err) + } + + serializable, err := inmem.FromSnapshot(snapshot) + if err != nil { + return nil, fmt.Errorf("could not convert snapshot to serializable: %w", err) + } + + s.logger.Info(). + Uint64("finalized_height", header.Height). // finalized height + Hex("finalized_block_id", logging.Entity(header)). + Uint64("sealed_height", sealedHeight). + Hex("sealed_commit", commit[:]). // not the commit for the finalized height, but for the sealed height + Uint("blocks_to_skip", blocksToSkip). + Msgf("admintool: protocol snapshot generated successfully") + + return commands.ConvertToMap(serializable.Encodable()) +} + +func (s *ProtocolSnapshotCommand) Validator(req *admin.CommandRequest) error { + // blocksToSkip is the number of blocks to skip when iterating the sealed heights to find the state commitment + // in the checkpoint file. + // default is 0 + validated := &protocolSnapshotData{ + blocksToSkip: uint(0), + } + + input, ok := req.Data.(map[string]interface{}) + if ok { + data, ok := input["blocks-to-skip"] + + if ok { + n, ok := data.(float64) + if !ok { + return fmt.Errorf("could not parse blocks-to-skip: %v", data) + } + validated.blocksToSkip = uint(n) + } + } + + req.ValidatorData = validated + + return nil +} diff --git a/cmd/execution_builder.go b/cmd/execution_builder.go index 666afb4002b..301ee0acca5 100644 --- a/cmd/execution_builder.go +++ b/cmd/execution_builder.go @@ -192,6 +192,15 @@ func (builder *ExecutionNodeBuilder) LoadComponentsAndModules() { AdminCommand("get-transactions", func(conf *NodeConfig) commands.AdminCommand { return storageCommands.NewGetTransactionsCommand(conf.State, conf.Storage.Payloads, conf.Storage.Collections) }). + AdminCommand("protocol-snapshot", func(conf *NodeConfig) commands.AdminCommand { + return storageCommands.NewProtocolSnapshotCommand( + conf.Logger, + conf.State, + conf.Storage.Headers, + conf.Storage.Seals, + exeNode.exeConf.triedir, + ) + }). Module("mutable follower state", exeNode.LoadMutableFollowerState). Module("system specs", exeNode.LoadSystemSpecs). Module("execution metrics", exeNode.LoadExecutionMetrics). diff --git a/cmd/util/cmd/read-protocol-state/cmd/snapshot.go b/cmd/util/cmd/read-protocol-state/cmd/snapshot.go index 13386195ab3..77a9d77777f 100644 --- a/cmd/util/cmd/read-protocol-state/cmd/snapshot.go +++ b/cmd/util/cmd/read-protocol-state/cmd/snapshot.go @@ -5,10 +5,16 @@ import ( "github.com/spf13/cobra" "github.com/onflow/flow-go/cmd/util/cmd/common" + commonFuncs "github.com/onflow/flow-go/cmd/util/common" + "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/state/protocol" "github.com/onflow/flow-go/state/protocol/inmem" ) +var flagCheckpointDir string +var flagCheckpointScanStep uint +var flagCheckpointScanEndHeight int64 + var SnapshotCmd = &cobra.Command{ Use: "snapshot", Short: "Read snapshot from protocol state", @@ -26,6 +32,15 @@ func init() { SnapshotCmd.Flags().BoolVar(&flagSealed, "sealed", false, "get sealed block") + + SnapshotCmd.Flags().StringVar(&flagCheckpointDir, "checkpoint-dir", "", + "(execution node only) get snapshot from the latest checkpoint file in the given checkpoint directory") + + SnapshotCmd.Flags().UintVar(&flagCheckpointScanStep, "checkpoint-scan-step", 0, + "(execution node only) scan step for finding sealed height by checkpoint (use with --checkpoint-dir flag)") + + SnapshotCmd.Flags().Int64Var(&flagCheckpointScanEndHeight, "checkpoint-scan-end-height", -1, + "(execution node only) scan end height for finding sealed height by checkpoint (use with --checkpoint-dir flag)") } func runSnapshot(*cobra.Command, []string) { @@ -49,6 +64,28 @@ func runSnapshot(*cobra.Command, []string) { } else if flagSealed { log.Info().Msgf("get last sealed snapshot") snapshot = state.Sealed() + } else if flagCheckpointDir != "" { + log.Info().Msgf("get snapshot for latest checkpoint in directory %v (step: %v, endHeight: %v)", + flagCheckpointDir, flagCheckpointScanStep, flagCheckpointScanEndHeight) + var protocolSnapshot protocol.Snapshot + var sealedHeight uint64 + var sealedCommit flow.StateCommitment + if flagCheckpointScanEndHeight < 0 { + // using default end height which is the last sealed height + protocolSnapshot, sealedHeight, sealedCommit, err = commonFuncs.GenerateProtocolSnapshotForCheckpoint( + log.Logger, state, storages.Headers, storages.Seals, flagCheckpointDir, flagCheckpointScanStep) + } else { + // using customized end height + protocolSnapshot, sealedHeight, sealedCommit, err = commonFuncs.GenerateProtocolSnapshotForCheckpointWithHeights( + log.Logger, state, storages.Headers, storages.Seals, flagCheckpointDir, flagCheckpointScanStep, uint64(flagCheckpointScanEndHeight)) + } + + if err != nil { + log.Fatal().Err(err).Msgf("could not generate protocol snapshot for checkpoint in dir: %v", flagCheckpointDir) + } + + snapshot = protocolSnapshot + log.Info().Msgf("snapshot found, sealed height %v, commit %x", sealedHeight, sealedCommit) } head, err := snapshot.Head() diff --git a/cmd/util/common/checkpoint.go b/cmd/util/common/checkpoint.go new file mode 100644 index 00000000000..bddcead9e49 --- /dev/null +++ b/cmd/util/common/checkpoint.go @@ -0,0 +1,202 @@ +package common + +import ( + "fmt" + "path/filepath" + + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" + + "github.com/onflow/flow-go/ledger" + "github.com/onflow/flow-go/ledger/complete/wal" + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/state/protocol" + "github.com/onflow/flow-go/state/protocol/snapshots" + "github.com/onflow/flow-go/storage" +) + +// FindHeightsByCheckpoints finds the sealed height that produces the state commitment included in the checkpoint file. +func FindHeightsByCheckpoints( + logger zerolog.Logger, + headers storage.Headers, + seals storage.Seals, + checkpointFilePath string, + blocksToSkip uint, + startHeight uint64, + endHeight uint64, +) ( + uint64, // sealed height that produces the state commitment included in the checkpoint file + flow.StateCommitment, // the state commitment that matches the sealed height + uint64, // the finalized height that seals the sealed height + error, +) { + + // find all trie root hashes in the checkpoint file + dir, fileName := filepath.Split(checkpointFilePath) + hashes, err := wal.ReadTriesRootHash(logger, dir, fileName) + if err != nil { + return 0, flow.DummyStateCommitment, 0, + fmt.Errorf("could not read trie root hashes from checkpoint file %v: %w", + checkpointFilePath, err) + } + + // convert all trie root hashes to state commitments + commitments := hashesToCommits(hashes) + + commitMap := make(map[flow.StateCommitment]struct{}, len(commitments)) + for _, commit := range commitments { + commitMap[commit] = struct{}{} + } + + // iterate backwards from the end height to the start height + // to find the block that produces a state commitment in the given list + // It is safe to skip blocks in this linear search because we expect `stateCommitments` to hold commits + // for a contiguous range of blocks (for correct operation we assume `blocksToSkip` is smaller than this range). + // end height must be a sealed block + step := blocksToSkip + 1 + for height := endHeight; height >= startHeight; height -= uint64(step) { + finalizedID, err := headers.BlockIDByHeight(height) + if err != nil { + return 0, flow.DummyStateCommitment, 0, + fmt.Errorf("could not find block by height %v: %w", height, err) + } + + // since height is a sealed block height, then we must be able to find the seal for this block + finalizedSeal, err := seals.HighestInFork(finalizedID) + if err != nil { + return 0, flow.DummyStateCommitment, 0, + fmt.Errorf("could not find seal for block %v at height %v: %w", finalizedID, height, err) + } + + commit := finalizedSeal.FinalState + + _, ok := commitMap[commit] + if ok { + sealedBlock, err := headers.ByBlockID(finalizedSeal.BlockID) + if err != nil { + return 0, flow.DummyStateCommitment, 0, + fmt.Errorf("could not find block by ID %v: %w", finalizedSeal.BlockID, err) + } + + log.Info().Msgf("successfully found block %v (%v) that seals block %v (%v) for commit %x in checkpoint file %v", + height, finalizedID, + sealedBlock.Height, finalizedSeal.BlockID, + commit, checkpointFilePath) + + return sealedBlock.Height, commit, height, nil + } + + if height < uint64(step) { + break + } + } + + return 0, flow.DummyStateCommitment, 0, + fmt.Errorf("could not find commit within height range [%v,%v]", startHeight, endHeight) +} + +// GenerateProtocolSnapshotForCheckpoint finds a sealed block that produces the state commitment contained in the latest +// checkpoint file, and return a protocol snapshot for the finalized block that seals the sealed block. +// The returned protocol snapshot can be used for dynamic bootstrapping an execution node along with the latest checkpoint file. +// +// When finding a sealed block it iterates backwards through each sealed height from the last sealed height, and see +// if the state commitment matches with one of the state commitments contained in the checkpoint file. +// However, the iteration could be slow, in order to speed up the iteration, we can skip some blocks each time. +// Since a checkpoint file usually contains 500 tries, which might cover around 250 blocks (assuming 2 tries per block), +// then skipping 10 blocks each time will still allow us to find the sealed block while not missing the height contained +// by the checkpoint file. +// So the blocksToSkip parameter is used to skip some blocks each time when iterating the sealed heights. +func GenerateProtocolSnapshotForCheckpoint( + logger zerolog.Logger, + state protocol.State, + headers storage.Headers, + seals storage.Seals, + checkpointDir string, + blocksToSkip uint, +) (protocol.Snapshot, uint64, flow.StateCommitment, error) { + // skip X blocks (i.e. 10) each time to find the block that produces the state commitment in the checkpoint file + // since a checkpoint file contains 500 tries, this allows us to find the block more efficiently + sealed, err := state.Sealed().Head() + if err != nil { + return nil, 0, flow.DummyStateCommitment, err + } + endHeight := sealed.Height + + return GenerateProtocolSnapshotForCheckpointWithHeights(logger, state, headers, seals, + checkpointDir, + blocksToSkip, + endHeight, + ) +} + +// findLatestCheckpointFilePath finds the latest checkpoint file in the given directory +// it returns the header file name of the latest checkpoint file +func findLatestCheckpointFilePath(checkpointDir string) (string, error) { + _, last, err := wal.ListCheckpoints(checkpointDir) + if err != nil { + return "", fmt.Errorf("could not list checkpoints in directory %v: %w", checkpointDir, err) + } + + fileName := wal.NumberToFilename(last) + if last < 0 { + fileName = "root.checkpoint" + } + + checkpointFilePath := filepath.Join(checkpointDir, fileName) + return checkpointFilePath, nil +} + +// GenerateProtocolSnapshotForCheckpointWithHeights does the same thing as GenerateProtocolSnapshotForCheckpoint +// except that it allows the caller to specify the end height of the sealed block that we iterate backwards from. +func GenerateProtocolSnapshotForCheckpointWithHeights( + logger zerolog.Logger, + state protocol.State, + headers storage.Headers, + seals storage.Seals, + checkpointDir string, + blocksToSkip uint, + endHeight uint64, +) (protocol.Snapshot, uint64, flow.StateCommitment, error) { + // Stop searching after 10,000 iterations or upon reaching the minimum height, whichever comes first. + startHeight := uint64(0) + // preventing startHeight from being negative + length := uint64(blocksToSkip+1) * 10000 + if endHeight > length { + startHeight = endHeight - length + } + + checkpointFilePath, err := findLatestCheckpointFilePath(checkpointDir) + if err != nil { + return nil, 0, flow.DummyStateCommitment, fmt.Errorf("could not find latest checkpoint file in directory %v: %w", checkpointDir, err) + } + + log.Info(). + Uint64("start_height", startHeight). + Uint64("end_height", endHeight). + Uint("blocksToSkip", blocksToSkip). + Msgf("generating protocol snapshot for checkpoint file %v", checkpointFilePath) + // find the height of the finalized block that produces the state commitment contained in the checkpoint file + sealedHeight, commit, finalizedHeight, err := FindHeightsByCheckpoints(logger, headers, seals, checkpointFilePath, blocksToSkip, startHeight, endHeight) + if err != nil { + return nil, 0, flow.DummyStateCommitment, fmt.Errorf("could not find sealed height in range [%v:%v] (blocksToSkip: %v) by checkpoints: %w", + startHeight, endHeight, blocksToSkip, + err) + } + + snapshot := state.AtHeight(finalizedHeight) + validSnapshot, err := snapshots.GetDynamicBootstrapSnapshot(state, snapshot) + if err != nil { + return nil, 0, flow.DummyStateCommitment, fmt.Errorf("could not get dynamic bootstrap snapshot: %w", err) + } + + return validSnapshot, sealedHeight, commit, nil +} + +// hashesToCommits converts a list of ledger.RootHash to a list of flow.StateCommitment +func hashesToCommits(hashes []ledger.RootHash) []flow.StateCommitment { + commits := make([]flow.StateCommitment, len(hashes)) + for i, h := range hashes { + commits[i] = flow.StateCommitment(h) + } + return commits +} diff --git a/engine/access/rpc/backend/backend_network.go b/engine/access/rpc/backend/backend_network.go index 8405d03200a..6c195197f60 100644 --- a/engine/access/rpc/backend/backend_network.go +++ b/engine/access/rpc/backend/backend_network.go @@ -3,7 +3,6 @@ package backend import ( "context" "errors" - "fmt" "github.com/onflow/flow-go/state" @@ -15,11 +14,10 @@ import ( "github.com/onflow/flow-go/engine/common/rpc/convert" "github.com/onflow/flow-go/model/flow" "github.com/onflow/flow-go/state/protocol" + "github.com/onflow/flow-go/state/protocol/snapshots" "github.com/onflow/flow-go/storage" ) -var SnapshotHistoryLimitErr = fmt.Errorf("reached the snapshot history limit") - type backendNetwork struct { state protocol.State chainID flow.ChainID @@ -78,7 +76,7 @@ func (b *backendNetwork) GetNodeVersionInfo(ctx context.Context) (*access.NodeVe func (b *backendNetwork) GetLatestProtocolStateSnapshot(_ context.Context) ([]byte, error) { snapshot := b.state.Final() - validSnapshot, err := b.getValidSnapshot(snapshot, 0, true) + validSnapshot, err := snapshots.GetClosestDynamicBootstrapSnapshot(b.state, snapshot, b.snapshotHistoryLimit) if err != nil { return nil, err } @@ -133,9 +131,9 @@ func (b *backendNetwork) GetProtocolStateSnapshotByBlockID(_ context.Context, bl "failed to retrieve snapshot for block: block not finalized and is below finalized height") } - validSnapshot, err := b.getValidSnapshot(snapshot, 0, false) + validSnapshot, err := snapshots.GetDynamicBootstrapSnapshot(b.state, snapshot) if err != nil { - if errors.Is(err, ErrSnapshotPhaseMismatch) { + if errors.Is(err, snapshots.ErrSnapshotPhaseMismatch) { return nil, status.Errorf(codes.InvalidArgument, "failed to retrieve snapshot for block, try again with different block: "+ "%v", err) @@ -169,9 +167,9 @@ func (b *backendNetwork) GetProtocolStateSnapshotByHeight(_ context.Context, blo return nil, status.Errorf(codes.Internal, "failed to get a valid snapshot: %v", err) } - validSnapshot, err := b.getValidSnapshot(snapshot, 0, false) + validSnapshot, err := snapshots.GetDynamicBootstrapSnapshot(b.state, snapshot) if err != nil { - if errors.Is(err, ErrSnapshotPhaseMismatch) { + if errors.Is(err, snapshots.ErrSnapshotPhaseMismatch) { return nil, status.Errorf(codes.InvalidArgument, "failed to retrieve snapshot for block, try again with different block: "+ "%v", err) @@ -186,84 +184,3 @@ func (b *backendNetwork) GetProtocolStateSnapshotByHeight(_ context.Context, blo return data, nil } - -func (b *backendNetwork) isEpochOrPhaseDifferent(counter1, counter2 uint64, phase1, phase2 flow.EpochPhase) bool { - return counter1 != counter2 || phase1 != phase2 -} - -// getValidSnapshot will return a valid snapshot that has a sealing segment which -// 1. does not contain any blocks that span an epoch transition -// 2. does not contain any blocks that span an epoch phase transition -// If a snapshot does contain an invalid sealing segment query the state -// by height of each block in the segment and return a snapshot at the point -// where the transition happens. -// Expected error returns during normal operations: -// * ErrSnapshotPhaseMismatch - snapshot does not contain a valid sealing segment -// All other errors should be treated as exceptions. -func (b *backendNetwork) getValidSnapshot(snapshot protocol.Snapshot, blocksVisited int, findNextValidSnapshot bool) (protocol.Snapshot, error) { - segment, err := snapshot.SealingSegment() - if err != nil { - return nil, fmt.Errorf("failed to get sealing segment: %w", err) - } - - counterAtHighest, phaseAtHighest, err := b.getCounterAndPhase(segment.Highest().Header.Height) - if err != nil { - return nil, fmt.Errorf("failed to get counter and phase at highest block in the segment: %w", err) - } - - counterAtLowest, phaseAtLowest, err := b.getCounterAndPhase(segment.Sealed().Header.Height) - if err != nil { - return nil, fmt.Errorf("failed to get counter and phase at lowest block in the segment: %w", err) - } - - // Check if the counters and phase are different this indicates that the sealing segment - // of the snapshot requested spans either an epoch transition or phase transition. - if b.isEpochOrPhaseDifferent(counterAtHighest, counterAtLowest, phaseAtHighest, phaseAtLowest) { - if !findNextValidSnapshot { - return nil, ErrSnapshotPhaseMismatch - } - - // Visit each node in strict order of decreasing height starting at head - // to find the block that straddles the transition boundary. - for i := len(segment.Blocks) - 1; i >= 0; i-- { - blocksVisited++ - - // NOTE: Check if we have reached our history limit, in edge cases - // where the sealing segment is abnormally long we want to short circuit - // the recursive calls and return an error. The API caller can retry. - if blocksVisited > b.snapshotHistoryLimit { - return nil, fmt.Errorf("%w: (%d)", SnapshotHistoryLimitErr, b.snapshotHistoryLimit) - } - - counterAtBlock, phaseAtBlock, err := b.getCounterAndPhase(segment.Blocks[i].Header.Height) - if err != nil { - return nil, fmt.Errorf("failed to get epoch counter and phase for snapshot at block %s: %w", segment.Blocks[i].ID(), err) - } - - // Check if this block straddles the transition boundary, if it does return the snapshot - // at that block height. - if b.isEpochOrPhaseDifferent(counterAtHighest, counterAtBlock, phaseAtHighest, phaseAtBlock) { - return b.getValidSnapshot(b.state.AtHeight(segment.Blocks[i].Header.Height), blocksVisited, true) - } - } - } - - return snapshot, nil -} - -// getCounterAndPhase will return the epoch counter and phase at the specified height in state -func (b *backendNetwork) getCounterAndPhase(height uint64) (uint64, flow.EpochPhase, error) { - snapshot := b.state.AtHeight(height) - - counter, err := snapshot.Epochs().Current().Counter() - if err != nil { - return 0, 0, fmt.Errorf("failed to get counter for block (height=%d): %w", height, err) - } - - phase, err := snapshot.Phase() - if err != nil { - return 0, 0, fmt.Errorf("failed to get phase for block (height=%d): %w", height, err) - } - - return counter, phase, nil -} diff --git a/engine/access/rpc/backend/backend_test.go b/engine/access/rpc/backend/backend_test.go index 03715036e0e..d0810192700 100644 --- a/engine/access/rpc/backend/backend_test.go +++ b/engine/access/rpc/backend/backend_test.go @@ -35,6 +35,7 @@ import ( bprotocol "github.com/onflow/flow-go/state/protocol/badger" "github.com/onflow/flow-go/state/protocol/invalid" protocol "github.com/onflow/flow-go/state/protocol/mock" + "github.com/onflow/flow-go/state/protocol/snapshots" "github.com/onflow/flow-go/state/protocol/util" "github.com/onflow/flow-go/storage" storagemock "github.com/onflow/flow-go/storage/mock" @@ -403,7 +404,7 @@ func (suite *Suite) TestGetLatestProtocolStateSnapshot_HistoryLimit() { // the handler should return a snapshot history limit error _, err = backend.GetLatestProtocolStateSnapshot(context.Background()) - suite.Require().ErrorIs(err, SnapshotHistoryLimitErr) + suite.Require().ErrorIs(err, snapshots.ErrSnapshotHistoryLimit) }) } @@ -677,7 +678,7 @@ func (suite *Suite) TestGetProtocolStateSnapshotByBlockID_InvalidSegment() { suite.Require().Error(err) suite.Require().Empty(bytes) suite.Require().Equal(status.Errorf(codes.InvalidArgument, "failed to retrieve snapshot for block, try again with different block: %v", - ErrSnapshotPhaseMismatch).Error(), + snapshots.ErrSnapshotPhaseMismatch).Error(), err.Error()) }) @@ -698,7 +699,7 @@ func (suite *Suite) TestGetProtocolStateSnapshotByBlockID_InvalidSegment() { suite.Require().Error(err) suite.Require().Empty(bytes) suite.Require().Equal(status.Errorf(codes.InvalidArgument, "failed to retrieve snapshot for block, try again with different block: %v", - ErrSnapshotPhaseMismatch).Error(), + snapshots.ErrSnapshotPhaseMismatch).Error(), err.Error()) }) }) @@ -815,7 +816,7 @@ func (suite *Suite) TestGetProtocolStateSnapshotByHeight_InvalidSegment() { suite.Require().Error(err) suite.Require().Equal(status.Errorf(codes.InvalidArgument, "failed to retrieve snapshot for block, try "+ "again with different block: %v", - ErrSnapshotPhaseMismatch).Error(), + snapshots.ErrSnapshotPhaseMismatch).Error(), err.Error()) }) } diff --git a/engine/access/rpc/backend/errors.go b/engine/access/rpc/backend/errors.go index 38a9f84aec7..4752c6563ce 100644 --- a/engine/access/rpc/backend/errors.go +++ b/engine/access/rpc/backend/errors.go @@ -7,10 +7,6 @@ import ( "github.com/onflow/flow-go/model/flow" ) -// ErrSnapshotPhaseMismatch indicates that a valid sealing segment cannot be build for a snapshot because -// the snapshot requested spans either an epoch transition or phase transition. -var ErrSnapshotPhaseMismatch = errors.New("snapshot does not contain a valid sealing segment") - // InsufficientExecutionReceipts indicates that no execution receipt were found for a given block ID type InsufficientExecutionReceipts struct { blockID flow.Identifier diff --git a/fvm/environment/block_info.go b/fvm/environment/block_info.go index 9e55a67c649..928859c21f2 100644 --- a/fvm/environment/block_info.go +++ b/fvm/environment/block_info.go @@ -142,6 +142,10 @@ func (info *blockInfo) GetBlockAtHeight( return runtimeBlockFromHeader(info.blockHeader), true, nil } + if height+uint64(flow.DefaultTransactionExpiry) < info.blockHeader.Height { + return runtime.Block{}, false, errors.NewBlockHeightOutOfRangeError(height) + } + header, err := info.blocks.ByHeightFrom(height, info.blockHeader) // TODO (ramtin): remove dependency on storage and move this if condition // to blockfinder diff --git a/fvm/errors/codes.go b/fvm/errors/codes.go index 9e8069e3a10..3d51e444c07 100644 --- a/fvm/errors/codes.go +++ b/fvm/errors/codes.go @@ -59,6 +59,7 @@ const ( ErrCodeAccountAuthorizationError ErrorCode = 1055 ErrCodeOperationAuthorizationError ErrorCode = 1056 ErrCodeOperationNotSupportedError ErrorCode = 1057 + ErrCodeBlockHeightOutOfRangeError ErrorCode = 1058 // execution errors 1100 - 1200 // Deprecated: No longer used. diff --git a/fvm/errors/execution.go b/fvm/errors/execution.go index 38a78bc8d21..b484c805f93 100644 --- a/fvm/errors/execution.go +++ b/fvm/errors/execution.go @@ -237,6 +237,17 @@ func IsOperationNotSupportedError(err error) bool { return HasErrorCode(err, ErrCodeOperationNotSupportedError) } +func NewBlockHeightOutOfRangeError(height uint64) CodedError { + return NewCodedError( + ErrCodeBlockHeightOutOfRangeError, + "block height (%v) is out of queriable range", + height) +} + +func IsBlockHeightOutOfRangeError(err error) bool { + return HasErrorCode(err, ErrCodeBlockHeightOutOfRangeError) +} + // NewScriptExecutionCancelledError construct a new CodedError which indicates // that Cadence Script execution has been cancelled (e.g. request connection // has been droped) diff --git a/ledger/complete/wal/checkpoint_v6_reader.go b/ledger/complete/wal/checkpoint_v6_reader.go index c2703261d24..460343c49b4 100644 --- a/ledger/complete/wal/checkpoint_v6_reader.go +++ b/ledger/complete/wal/checkpoint_v6_reader.go @@ -20,6 +20,7 @@ import ( // ErrEOFNotReached for indicating end of file not reached error var ErrEOFNotReached = errors.New("expect to reach EOF, but actually didn't") +// TODO: validate the header file and the sub file that contains the root hashes var ReadTriesRootHash = readTriesRootHash var CheckpointHasRootHash = checkpointHasRootHash diff --git a/model/flow/sealing_segment.go b/model/flow/sealing_segment.go index dffd5a9cef5..590f423f2e8 100644 --- a/model/flow/sealing_segment.go +++ b/model/flow/sealing_segment.go @@ -33,7 +33,8 @@ type SealingSegment struct { // (see sealing_segment.md for details): // (ii) All blocks that are sealed by `head`. This is relevant if `head` contains _multiple_ seals. // (iii) The sealing segment holds the history of all non-expired collection guarantees, i.e. - // limitHeight := max(head.Height - flow.DefaultTransactionExpiry, SporkRootBlockHeight) + // limitHeight := max(blockSealedAtHead.Height - flow.DefaultTransactionExpiry, SporkRootBlockHeight) + // where blockSealedAtHead is the block sealed by `head` block. // (Potentially longer history is permitted) ExtraBlocks []*Block diff --git a/model/flow/sealing_segment.md b/model/flow/sealing_segment.md index 1cc6544ec37..cc956754d85 100644 --- a/model/flow/sealing_segment.md +++ b/model/flow/sealing_segment.md @@ -2,15 +2,15 @@ The `SealingSegment` is a section of the finalized chain. It is part of the data need to initialize a new node to join the network. Informally, the `SealingSegment` is continuous section -of recently finalized blocks that is long enough for the new node to execute its business logic. +of recently finalized blocks that is long enough for the new node to execute its business logic. -## History length covered by the Sealing Segment +## History length covered by the Sealing Segment The `SealingSegment` is created from a `protocol.Snapshot` via the method `SealingSegment`. -Lets denote the block that the `protocol.Snapshot` refers to as `head`. Per convention, -`head` must be a finalized block. +Lets denote the block that the `protocol.Snapshot` refers to as `head`. Per convention, +`head` must be a finalized block. -### Part 1: from `head` back to the latest sealed block +### Part 1: from `head` back to the latest sealed block The SealingSegment is a chain segment such that the last block (greatest height) is this snapshot's reference block (i.e. `head`) and the first (least height) is the most @@ -40,10 +40,10 @@ type SealingSegment struct { Blocks []*Block ⋮ -} +} ``` -**Minimality Requirement for `SealingSegment.Blocks`**: +**Minimality Requirement for `SealingSegment.Blocks`**: In example 3, note that block `B` is the highest sealed block as of `E`. Therefore, the lowest block in `SealingSegment.Blocks` must be `B`. Essentially, this is a minimality requirement for the history: it shouldn't be longer than necessary. So @@ -68,10 +68,10 @@ ExtraBlocks []*Block ``` **In case `head` contains multiple seals, we need _all_ the sealed blocks**, for the following reason: -* All nodes locally maintain a copy of the protocol state. A service event may change the state of the protocol state. +* All nodes locally maintain a copy of the protocol state. A service event may change the state of the protocol state. * For Byzantine resilience, we don't want protocol-state changes to take effect immediately. Therefore, we process system events only after receiving a QC for the block. - + Now let us consider the situation where a newly initialized node comes online and processes the first child of `head`. Lets reuse the example from above, where our head was block `E` and we are now processing the child `X` ``` @@ -80,14 +80,14 @@ ExtraBlocks []*Block ExtraBlocks Blocks block ``` `X` carries the QC for `E`, hence the protocol-state changes in `E` take effect for `X`. Therefore, when processing `X`, - we go through the seals in `E` and look through the sealed execution results for service events. + we go through the seals in `E` and look through the sealed execution results for service events. * As the service events are order-sensitive, we need to process the seals in the correct order, which is by increasing height - of the sealed block. The seals don't contain the block's height information, hence we need to resolve the block. + of the sealed block. The seals don't contain the block's height information, hence we need to resolve the block. **Extended history to check for duplicated collection guarantees in blocks** is required by nodes that _validate_ block payloads (e.g. consensus nodes). Also Access Nodes require these blocks. Collections expire after `flow.DefaultTransactionExpiry` blocks. Hence, we desire a history of `flow.DefaultTransactionExpiry` blocks. However, there is the edge case of a recent spork (or genesis), -where the history is simply less that `flow.DefaultTransactionExpiry`. +where the history is simply less that `flow.DefaultTransactionExpiry`. ### Formal definition @@ -98,10 +98,11 @@ The descriptions from the previous section can be formalized as follows * (ii) All blocks that are sealed by `head`. This is relevant if `head` contains _multiple_ seals. * (iii) The sealing segment should contain the history back to (including): ``` - limitHeight := max(head.Height - flow.DefaultTransactionExpiry, SporkRootBlockHeight) + limitHeight := max(blockSealedAtHead.Height - flow.DefaultTransactionExpiry, SporkRootBlockHeight) ``` + where blockSealedAtHead is the block sealed by `head` block. Note that all three conditions have to be satisfied by a sealing segment. Therefore, it must contain the longest history -required by any of the three conditions. The 'Spork Root Block' is the cutoff. +required by any of the three conditions. The 'Spork Root Block' is the cutoff. Per convention, we include the blocks for (i) in the `SealingSegment.Blocks`, while the additional blocks for (ii) and optionally (iii) are contained in as `SealingSegment.ExtraBlocks`. @@ -147,5 +148,5 @@ In its current state, the sealing segment has been evolving driven by different and other improvements. However, an important aspect of the sealing segment is to allow newly-joining nodes to build an internal representation of the protocol state, in particular the identity table. There are large changes coming around when we move to the dynamic identity table. Therefore, we accept that the Sealing Segment currently has some technical debt and unnecessary complexity. Once we have implemented the -dynamic identity table, we will have a much more solidified understanding of the data in the sealing segment. +dynamic identity table, we will have a much more solidified understanding of the data in the sealing segment. diff --git a/state/protocol/badger/snapshot.go b/state/protocol/badger/snapshot.go index 90cdebc6db9..6dbba18b09f 100644 --- a/state/protocol/badger/snapshot.go +++ b/state/protocol/badger/snapshot.go @@ -231,7 +231,7 @@ func (s *Snapshot) SealingSegment() (*flow.SealingSegment, error) { // This is relevant if `head` does not contain any seals. // (ii) All blocks that are sealed by `head`. This is relevant if head` contains _multiple_ seals. // (iii) The sealing segment should contain the history back to (including): - // limitHeight := max(head.Height - flow.DefaultTransactionExpiry, SporkRootBlockHeight) + // limitHeight := max(blockSealedAtHead.Height - flow.DefaultTransactionExpiry, SporkRootBlockHeight) // Per convention, we include the blocks for (i) in the `SealingSegment.Blocks`, while the // additional blocks for (ii) and optionally (iii) are contained in as `SealingSegment.ExtraBlocks`. head, err := s.state.blocks.ByID(s.blockID) @@ -299,10 +299,10 @@ func (s *Snapshot) SealingSegment() (*flow.SealingSegment, error) { } // STEP (iii): extended history to allow checking for duplicated collections, i.e. - // limitHeight = max(head.Height - flow.DefaultTransactionExpiry, SporkRootBlockHeight) + // limitHeight = max(blockSealedAtHead.Height - flow.DefaultTransactionExpiry, SporkRootBlockHeight) limitHeight := s.state.sporkRootBlockHeight - if head.Header.Height > s.state.sporkRootBlockHeight+flow.DefaultTransactionExpiry { - limitHeight = head.Header.Height - flow.DefaultTransactionExpiry + if blockSealedAtHead.Height > s.state.sporkRootBlockHeight+flow.DefaultTransactionExpiry { + limitHeight = blockSealedAtHead.Height - flow.DefaultTransactionExpiry } // As we have to satisfy (ii) _and_ (iii), we have to take the longest history, i.e. the lowest height. diff --git a/state/protocol/badger/snapshot_test.go b/state/protocol/badger/snapshot_test.go index 54f1522b2b8..9b6f783ce0e 100644 --- a/state/protocol/badger/snapshot_test.go +++ b/state/protocol/badger/snapshot_test.go @@ -588,10 +588,63 @@ func TestSealingSegment(t *testing.T) { assertSealingSegmentBlocksQueryableAfterBootstrap(t, snapshot) }) }) + + // Root <- B1 <- B2 <- ... <- B700(Seal_B699) + // Expected sealing segment: [B699, B700], Extra blocks: [B98, B99, ..., B698] + // where DefaultTransactionExpiry = 600 + t.Run("test extra blocks contain exactly DefaultTransactionExpiry number of blocks below the sealed block", func(t *testing.T) { + util.RunWithFollowerProtocolState(t, rootSnapshot, func(db *badger.DB, state *bprotocol.FollowerState) { + root := unittest.BlockWithParentFixture(head) + buildFinalizedBlock(t, state, root) + + blocks := make([]*flow.Block, 0, flow.DefaultTransactionExpiry+3) + parent := root + for i := 0; i < flow.DefaultTransactionExpiry+1; i++ { + next := unittest.BlockWithParentFixture(parent.Header) + next.Header.View = next.Header.Height + 1 // set view so we are still in the same epoch + buildFinalizedBlock(t, state, next) + blocks = append(blocks, next) + parent = next + } + + // last sealed block + lastSealedBlock := parent + lastReceipt, lastSeal := unittest.ReceiptAndSealForBlock(lastSealedBlock) + prevLastBlock := unittest.BlockWithParentFixture(lastSealedBlock.Header) + prevLastBlock.SetPayload(unittest.PayloadFixture( + unittest.WithReceipts(lastReceipt), + )) + buildFinalizedBlock(t, state, prevLastBlock) + + // last finalized block + lastBlock := unittest.BlockWithParentFixture(prevLastBlock.Header) + lastBlock.SetPayload(unittest.PayloadFixture( + unittest.WithSeals(lastSeal), + )) + buildFinalizedBlock(t, state, lastBlock) + + // build a valid child to ensure we have a QC + buildFinalizedBlock(t, state, unittest.BlockWithParentFixture(lastBlock.Header)) + + snapshot := state.AtBlockID(lastBlock.ID()) + segment, err := snapshot.SealingSegment() + require.NoError(t, err) + + assert.Equal(t, lastBlock.Header, segment.Highest().Header) + assert.Equal(t, lastBlock.Header, segment.Finalized().Header) + assert.Equal(t, lastSealedBlock.Header, segment.Sealed().Header) + + // there are DefaultTransactionExpiry number of blocks in total + unittest.AssertEqualBlocksLenAndOrder(t, blocks[:flow.DefaultTransactionExpiry], segment.ExtraBlocks) + assert.Len(t, segment.ExtraBlocks, flow.DefaultTransactionExpiry) + assertSealingSegmentBlocksQueryableAfterBootstrap(t, snapshot) + + }) + }) // Test the case where the reference block of the snapshot contains seals for blocks that are lower than the lowest sealing segment's block. // This test case specifically checks if sealing segment includes both highest and lowest block sealed by head. // ROOT <- B1 <- B2 <- B3(Seal_B1) <- B4 <- ... <- LastBlock(Seal_B2, Seal_B3, Seal_B4) - // Expected sealing segment: [B4, ..., B5], Extra blocks: [B2, B3] + // Expected sealing segment: [B4, ..., B5], Extra blocks: [Root, B1, B2, B3] t.Run("highest block seals outside segment", func(t *testing.T) { util.RunWithFollowerProtocolState(t, rootSnapshot, func(db *badger.DB, state *bprotocol.FollowerState) { // build a block to seal @@ -637,7 +690,8 @@ func TestSealingSegment(t *testing.T) { require.NoError(t, err) assert.Equal(t, lastBlock.Header, segment.Highest().Header) assert.Equal(t, block4.Header, segment.Sealed().Header) - unittest.AssertEqualBlocksLenAndOrder(t, []*flow.Block{block2, block3}, segment.ExtraBlocks) + root := rootSnapshot.Encodable().SealingSegment.Sealed() + unittest.AssertEqualBlocksLenAndOrder(t, []*flow.Block{root, block1, block2, block3}, segment.ExtraBlocks) assert.Len(t, segment.ExecutionResults, 2) assertSealingSegmentBlocksQueryableAfterBootstrap(t, snapshot) diff --git a/state/protocol/snapshots/dynamic_bootstrap.go b/state/protocol/snapshots/dynamic_bootstrap.go new file mode 100644 index 00000000000..6647fa101bf --- /dev/null +++ b/state/protocol/snapshots/dynamic_bootstrap.go @@ -0,0 +1,122 @@ +package snapshots + +import ( + "errors" + "fmt" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/state/protocol" +) + +var ErrSnapshotPhaseMismatch = errors.New("snapshot does not contain a valid sealing segment") +var ErrSnapshotHistoryLimit = errors.New("reached the snapshot history limit") + +// GetDynamicBootstrapSnapshot returns `refSnapshot` if it is valid for use in dynamic bootstrapping. +// Otherwise returns an error. (Effectively this validates that the input snapshot can be used in dynamic bootstrapping.) +// Expected error returns during normal operations: +// * ErrSnapshotPhaseMismatch - snapshot does not contain a valid sealing segment +// All other errors should be treated as exceptions. +func GetDynamicBootstrapSnapshot(state protocol.State, refSnapshot protocol.Snapshot) (protocol.Snapshot, error) { + return getValidSnapshot(state, refSnapshot, 0, false, 0) +} + +// GetClosestDynamicBootstrapSnapshot will return a valid snapshot for dynamic bootstrapping +// Expected error returns during normal operations: +// If a snapshot does contain an invalid sealing segment query the state +// by height of each block in the segment and return a snapshot at the point +// where the transition happens. +// * ErrSnapshotPhaseMismatch - snapshot does not contain a valid sealing segment +// * ErrSnapshotHistoryLimit - reached the snapshot history limit +// All other errors should be treated as exceptions. +func GetClosestDynamicBootstrapSnapshot(state protocol.State, refSnapshot protocol.Snapshot, snapshotHistoryLimit int) (protocol.Snapshot, error) { + return getValidSnapshot(state, refSnapshot, 0, true, snapshotHistoryLimit) +} + +// GetCounterAndPhase returns the current epoch counter and phase, at `height`. +// No errors are expected during normal operation. +func GetCounterAndPhase(state protocol.State, height uint64) (uint64, flow.EpochPhase, error) { + snapshot := state.AtHeight(height) + + counter, err := snapshot.Epochs().Current().Counter() + if err != nil { + return 0, 0, fmt.Errorf("failed to get counter for block (height=%d): %w", height, err) + } + + phase, err := snapshot.Phase() + if err != nil { + return 0, 0, fmt.Errorf("failed to get phase for block (height=%d): %w", height, err) + } + + return counter, phase, nil +} + +func IsEpochOrPhaseDifferent(counter1, counter2 uint64, phase1, phase2 flow.EpochPhase) bool { + return counter1 != counter2 || phase1 != phase2 +} + +// getValidSnapshot will return a valid snapshot that has a sealing segment which +// 1. does not contain any blocks that span an epoch transition +// 2. does not contain any blocks that span an epoch phase transition +// If a snapshot does contain an invalid sealing segment query the state +// by height of each block in the segment and return a snapshot at the point +// where the transition happens. +// Expected error returns during normal operations: +// * ErrSnapshotPhaseMismatch - snapshot does not contain a valid sealing segment +// * ErrSnapshotHistoryLimit - failed to find a valid snapshot after checking `snapshotHistoryLimit` blocks +// All other errors should be treated as exceptions. +func getValidSnapshot( + state protocol.State, + snapshot protocol.Snapshot, + blocksVisited int, + findNextValidSnapshot bool, + snapshotHistoryLimit int, +) (protocol.Snapshot, error) { + segment, err := snapshot.SealingSegment() + if err != nil { + return nil, fmt.Errorf("failed to get sealing segment: %w", err) + } + + counterAtHighest, phaseAtHighest, err := GetCounterAndPhase(state, segment.Highest().Header.Height) + if err != nil { + return nil, fmt.Errorf("failed to get counter and phase at highest block in the segment: %w", err) + } + + counterAtLowest, phaseAtLowest, err := GetCounterAndPhase(state, segment.Sealed().Header.Height) + if err != nil { + return nil, fmt.Errorf("failed to get counter and phase at lowest block in the segment: %w", err) + } + + // Check if the counters and phase are different this indicates that the sealing segment + // of the snapshot requested spans either an epoch transition or phase transition. + if IsEpochOrPhaseDifferent(counterAtHighest, counterAtLowest, phaseAtHighest, phaseAtLowest) { + if !findNextValidSnapshot { + return nil, ErrSnapshotPhaseMismatch + } + + // Visit each node in strict order of decreasing height starting at head + // to find the block that straddles the transition boundary. + for i := len(segment.Blocks) - 1; i >= 0; i-- { + blocksVisited++ + + // NOTE: Check if we have reached our history limit, in edge cases + // where the sealing segment is abnormally long we want to short circuit + // the recursive calls and return an error. The API caller can retry. + if blocksVisited > snapshotHistoryLimit { + return nil, fmt.Errorf("%w: (%d)", ErrSnapshotHistoryLimit, snapshotHistoryLimit) + } + + counterAtBlock, phaseAtBlock, err := GetCounterAndPhase(state, segment.Blocks[i].Header.Height) + if err != nil { + return nil, fmt.Errorf("failed to get epoch counter and phase for snapshot at block %s: %w", segment.Blocks[i].ID(), err) + } + + // Check if this block straddles the transition boundary, if it does return the snapshot + // at that block height. + if IsEpochOrPhaseDifferent(counterAtHighest, counterAtBlock, phaseAtHighest, phaseAtBlock) { + return getValidSnapshot(state, state.AtHeight(segment.Blocks[i].Header.Height), blocksVisited, true, snapshotHistoryLimit) + } + } + } + + return snapshot, nil +}