From 4a878d1c67725f4f1c5646245f0df8ddb8288379 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 18 Jan 2021 12:04:26 +0100 Subject: [PATCH 01/22] eth/protocols/snap: make timeout configurable --- eth/protocols/snap/sync.go | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index d6f0eb54724e..448fce2b269e 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -73,10 +73,6 @@ const ( // waste bandwidth. maxTrieRequestCount = 512 - // requestTimeout is the maximum time a peer is allowed to spend on serving - // a single network request. - requestTimeout = 10 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync? - // accountConcurrency is the number of chunks to split the account trie into // to allow concurrent retrievals. accountConcurrency = 16 @@ -86,6 +82,12 @@ const ( storageConcurrency = 16 ) +var ( + // requestTimeout is the maximum time a peer is allowed to spend on serving + // a single network request. + requestTimeout = 10 * time.Second // TODO(karalabe): Make it dynamic ala fast-sync? +) + // accountRequest tracks a pending account range request to ensure responses are // to actual requests and to validate any security constraints. // From 9d8994c39e03d4d5579f8f9aed62997e440774d5 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 27 May 2020 11:08:31 +0200 Subject: [PATCH 02/22] eth/protocols/snap: snap sync testing --- eth/protocols/snap/peer.go | 11 + eth/protocols/snap/sync.go | 91 ++--- eth/protocols/snap/sync_test.go | 658 ++++++++++++++++++++++++++++++++ 3 files changed, 715 insertions(+), 45 deletions(-) diff --git a/eth/protocols/snap/peer.go b/eth/protocols/snap/peer.go index 73eaaadd0929..4c5e2bdfc87e 100644 --- a/eth/protocols/snap/peer.go +++ b/eth/protocols/snap/peer.go @@ -22,6 +22,17 @@ import ( "github.com/ethereum/go-ethereum/p2p" ) +// PeerIF exists so we can mock peers in testing, and not deal with +// the actual protocol marshalling +type PeerIF interface { + ID() string + RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error + RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error + RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error + RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error + Log() log.Logger +} + // Peer is a collection of relevant information we have about a `snap` peer. type Peer struct { id string // Unique ID for the peer, cached diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 448fce2b269e..04d65fe65d1a 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -353,9 +353,9 @@ type Syncer struct { healer *healTask // Current state healing task being executed update chan struct{} // Notification channel for possible sync progression - peers map[string]*Peer // Currently active peers to download from - peerJoin *event.Feed // Event feed to react to peers joining - peerDrop *event.Feed // Event feed to react to peers dropping + peers map[string]PeerIF // Currently active peers to download from + peerJoin *event.Feed // Event feed to react to peers joining + peerDrop *event.Feed // Event feed to react to peers dropping // Request tracking during syncing phase statelessPeers map[string]struct{} // Peers that failed to deliver state data @@ -417,7 +417,7 @@ func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer { db: db, bloom: bloom, - peers: make(map[string]*Peer), + peers: make(map[string]PeerIF), peerJoin: new(event.Feed), peerDrop: new(event.Feed), update: make(chan struct{}, 1), @@ -449,27 +449,28 @@ func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer { } // Register injects a new data source into the syncer's peerset. -func (s *Syncer) Register(peer *Peer) error { +func (s *Syncer) Register(peer PeerIF) error { // Make sure the peer is not registered yet s.lock.Lock() - if _, ok := s.peers[peer.id]; ok { - log.Error("Snap peer already registered", "id", peer.id) + pId := peer.ID() + if _, ok := s.peers[pId]; ok { + log.Error("Snap peer already registered", "id", pId) s.lock.Unlock() return errors.New("already registered") } - s.peers[peer.id] = peer + s.peers[pId] = peer // Mark the peer as idle, even if no sync is running - s.accountIdlers[peer.id] = struct{}{} - s.storageIdlers[peer.id] = struct{}{} - s.bytecodeIdlers[peer.id] = struct{}{} - s.trienodeHealIdlers[peer.id] = struct{}{} - s.bytecodeHealIdlers[peer.id] = struct{}{} + s.accountIdlers[pId] = struct{}{} + s.storageIdlers[pId] = struct{}{} + s.bytecodeIdlers[pId] = struct{}{} + s.trienodeHealIdlers[pId] = struct{}{} + s.bytecodeHealIdlers[pId] = struct{}{} s.lock.Unlock() // Notify any active syncs that a new peer can be assigned data - s.peerJoin.Send(peer.id) + s.peerJoin.Send(pId) return nil } @@ -800,7 +801,7 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) { delete(s.accountIdlers, idle) s.pend.Add(1) - go func(peer *Peer, root common.Hash) { + go func(peer PeerIF, root common.Hash) { defer s.pend.Done() // Attempt to send the remote request and revert if it fails @@ -887,7 +888,7 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) { delete(s.bytecodeIdlers, idle) s.pend.Add(1) - go func(peer *Peer) { + go func(peer PeerIF) { defer s.pend.Done() // Attempt to send the remote request and revert if it fails @@ -1012,7 +1013,7 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) { delete(s.storageIdlers, idle) s.pend.Add(1) - go func(peer *Peer, root common.Hash) { + go func(peer PeerIF, root common.Hash) { defer s.pend.Done() // Attempt to send the remote request and revert if it fails @@ -1127,7 +1128,7 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) { delete(s.trienodeHealIdlers, idle) s.pend.Add(1) - go func(peer *Peer, root common.Hash) { + go func(peer PeerIF, root common.Hash) { defer s.pend.Done() // Attempt to send the remote request and revert if it fails @@ -1225,7 +1226,7 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) { delete(s.bytecodeHealIdlers, idle) s.pend.Add(1) - go func(peer *Peer) { + go func(peer PeerIF) { defer s.pend.Done() // Attempt to send the remote request and revert if it fails @@ -1943,7 +1944,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { // OnAccounts is a callback method to invoke when a range of accounts are // received from a remote peer. -func (s *Syncer) OnAccounts(peer *Peer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { +func (s *Syncer) OnAccounts(peer PeerIF, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { size := common.StorageSize(len(hashes) * common.HashLength) for _, account := range accounts { size += common.StorageSize(len(account)) @@ -1951,15 +1952,15 @@ func (s *Syncer) OnAccounts(peer *Peer, id uint64, hashes []common.Hash, account for _, node := range proof { size += common.StorageSize(len(node)) } - logger := peer.logger.New("reqid", id) + logger := peer.Log().New("reqid", peer.ID()) logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size) // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, // we'll drop the peer in a bit. s.lock.Lock() - if _, ok := s.peers[peer.id]; ok { - s.accountIdlers[peer.id] = struct{}{} + if _, ok := s.peers[peer.ID()]; ok { + s.accountIdlers[peer.ID()] = struct{}{} } select { case s.update <- struct{}{}: @@ -1985,7 +1986,7 @@ func (s *Syncer) OnAccounts(peer *Peer, id uint64, hashes []common.Hash, account // synced to our head. if len(hashes) == 0 && len(accounts) == 0 && len(proof) == 0 { logger.Debug("Peer rejected account range request", "root", s.root) - s.statelessPeers[peer.id] = struct{}{} + s.statelessPeers[peer.ID()] = struct{}{} s.lock.Unlock() // Signal this request as failed, and ready for rescheduling @@ -2052,7 +2053,7 @@ func (s *Syncer) OnAccounts(peer *Peer, id uint64, hashes []common.Hash, account // OnByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer. -func (s *Syncer) OnByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error { +func (s *Syncer) OnByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { s.lock.RLock() syncing := len(s.tasks) > 0 s.lock.RUnlock() @@ -2065,20 +2066,20 @@ func (s *Syncer) OnByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error { // onByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer in the syncing phase. -func (s *Syncer) onByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error { +func (s *Syncer) onByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { var size common.StorageSize for _, code := range bytecodes { size += common.StorageSize(len(code)) } - logger := peer.logger.New("reqid", id) + logger := peer.Log().New("reqid", id) logger.Trace("Delivering set of bytecodes", "bytecodes", len(bytecodes), "bytes", size) // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, // we'll drop the peer in a bit. s.lock.Lock() - if _, ok := s.peers[peer.id]; ok { - s.bytecodeIdlers[peer.id] = struct{}{} + if _, ok := s.peers[peer.ID()]; ok { + s.bytecodeIdlers[peer.ID()] = struct{}{} } select { case s.update <- struct{}{}: @@ -2103,7 +2104,7 @@ func (s *Syncer) onByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error { // yet synced. if len(bytecodes) == 0 { logger.Debug("Peer rejected bytecode request") - s.statelessPeers[peer.id] = struct{}{} + s.statelessPeers[peer.ID()] = struct{}{} s.lock.Unlock() // Signal this request as failed, and ready for rescheduling @@ -2152,7 +2153,7 @@ func (s *Syncer) onByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error { // OnStorage is a callback method to invoke when ranges of storage slots // are received from a remote peer. -func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { +func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { // Gather some trace stats to aid in debugging issues var ( hashCount int @@ -2172,15 +2173,15 @@ func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots for _, node := range proof { size += common.StorageSize(len(node)) } - logger := peer.logger.New("reqid", id) + logger := peer.Log().New("reqid", id) logger.Trace("Delivering ranges of storage slots", "accounts", len(hashes), "hashes", hashCount, "slots", slotCount, "proofs", len(proof), "size", size) // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, // we'll drop the peer in a bit. s.lock.Lock() - if _, ok := s.peers[peer.id]; ok { - s.storageIdlers[peer.id] = struct{}{} + if _, ok := s.peers[peer.ID()]; ok { + s.storageIdlers[peer.ID()] = struct{}{} } select { case s.update <- struct{}{}: @@ -2218,7 +2219,7 @@ func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots // synced to our head. if len(hashes) == 0 { logger.Debug("Peer rejected storage request") - s.statelessPeers[peer.id] = struct{}{} + s.statelessPeers[peer.ID()] = struct{}{} s.lock.Unlock() // Signal this request as failed, and ready for rescheduling @@ -2304,20 +2305,20 @@ func (s *Syncer) OnStorage(peer *Peer, id uint64, hashes [][]common.Hash, slots // OnTrieNodes is a callback method to invoke when a batch of trie nodes // are received from a remote peer. -func (s *Syncer) OnTrieNodes(peer *Peer, id uint64, trienodes [][]byte) error { +func (s *Syncer) OnTrieNodes(peer PeerIF, id uint64, trienodes [][]byte) error { var size common.StorageSize for _, node := range trienodes { size += common.StorageSize(len(node)) } - logger := peer.logger.New("reqid", id) + logger := peer.Log().New("reqid", id) logger.Trace("Delivering set of healing trienodes", "trienodes", len(trienodes), "bytes", size) // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, // we'll drop the peer in a bit. s.lock.Lock() - if _, ok := s.peers[peer.id]; ok { - s.trienodeHealIdlers[peer.id] = struct{}{} + if _, ok := s.peers[peer.ID()]; ok { + s.trienodeHealIdlers[peer.ID()] = struct{}{} } select { case s.update <- struct{}{}: @@ -2342,7 +2343,7 @@ func (s *Syncer) OnTrieNodes(peer *Peer, id uint64, trienodes [][]byte) error { // yet synced. if len(trienodes) == 0 { logger.Debug("Peer rejected trienode heal request") - s.statelessPeers[peer.id] = struct{}{} + s.statelessPeers[peer.ID()] = struct{}{} s.lock.Unlock() // Signal this request as failed, and ready for rescheduling @@ -2392,20 +2393,20 @@ func (s *Syncer) OnTrieNodes(peer *Peer, id uint64, trienodes [][]byte) error { // onHealByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer in the healing phase. -func (s *Syncer) onHealByteCodes(peer *Peer, id uint64, bytecodes [][]byte) error { +func (s *Syncer) onHealByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { var size common.StorageSize for _, code := range bytecodes { size += common.StorageSize(len(code)) } - logger := peer.logger.New("reqid", id) + logger := peer.Log().New("reqid", id) logger.Trace("Delivering set of healing bytecodes", "bytecodes", len(bytecodes), "bytes", size) // Whether or not the response is valid, we can mark the peer as idle and // notify the scheduler to assign a new task. If the response is invalid, // we'll drop the peer in a bit. s.lock.Lock() - if _, ok := s.peers[peer.id]; ok { - s.bytecodeHealIdlers[peer.id] = struct{}{} + if _, ok := s.peers[peer.ID()]; ok { + s.bytecodeHealIdlers[peer.ID()] = struct{}{} } select { case s.update <- struct{}{}: @@ -2430,7 +2431,7 @@ func (s *Syncer) onHealByteCodes(peer *Peer, id uint64, bytecodes [][]byte) erro // yet synced. if len(bytecodes) == 0 { logger.Debug("Peer rejected bytecode heal request") - s.statelessPeers[peer.id] = struct{}{} + s.statelessPeers[peer.ID()] = struct{}{} s.lock.Unlock() // Signal this request as failed, and ready for rescheduling diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index 4f28b99bfe0b..f96344e1910a 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -17,11 +17,24 @@ package snap import ( + "bytes" "crypto/rand" + "encoding/binary" "fmt" + "math/big" + "os" + "sort" "testing" + "time" + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/core/rawdb" + "github.com/ethereum/go-ethereum/core/state" "github.com/ethereum/go-ethereum/crypto" + "github.com/ethereum/go-ethereum/light" + "github.com/ethereum/go-ethereum/log" + "github.com/ethereum/go-ethereum/rlp" + "github.com/ethereum/go-ethereum/trie" "golang.org/x/crypto/sha3" ) @@ -96,3 +109,648 @@ func BenchmarkHashing(b *testing.B) { } }) } + +type testPeer struct { + id string + test *testing.T + remote *Syncer + log log.Logger + accountTrie *trie.Trie + accountValues entrySlice + storageTries map[common.Hash]*trie.Trie + storageValues map[common.Hash]entrySlice + + accountRequestHandler func(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error + storageRequestHandler func(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error + trieRequestHandler func(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap uint64) error + codeRequestHandler func(t *testPeer, id uint64, hashes []common.Hash, max uint64) error + cancelCh chan struct{} +} + +func newTestPeer(id string, t *testing.T, cancelCh chan struct{}) *testPeer { + peer := &testPeer{ + id: id, + test: t, + log: log.New("id", id), + accountRequestHandler: defaultAccountRequestHandler, + trieRequestHandler: defaultTrieRequestHandler, + storageRequestHandler: defaultStorageRequestHandler, + codeRequestHandler: defaultCodeReqeustHandler, + cancelCh: cancelCh, + } + stdoutHandler := log.StreamHandler(os.Stdout, log.TerminalFormat(true)) + peer.log.SetHandler(stdoutHandler) + return peer + +} + +func (t *testPeer) ID() string { + return t.id +} + +func (t *testPeer) Log() log.Logger { + return t.log +} + +func (t *testPeer) RequestAccountRange(id uint64, root, origin, limit common.Hash, cap uint64) error { + t.Log().Info("<- AccRangeReq", "id", id, "root", root, "origin", origin, "limit", limit, "max", cap) + go t.accountRequestHandler(t, id, root, origin, cap) + return nil +} + +func (t *testPeer) RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, cap uint64) error { + t.Log().Info("<- TrieNodeReq", "id", id, "root", root, "paths", len(paths), "limit", cap) + go t.trieRequestHandler(t, id, root, paths, cap) + return nil +} + +func (t *testPeer) RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error { + t.Log().Info("<- StorRangeReq", "id", id, "root", root, "account[0]", accounts[0], + "origin", fmt.Sprintf("%x", origin), "limit", fmt.Sprintf("%x", limit), "max", max) + go t.storageRequestHandler(t, id, root, accounts, origin, limit, max) + return nil +} + +func (t *testPeer) RequestByteCodes(id uint64, hashes []common.Hash, max uint64) error { + t.Log().Info("<- CodeReq", "id", id, "#hashes", len(hashes), "max", max) + go t.codeRequestHandler(t, id, hashes, max) + return nil +} + +// defaultTrieRequestHandler is a well-behaving handler for trie healing requests +func defaultTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap uint64) error { + // Pass the response + var nodes [][]byte + for _, pathset := range paths { + switch len(pathset) { + case 1: + blob, _, err := t.accountTrie.TryGetNode(pathset[0]) + if err != nil { + t.Log().Info("Error handling req", "error", err) + break + } + nodes = append(nodes, blob) + default: + account := t.storageTries[(common.BytesToHash(pathset[0]))] + for _, path := range pathset[1:] { + blob, _, err := account.TryGetNode(path) + if err != nil { + t.Log().Info("Error handling req", "error", err) + break + } + nodes = append(nodes, blob) + } + } + } + t.remote.OnTrieNodes(t, requestId, nodes) + return nil +} + +// defaultAccountRequestHandler is a well-behaving handler for AccountRangeRequests +func defaultAccountRequestHandler(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { + var ( + proofs [][]byte + keys []common.Hash + vals [][]byte + size uint64 + ) + for _, entry := range t.accountValues { + if size > cap { + break + } + if bytes.Compare(origin[:], entry.k) <= 0 { + keys = append(keys, common.BytesToHash(entry.k)) + vals = append(vals, entry.v) + size += uint64(32 + len(entry.v)) + } + } + // Unless we send the entire trie, we need to supply proofs + // Actually, we need to supply proofs either way! This seems tob be an implementation + // quirk in go-ethereum + proof := light.NewNodeSet() + if err := t.accountTrie.Prove(origin[:], 0, proof); err != nil { + t.log.Error("Could not prove inexistence of origin", "origin", origin, + "error", err) + } + if len(keys) > 0 { + lastK := (keys[len(keys)-1])[:] + if err := t.accountTrie.Prove(lastK, 0, proof); err != nil { + t.log.Error("Could not prove last item", + "error", err) + } + } + for _, blob := range proof.NodeList() { + proofs = append(proofs, blob) + } + if err := t.remote.OnAccounts(t, requestId, keys, vals, proofs); err != nil { + t.log.Error("remote error on delivery", "error", err) + t.test.Errorf("Remote side rejected our delivery: %v", err) + t.cancelCh <- struct{}{} + return err + } + return nil +} + +// defaultStorageRequestHandler is a well-behaving storage request handler +func defaultStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, bOrigin, bLimit []byte, max uint64) error { + hashes, slots, proofs := createStorageRequestResponse(t, root, accounts, bOrigin, bLimit, max) + if err := t.remote.OnStorage(t, requestId, hashes, slots, proofs); err != nil { + t.log.Error("remote error on delivery", "error", err) + t.test.Errorf("Remote side rejected our delivery: %v", err) + t.cancelCh <- struct{}{} + } + return nil +} + +func createStorageRequestResponse(t *testPeer, root common.Hash, accounts []common.Hash, bOrigin, bLimit []byte, max uint64) (hashes [][]common.Hash, slots [][][]byte, proofs [][]byte) { + var ( + size uint64 + limit = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") + ) + if len(bLimit) > 0 { + limit = common.BytesToHash(bLimit) + } + var origin common.Hash + if len(bOrigin) > 0 { + origin = common.BytesToHash(bOrigin) + } + + var limitExceeded bool + var incomplete bool + for _, account := range accounts { + t.Log().Info("Adding account", "account", account.Hex()) + + var keys []common.Hash + var vals [][]byte + for _, entry := range t.storageValues[account] { + if limitExceeded { + incomplete = true + break + } + if bytes.Compare(entry.k, origin[:]) < 0 { + incomplete = true + continue + } + keys = append(keys, common.BytesToHash(entry.k)) + vals = append(vals, entry.v) + size += uint64(32 + len(entry.v)) + if bytes.Compare(entry.k, limit[:]) >= 0 { + t.Log().Info("key outside of limit", "limit", fmt.Sprintf("%x", limit), "key", fmt.Sprintf("%x", entry.k)) + limitExceeded = true + } + if size > max { + limitExceeded = true + } + } + hashes = append(hashes, keys) + slots = append(slots, vals) + + if incomplete { + // If we're aborting, we need to prove the first and last item + // This terminates the response (and thus the loop) + proof := light.NewNodeSet() + stTrie := t.storageTries[account] + + // Here's a potential gotcha: when constructing the proof, we cannot + // use the 'origin' slice directly, but must use the full 32-byte + // hash form. + if err := stTrie.Prove(origin[:], 0, proof); err != nil { + t.log.Error("Could not prove inexistence of origin", "origin", origin, + "error", err) + } + if len(keys) > 0 { + lastK := (keys[len(keys)-1])[:] + if err := stTrie.Prove(lastK, 0, proof); err != nil { + t.log.Error("Could not prove last item", "error", err) + } + } + for _, blob := range proof.NodeList() { + proofs = append(proofs, blob) + } + break + } + } + return hashes, slots, proofs +} + +func defaultCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { + panic("TODO implement me") +} + +// emptyRequestAccountRangeFn is a rejects AccountRangeRequests +func emptyRequestAccountRangeFn(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { + var proofs [][]byte + var keys []common.Hash + var vals [][]byte + t.remote.OnAccounts(t, requestId, keys, vals, proofs) + return nil +} + +func emptyTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap uint64) error { + var nodes [][]byte + t.remote.OnTrieNodes(t, requestId, nodes) + return nil +} + +func emptyStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error { + var hashes [][]common.Hash + var slots [][][]byte + var proofs [][]byte + t.remote.OnStorage(t, requestId, hashes, slots, proofs) + return nil +} + +func emptyCodeRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { + var bytecodes [][]byte + t.remote.OnByteCodes(t, id, bytecodes) + return nil +} + +// starvingStorageRequestHandler is somewhat well-behaving storage handler, but it caps the returned results to be very small +func starvingStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error { + return defaultStorageRequestHandler(t, requestId, root, accounts, origin, limit, 500) +} + +func starvingAccountRequestHandler(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { + return defaultAccountRequestHandler(t, requestId, root, origin, 500) +} + +// corruptStorageRequestHandler doesn't provide good proofs +func corruptStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error { + hashes, slots, proofs := createStorageRequestResponse(t, root, accounts, origin, limit, max) + if len(proofs) > 0 { + proofs = proofs[1:] + } + if err := t.remote.OnStorage(t, requestId, hashes, slots, proofs); err != nil { + t.log.Info("remote error on delivery (as expected)", "error", err) + } + return nil +} + +// TestSync tests a basic sync +func TestSync(t *testing.T) { + trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) + + sourceAccountTrie, elems := makeAccountTrieNoStorage(trieBackend, 100) + cancel := make(chan struct{}) + source := newTestPeer("source", t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + syncer := setupSyncer(source) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + +// TestSyncWithStorage tests basic sync using accounts + storage +func TestSyncWithStorage(t *testing.T) { + trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 3, 3000) + cancel := make(chan struct{}) + source := newTestPeer("source", t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.storageTries = storageTries + source.storageValues = storageElems + syncer := setupSyncer(source) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + +// TestSyncBloatedProof tests a scenario where we provide only _one_ value, but +// also ship the entire trie inside the proof. If the attack is successfull, +// the remote side does not do any follow-up requests +func TestSyncBloatedProof(t *testing.T) { + trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) + sourceAccountTrie, elems := makeAccountTrieNoStorage(trieBackend, 100) + cancel := make(chan struct{}) + source := newTestPeer("source", t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + + source.accountRequestHandler = func(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { + var proofs [][]byte + var keys []common.Hash + var vals [][]byte + + // The values + for _, entry := range t.accountValues { + if bytes.Compare(origin[:], entry.k) <= 0 { + keys = append(keys, common.BytesToHash(entry.k)) + vals = append(vals, entry.v) + } + } + // The proofs + proof := light.NewNodeSet() + if err := t.accountTrie.Prove(origin[:], 0, proof); err != nil { + t.log.Error("Could not prove origin", "origin", origin, "error", err) + } + // The bloat: add proof of every single element + for _, entry := range t.accountValues { + if err := t.accountTrie.Prove(entry.k, 0, proof); err != nil { + t.log.Error("Could not prove item", "error", err) + } + } + // And remove one item from the elements + if len(keys) > 2 { + keys = append(keys[:1], keys[2:]...) + vals = append(vals[:1], vals[2:]...) + } + for _, blob := range proof.NodeList() { + proofs = append(proofs, blob) + } + if err := t.remote.OnAccounts(t, requestId, keys, vals, proofs); err != nil { + t.log.Error("remote error on delivery", "error", err) + // This is actually correct, signal to exit the test successfully + t.cancelCh <- struct{}{} + } + return nil + } + syncer := setupSyncer(source) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Logf("sync failed: %v", err) + } else { + // TODO @karalabe, @holiman: + // A cancel, which aborts the sync before completion, should probably + // return an error from Sync(..) ? + t.Fatal("No error returned from incomplete/cancelled sync") + } +} + +func setupSyncer(peers ...*testPeer) *Syncer { + stateDb := rawdb.NewMemoryDatabase() + syncer := NewSyncer(stateDb, trie.NewSyncBloom(1, stateDb)) + for _, peer := range peers { + syncer.Register(peer) + peer.remote = syncer + } + return syncer +} + +// TestMultiSync tests a basic sync with multiple peers +func TestMultiSync(t *testing.T) { + cancel := make(chan struct{}) + sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 100) + + sourceA := newTestPeer("sourceA", t, cancel) + sourceA.accountTrie = sourceAccountTrie + sourceA.accountValues = elems + + sourceB := newTestPeer("sourceB", t, cancel) + sourceB.accountTrie = sourceAccountTrie + sourceB.accountValues = elems + + syncer := setupSyncer(sourceA, sourceB) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + +// TestMultiSyncManyUseless contains one good peer, and many which doesn't return anything valuable at all +func TestMultiSyncManyUseless(t *testing.T) { + cancel := make(chan struct{}) + + trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 100, 3000) + + mkSource := func(name string, a, b, c bool) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.storageTries = storageTries + source.storageValues = storageElems + + if !a { + source.accountRequestHandler = emptyRequestAccountRangeFn + } + if !b { + source.storageRequestHandler = emptyStorageRequestHandler + } + if !c { + source.trieRequestHandler = emptyTrieRequestHandler + } + return source + } + + syncer := setupSyncer( + mkSource("full", true, true, true), + mkSource("noAccounts", false, true, true), + mkSource("noStorage", true, false, true), + mkSource("noTrie", true, true, false), + ) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + +// TestSyncNoStorageAndOneCappedPeer tests sync using accounts and no storage, where one peer is +// consistently returning very small results +func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { + cancel := make(chan struct{}) + + trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) + sourceAccountTrie, elems := makeAccountTrieNoStorage(trieBackend, 3000) + + mkSource := func(name string, slow bool) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + + if slow { + source.accountRequestHandler = starvingAccountRequestHandler + } + return source + } + + syncer := setupSyncer( + mkSource("nice-a", false), + mkSource("nice-b", false), + mkSource("nice-c", false), + mkSource("capped", true), + ) + go func() { + select { + case <-time.After(5 * time.Second): + t.Errorf("Sync stalled") + cancel <- struct{}{} + } + }() + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + +// TestSyncWithStorageAndOneCappedPeer tests sync using accounts + storage, where one peer is +// consistently returning very small results +func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { + cancel := make(chan struct{}) + + trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 100, 3000) + + mkSource := func(name string, slow bool) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.storageTries = storageTries + source.storageValues = storageElems + + if slow { + source.storageRequestHandler = starvingStorageRequestHandler + } + return source + } + + syncer := setupSyncer( + mkSource("nice-a", false), + //mkSource("nice-b", false), + //mkSource("nice-c", false), + mkSource("slow", true), + ) + go func() { + select { + case <-time.After(5 * time.Second): + t.Errorf("Sync stalled") + cancel <- struct{}{} + } + }() + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + +// TestSyncWithStorageAndCorruptPeer tests sync using accounts + storage, where one peer is +// sometimes sending bad proofs +func TestSyncWithStorageAndCorruptPeer(t *testing.T) { + cancel := make(chan struct{}) + + trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 100, 3000) + + mkSource := func(name string, corrupt bool) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.storageTries = storageTries + source.storageValues = storageElems + + if corrupt { + source.storageRequestHandler = corruptStorageRequestHandler + } + return source + } + + syncer := setupSyncer( + mkSource("nice-a", false), + mkSource("nice-b", false), + mkSource("nice-c", false), + mkSource("corrupt", true), + ) + go func() { + select { + case <-time.After(5 * time.Second): + t.Errorf("Sync stalled") + cancel <- struct{}{} + } + }() + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + +type kv struct { + k, v []byte + t bool +} + +// Some helpers for sorting +type entrySlice []*kv + +func (p entrySlice) Len() int { return len(p) } +func (p entrySlice) Less(i, j int) bool { return bytes.Compare(p[i].k, p[j].k) < 0 } +func (p entrySlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } + +// makeAccountTrieNoStorage spits out a trie, along with the leafs +func makeAccountTrieNoStorage(db *trie.Database, n int) (*trie.Trie, entrySlice) { + accTrie, _ := trie.New(common.Hash{}, db) + var entries entrySlice + for i := uint64(0); i < uint64(n); i++ { + value, _ := rlp.EncodeToBytes(state.Account{ + Nonce: i, + Balance: big.NewInt(int64(i)), + Root: emptyRoot, + CodeHash: emptyCode[:], + }) + key := key32(i) + elem := &kv{key, value, false} + accTrie.Update(elem.k, elem.v) + entries = append(entries, elem) + } + sort.Sort(entries) + // Push to disk layer + accTrie.Commit(nil) + return accTrie, entries +} + +func key32(i uint64) []byte { + key := make([]byte, 32) + binary.LittleEndian.PutUint64(key, i) + return key +} + +// makeAccountTrieWithStorage spits out a trie, along with the leafs +func makeAccountTrieWithStorage(db *trie.Database, accounts, slots int) (*trie.Trie, entrySlice, + map[common.Hash]*trie.Trie, map[common.Hash]entrySlice) { + + var ( + accTrie, _ = trie.New(common.Hash{}, db) + entries entrySlice + storageTries = make(map[common.Hash]*trie.Trie) + storageEntries = make(map[common.Hash]entrySlice) + ) + + // Make a storage trie which we reuse for the whole lot + stTrie, stEntries := makeStorageTrie(slots, db) + stRoot := stTrie.Hash() + // Create n accounts in the trie + for i := uint64(1); i <= uint64(accounts); i++ { + key := key32(i) + value, _ := rlp.EncodeToBytes(state.Account{ + Nonce: i, + Balance: big.NewInt(int64(i)), + Root: stRoot, + CodeHash: emptyCode[:], + }) + elem := &kv{key, value, false} + accTrie.Update(elem.k, elem.v) + entries = append(entries, elem) + // we reuse the same one for all accounts + storageTries[common.BytesToHash(key)] = stTrie + storageEntries[common.BytesToHash(key)] = stEntries + } + stTrie.Commit(nil) + accTrie.Commit(nil) + return accTrie, entries, storageTries, storageEntries +} + +// makeStorageTrie fills a storage trie with n items, returning the +// not-yet-committed trie and the sorted entries +func makeStorageTrie(n int, db *trie.Database) (*trie.Trie, entrySlice) { + trie, _ := trie.New(common.Hash{}, db) + var entries entrySlice + for i := uint64(1); i <= uint64(n); i++ { + // store 'i' at slot 'i' + slotValue := key32(i) + rlpSlotValue, _ := rlp.EncodeToBytes(common.TrimLeftZeroes(slotValue[:])) + + slotKey := key32(i) + key := crypto.Keccak256Hash(slotKey[:]) + + elem := &kv{key[:], rlpSlotValue, false} + trie.Update(elem.k, elem.v) + entries = append(entries, elem) + } + sort.Sort(entries) + return trie, entries +} From 935c001814dd3dbd70c692ee4079c1289e1b1a22 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 18 Jan 2021 12:04:46 +0100 Subject: [PATCH 03/22] eth/protocols/snap: test to trigger panic --- eth/protocols/snap/sync_test.go | 109 +++++++++++++++++++++++++++++--- 1 file changed, 101 insertions(+), 8 deletions(-) diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index f96344e1910a..b17863be63f6 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -346,12 +346,20 @@ func emptyRequestAccountRangeFn(t *testPeer, requestId uint64, root common.Hash, return nil } +func nonResponsiveRequestAccountRangeFn(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { + return nil +} + func emptyTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap uint64) error { var nodes [][]byte t.remote.OnTrieNodes(t, requestId, nodes) return nil } +func nonResponsiveTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap uint64) error { + return nil +} + func emptyStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error { var hashes [][]common.Hash var slots [][][]byte @@ -360,6 +368,10 @@ func emptyStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, return nil } +func nonResponsiveStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error { + return nil +} + func emptyCodeRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { var bytecodes [][]byte t.remote.OnByteCodes(t, id, bytecodes) @@ -375,6 +387,10 @@ func starvingAccountRequestHandler(t *testPeer, requestId uint64, root common.Ha return defaultAccountRequestHandler(t, requestId, root, origin, 500) } +func misdeliveringAccountRequestHandler(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { + return defaultAccountRequestHandler(t, requestId-1, root, origin, 500) +} + // corruptStorageRequestHandler doesn't provide good proofs func corruptStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error { hashes, slots, proofs := createStorageRequestResponse(t, root, accounts, origin, limit, max) @@ -461,19 +477,14 @@ func TestSyncBloatedProof(t *testing.T) { proofs = append(proofs, blob) } if err := t.remote.OnAccounts(t, requestId, keys, vals, proofs); err != nil { - t.log.Error("remote error on delivery", "error", err) + t.log.Info("remote error on delivery", "error", err) // This is actually correct, signal to exit the test successfully t.cancelCh <- struct{}{} } return nil } syncer := setupSyncer(source) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { - t.Logf("sync failed: %v", err) - } else { - // TODO @karalabe, @holiman: - // A cancel, which aborts the sync before completion, should probably - // return an error from Sync(..) ? + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err == nil { t.Fatal("No error returned from incomplete/cancelled sync") } } @@ -544,6 +555,88 @@ func TestMultiSyncManyUseless(t *testing.T) { } } +// TestMultiSyncManyUseless contains one good peer, and many which doesn't return anything valuable at all +func TestMultiSyncManyUselessWithLowTimeout(t *testing.T) { + // We're setting the timeout to very low, to increase the chance of the timeout + // being triggered. This was previously a cause of panic, when a response + // arrived simultaneously as a timeout was triggered. + old := requestTimeout + requestTimeout = 1 * time.Millisecond + defer func() { + requestTimeout = old + }() + cancel := make(chan struct{}) + + trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 100, 3000) + + mkSource := func(name string, a, b, c bool) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.storageTries = storageTries + source.storageValues = storageElems + + if !a { + source.accountRequestHandler = emptyRequestAccountRangeFn + } + if !b { + source.storageRequestHandler = emptyStorageRequestHandler + } + if !c { + source.trieRequestHandler = emptyTrieRequestHandler + } + return source + } + + syncer := setupSyncer( + mkSource("full", true, true, true), + mkSource("noAccounts", false, true, true), + mkSource("noStorage", true, false, true), + mkSource("noTrie", true, true, false), + ) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + +// TestMultiSyncManyUnresponsive contains one good peer, and many which doesn't respond at all +func TestMultiSyncManyUnresponsive(t *testing.T) { + cancel := make(chan struct{}) + + trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 100, 3000) + + mkSource := func(name string, a, b, c bool) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.storageTries = storageTries + source.storageValues = storageElems + + if !a { + source.accountRequestHandler = nonResponsiveRequestAccountRangeFn + } + if !b { + source.storageRequestHandler = nonResponsiveStorageRequestHandler + } + if !c { + source.trieRequestHandler = nonResponsiveTrieRequestHandler + } + return source + } + + syncer := setupSyncer( + mkSource("full", true, true, true), + mkSource("noAccounts", false, true, true), + mkSource("noStorage", true, false, true), + mkSource("noTrie", true, true, false), + ) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + // TestSyncNoStorageAndOneCappedPeer tests sync using accounts and no storage, where one peer is // consistently returning very small results func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { @@ -572,7 +665,7 @@ func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { go func() { select { case <-time.After(5 * time.Second): - t.Errorf("Sync stalled") + t.Log("Sync stalled") cancel <- struct{}{} } }() From 375588b2be9e7a601d82e9962c2a07e57bbbd659 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 18 Jan 2021 12:58:41 +0100 Subject: [PATCH 04/22] eth/protocols/snap: fix race condition on timeouts --- eth/protocols/snap/sync.go | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 04d65fe65d1a..26eb4bd90884 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -1978,7 +1978,11 @@ func (s *Syncer) OnAccounts(peer PeerIF, id uint64, hashes []common.Hash, accoun // Clean up the request timeout timer, we'll see how to proceed further based // on the actual delivered content - req.timeout.Stop() + if !req.timeout.Stop(){ + // The timeout is already triggered, and this request will be reverted+rescheduled + s.lock.Unlock() + return nil + } // Response is valid, but check if peer is signalling that it does not have // the requested data. For account range queries that means the state being @@ -2199,7 +2203,11 @@ func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots // Clean up the request timeout timer, we'll see how to proceed further based // on the actual delivered content - req.timeout.Stop() + if !req.timeout.Stop(){ + // The timeout is already triggered, and this request will be reverted+rescheduled + s.lock.Unlock() + return nil + } // Reject the response if the hash sets and slot sets don't match, or if the // peer sent more data than requested. @@ -2336,7 +2344,11 @@ func (s *Syncer) OnTrieNodes(peer PeerIF, id uint64, trienodes [][]byte) error { // Clean up the request timeout timer, we'll see how to proceed further based // on the actual delivered content - req.timeout.Stop() + if !req.timeout.Stop(){ + // The timeout is already triggered, and this request will be reverted+rescheduled + s.lock.Unlock() + return nil + } // Response is valid, but check if peer is signalling that it does not have // the requested data. For bytecode range queries that means the peer is not @@ -2424,7 +2436,11 @@ func (s *Syncer) onHealByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) err // Clean up the request timeout timer, we'll see how to proceed further based // on the actual delivered content - req.timeout.Stop() + if !req.timeout.Stop(){ + // The timeout is already triggered, and this request will be reverted+rescheduled + s.lock.Unlock() + return nil + } // Response is valid, but check if peer is signalling that it does not have // the requested data. For bytecode range queries that means the peer is not From 404b2996ce3864bee13b52dfb5f7878161082279 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 18 Jan 2021 13:05:55 +0100 Subject: [PATCH 05/22] eth/protocols/snap: return error on cancelled sync --- eth/protocols/snap/protocol.go | 1 + eth/protocols/snap/sync.go | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/eth/protocols/snap/protocol.go b/eth/protocols/snap/protocol.go index a1e4349691b4..f1a25a2066a2 100644 --- a/eth/protocols/snap/protocol.go +++ b/eth/protocols/snap/protocol.go @@ -61,6 +61,7 @@ var ( errDecode = errors.New("invalid message") errInvalidMsgCode = errors.New("invalid message code") errBadRequest = errors.New("bad request") + errCancelled = errors.New("sync cancelled") ) // Packet represents a p2p message in the `snap` protocol. diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 26eb4bd90884..c07445c3949b 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -583,7 +583,7 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { case id := <-peerDrop: s.revertRequests(id) case <-cancel: - return nil + return errCancelled case req := <-s.accountReqFails: s.revertAccountRequest(req) @@ -1978,7 +1978,7 @@ func (s *Syncer) OnAccounts(peer PeerIF, id uint64, hashes []common.Hash, accoun // Clean up the request timeout timer, we'll see how to proceed further based // on the actual delivered content - if !req.timeout.Stop(){ + if !req.timeout.Stop() { // The timeout is already triggered, and this request will be reverted+rescheduled s.lock.Unlock() return nil @@ -2203,7 +2203,7 @@ func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots // Clean up the request timeout timer, we'll see how to proceed further based // on the actual delivered content - if !req.timeout.Stop(){ + if !req.timeout.Stop() { // The timeout is already triggered, and this request will be reverted+rescheduled s.lock.Unlock() return nil @@ -2344,7 +2344,7 @@ func (s *Syncer) OnTrieNodes(peer PeerIF, id uint64, trienodes [][]byte) error { // Clean up the request timeout timer, we'll see how to proceed further based // on the actual delivered content - if !req.timeout.Stop(){ + if !req.timeout.Stop() { // The timeout is already triggered, and this request will be reverted+rescheduled s.lock.Unlock() return nil @@ -2436,7 +2436,7 @@ func (s *Syncer) onHealByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) err // Clean up the request timeout timer, we'll see how to proceed further based // on the actual delivered content - if !req.timeout.Stop(){ + if !req.timeout.Stop() { // The timeout is already triggered, and this request will be reverted+rescheduled s.lock.Unlock() return nil From f1918485562c0900f489aba2b90b035af93a44d8 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 19 Jan 2021 10:22:39 +0100 Subject: [PATCH 06/22] squashme: updates + test causing panic + properly serve accounts in order --- eth/protocols/snap/sync_test.go | 504 ++++++++++++++++++++++++-------- 1 file changed, 382 insertions(+), 122 deletions(-) diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index b17863be63f6..f69ee9807c3c 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -110,6 +110,11 @@ func BenchmarkHashing(b *testing.B) { }) } +type storageHandlerFunc func(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error +type accountHandlerFunc func(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error +type trieHandlerFunc func(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap uint64) error +type codeHandlerFunc func(t *testPeer, id uint64, hashes []common.Hash, max uint64) error + type testPeer struct { id string test *testing.T @@ -120,10 +125,10 @@ type testPeer struct { storageTries map[common.Hash]*trie.Trie storageValues map[common.Hash]entrySlice - accountRequestHandler func(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error - storageRequestHandler func(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error - trieRequestHandler func(t *testPeer, requestId uint64, root common.Hash, paths []TrieNodePathSet, cap uint64) error - codeRequestHandler func(t *testPeer, id uint64, hashes []common.Hash, max uint64) error + accountRequestHandler accountHandlerFunc + storageRequestHandler storageHandlerFunc + trieRequestHandler trieHandlerFunc + codeRequestHandler codeHandlerFunc cancelCh chan struct{} } @@ -152,28 +157,31 @@ func (t *testPeer) Log() log.Logger { return t.log } -func (t *testPeer) RequestAccountRange(id uint64, root, origin, limit common.Hash, cap uint64) error { - t.Log().Info("<- AccRangeReq", "id", id, "root", root, "origin", origin, "limit", limit, "max", cap) - go t.accountRequestHandler(t, id, root, origin, cap) +func (t *testPeer) RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error { + t.log.Trace("Fetching range of accounts", "reqid", id, "root", root, "origin", origin, "limit", limit, "bytes", common.StorageSize(bytes)) + go t.accountRequestHandler(t, id, root, origin, bytes) return nil } -func (t *testPeer) RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, cap uint64) error { - t.Log().Info("<- TrieNodeReq", "id", id, "root", root, "paths", len(paths), "limit", cap) - go t.trieRequestHandler(t, id, root, paths, cap) +func (t *testPeer) RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error { + t.log.Trace("Fetching set of trie nodes", "reqid", id, "root", root, "pathsets", len(paths), "bytes", common.StorageSize(bytes)) + go t.trieRequestHandler(t, id, root, paths, bytes) return nil } -func (t *testPeer) RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error { - t.Log().Info("<- StorRangeReq", "id", id, "root", root, "account[0]", accounts[0], - "origin", fmt.Sprintf("%x", origin), "limit", fmt.Sprintf("%x", limit), "max", max) - go t.storageRequestHandler(t, id, root, accounts, origin, limit, max) +func (t *testPeer) RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error { + if len(accounts) == 1 && origin != nil { + t.log.Trace("Fetching range of large storage slots", "reqid", id, "root", root, "account", accounts[0], "origin", common.BytesToHash(origin), "limit", common.BytesToHash(limit), "bytes", common.StorageSize(bytes)) + } else { + t.log.Trace("Fetching ranges of small storage slots", "reqid", id, "root", root, "accounts", len(accounts), "first", accounts[0], "bytes", common.StorageSize(bytes)) + } + go t.storageRequestHandler(t, id, root, accounts, origin, limit, bytes) return nil } -func (t *testPeer) RequestByteCodes(id uint64, hashes []common.Hash, max uint64) error { - t.Log().Info("<- CodeReq", "id", id, "#hashes", len(hashes), "max", max) - go t.codeRequestHandler(t, id, hashes, max) +func (t *testPeer) RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error { + t.log.Trace("Fetching set of byte codes", "reqid", id, "hashes", len(hashes), "bytes", common.StorageSize(bytes)) + go t.codeRequestHandler(t, id, hashes, bytes) return nil } @@ -207,13 +215,20 @@ func defaultTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, } // defaultAccountRequestHandler is a well-behaving handler for AccountRangeRequests -func defaultAccountRequestHandler(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { - var ( - proofs [][]byte - keys []common.Hash - vals [][]byte - size uint64 - ) +func defaultAccountRequestHandler(t *testPeer, id uint64, root common.Hash, origin common.Hash, cap uint64) error { + keys, vals, proofs := createAccountRequestResponse(t, root, origin, cap) + if err := t.remote.OnAccounts(t, id, keys, vals, proofs); err != nil { + t.log.Error("remote error on delivery", "error", err) + t.test.Errorf("Remote side rejected our delivery: %v", err) + t.remote.Unregister(t.id) + t.cancelCh <- struct{}{} + return err + } + return nil +} + +func createAccountRequestResponse(t *testPeer, root common.Hash, origin common.Hash, cap uint64) (keys []common.Hash, vals [][]byte, proofs [][]byte) { + var size uint64 for _, entry := range t.accountValues { if size > cap { break @@ -242,19 +257,26 @@ func defaultAccountRequestHandler(t *testPeer, requestId uint64, root common.Has for _, blob := range proof.NodeList() { proofs = append(proofs, blob) } - if err := t.remote.OnAccounts(t, requestId, keys, vals, proofs); err != nil { + return keys, vals, proofs +} + +// defaultStorageRequestHandler is a well-behaving storage request handler +func defaultStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, bOrigin, bLimit []byte, max uint64) error { + hashes, slots, proofs := createStorageRequestResponse(t, root, accounts, bOrigin, bLimit, max) + if err := t.remote.OnStorage(t, requestId, hashes, slots, proofs); err != nil { t.log.Error("remote error on delivery", "error", err) t.test.Errorf("Remote side rejected our delivery: %v", err) t.cancelCh <- struct{}{} - return err } return nil } -// defaultStorageRequestHandler is a well-behaving storage request handler -func defaultStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, bOrigin, bLimit []byte, max uint64) error { - hashes, slots, proofs := createStorageRequestResponse(t, root, accounts, bOrigin, bLimit, max) - if err := t.remote.OnStorage(t, requestId, hashes, slots, proofs); err != nil { +func defaultCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { + var bytecodes [][]byte + for _, h := range hashes { + bytecodes = append(bytecodes, getCode(h)) + } + if err := t.remote.OnByteCodes(t, id, bytecodes); err != nil { t.log.Error("remote error on delivery", "error", err) t.test.Errorf("Remote side rejected our delivery: %v", err) t.cancelCh <- struct{}{} @@ -278,7 +300,6 @@ func createStorageRequestResponse(t *testPeer, root common.Hash, accounts []comm var limitExceeded bool var incomplete bool for _, account := range accounts { - t.Log().Info("Adding account", "account", account.Hex()) var keys []common.Hash var vals [][]byte @@ -295,7 +316,6 @@ func createStorageRequestResponse(t *testPeer, root common.Hash, accounts []comm vals = append(vals, entry.v) size += uint64(32 + len(entry.v)) if bytes.Compare(entry.k, limit[:]) >= 0 { - t.Log().Info("key outside of limit", "limit", fmt.Sprintf("%x", limit), "key", fmt.Sprintf("%x", entry.k)) limitExceeded = true } if size > max { @@ -333,10 +353,6 @@ func createStorageRequestResponse(t *testPeer, root common.Hash, accounts []comm return hashes, slots, proofs } -func defaultCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { - panic("TODO implement me") -} - // emptyRequestAccountRangeFn is a rejects AccountRangeRequests func emptyRequestAccountRangeFn(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { var proofs [][]byte @@ -372,9 +388,36 @@ func nonResponsiveStorageRequestHandler(t *testPeer, requestId uint64, root comm return nil } -func emptyCodeRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { +//func emptyCodeRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { +// var bytecodes [][]byte +// t.remote.OnByteCodes(t, id, bytecodes) +// return nil +//} + +func corruptCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { var bytecodes [][]byte - t.remote.OnByteCodes(t, id, bytecodes) + for _, h := range hashes { + // Send back the hashes + bytecodes = append(bytecodes, h[:]) + } + if err := t.remote.OnByteCodes(t, id, bytecodes); err != nil { + t.log.Error("remote error on delivery", "error", err) + // Mimic the real-life handler, which drops a peer on errors + t.remote.Unregister(t.id) + } + return nil +} + +func cappedCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { + var bytecodes [][]byte + for _, h := range hashes[:1] { + bytecodes = append(bytecodes, getCode(h)) + } + if err := t.remote.OnByteCodes(t, id, bytecodes); err != nil { + t.log.Error("remote error on delivery", "error", err) + // Mimic the real-life handler, which drops a peer on errors + t.remote.Unregister(t.id) + } return nil } @@ -387,8 +430,21 @@ func starvingAccountRequestHandler(t *testPeer, requestId uint64, root common.Ha return defaultAccountRequestHandler(t, requestId, root, origin, 500) } -func misdeliveringAccountRequestHandler(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { - return defaultAccountRequestHandler(t, requestId-1, root, origin, 500) +//func misdeliveringAccountRequestHandler(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { +// return defaultAccountRequestHandler(t, requestId-1, root, origin, 500) +//} + +func corruptAccountRequestHandler(t *testPeer, requestId uint64, root common.Hash, origin common.Hash, cap uint64) error { + hashes, accounts, proofs := createAccountRequestResponse(t, root, origin, cap) + if len(proofs) > 0 { + proofs = proofs[1:] + } + if err := t.remote.OnAccounts(t, requestId, hashes, accounts, proofs); err != nil { + t.log.Info("remote error on delivery (as expected)", "error", err) + // Mimic the real-life handler, which drops a peer on errors + t.remote.Unregister(t.id) + } + return nil } // corruptStorageRequestHandler doesn't provide good proofs @@ -399,43 +455,24 @@ func corruptStorageRequestHandler(t *testPeer, requestId uint64, root common.Has } if err := t.remote.OnStorage(t, requestId, hashes, slots, proofs); err != nil { t.log.Info("remote error on delivery (as expected)", "error", err) + // Mimic the real-life handler, which drops a peer on errors + t.remote.Unregister(t.id) } return nil } -// TestSync tests a basic sync -func TestSync(t *testing.T) { - trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) - - sourceAccountTrie, elems := makeAccountTrieNoStorage(trieBackend, 100) - cancel := make(chan struct{}) - source := newTestPeer("source", t, cancel) - source.accountTrie = sourceAccountTrie - source.accountValues = elems - syncer := setupSyncer(source) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { - t.Fatalf("sync failed: %v", err) - } -} - -// TestSyncWithStorage tests basic sync using accounts + storage -func TestSyncWithStorage(t *testing.T) { - trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 3, 3000) - cancel := make(chan struct{}) - source := newTestPeer("source", t, cancel) - source.accountTrie = sourceAccountTrie - source.accountValues = elems - source.storageTries = storageTries - source.storageValues = storageElems - syncer := setupSyncer(source) - if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { - t.Fatalf("sync failed: %v", err) +func noProofStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error { + hashes, slots, _ := createStorageRequestResponse(t, root, accounts, origin, limit, max) + if err := t.remote.OnStorage(t, requestId, hashes, slots, nil); err != nil { + t.log.Info("remote error on delivery (as expected)", "error", err) + // Mimic the real-life handler, which drops a peer on errors + t.remote.Unregister(t.id) } + return nil } // TestSyncBloatedProof tests a scenario where we provide only _one_ value, but -// also ship the entire trie inside the proof. If the attack is successfull, +// also ship the entire trie inside the proof. If the attack is successful, // the remote side does not do any follow-up requests func TestSyncBloatedProof(t *testing.T) { trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) @@ -499,20 +536,86 @@ func setupSyncer(peers ...*testPeer) *Syncer { return syncer } +// TestSync tests a basic sync with one peer +func TestSync(t *testing.T) { + cancel := make(chan struct{}) + sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 100) + + mkSource := func(name string) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + return source + } + + syncer := setupSyncer(mkSource("sourceA")) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + +// TestSyncTinyTriePanic tests a basic sync with one peer, and a tiny trie. This caused a +// panic within the prover +func TestSyncTinyTriePanic(t *testing.T) { + cancel := make(chan struct{}) + + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(1, 1, false) + + mkSource := func(name string, slow bool) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.storageTries = storageTries + source.storageValues = storageElems + + if slow { + source.storageRequestHandler = starvingStorageRequestHandler + } + return source + } + + syncer := setupSyncer( + mkSource("nice-a", false), + ) + done := checkStall(t, cancel) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } + close(done) +} + // TestMultiSync tests a basic sync with multiple peers func TestMultiSync(t *testing.T) { cancel := make(chan struct{}) sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 100) - sourceA := newTestPeer("sourceA", t, cancel) - sourceA.accountTrie = sourceAccountTrie - sourceA.accountValues = elems + mkSource := func(name string) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + return source + } - sourceB := newTestPeer("sourceB", t, cancel) - sourceB.accountTrie = sourceAccountTrie - sourceB.accountValues = elems + syncer := setupSyncer(mkSource("sourceA"), mkSource("sourceB")) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } +} + +// TestSyncWithStorage tests basic sync using accounts + storage + code +func TestSyncWithStorage(t *testing.T) { + cancel := make(chan struct{}) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(3, 3000, true) - syncer := setupSyncer(sourceA, sourceB) + mkSource := func(name string) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.storageTries = storageTries + source.storageValues = storageElems + return source + } + syncer := setupSyncer(mkSource("sourceA")) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { t.Fatalf("sync failed: %v", err) } @@ -522,8 +625,7 @@ func TestMultiSync(t *testing.T) { func TestMultiSyncManyUseless(t *testing.T) { cancel := make(chan struct{}) - trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 100, 3000) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true) mkSource := func(name string, a, b, c bool) *testPeer { source := newTestPeer(name, t, cancel) @@ -567,8 +669,7 @@ func TestMultiSyncManyUselessWithLowTimeout(t *testing.T) { }() cancel := make(chan struct{}) - trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 100, 3000) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true) mkSource := func(name string, a, b, c bool) *testPeer { source := newTestPeer(name, t, cancel) @@ -602,10 +703,16 @@ func TestMultiSyncManyUselessWithLowTimeout(t *testing.T) { // TestMultiSyncManyUnresponsive contains one good peer, and many which doesn't respond at all func TestMultiSyncManyUnresponsive(t *testing.T) { + // We're setting the timeout to very low, to make the test run a bit faster + old := requestTimeout + requestTimeout = 1 * time.Millisecond + defer func() { + requestTimeout = old + }() + cancel := make(chan struct{}) - trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 100, 3000) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true) mkSource := func(name string, a, b, c bool) *testPeer { source := newTestPeer(name, t, cancel) @@ -637,13 +744,26 @@ func TestMultiSyncManyUnresponsive(t *testing.T) { } } +func checkStall(t *testing.T, cancel chan (struct{})) chan struct{} { + testDone := make(chan (struct{})) + go func() { + select { + case <-time.After(5 * time.Second): + t.Log("Sync stalled") + cancel <- struct{}{} + case <-testDone: + return + } + }() + return testDone +} + // TestSyncNoStorageAndOneCappedPeer tests sync using accounts and no storage, where one peer is // consistently returning very small results func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { cancel := make(chan struct{}) - trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) - sourceAccountTrie, elems := makeAccountTrieNoStorage(trieBackend, 3000) + sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 3000) mkSource := func(name string, slow bool) *testPeer { source := newTestPeer(name, t, cancel) @@ -662,25 +782,115 @@ func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { mkSource("nice-c", false), mkSource("capped", true), ) - go func() { - select { - case <-time.After(5 * time.Second): - t.Log("Sync stalled") - cancel <- struct{}{} - } - }() + done := checkStall(t, cancel) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { t.Fatalf("sync failed: %v", err) } + close(done) +} + +// TestSyncNoStorageAndOneCodeCorruptPeer has one peer which doesn't deliver +// code requests properly. +func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { + cancel := make(chan struct{}) + + sourceAccountTrie, elems := makeAccountTrieNoStorage( + trie.NewDatabase(rawdb.NewMemoryDatabase()), 3000) + + mkSource := func(name string, codeFn codeHandlerFunc) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.codeRequestHandler = codeFn + return source + } + // One is capped, one is corrupt. If we don't use a capped one, there's a 50% + // chance that the full set of codes requested are sent only to the + // non-corrupt peer, which delivers everything in one go, and makes the + // test moot + syncer := setupSyncer( + mkSource("capped", cappedCodeReqeustHandler), + mkSource("corrupt", corruptCodeReqeustHandler), + ) + done := checkStall(t, cancel) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } + close(done) +} + +func TestSyncNoStorageAndOneAccountCorruptPeer(t *testing.T) { + cancel := make(chan struct{}) + + sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 3000) + + mkSource := func(name string, accFn accountHandlerFunc) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.accountRequestHandler = accFn + return source + } + // One is capped, one is corrupt. If we don't use a capped one, there's a 50% + // chance that the full set of codes requested are sent only to the + // non-corrupt peer, which delivers everything in one go, and makes the + // test moot + syncer := setupSyncer( + mkSource("capped", defaultAccountRequestHandler), + mkSource("corrupt", corruptAccountRequestHandler), + ) + done := checkStall(t, cancel) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } + close(done) +} + +// TestSyncNoStorageAndOneCodeCappedPeer has one peer which delivers code hashes +// one by one +func TestSyncNoStorageAndOneCodeCappedPeer(t *testing.T) { + cancel := make(chan struct{}) + + sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 3000) + + mkSource := func(name string, codeFn codeHandlerFunc) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.codeRequestHandler = codeFn + return source + } + // Count how many times it's invoked. Remember, there are only 8 unique hashes, + // so it shouldn't be more than that + var counter int + syncer := setupSyncer( + mkSource("capped", func(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { + counter++ + return cappedCodeReqeustHandler(t, id, hashes, max) + }), + ) + done := checkStall(t, cancel) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } + close(done) + // There are only 8 unique hashes, and 3K accounts. However, the code + // deduplication is per request batch. If it were a perfect global dedup, + // we would expect only 8 requests. If there were no dedup, there would be + // 3k requests. + // We expect somewhere below 100 requests for these 8 unique hashes. + if threshold := 100; counter > threshold { + t.Fatalf("Error, expected < %d invocations, got %d", threshold, counter) + } } // TestSyncWithStorageAndOneCappedPeer tests sync using accounts + storage, where one peer is // consistently returning very small results func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { + log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) cancel := make(chan struct{}) - trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 100, 3000) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(300, 1000, false) mkSource := func(name string, slow bool) *testPeer { source := newTestPeer(name, t, cancel) @@ -697,20 +907,13 @@ func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { syncer := setupSyncer( mkSource("nice-a", false), - //mkSource("nice-b", false), - //mkSource("nice-c", false), mkSource("slow", true), ) - go func() { - select { - case <-time.After(5 * time.Second): - t.Errorf("Sync stalled") - cancel <- struct{}{} - } - }() + done := checkStall(t, cancel) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { t.Fatalf("sync failed: %v", err) } + close(done) } // TestSyncWithStorageAndCorruptPeer tests sync using accounts + storage, where one peer is @@ -718,38 +921,57 @@ func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { func TestSyncWithStorageAndCorruptPeer(t *testing.T) { cancel := make(chan struct{}) - trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(trieBackend, 100, 3000) + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true) - mkSource := func(name string, corrupt bool) *testPeer { + mkSource := func(name string, handler storageHandlerFunc) *testPeer { source := newTestPeer(name, t, cancel) source.accountTrie = sourceAccountTrie source.accountValues = elems source.storageTries = storageTries source.storageValues = storageElems + source.storageRequestHandler = handler + return source + } - if corrupt { - source.storageRequestHandler = corruptStorageRequestHandler - } + syncer := setupSyncer( + mkSource("nice-a", defaultStorageRequestHandler), + mkSource("nice-b", defaultStorageRequestHandler), + mkSource("nice-c", defaultStorageRequestHandler), + mkSource("corrupt", corruptStorageRequestHandler), + ) + done := checkStall(t, cancel) + if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { + t.Fatalf("sync failed: %v", err) + } + close(done) +} + +func TestSyncWithStorageAndNonProvingPeer(t *testing.T) { + cancel := make(chan struct{}) + + sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true) + + mkSource := func(name string, handler storageHandlerFunc) *testPeer { + source := newTestPeer(name, t, cancel) + source.accountTrie = sourceAccountTrie + source.accountValues = elems + source.storageTries = storageTries + source.storageValues = storageElems + source.storageRequestHandler = handler return source } syncer := setupSyncer( - mkSource("nice-a", false), - mkSource("nice-b", false), - mkSource("nice-c", false), - mkSource("corrupt", true), + mkSource("nice-a", defaultStorageRequestHandler), + mkSource("nice-b", defaultStorageRequestHandler), + mkSource("nice-c", defaultStorageRequestHandler), + mkSource("corrupt", noProofStorageRequestHandler), ) - go func() { - select { - case <-time.After(5 * time.Second): - t.Errorf("Sync stalled") - cancel <- struct{}{} - } - }() + done := checkStall(t, cancel) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { t.Fatalf("sync failed: %v", err) } + close(done) } type kv struct { @@ -773,7 +995,7 @@ func makeAccountTrieNoStorage(db *trie.Database, n int) (*trie.Trie, entrySlice) Nonce: i, Balance: big.NewInt(int64(i)), Root: emptyRoot, - CodeHash: emptyCode[:], + CodeHash: getACodeHash(i), }) key := key32(i) elem := &kv{key, value, false} @@ -792,11 +1014,44 @@ func key32(i uint64) []byte { return key } +var ( + codehashes = []common.Hash{ + crypto.Keccak256Hash([]byte{0}), + crypto.Keccak256Hash([]byte{1}), + crypto.Keccak256Hash([]byte{2}), + crypto.Keccak256Hash([]byte{3}), + crypto.Keccak256Hash([]byte{4}), + crypto.Keccak256Hash([]byte{5}), + crypto.Keccak256Hash([]byte{6}), + crypto.Keccak256Hash([]byte{7}), + } +) + +// getACodeHash returns a pseudo-random code hash +func getACodeHash(i uint64) []byte { + h := codehashes[int(i)%len(codehashes)] + return h[:] +} + +// convenience function to lookup the code from the code hash +func getCode(hash common.Hash) []byte { + if hash == emptyCode { + return nil + } + for i, h := range codehashes { + if h == hash { + return []byte{byte(i)} + } + } + return nil +} + // makeAccountTrieWithStorage spits out a trie, along with the leafs -func makeAccountTrieWithStorage(db *trie.Database, accounts, slots int) (*trie.Trie, entrySlice, +func makeAccountTrieWithStorage(accounts, slots int, code bool) (*trie.Trie, entrySlice, map[common.Hash]*trie.Trie, map[common.Hash]entrySlice) { var ( + db = trie.NewDatabase(rawdb.NewMemoryDatabase()) accTrie, _ = trie.New(common.Hash{}, db) entries entrySlice storageTries = make(map[common.Hash]*trie.Trie) @@ -809,11 +1064,15 @@ func makeAccountTrieWithStorage(db *trie.Database, accounts, slots int) (*trie.T // Create n accounts in the trie for i := uint64(1); i <= uint64(accounts); i++ { key := key32(i) + codehash := emptyCode[:] + if code { + codehash = getACodeHash(i) + } value, _ := rlp.EncodeToBytes(state.Account{ Nonce: i, Balance: big.NewInt(int64(i)), Root: stRoot, - CodeHash: emptyCode[:], + CodeHash: codehash, }) elem := &kv{key, value, false} accTrie.Update(elem.k, elem.v) @@ -822,6 +1081,7 @@ func makeAccountTrieWithStorage(db *trie.Database, accounts, slots int) (*trie.T storageTries[common.BytesToHash(key)] = stTrie storageEntries[common.BytesToHash(key)] = stEntries } + sort.Sort(entries) stTrie.Commit(nil) accTrie.Commit(nil) return accTrie, entries, storageTries, storageEntries From dc2174490fd214cf993673d48ff2198a31ea3b2c Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 19 Jan 2021 10:23:08 +0100 Subject: [PATCH 07/22] eth/protocols/snap: revert failing storage response --- eth/protocols/snap/sync.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index c07445c3949b..38ecaa7b11a3 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2262,6 +2262,7 @@ func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots dbs[i], tries[i], _, _, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil) if err != nil { logger.Warn("Storage slots failed proof", "err", err) + s.scheduleRevertStorageRequest(req) return err } } else { @@ -2276,6 +2277,7 @@ func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots dbs[i], tries[i], notary, cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb) if err != nil { logger.Warn("Storage range failed proof", "err", err) + s.scheduleRevertStorageRequest(req) return err } } From 09d06106076279f07ef416c1313fd6dd1f185807 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 19 Jan 2021 16:11:08 +0100 Subject: [PATCH 08/22] eth/protocols/snap: revert on bad responses (storage, code) --- eth/protocols/snap/sync.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 38ecaa7b11a3..ff3af556e629 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2139,6 +2139,8 @@ func (s *Syncer) onByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { } // We've either ran out of hashes, or got unrequested data logger.Warn("Unexpected bytecodes", "count", len(bytecodes)-i) + // Signal this request as failed, and ready for rescheduling + s.scheduleRevertBytecodeRequest(req) return errors.New("unexpected bytecode") } // Response validated, send it to the scheduler for filling @@ -2213,11 +2215,13 @@ func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots // peer sent more data than requested. if len(hashes) != len(slots) { s.lock.Unlock() + s.scheduleRevertStorageRequest(req) // reschedule request logger.Warn("Hash and slot set size mismatch", "hashset", len(hashes), "slotset", len(slots)) return errors.New("hash and slot set size mismatch") } if len(hashes) > len(req.accounts) { s.lock.Unlock() + s.scheduleRevertStorageRequest(req) // reschedule request logger.Warn("Hash set larger than requested", "hashset", len(hashes), "requested", len(req.accounts)) return errors.New("hash set larger than requested") } @@ -2229,9 +2233,7 @@ func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots logger.Debug("Peer rejected storage request") s.statelessPeers[peer.ID()] = struct{}{} s.lock.Unlock() - - // Signal this request as failed, and ready for rescheduling - s.scheduleRevertStorageRequest(req) + s.scheduleRevertStorageRequest(req) // reschedule request return nil } s.lock.Unlock() @@ -2261,8 +2263,8 @@ func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots // space and hash to the origin root. dbs[i], tries[i], _, _, err = trie.VerifyRangeProof(req.roots[i], nil, nil, keys, slots[i], nil) if err != nil { + s.scheduleRevertStorageRequest(req) // reschedule request logger.Warn("Storage slots failed proof", "err", err) - s.scheduleRevertStorageRequest(req) return err } } else { @@ -2276,8 +2278,8 @@ func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots } dbs[i], tries[i], notary, cont, err = trie.VerifyRangeProof(req.roots[i], req.origin[:], end, keys, slots[i], proofdb) if err != nil { + s.scheduleRevertStorageRequest(req) // reschedule request logger.Warn("Storage range failed proof", "err", err) - s.scheduleRevertStorageRequest(req) return err } } From f9630413df02443ecb70b9039bd489e01e98f4df Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 19 Jan 2021 16:26:11 +0100 Subject: [PATCH 09/22] eth/protocols/snap: fix account handling stall --- eth/protocols/snap/sync.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index ff3af556e629..fbedf44a5b27 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2018,6 +2018,8 @@ func (s *Syncer) OnAccounts(peer PeerIF, id uint64, hashes []common.Hash, accoun db, tr, notary, cont, err := trie.VerifyRangeProof(root, req.origin[:], end, keys, accounts, proofdb) if err != nil { logger.Warn("Account range failed proof", "err", err) + // Signal this request as failed, and ready for rescheduling + s.scheduleRevertAccountRequest(req) return err } // Partial trie reconstructed, send it to the scheduler for storage filling From cf8b96b49892e4922f36a070912f6148c3c9d924 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Tue, 19 Jan 2021 16:42:13 +0100 Subject: [PATCH 10/22] eth/protocols/snap: fix remaining revertal-issues --- eth/protocols/snap/sync.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index fbedf44a5b27..decf6591956e 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2392,6 +2392,8 @@ func (s *Syncer) OnTrieNodes(peer PeerIF, id uint64, trienodes [][]byte) error { } // We've either ran out of hashes, or got unrequested data logger.Warn("Unexpected healing trienodes", "count", len(trienodes)-i) + // Signal this request as failed, and ready for rescheduling + s.scheduleRevertTrienodeHealRequest(req) return errors.New("unexpected healing trienode") } // Response validated, send it to the scheduler for filling @@ -2484,6 +2486,8 @@ func (s *Syncer) onHealByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) err } // We've either ran out of hashes, or got unrequested data logger.Warn("Unexpected healing bytecodes", "count", len(bytecodes)-i) + // Signal this request as failed, and ready for rescheduling + s.scheduleRevertBytecodeHealRequest(req) return errors.New("unexpected healing bytecode") } // Response validated, send it to the scheduler for filling From 6944299b13f4e7b3eda1c73532146b6e1dc70d81 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 20 Jan 2021 09:41:53 +0100 Subject: [PATCH 11/22] eth/protocols/snap: timeouthandler for bytecode requests --- eth/protocols/snap/sync.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index decf6591956e..dc55d1b2b831 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -2103,7 +2103,11 @@ func (s *Syncer) onByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { // Clean up the request timeout timer, we'll see how to proceed further based // on the actual delivered content - req.timeout.Stop() + if !req.timeout.Stop() { + // The timeout is already triggered, and this request will be reverted+rescheduled + s.lock.Unlock() + return nil + } // Response is valid, but check if peer is signalling that it does not have // the requested data. For bytecode range queries that means the peer is not From b86bd4b252b86283ba4dfd11f1b554da5964e325 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 20 Jan 2021 12:50:50 +0100 Subject: [PATCH 12/22] eth/protocols/snap: debugging + fix log message --- eth/protocols/snap/sync.go | 70 ++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index dc55d1b2b831..50baf9ff7cc3 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -566,14 +566,17 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { return nil } // Assign all the data retrieval tasks to any free peers - s.assignAccountTasks(cancel) - s.assignBytecodeTasks(cancel) - s.assignStorageTasks(cancel) + acTasks := s.assignAccountTasks(cancel) + bcTasks := s.assignBytecodeTasks(cancel) + stTasks := s.assignStorageTasks(cancel) + var tnhTasks, bchTasks int if len(s.tasks) == 0 { // Sync phase done, run heal phase - s.assignTrienodeHealTasks(cancel) - s.assignBytecodeHealTasks(cancel) + tnhTasks = s.assignTrienodeHealTasks(cancel) + bchTasks = s.assignBytecodeHealTasks(cancel) } + log.Debug("Assigned tasks", "account", acTasks, "bytecode", bcTasks, + "storage", stTasks, "trie-heal", tnhTasks, "byte-heal", bchTasks) // Wait for something to happen select { case <-s.update: @@ -741,13 +744,13 @@ func (s *Syncer) cleanStorageTasks() { // assignAccountTasks attempts to match idle peers to pending account range // retrievals. -func (s *Syncer) assignAccountTasks(cancel chan struct{}) { +func (s *Syncer) assignAccountTasks(cancel chan struct{}) int { s.lock.Lock() defer s.lock.Unlock() - + assignments := 0 // If there are no idle peers, short circuit assignment if len(s.accountIdlers) == 0 { - return + return assignments } // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { @@ -769,7 +772,7 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) { break } if idle == "" { - return + return assignments } // Matched a pending task to an idle peer, allocate a unique request id var reqid uint64 @@ -801,6 +804,7 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) { delete(s.accountIdlers, idle) s.pend.Add(1) + assignments++ go func(peer PeerIF, root common.Hash) { defer s.pend.Done() @@ -814,16 +818,17 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) { // Inject the request into the task to block further assignments task.req = req } + return assignments } // assignBytecodeTasks attempts to match idle peers to pending code retrievals. -func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) { +func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) int { s.lock.Lock() defer s.lock.Unlock() - + var assignments = 0 // If there are no idle peers, short circuit assignment if len(s.bytecodeIdlers) == 0 { - return + return assignments } // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { @@ -849,7 +854,7 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) { break } if idle == "" { - return + return assignments } // Matched a pending task to an idle peer, allocate a unique request id var reqid uint64 @@ -872,6 +877,7 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) { break } } + assignments++ req := &bytecodeRequest{ peer: idle, id: reqid, @@ -898,17 +904,18 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) { } }(s.peers[idle]) // We're in the lock, peers[id] surely exists } + return assignments } // assignStorageTasks attempts to match idle peers to pending storage range // retrievals. -func (s *Syncer) assignStorageTasks(cancel chan struct{}) { +func (s *Syncer) assignStorageTasks(cancel chan struct{}) int { s.lock.Lock() defer s.lock.Unlock() - + var assignments = 0 // If there are no idle peers, short circuit assignment if len(s.storageIdlers) == 0 { - return + return assignments } // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { @@ -934,7 +941,7 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) { break } if idle == "" { - return + return assignments } // Matched a pending task to an idle peer, allocate a unique request id var reqid uint64 @@ -965,7 +972,6 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) { // Found an incomplete storage chunk, schedule it accounts = append(accounts, account) roots = append(roots, st.root) - subtask = st break // Large contract chunks are downloaded individually } @@ -991,6 +997,7 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) { if len(accounts) == 0 { continue } + assignments++ req := &storageRequest{ peer: idle, id: reqid, @@ -1032,17 +1039,18 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) { subtask.req = req } } + return assignments } // assignTrienodeHealTasks attempts to match idle peers to trie node requests to // heal any trie errors caused by the snap sync's chunked retrieval model. -func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) { +func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) int { s.lock.Lock() defer s.lock.Unlock() - + var assignments = 0 // If there are no idle peers, short circuit assignment if len(s.trienodeHealIdlers) == 0 { - return + return assignments } // Iterate over pending tasks and try to find a peer to retrieve with for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 { @@ -1064,7 +1072,7 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) { } // If all the heal tasks are bytecodes or already downloading, bail if len(s.healer.trieTasks) == 0 { - return + return assignments } // Task pending retrieval, try to find an idle peer. If no such peer // exists, we probably assigned tasks for all (or they are stateless). @@ -1080,7 +1088,7 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) { break } if idle == "" { - return + return assignments } // Matched a pending task to an idle peer, allocate a unique request id var reqid uint64 @@ -1111,6 +1119,7 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) { break } } + assignments++ req := &trienodeHealRequest{ peer: idle, id: reqid, @@ -1138,17 +1147,18 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) { } }(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists } + return assignments } // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to // heal any trie errors caused by the snap sync's chunked retrieval model. -func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) { +func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) int { s.lock.Lock() defer s.lock.Unlock() - + var assignments = 0 // If there are no idle peers, short circuit assignment if len(s.bytecodeHealIdlers) == 0 { - return + return assignments } // Iterate over pending tasks and try to find a peer to retrieve with for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 { @@ -1170,7 +1180,7 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) { } // If all the heal tasks are trienodes or already downloading, bail if len(s.healer.codeTasks) == 0 { - return + return assignments } // Task pending retrieval, try to find an idle peer. If no such peer // exists, we probably assigned tasks for all (or they are stateless). @@ -1186,7 +1196,7 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) { break } if idle == "" { - return + return assignments } // Matched a pending task to an idle peer, allocate a unique request id var reqid uint64 @@ -1210,6 +1220,7 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) { break } } + assignments++ req := &bytecodeHealRequest{ peer: idle, id: reqid, @@ -1236,6 +1247,7 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) { } }(s.peers[idle]) // We're in the lock, peers[id] surely exists } + return assignments } // revertRequests locates all the currently pending reuqests from a particular @@ -1952,7 +1964,7 @@ func (s *Syncer) OnAccounts(peer PeerIF, id uint64, hashes []common.Hash, accoun for _, node := range proof { size += common.StorageSize(len(node)) } - logger := peer.Log().New("reqid", peer.ID()) + logger := peer.Log().New("reqid", id) logger.Trace("Delivering range of accounts", "hashes", len(hashes), "accounts", len(accounts), "proofs", len(proof), "bytes", size) // Whether or not the response is valid, we can mark the peer as idle and From f3584737ae9699b01d8b0935f09933143579c52d Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 20 Jan 2021 13:46:36 +0100 Subject: [PATCH 13/22] eth/protocols/snap: fix misspelliings in docs --- eth/protocols/snap/sync.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 50baf9ff7cc3..a9dd9e503736 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -1537,7 +1537,7 @@ func (s *Syncer) processAccountResponse(res *accountResponse) { break } } - // Itereate over all the accounts and assemble which ones need further sub- + // Iterate over all the accounts and assemble which ones need further sub- // filling before the entire account range can be persisted. res.task.needCode = make([]bool, len(res.accounts)) res.task.needState = make([]bool, len(res.accounts)) @@ -1581,7 +1581,7 @@ func (s *Syncer) processAccountResponse(res *accountResponse) { } } // Delete any subtasks that have been aborted but not resumed. This may undo - // some progress if a newpeer gives us less accounts than an old one, but for + // some progress if a new peer gives us less accounts than an old one, but for // now we have to live with that. for hash := range res.task.SubTasks { if _, ok := resumed[hash]; !ok { From ac7c967cbe9bc10d57b0767bf960f5082c65dde0 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 20 Jan 2021 12:54:40 +0100 Subject: [PATCH 14/22] eth/protocols/snap: fix race in bytecode handling --- eth/protocols/snap/sync.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index a9dd9e503736..136bc75686fb 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -348,10 +348,11 @@ type Syncer struct { db ethdb.KeyValueStore // Database to store the trie nodes into (and dedup) bloom *trie.SyncBloom // Bloom filter to deduplicate nodes for state fixup - root common.Hash // Current state trie root being synced - tasks []*accountTask // Current account task set being synced - healer *healTask // Current state healing task being executed - update chan struct{} // Notification channel for possible sync progression + root common.Hash // Current state trie root being synced + tasks []*accountTask // Current account task set being synced + snapped bool // Flag to signal that snap phase is done + healer *healTask // Current state healing task being executed + update chan struct{} // Notification channel for possible sync progression peers map[string]PeerIF // Currently active peers to download from peerJoin *event.Feed // Event feed to react to peers joining @@ -628,6 +629,7 @@ func (s *Syncer) loadSyncStatus() { log.Debug("Scheduled account sync task", "from", task.Next, "last", task.Last) } s.tasks = progress.Tasks + s.snapped = len(s.tasks) == 0 s.accountSynced = progress.AccountSynced s.accountBytes = progress.AccountBytes @@ -707,6 +709,11 @@ func (s *Syncer) cleanAccountTasks() { i-- } } + if len(s.tasks) == 0 { + s.lock.Lock() + s.snapped = true + s.lock.Unlock() + } } // cleanStorageTasks iterates over all the account tasks and storage sub-tasks @@ -2073,7 +2080,7 @@ func (s *Syncer) OnAccounts(peer PeerIF, id uint64, hashes []common.Hash, accoun // bytes codes are received from a remote peer. func (s *Syncer) OnByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { s.lock.RLock() - syncing := len(s.tasks) > 0 + syncing := !s.snapped s.lock.RUnlock() if syncing { From a8e39d1ec39597d9ffe3bc48eab874c48dc060d8 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 20 Jan 2021 15:38:53 +0100 Subject: [PATCH 15/22] eth/protocols/snap: undo deduplication of storage roots --- eth/protocols/snap/sync.go | 146 ++++++++++++++++++------------------- 1 file changed, 71 insertions(+), 75 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 136bc75686fb..008daa35aecd 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -1672,94 +1672,90 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { ) // Iterate over all the accounts and reconstruct their storage tries from the // delivered slots - delivered := make(map[common.Hash]bool) - for i := 0; i < len(res.hashes); i++ { - delivered[res.roots[i]] = true - } for i, account := range res.accounts { // If the account was not delivered, reschedule it if i >= len(res.hashes) { - if !delivered[res.roots[i]] { - res.mainTask.stateTasks[account] = res.roots[i] - } + res.mainTask.stateTasks[account] = res.roots[i] continue } // State was delivered, if complete mark as not needed any more, otherwise // mark the account as needing healing - for j, acc := range res.mainTask.res.accounts { - if res.roots[i] == acc.Root { - // If the packet contains multiple contract storage slots, all - // but the last are surely complete. The last contract may be - // chunked, so check it's continuation flag. - if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) { - res.mainTask.needState[j] = false - res.mainTask.pend-- - } - // If the last contract was chunked, mark it as needing healing - // to avoid writing it out to disk prematurely. - if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont { - res.mainTask.needHeal[j] = true - } - // If the last contract was chunked, we need to switch to large - // contract handling mode - if res.subTask == nil && i == len(res.hashes)-1 && res.cont { - // If we haven't yet started a large-contract retrieval, create - // the subtasks for it within the main account task - if tasks, ok := res.mainTask.SubTasks[account]; !ok { - var ( - next common.Hash - ) - step := new(big.Int).Sub( - new(big.Int).Div( - new(big.Int).Exp(common.Big2, common.Big256, nil), - big.NewInt(storageConcurrency), - ), common.Big1, - ) - for k := 0; k < storageConcurrency; k++ { - last := common.BigToHash(new(big.Int).Add(next.Big(), step)) - if k == storageConcurrency-1 { - // Make sure we don't overflow if the step is not a proper divisor - last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") - } - tasks = append(tasks, &storageTask{ - Next: next, - Last: last, - root: acc.Root, - }) - log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", next, "last", last) - next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) + for j, hash := range res.mainTask.res.hashes { + if account != hash { + continue + } + acc := res.mainTask.res.accounts[j] + // If the packet contains multiple contract storage slots, all + // but the last are surely complete. The last contract may be + // chunked, so check it's continuation flag. + if res.subTask == nil && res.mainTask.needState[j] && (i < len(res.hashes)-1 || !res.cont) { + res.mainTask.needState[j] = false + res.mainTask.pend-- + } + // If the last contract was chunked, mark it as needing healing + // to avoid writing it out to disk prematurely. + if res.subTask == nil && !res.mainTask.needHeal[j] && i == len(res.hashes)-1 && res.cont { + res.mainTask.needHeal[j] = true + } + // If the last contract was chunked, we need to switch to large + // contract handling mode + if res.subTask == nil && i == len(res.hashes)-1 && res.cont { + // If we haven't yet started a large-contract retrieval, create + // the subtasks for it within the main account task + if tasks, ok := res.mainTask.SubTasks[account]; !ok { + var ( + next common.Hash + ) + step := new(big.Int).Sub( + new(big.Int).Div( + new(big.Int).Exp(common.Big2, common.Big256, nil), + big.NewInt(storageConcurrency), + ), common.Big1, + ) + for k := 0; k < storageConcurrency; k++ { + last := common.BigToHash(new(big.Int).Add(next.Big(), step)) + if k == storageConcurrency-1 { + // Make sure we don't overflow if the step is not a proper divisor + last = common.HexToHash("0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff") } - res.mainTask.SubTasks[account] = tasks - - // Since we've just created the sub-tasks, this response - // is surely for the first one (zero origin) - res.subTask = tasks[0] + tasks = append(tasks, &storageTask{ + Next: next, + Last: last, + root: acc.Root, + }) + log.Debug("Created storage sync task", "account", account, "root", acc.Root, "from", next, "last", last) + next = common.BigToHash(new(big.Int).Add(last.Big(), common.Big1)) } + res.mainTask.SubTasks[account] = tasks + + // Since we've just created the sub-tasks, this response + // is surely for the first one (zero origin) + res.subTask = tasks[0] } - // If we're in large contract delivery mode, forward the subtask - if res.subTask != nil { - // Ensure the response doesn't overflow into the subsequent task - last := res.subTask.Last.Big() - for k, hash := range res.hashes[i] { - if hash.Big().Cmp(last) > 0 { - // Chunk overflown, cut off excess, but also update the boundary - for l := k; l < len(res.hashes[i]); l++ { - if err := res.tries[i].Prove(res.hashes[i][l][:], 0, res.overflow); err != nil { - panic(err) // Account range was already proven, what happened - } + } + // If we're in large contract delivery mode, forward the subtask + if res.subTask != nil { + // Ensure the response doesn't overflow into the subsequent task + last := res.subTask.Last.Big() + for k, hash := range res.hashes[i] { + if hash.Big().Cmp(last) > 0 { + // Chunk overflown, cut off excess, but also update the boundary + for l := k; l < len(res.hashes[i]); l++ { + if err := res.tries[i].Prove(res.hashes[i][l][:], 0, res.overflow); err != nil { + panic(err) // Account range was already proven, what happened } - res.hashes[i] = res.hashes[i][:k] - res.slots[i] = res.slots[i][:k] - res.cont = false // Mark range completed - break } + res.hashes[i] = res.hashes[i][:k] + res.slots[i] = res.slots[i][:k] + res.cont = false // Mark range completed + break } - // Forward the relevant storage chunk (even if created just now) - if res.cont { - res.subTask.Next = common.BigToHash(new(big.Int).Add(res.hashes[i][len(res.hashes[i])-1].Big(), big.NewInt(1))) - } else { - res.subTask.done = true - } + } + // Forward the relevant storage chunk (even if created just now) + if res.cont { + res.subTask.Next = common.BigToHash(new(big.Int).Add(res.hashes[i][len(res.hashes[i])-1].Big(), big.NewInt(1))) + } else { + res.subTask.done = true } } } From be0ab3bd711774a91a308e9a4197b193390eccd3 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Wed, 20 Jan 2021 16:11:31 +0100 Subject: [PATCH 16/22] synctests: refactor + minify panic testcase --- eth/protocols/snap/sync_test.go | 73 +++++++++++++++------------------ 1 file changed, 33 insertions(+), 40 deletions(-) diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index f69ee9807c3c..bb358e78d5f6 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -475,8 +475,7 @@ func noProofStorageRequestHandler(t *testPeer, requestId uint64, root common.Has // also ship the entire trie inside the proof. If the attack is successful, // the remote side does not do any follow-up requests func TestSyncBloatedProof(t *testing.T) { - trieBackend := trie.NewDatabase(rawdb.NewMemoryDatabase()) - sourceAccountTrie, elems := makeAccountTrieNoStorage(trieBackend, 100) + sourceAccountTrie, elems := makeAccountTrieNoStorage(100) cancel := make(chan struct{}) source := newTestPeer("source", t, cancel) source.accountTrie = sourceAccountTrie @@ -539,7 +538,7 @@ func setupSyncer(peers ...*testPeer) *Syncer { // TestSync tests a basic sync with one peer func TestSync(t *testing.T) { cancel := make(chan struct{}) - sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 100) + sourceAccountTrie, elems := makeAccountTrieNoStorage(100) mkSource := func(name string) *testPeer { source := newTestPeer(name, t, cancel) @@ -559,23 +558,17 @@ func TestSync(t *testing.T) { func TestSyncTinyTriePanic(t *testing.T) { cancel := make(chan struct{}) - sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(1, 1, false) + sourceAccountTrie, elems := makeAccountTrieNoStorage(1) - mkSource := func(name string, slow bool) *testPeer { + mkSource := func(name string) *testPeer { source := newTestPeer(name, t, cancel) source.accountTrie = sourceAccountTrie source.accountValues = elems - source.storageTries = storageTries - source.storageValues = storageElems - - if slow { - source.storageRequestHandler = starvingStorageRequestHandler - } return source } syncer := setupSyncer( - mkSource("nice-a", false), + mkSource("nice-a"), ) done := checkStall(t, cancel) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { @@ -587,7 +580,7 @@ func TestSyncTinyTriePanic(t *testing.T) { // TestMultiSync tests a basic sync with multiple peers func TestMultiSync(t *testing.T) { cancel := make(chan struct{}) - sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 100) + sourceAccountTrie, elems := makeAccountTrieNoStorage(100) mkSource := func(name string) *testPeer { source := newTestPeer(name, t, cancel) @@ -763,7 +756,7 @@ func checkStall(t *testing.T, cancel chan (struct{})) chan struct{} { func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { cancel := make(chan struct{}) - sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 3000) + sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) mkSource := func(name string, slow bool) *testPeer { source := newTestPeer(name, t, cancel) @@ -794,8 +787,7 @@ func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { cancel := make(chan struct{}) - sourceAccountTrie, elems := makeAccountTrieNoStorage( - trie.NewDatabase(rawdb.NewMemoryDatabase()), 3000) + sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) mkSource := func(name string, codeFn codeHandlerFunc) *testPeer { source := newTestPeer(name, t, cancel) @@ -822,7 +814,7 @@ func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { func TestSyncNoStorageAndOneAccountCorruptPeer(t *testing.T) { cancel := make(chan struct{}) - sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 3000) + sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) mkSource := func(name string, accFn accountHandlerFunc) *testPeer { source := newTestPeer(name, t, cancel) @@ -851,7 +843,7 @@ func TestSyncNoStorageAndOneAccountCorruptPeer(t *testing.T) { func TestSyncNoStorageAndOneCodeCappedPeer(t *testing.T) { cancel := make(chan struct{}) - sourceAccountTrie, elems := makeAccountTrieNoStorage(trie.NewDatabase(rawdb.NewMemoryDatabase()), 3000) + sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) mkSource := func(name string, codeFn codeHandlerFunc) *testPeer { source := newTestPeer(name, t, cancel) @@ -986,28 +978,6 @@ func (p entrySlice) Len() int { return len(p) } func (p entrySlice) Less(i, j int) bool { return bytes.Compare(p[i].k, p[j].k) < 0 } func (p entrySlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } -// makeAccountTrieNoStorage spits out a trie, along with the leafs -func makeAccountTrieNoStorage(db *trie.Database, n int) (*trie.Trie, entrySlice) { - accTrie, _ := trie.New(common.Hash{}, db) - var entries entrySlice - for i := uint64(0); i < uint64(n); i++ { - value, _ := rlp.EncodeToBytes(state.Account{ - Nonce: i, - Balance: big.NewInt(int64(i)), - Root: emptyRoot, - CodeHash: getACodeHash(i), - }) - key := key32(i) - elem := &kv{key, value, false} - accTrie.Update(elem.k, elem.v) - entries = append(entries, elem) - } - sort.Sort(entries) - // Push to disk layer - accTrie.Commit(nil) - return accTrie, entries -} - func key32(i uint64) []byte { key := make([]byte, 32) binary.LittleEndian.PutUint64(key, i) @@ -1046,6 +1016,29 @@ func getCode(hash common.Hash) []byte { return nil } +// makeAccountTrieNoStorage spits out a trie, along with the leafs +func makeAccountTrieNoStorage(n int) (*trie.Trie, entrySlice) { + db := trie.NewDatabase(rawdb.NewMemoryDatabase()) + accTrie, _ := trie.New(common.Hash{}, db) + var entries entrySlice + for i := uint64(1); i <= uint64(n); i++ { + value, _ := rlp.EncodeToBytes(state.Account{ + Nonce: i, + Balance: big.NewInt(int64(i)), + Root: emptyRoot, + CodeHash: getACodeHash(i), + }) + key := key32(i) + elem := &kv{key, value, false} + accTrie.Update(elem.k, elem.v) + entries = append(entries, elem) + } + sort.Sort(entries) + // Push to disk layer + accTrie.Commit(nil) + return accTrie, entries +} + // makeAccountTrieWithStorage spits out a trie, along with the leafs func makeAccountTrieWithStorage(accounts, slots int, code bool) (*trie.Trie, entrySlice, map[common.Hash]*trie.Trie, map[common.Hash]entrySlice) { From 324b8a6376223028d845bebb9b1d75d1ec591b56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Sun, 24 Jan 2021 21:20:21 +0200 Subject: [PATCH 17/22] eth/protocols/snap: minor polishes --- eth/protocols/snap/peer.go | 16 +++----- eth/protocols/snap/sync.go | 61 +++++++++++++++++++++++-------- eth/protocols/snap/sync_test.go | 65 +++++++++++++++------------------ 3 files changed, 80 insertions(+), 62 deletions(-) diff --git a/eth/protocols/snap/peer.go b/eth/protocols/snap/peer.go index 4c5e2bdfc87e..4f3d550f1f70 100644 --- a/eth/protocols/snap/peer.go +++ b/eth/protocols/snap/peer.go @@ -22,17 +22,6 @@ import ( "github.com/ethereum/go-ethereum/p2p" ) -// PeerIF exists so we can mock peers in testing, and not deal with -// the actual protocol marshalling -type PeerIF interface { - ID() string - RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error - RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error - RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error - RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error - Log() log.Logger -} - // Peer is a collection of relevant information we have about a `snap` peer. type Peer struct { id string // Unique ID for the peer, cached @@ -67,6 +56,11 @@ func (p *Peer) Version() uint { return p.version } +// Log overrides the P2P logget with the higher level one containing only the id. +func (p *Peer) Log() log.Logger { + return p.logger +} + // RequestAccountRange fetches a batch of accounts rooted in a specific account // trie, starting with the origin. func (p *Peer) RequestAccountRange(id uint64, root common.Hash, origin, limit common.Hash, bytes uint64) error { diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 008daa35aecd..ffaac4e8b38d 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -333,6 +333,33 @@ type syncProgress struct { BytecodeHealNops uint64 // Number of bytecodes not requested } +// SyncPeer abstracts out the methods required for a peer to be synced against +// with the goal of allowing the construction of mock peers without the full +// blown networking. +type SyncPeer interface { + // ID retrieves the peer's unique identifier. + ID() string + + // RequestAccountRange fetches a batch of accounts rooted in a specific account + // trie, starting with the origin. + RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error + + // RequestStorageRange fetches a batch of storage slots belonging to one or + // more accounts. If slots from only one accout is requested, an origin marker + // may also be used to retrieve from there. + RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error + + // RequestByteCodes fetches a batch of bytecodes by hash. + RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error + + // RequestTrieNodes fetches a batch of account or storage trie nodes rooted in + // a specificstate trie. + RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error + + // Log retrieves the peer's own contextual logger. + Log() log.Logger +} + // Syncer is an Ethereum account and storage trie syncer based on snapshots and // the snap protocol. It's purpose is to download all the accounts and storage // slots from remote peers and reassemble chunks of the state trie, on top of @@ -354,9 +381,9 @@ type Syncer struct { healer *healTask // Current state healing task being executed update chan struct{} // Notification channel for possible sync progression - peers map[string]PeerIF // Currently active peers to download from - peerJoin *event.Feed // Event feed to react to peers joining - peerDrop *event.Feed // Event feed to react to peers dropping + peers map[string]SyncPeer // Currently active peers to download from + peerJoin *event.Feed // Event feed to react to peers joining + peerDrop *event.Feed // Event feed to react to peers dropping // Request tracking during syncing phase statelessPeers map[string]struct{} // Peers that failed to deliver state data @@ -413,12 +440,14 @@ type Syncer struct { lock sync.RWMutex // Protects fields that can change outside of sync (peers, reqs, root) } +// NewSyncer creates a new snapshot syncer to download the Ethereum state over the +// snap protocol. func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer { return &Syncer{ db: db, bloom: bloom, - peers: make(map[string]PeerIF), + peers: make(map[string]SyncPeer), peerJoin: new(event.Feed), peerDrop: new(event.Feed), update: make(chan struct{}, 1), @@ -450,7 +479,7 @@ func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer { } // Register injects a new data source into the syncer's peerset. -func (s *Syncer) Register(peer PeerIF) error { +func (s *Syncer) Register(peer SyncPeer) error { // Make sure the peer is not registered yet s.lock.Lock() pId := peer.ID() @@ -812,7 +841,7 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) int { s.pend.Add(1) assignments++ - go func(peer PeerIF, root common.Hash) { + go func(peer SyncPeer, root common.Hash) { defer s.pend.Done() // Attempt to send the remote request and revert if it fails @@ -901,7 +930,7 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) int { delete(s.bytecodeIdlers, idle) s.pend.Add(1) - go func(peer PeerIF) { + go func(peer SyncPeer) { defer s.pend.Done() // Attempt to send the remote request and revert if it fails @@ -1027,7 +1056,7 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) int { delete(s.storageIdlers, idle) s.pend.Add(1) - go func(peer PeerIF, root common.Hash) { + go func(peer SyncPeer, root common.Hash) { defer s.pend.Done() // Attempt to send the remote request and revert if it fails @@ -1144,7 +1173,7 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) int { delete(s.trienodeHealIdlers, idle) s.pend.Add(1) - go func(peer PeerIF, root common.Hash) { + go func(peer SyncPeer, root common.Hash) { defer s.pend.Done() // Attempt to send the remote request and revert if it fails @@ -1244,7 +1273,7 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) int { delete(s.bytecodeHealIdlers, idle) s.pend.Add(1) - go func(peer PeerIF) { + go func(peer SyncPeer) { defer s.pend.Done() // Attempt to send the remote request and revert if it fails @@ -1959,7 +1988,7 @@ func (s *Syncer) forwardAccountTask(task *accountTask) { // OnAccounts is a callback method to invoke when a range of accounts are // received from a remote peer. -func (s *Syncer) OnAccounts(peer PeerIF, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { +func (s *Syncer) OnAccounts(peer SyncPeer, id uint64, hashes []common.Hash, accounts [][]byte, proof [][]byte) error { size := common.StorageSize(len(hashes) * common.HashLength) for _, account := range accounts { size += common.StorageSize(len(account)) @@ -2074,7 +2103,7 @@ func (s *Syncer) OnAccounts(peer PeerIF, id uint64, hashes []common.Hash, accoun // OnByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer. -func (s *Syncer) OnByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { +func (s *Syncer) OnByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { s.lock.RLock() syncing := !s.snapped s.lock.RUnlock() @@ -2087,7 +2116,7 @@ func (s *Syncer) OnByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { // onByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer in the syncing phase. -func (s *Syncer) onByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { +func (s *Syncer) onByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { var size common.StorageSize for _, code := range bytecodes { size += common.StorageSize(len(code)) @@ -2180,7 +2209,7 @@ func (s *Syncer) onByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { // OnStorage is a callback method to invoke when ranges of storage slots // are received from a remote peer. -func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { +func (s *Syncer) OnStorage(peer SyncPeer, id uint64, hashes [][]common.Hash, slots [][][]byte, proof [][]byte) error { // Gather some trace stats to aid in debugging issues var ( hashCount int @@ -2338,7 +2367,7 @@ func (s *Syncer) OnStorage(peer PeerIF, id uint64, hashes [][]common.Hash, slots // OnTrieNodes is a callback method to invoke when a batch of trie nodes // are received from a remote peer. -func (s *Syncer) OnTrieNodes(peer PeerIF, id uint64, trienodes [][]byte) error { +func (s *Syncer) OnTrieNodes(peer SyncPeer, id uint64, trienodes [][]byte) error { var size common.StorageSize for _, node := range trienodes { size += common.StorageSize(len(node)) @@ -2432,7 +2461,7 @@ func (s *Syncer) OnTrieNodes(peer PeerIF, id uint64, trienodes [][]byte) error { // onHealByteCodes is a callback method to invoke when a batch of contract // bytes codes are received from a remote peer in the healing phase. -func (s *Syncer) onHealByteCodes(peer PeerIF, id uint64, bytecodes [][]byte) error { +func (s *Syncer) onHealByteCodes(peer SyncPeer, id uint64, bytecodes [][]byte) error { var size common.StorageSize for _, code := range bytecodes { size += common.StorageSize(len(code)) diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index bb358e78d5f6..ce21876d3820 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -119,7 +119,7 @@ type testPeer struct { id string test *testing.T remote *Syncer - log log.Logger + logger log.Logger accountTrie *trie.Trie accountValues entrySlice storageTries map[common.Hash]*trie.Trie @@ -136,7 +136,7 @@ func newTestPeer(id string, t *testing.T, cancelCh chan struct{}) *testPeer { peer := &testPeer{ id: id, test: t, - log: log.New("id", id), + logger: log.New("id", id), accountRequestHandler: defaultAccountRequestHandler, trieRequestHandler: defaultTrieRequestHandler, storageRequestHandler: defaultStorageRequestHandler, @@ -144,43 +144,38 @@ func newTestPeer(id string, t *testing.T, cancelCh chan struct{}) *testPeer { cancelCh: cancelCh, } stdoutHandler := log.StreamHandler(os.Stdout, log.TerminalFormat(true)) - peer.log.SetHandler(stdoutHandler) + peer.logger.SetHandler(stdoutHandler) return peer } -func (t *testPeer) ID() string { - return t.id -} - -func (t *testPeer) Log() log.Logger { - return t.log -} +func (t *testPeer) ID() string { return t.id } +func (t *testPeer) Log() log.Logger { return t.logger } func (t *testPeer) RequestAccountRange(id uint64, root, origin, limit common.Hash, bytes uint64) error { - t.log.Trace("Fetching range of accounts", "reqid", id, "root", root, "origin", origin, "limit", limit, "bytes", common.StorageSize(bytes)) + t.logger.Trace("Fetching range of accounts", "reqid", id, "root", root, "origin", origin, "limit", limit, "bytes", common.StorageSize(bytes)) go t.accountRequestHandler(t, id, root, origin, bytes) return nil } func (t *testPeer) RequestTrieNodes(id uint64, root common.Hash, paths []TrieNodePathSet, bytes uint64) error { - t.log.Trace("Fetching set of trie nodes", "reqid", id, "root", root, "pathsets", len(paths), "bytes", common.StorageSize(bytes)) + t.logger.Trace("Fetching set of trie nodes", "reqid", id, "root", root, "pathsets", len(paths), "bytes", common.StorageSize(bytes)) go t.trieRequestHandler(t, id, root, paths, bytes) return nil } func (t *testPeer) RequestStorageRanges(id uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, bytes uint64) error { if len(accounts) == 1 && origin != nil { - t.log.Trace("Fetching range of large storage slots", "reqid", id, "root", root, "account", accounts[0], "origin", common.BytesToHash(origin), "limit", common.BytesToHash(limit), "bytes", common.StorageSize(bytes)) + t.logger.Trace("Fetching range of large storage slots", "reqid", id, "root", root, "account", accounts[0], "origin", common.BytesToHash(origin), "limit", common.BytesToHash(limit), "bytes", common.StorageSize(bytes)) } else { - t.log.Trace("Fetching ranges of small storage slots", "reqid", id, "root", root, "accounts", len(accounts), "first", accounts[0], "bytes", common.StorageSize(bytes)) + t.logger.Trace("Fetching ranges of small storage slots", "reqid", id, "root", root, "accounts", len(accounts), "first", accounts[0], "bytes", common.StorageSize(bytes)) } go t.storageRequestHandler(t, id, root, accounts, origin, limit, bytes) return nil } func (t *testPeer) RequestByteCodes(id uint64, hashes []common.Hash, bytes uint64) error { - t.log.Trace("Fetching set of byte codes", "reqid", id, "hashes", len(hashes), "bytes", common.StorageSize(bytes)) + t.logger.Trace("Fetching set of byte codes", "reqid", id, "hashes", len(hashes), "bytes", common.StorageSize(bytes)) go t.codeRequestHandler(t, id, hashes, bytes) return nil } @@ -194,7 +189,7 @@ func defaultTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, case 1: blob, _, err := t.accountTrie.TryGetNode(pathset[0]) if err != nil { - t.Log().Info("Error handling req", "error", err) + t.logger.Info("Error handling req", "error", err) break } nodes = append(nodes, blob) @@ -203,7 +198,7 @@ func defaultTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, for _, path := range pathset[1:] { blob, _, err := account.TryGetNode(path) if err != nil { - t.Log().Info("Error handling req", "error", err) + t.logger.Info("Error handling req", "error", err) break } nodes = append(nodes, blob) @@ -218,7 +213,7 @@ func defaultTrieRequestHandler(t *testPeer, requestId uint64, root common.Hash, func defaultAccountRequestHandler(t *testPeer, id uint64, root common.Hash, origin common.Hash, cap uint64) error { keys, vals, proofs := createAccountRequestResponse(t, root, origin, cap) if err := t.remote.OnAccounts(t, id, keys, vals, proofs); err != nil { - t.log.Error("remote error on delivery", "error", err) + t.logger.Error("remote error on delivery", "error", err) t.test.Errorf("Remote side rejected our delivery: %v", err) t.remote.Unregister(t.id) t.cancelCh <- struct{}{} @@ -244,13 +239,13 @@ func createAccountRequestResponse(t *testPeer, root common.Hash, origin common.H // quirk in go-ethereum proof := light.NewNodeSet() if err := t.accountTrie.Prove(origin[:], 0, proof); err != nil { - t.log.Error("Could not prove inexistence of origin", "origin", origin, + t.logger.Error("Could not prove inexistence of origin", "origin", origin, "error", err) } if len(keys) > 0 { lastK := (keys[len(keys)-1])[:] if err := t.accountTrie.Prove(lastK, 0, proof); err != nil { - t.log.Error("Could not prove last item", + t.logger.Error("Could not prove last item", "error", err) } } @@ -264,7 +259,7 @@ func createAccountRequestResponse(t *testPeer, root common.Hash, origin common.H func defaultStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, bOrigin, bLimit []byte, max uint64) error { hashes, slots, proofs := createStorageRequestResponse(t, root, accounts, bOrigin, bLimit, max) if err := t.remote.OnStorage(t, requestId, hashes, slots, proofs); err != nil { - t.log.Error("remote error on delivery", "error", err) + t.logger.Error("remote error on delivery", "error", err) t.test.Errorf("Remote side rejected our delivery: %v", err) t.cancelCh <- struct{}{} } @@ -277,7 +272,7 @@ func defaultCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max bytecodes = append(bytecodes, getCode(h)) } if err := t.remote.OnByteCodes(t, id, bytecodes); err != nil { - t.log.Error("remote error on delivery", "error", err) + t.logger.Error("remote error on delivery", "error", err) t.test.Errorf("Remote side rejected our delivery: %v", err) t.cancelCh <- struct{}{} } @@ -335,13 +330,13 @@ func createStorageRequestResponse(t *testPeer, root common.Hash, accounts []comm // use the 'origin' slice directly, but must use the full 32-byte // hash form. if err := stTrie.Prove(origin[:], 0, proof); err != nil { - t.log.Error("Could not prove inexistence of origin", "origin", origin, + t.logger.Error("Could not prove inexistence of origin", "origin", origin, "error", err) } if len(keys) > 0 { lastK := (keys[len(keys)-1])[:] if err := stTrie.Prove(lastK, 0, proof); err != nil { - t.log.Error("Could not prove last item", "error", err) + t.logger.Error("Could not prove last item", "error", err) } } for _, blob := range proof.NodeList() { @@ -401,20 +396,20 @@ func corruptCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max bytecodes = append(bytecodes, h[:]) } if err := t.remote.OnByteCodes(t, id, bytecodes); err != nil { - t.log.Error("remote error on delivery", "error", err) + t.logger.Error("remote error on delivery", "error", err) // Mimic the real-life handler, which drops a peer on errors t.remote.Unregister(t.id) } return nil } -func cappedCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { +func cappedCodeRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { var bytecodes [][]byte for _, h := range hashes[:1] { bytecodes = append(bytecodes, getCode(h)) } if err := t.remote.OnByteCodes(t, id, bytecodes); err != nil { - t.log.Error("remote error on delivery", "error", err) + t.logger.Error("remote error on delivery", "error", err) // Mimic the real-life handler, which drops a peer on errors t.remote.Unregister(t.id) } @@ -440,7 +435,7 @@ func corruptAccountRequestHandler(t *testPeer, requestId uint64, root common.Has proofs = proofs[1:] } if err := t.remote.OnAccounts(t, requestId, hashes, accounts, proofs); err != nil { - t.log.Info("remote error on delivery (as expected)", "error", err) + t.logger.Info("remote error on delivery (as expected)", "error", err) // Mimic the real-life handler, which drops a peer on errors t.remote.Unregister(t.id) } @@ -454,7 +449,7 @@ func corruptStorageRequestHandler(t *testPeer, requestId uint64, root common.Has proofs = proofs[1:] } if err := t.remote.OnStorage(t, requestId, hashes, slots, proofs); err != nil { - t.log.Info("remote error on delivery (as expected)", "error", err) + t.logger.Info("remote error on delivery (as expected)", "error", err) // Mimic the real-life handler, which drops a peer on errors t.remote.Unregister(t.id) } @@ -464,7 +459,7 @@ func corruptStorageRequestHandler(t *testPeer, requestId uint64, root common.Has func noProofStorageRequestHandler(t *testPeer, requestId uint64, root common.Hash, accounts []common.Hash, origin, limit []byte, max uint64) error { hashes, slots, _ := createStorageRequestResponse(t, root, accounts, origin, limit, max) if err := t.remote.OnStorage(t, requestId, hashes, slots, nil); err != nil { - t.log.Info("remote error on delivery (as expected)", "error", err) + t.logger.Info("remote error on delivery (as expected)", "error", err) // Mimic the real-life handler, which drops a peer on errors t.remote.Unregister(t.id) } @@ -496,12 +491,12 @@ func TestSyncBloatedProof(t *testing.T) { // The proofs proof := light.NewNodeSet() if err := t.accountTrie.Prove(origin[:], 0, proof); err != nil { - t.log.Error("Could not prove origin", "origin", origin, "error", err) + t.logger.Error("Could not prove origin", "origin", origin, "error", err) } // The bloat: add proof of every single element for _, entry := range t.accountValues { if err := t.accountTrie.Prove(entry.k, 0, proof); err != nil { - t.log.Error("Could not prove item", "error", err) + t.logger.Error("Could not prove item", "error", err) } } // And remove one item from the elements @@ -513,7 +508,7 @@ func TestSyncBloatedProof(t *testing.T) { proofs = append(proofs, blob) } if err := t.remote.OnAccounts(t, requestId, keys, vals, proofs); err != nil { - t.log.Info("remote error on delivery", "error", err) + t.logger.Info("remote error on delivery", "error", err) // This is actually correct, signal to exit the test successfully t.cancelCh <- struct{}{} } @@ -801,7 +796,7 @@ func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { // non-corrupt peer, which delivers everything in one go, and makes the // test moot syncer := setupSyncer( - mkSource("capped", cappedCodeReqeustHandler), + mkSource("capped", cappedCodeRequestHandler), mkSource("corrupt", corruptCodeReqeustHandler), ) done := checkStall(t, cancel) @@ -858,7 +853,7 @@ func TestSyncNoStorageAndOneCodeCappedPeer(t *testing.T) { syncer := setupSyncer( mkSource("capped", func(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { counter++ - return cappedCodeReqeustHandler(t, id, hashes, max) + return cappedCodeRequestHandler(t, id, hashes, max) }), ) done := checkStall(t, cancel) From 88dc6ee1d66bd41c14d202163c341cffe5693ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Sun, 24 Jan 2021 21:44:16 +0200 Subject: [PATCH 18/22] eth: minor polishes to make logs more useful --- eth/downloader/downloader.go | 4 +- eth/handler.go | 16 +++++-- eth/protocols/snap/sync.go | 89 ++++++++++++++++-------------------- 3 files changed, 53 insertions(+), 56 deletions(-) diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index 31c1cb47c373..421803876e0f 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -298,7 +298,7 @@ func (d *Downloader) RegisterPeer(id string, version uint, peer Peer) error { // Tests use short IDs, don't choke on them logger = log.New("peer", id) } else { - logger = log.New("peer", id[:16]) + logger = log.New("peer", id[:8]) } logger.Trace("Registering sync peer") if err := d.peers.Register(newPeerConnection(id, version, peer, logger)); err != nil { @@ -325,7 +325,7 @@ func (d *Downloader) UnregisterPeer(id string) error { // Tests use short IDs, don't choke on them logger = log.New("peer", id) } else { - logger = log.New("peer", id[:16]) + logger = log.New("peer", id[:8]) } logger.Trace("Unregistering sync peer") if err := d.peers.Unregister(id); err != nil { diff --git a/eth/handler.go b/eth/handler.go index a9506c4995f9..f6366d9af173 100644 --- a/eth/handler.go +++ b/eth/handler.go @@ -326,24 +326,32 @@ func (h *handler) runSnapPeer(peer *snap.Peer, handler snap.Handler) error { } func (h *handler) removePeer(id string) { + // Create a custom logger to avoid printing the entire id + var logger log.Logger + if len(id) < 16 { + // Tests use short IDs, don't choke on them + logger = log.New("peer", id) + } else { + logger = log.New("peer", id[:8]) + } // Remove the eth peer if it exists eth := h.peers.ethPeer(id) if eth != nil { - log.Debug("Removing Ethereum peer", "peer", id) + logger.Debug("Removing Ethereum peer") h.downloader.UnregisterPeer(id) h.txFetcher.Drop(id) if err := h.peers.unregisterEthPeer(id); err != nil { - log.Error("Peer removal failed", "peer", id, "err", err) + logger.Error("Ethereum peer removal failed", "err", err) } } // Remove the snap peer if it exists snap := h.peers.snapPeer(id) if snap != nil { - log.Debug("Removing Snapshot peer", "peer", id) + logger.Debug("Removing Snapshot peer") h.downloader.SnapSyncer.Unregister(id) if err := h.peers.unregisterSnapPeer(id); err != nil { - log.Error("Peer removal failed", "peer", id, "err", err) + logger.Error("Snapshot peer removel failed", "err", err) } } // Hard disconnect at the networking layer diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index ffaac4e8b38d..1e99b703848f 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -481,26 +481,27 @@ func NewSyncer(db ethdb.KeyValueStore, bloom *trie.SyncBloom) *Syncer { // Register injects a new data source into the syncer's peerset. func (s *Syncer) Register(peer SyncPeer) error { // Make sure the peer is not registered yet + id := peer.ID() + s.lock.Lock() - pId := peer.ID() - if _, ok := s.peers[pId]; ok { - log.Error("Snap peer already registered", "id", pId) + if _, ok := s.peers[id]; ok { + log.Error("Snap peer already registered", "id", id) s.lock.Unlock() return errors.New("already registered") } - s.peers[pId] = peer + s.peers[id] = peer // Mark the peer as idle, even if no sync is running - s.accountIdlers[pId] = struct{}{} - s.storageIdlers[pId] = struct{}{} - s.bytecodeIdlers[pId] = struct{}{} - s.trienodeHealIdlers[pId] = struct{}{} - s.bytecodeHealIdlers[pId] = struct{}{} + s.accountIdlers[id] = struct{}{} + s.storageIdlers[id] = struct{}{} + s.bytecodeIdlers[id] = struct{}{} + s.trienodeHealIdlers[id] = struct{}{} + s.bytecodeHealIdlers[id] = struct{}{} s.lock.Unlock() // Notify any active syncs that a new peer can be assigned data - s.peerJoin.Send(pId) + s.peerJoin.Send(id) return nil } @@ -596,17 +597,15 @@ func (s *Syncer) Sync(root common.Hash, cancel chan struct{}) error { return nil } // Assign all the data retrieval tasks to any free peers - acTasks := s.assignAccountTasks(cancel) - bcTasks := s.assignBytecodeTasks(cancel) - stTasks := s.assignStorageTasks(cancel) - var tnhTasks, bchTasks int + s.assignAccountTasks(cancel) + s.assignBytecodeTasks(cancel) + s.assignStorageTasks(cancel) + if len(s.tasks) == 0 { // Sync phase done, run heal phase - tnhTasks = s.assignTrienodeHealTasks(cancel) - bchTasks = s.assignBytecodeHealTasks(cancel) + s.assignTrienodeHealTasks(cancel) + s.assignBytecodeHealTasks(cancel) } - log.Debug("Assigned tasks", "account", acTasks, "bytecode", bcTasks, - "storage", stTasks, "trie-heal", tnhTasks, "byte-heal", bchTasks) // Wait for something to happen select { case <-s.update: @@ -780,13 +779,13 @@ func (s *Syncer) cleanStorageTasks() { // assignAccountTasks attempts to match idle peers to pending account range // retrievals. -func (s *Syncer) assignAccountTasks(cancel chan struct{}) int { +func (s *Syncer) assignAccountTasks(cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() - assignments := 0 + // If there are no idle peers, short circuit assignment if len(s.accountIdlers) == 0 { - return assignments + return } // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { @@ -808,7 +807,7 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) int { break } if idle == "" { - return assignments + return } // Matched a pending task to an idle peer, allocate a unique request id var reqid uint64 @@ -840,7 +839,6 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) int { delete(s.accountIdlers, idle) s.pend.Add(1) - assignments++ go func(peer SyncPeer, root common.Hash) { defer s.pend.Done() @@ -854,17 +852,16 @@ func (s *Syncer) assignAccountTasks(cancel chan struct{}) int { // Inject the request into the task to block further assignments task.req = req } - return assignments } // assignBytecodeTasks attempts to match idle peers to pending code retrievals. -func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) int { +func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() - var assignments = 0 + // If there are no idle peers, short circuit assignment if len(s.bytecodeIdlers) == 0 { - return assignments + return } // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { @@ -890,7 +887,7 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) int { break } if idle == "" { - return assignments + return } // Matched a pending task to an idle peer, allocate a unique request id var reqid uint64 @@ -913,7 +910,6 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) int { break } } - assignments++ req := &bytecodeRequest{ peer: idle, id: reqid, @@ -940,18 +936,17 @@ func (s *Syncer) assignBytecodeTasks(cancel chan struct{}) int { } }(s.peers[idle]) // We're in the lock, peers[id] surely exists } - return assignments } // assignStorageTasks attempts to match idle peers to pending storage range // retrievals. -func (s *Syncer) assignStorageTasks(cancel chan struct{}) int { +func (s *Syncer) assignStorageTasks(cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() - var assignments = 0 + // If there are no idle peers, short circuit assignment if len(s.storageIdlers) == 0 { - return assignments + return } // Iterate over all the tasks and try to find a pending one for _, task := range s.tasks { @@ -977,7 +972,7 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) int { break } if idle == "" { - return assignments + return } // Matched a pending task to an idle peer, allocate a unique request id var reqid uint64 @@ -1033,7 +1028,6 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) int { if len(accounts) == 0 { continue } - assignments++ req := &storageRequest{ peer: idle, id: reqid, @@ -1075,18 +1069,17 @@ func (s *Syncer) assignStorageTasks(cancel chan struct{}) int { subtask.req = req } } - return assignments } // assignTrienodeHealTasks attempts to match idle peers to trie node requests to // heal any trie errors caused by the snap sync's chunked retrieval model. -func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) int { +func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() - var assignments = 0 + // If there are no idle peers, short circuit assignment if len(s.trienodeHealIdlers) == 0 { - return assignments + return } // Iterate over pending tasks and try to find a peer to retrieve with for len(s.healer.trieTasks) > 0 || s.healer.scheduler.Pending() > 0 { @@ -1108,7 +1101,7 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) int { } // If all the heal tasks are bytecodes or already downloading, bail if len(s.healer.trieTasks) == 0 { - return assignments + return } // Task pending retrieval, try to find an idle peer. If no such peer // exists, we probably assigned tasks for all (or they are stateless). @@ -1124,7 +1117,7 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) int { break } if idle == "" { - return assignments + return } // Matched a pending task to an idle peer, allocate a unique request id var reqid uint64 @@ -1155,7 +1148,6 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) int { break } } - assignments++ req := &trienodeHealRequest{ peer: idle, id: reqid, @@ -1183,18 +1175,17 @@ func (s *Syncer) assignTrienodeHealTasks(cancel chan struct{}) int { } }(s.peers[idle], s.root) // We're in the lock, peers[id] surely exists } - return assignments } // assignBytecodeHealTasks attempts to match idle peers to bytecode requests to // heal any trie errors caused by the snap sync's chunked retrieval model. -func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) int { +func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) { s.lock.Lock() defer s.lock.Unlock() - var assignments = 0 + // If there are no idle peers, short circuit assignment if len(s.bytecodeHealIdlers) == 0 { - return assignments + return } // Iterate over pending tasks and try to find a peer to retrieve with for len(s.healer.codeTasks) > 0 || s.healer.scheduler.Pending() > 0 { @@ -1216,7 +1207,7 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) int { } // If all the heal tasks are trienodes or already downloading, bail if len(s.healer.codeTasks) == 0 { - return assignments + return } // Task pending retrieval, try to find an idle peer. If no such peer // exists, we probably assigned tasks for all (or they are stateless). @@ -1232,7 +1223,7 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) int { break } if idle == "" { - return assignments + return } // Matched a pending task to an idle peer, allocate a unique request id var reqid uint64 @@ -1256,7 +1247,6 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) int { break } } - assignments++ req := &bytecodeHealRequest{ peer: idle, id: reqid, @@ -1283,7 +1273,6 @@ func (s *Syncer) assignBytecodeHealTasks(cancel chan struct{}) int { } }(s.peers[idle]) // We're in the lock, peers[id] surely exists } - return assignments } // revertRequests locates all the currently pending reuqests from a particular From 4aeb629b05e01ccbd21cdd2effc5930030da667d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Sun, 24 Jan 2021 21:52:57 +0200 Subject: [PATCH 19/22] eth/protocols/snap: remove excessive logs from the test runs --- eth/protocols/snap/sync_test.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index ce21876d3820..a95f5d5b74dc 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -22,7 +22,6 @@ import ( "encoding/binary" "fmt" "math/big" - "os" "sort" "testing" "time" @@ -143,8 +142,8 @@ func newTestPeer(id string, t *testing.T, cancelCh chan struct{}) *testPeer { codeRequestHandler: defaultCodeReqeustHandler, cancelCh: cancelCh, } - stdoutHandler := log.StreamHandler(os.Stdout, log.TerminalFormat(true)) - peer.logger.SetHandler(stdoutHandler) + //stderrHandler := log.StreamHandler(os.Stderr, log.TerminalFormat(true)) + //peer.logger.SetHandler(stderrHandler) return peer } @@ -874,7 +873,6 @@ func TestSyncNoStorageAndOneCodeCappedPeer(t *testing.T) { // TestSyncWithStorageAndOneCappedPeer tests sync using accounts + storage, where one peer is // consistently returning very small results func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { - log.Root().SetHandler(log.LvlFilterHandler(log.LvlTrace, log.StreamHandler(os.Stderr, log.TerminalFormat(true)))) cancel := make(chan struct{}) sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(300, 1000, false) From a6a131facd1461fb525a7a8afb098c26ca98a58f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Sun, 24 Jan 2021 22:14:05 +0200 Subject: [PATCH 20/22] eth/protocols/snap: stress tests with concurrency --- eth/protocols/snap/sync.go | 1 + eth/protocols/snap/sync_test.go | 51 ++++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/eth/protocols/snap/sync.go b/eth/protocols/snap/sync.go index 1e99b703848f..e7720026bfcd 100644 --- a/eth/protocols/snap/sync.go +++ b/eth/protocols/snap/sync.go @@ -1703,6 +1703,7 @@ func (s *Syncer) processStorageResponse(res *storageResponse) { continue } acc := res.mainTask.res.accounts[j] + // If the packet contains multiple contract storage slots, all // but the last are surely complete. The last contract may be // chunked, so check it's continuation flag. diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index a95f5d5b74dc..4de0bf17eeee 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -38,6 +38,8 @@ import ( ) func TestHashing(t *testing.T) { + t.Parallel() + var bytecodes = make([][]byte, 10) for i := 0; i < len(bytecodes); i++ { buf := make([]byte, 100) @@ -469,6 +471,8 @@ func noProofStorageRequestHandler(t *testPeer, requestId uint64, root common.Has // also ship the entire trie inside the proof. If the attack is successful, // the remote side does not do any follow-up requests func TestSyncBloatedProof(t *testing.T) { + t.Parallel() + sourceAccountTrie, elems := makeAccountTrieNoStorage(100) cancel := make(chan struct{}) source := newTestPeer("source", t, cancel) @@ -531,6 +535,8 @@ func setupSyncer(peers ...*testPeer) *Syncer { // TestSync tests a basic sync with one peer func TestSync(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems := makeAccountTrieNoStorage(100) @@ -550,6 +556,8 @@ func TestSync(t *testing.T) { // TestSyncTinyTriePanic tests a basic sync with one peer, and a tiny trie. This caused a // panic within the prover func TestSyncTinyTriePanic(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems := makeAccountTrieNoStorage(1) @@ -573,6 +581,8 @@ func TestSyncTinyTriePanic(t *testing.T) { // TestMultiSync tests a basic sync with multiple peers func TestMultiSync(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems := makeAccountTrieNoStorage(100) @@ -591,6 +601,8 @@ func TestMultiSync(t *testing.T) { // TestSyncWithStorage tests basic sync using accounts + storage + code func TestSyncWithStorage(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(3, 3000, true) @@ -610,6 +622,8 @@ func TestSyncWithStorage(t *testing.T) { // TestMultiSyncManyUseless contains one good peer, and many which doesn't return anything valuable at all func TestMultiSyncManyUseless(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true) @@ -649,11 +663,9 @@ func TestMultiSyncManyUselessWithLowTimeout(t *testing.T) { // We're setting the timeout to very low, to increase the chance of the timeout // being triggered. This was previously a cause of panic, when a response // arrived simultaneously as a timeout was triggered. - old := requestTimeout - requestTimeout = 1 * time.Millisecond - defer func() { - requestTimeout = old - }() + defer func(old time.Duration) { requestTimeout = old }(requestTimeout) + requestTimeout = time.Millisecond + cancel := make(chan struct{}) sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true) @@ -691,11 +703,8 @@ func TestMultiSyncManyUselessWithLowTimeout(t *testing.T) { // TestMultiSyncManyUnresponsive contains one good peer, and many which doesn't respond at all func TestMultiSyncManyUnresponsive(t *testing.T) { // We're setting the timeout to very low, to make the test run a bit faster - old := requestTimeout - requestTimeout = 1 * time.Millisecond - defer func() { - requestTimeout = old - }() + defer func(old time.Duration) { requestTimeout = old }(requestTimeout) + requestTimeout = time.Millisecond cancel := make(chan struct{}) @@ -731,13 +740,13 @@ func TestMultiSyncManyUnresponsive(t *testing.T) { } } -func checkStall(t *testing.T, cancel chan (struct{})) chan struct{} { - testDone := make(chan (struct{})) +func checkStall(t *testing.T, cancel chan struct{}) chan struct{} { + testDone := make(chan struct{}) go func() { select { case <-time.After(5 * time.Second): t.Log("Sync stalled") - cancel <- struct{}{} + close(cancel) case <-testDone: return } @@ -748,6 +757,8 @@ func checkStall(t *testing.T, cancel chan (struct{})) chan struct{} { // TestSyncNoStorageAndOneCappedPeer tests sync using accounts and no storage, where one peer is // consistently returning very small results func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) @@ -779,6 +790,8 @@ func TestSyncNoStorageAndOneCappedPeer(t *testing.T) { // TestSyncNoStorageAndOneCodeCorruptPeer has one peer which doesn't deliver // code requests properly. func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) @@ -806,6 +819,8 @@ func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { } func TestSyncNoStorageAndOneAccountCorruptPeer(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) @@ -835,6 +850,8 @@ func TestSyncNoStorageAndOneAccountCorruptPeer(t *testing.T) { // TestSyncNoStorageAndOneCodeCappedPeer has one peer which delivers code hashes // one by one func TestSyncNoStorageAndOneCodeCappedPeer(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems := makeAccountTrieNoStorage(3000) @@ -873,6 +890,8 @@ func TestSyncNoStorageAndOneCodeCappedPeer(t *testing.T) { // TestSyncWithStorageAndOneCappedPeer tests sync using accounts + storage, where one peer is // consistently returning very small results func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(300, 1000, false) @@ -904,6 +923,8 @@ func TestSyncWithStorageAndOneCappedPeer(t *testing.T) { // TestSyncWithStorageAndCorruptPeer tests sync using accounts + storage, where one peer is // sometimes sending bad proofs func TestSyncWithStorageAndCorruptPeer(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true) @@ -932,6 +953,8 @@ func TestSyncWithStorageAndCorruptPeer(t *testing.T) { } func TestSyncWithStorageAndNonProvingPeer(t *testing.T) { + t.Parallel() + cancel := make(chan struct{}) sourceAccountTrie, elems, storageTries, storageElems := makeAccountTrieWithStorage(100, 3000, true) @@ -993,7 +1016,7 @@ var ( // getACodeHash returns a pseudo-random code hash func getACodeHash(i uint64) []byte { h := codehashes[int(i)%len(codehashes)] - return h[:] + return common.CopyBytes(h[:]) } // convenience function to lookup the code from the code hash From 00b8129250cd4c8cc7e8e7ac3495498015ee968c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Sun, 24 Jan 2021 22:33:58 +0200 Subject: [PATCH 21/22] eth/protocols/snap: further fixes to test cancel channel handling --- eth/protocols/snap/sync_test.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index 4de0bf17eeee..03c92b68933b 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -141,7 +141,7 @@ func newTestPeer(id string, t *testing.T, cancelCh chan struct{}) *testPeer { accountRequestHandler: defaultAccountRequestHandler, trieRequestHandler: defaultTrieRequestHandler, storageRequestHandler: defaultStorageRequestHandler, - codeRequestHandler: defaultCodeReqeustHandler, + codeRequestHandler: defaultCodeRequestHandler, cancelCh: cancelCh, } //stderrHandler := log.StreamHandler(os.Stderr, log.TerminalFormat(true)) @@ -217,7 +217,7 @@ func defaultAccountRequestHandler(t *testPeer, id uint64, root common.Hash, orig t.logger.Error("remote error on delivery", "error", err) t.test.Errorf("Remote side rejected our delivery: %v", err) t.remote.Unregister(t.id) - t.cancelCh <- struct{}{} + close(t.cancelCh) return err } return nil @@ -262,12 +262,12 @@ func defaultStorageRequestHandler(t *testPeer, requestId uint64, root common.Has if err := t.remote.OnStorage(t, requestId, hashes, slots, proofs); err != nil { t.logger.Error("remote error on delivery", "error", err) t.test.Errorf("Remote side rejected our delivery: %v", err) - t.cancelCh <- struct{}{} + close(t.cancelCh) } return nil } -func defaultCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { +func defaultCodeRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { var bytecodes [][]byte for _, h := range hashes { bytecodes = append(bytecodes, getCode(h)) @@ -275,7 +275,7 @@ func defaultCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max if err := t.remote.OnByteCodes(t, id, bytecodes); err != nil { t.logger.Error("remote error on delivery", "error", err) t.test.Errorf("Remote side rejected our delivery: %v", err) - t.cancelCh <- struct{}{} + close(t.cancelCh) } return nil } @@ -390,7 +390,7 @@ func nonResponsiveStorageRequestHandler(t *testPeer, requestId uint64, root comm // return nil //} -func corruptCodeReqeustHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { +func corruptCodeRequestHandler(t *testPeer, id uint64, hashes []common.Hash, max uint64) error { var bytecodes [][]byte for _, h := range hashes { // Send back the hashes @@ -513,7 +513,7 @@ func TestSyncBloatedProof(t *testing.T) { if err := t.remote.OnAccounts(t, requestId, keys, vals, proofs); err != nil { t.logger.Info("remote error on delivery", "error", err) // This is actually correct, signal to exit the test successfully - t.cancelCh <- struct{}{} + close(t.cancelCh) } return nil } @@ -744,7 +744,7 @@ func checkStall(t *testing.T, cancel chan struct{}) chan struct{} { testDone := make(chan struct{}) go func() { select { - case <-time.After(5 * time.Second): + case <-time.After(15 * time.Second): t.Log("Sync stalled") close(cancel) case <-testDone: @@ -809,7 +809,7 @@ func TestSyncNoStorageAndOneCodeCorruptPeer(t *testing.T) { // test moot syncer := setupSyncer( mkSource("capped", cappedCodeRequestHandler), - mkSource("corrupt", corruptCodeReqeustHandler), + mkSource("corrupt", corruptCodeRequestHandler), ) done := checkStall(t, cancel) if err := syncer.Sync(sourceAccountTrie.Hash(), cancel); err != nil { From 2aafa6a1b9a187098cc15a750b3dd0a751fb971d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A9ter=20Szil=C3=A1gyi?= Date: Mon, 25 Jan 2021 07:47:26 +0200 Subject: [PATCH 22/22] eth/protocols/snap: extend test timeouts on CI --- eth/protocols/snap/sync_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eth/protocols/snap/sync_test.go b/eth/protocols/snap/sync_test.go index 03c92b68933b..0b048786e8a1 100644 --- a/eth/protocols/snap/sync_test.go +++ b/eth/protocols/snap/sync_test.go @@ -744,7 +744,7 @@ func checkStall(t *testing.T, cancel chan struct{}) chan struct{} { testDone := make(chan struct{}) go func() { select { - case <-time.After(15 * time.Second): + case <-time.After(time.Minute): // TODO(karalabe): Make tests smaller, this is too much t.Log("Sync stalled") close(cancel) case <-testDone: