From d7741b175ddf822d39e698ffabceee3e06571b0b Mon Sep 17 00:00:00 2001 From: Nils-Erik Frantzell Date: Tue, 11 Jun 2019 20:29:53 -0700 Subject: [PATCH 1/7] Garbage collect incompatible peers Garbage collect peers in Host::run() which are deemed incompatible - an incompatible peer is one with which we will never be able to connect to successfully peer with (e.g. on a different chain or network, not running any capabilities). --- libp2p/Common.h | 11 +++++++++++ libp2p/Host.cpp | 42 ++++++++++++++++++++++++++++++++-------- libp2p/Host.h | 7 ++++++- libp2p/Peer.cpp | 25 +++++++++++++++++++----- libp2p/Peer.h | 4 +++- libp2p/RLPxHandshake.cpp | 30 +++++++++++++++++++++++----- libp2p/RLPxHandshake.h | 2 ++ 7 files changed, 101 insertions(+), 20 deletions(-) diff --git a/libp2p/Common.h b/libp2p/Common.h index cc2139f6f9f..82f814b50f1 100644 --- a/libp2p/Common.h +++ b/libp2p/Common.h @@ -110,6 +110,17 @@ enum DisconnectReason /// @returns the string form of the given disconnection reason. std::string reasonOf(DisconnectReason _r); +enum HandshakeFailureReason +{ + NoFailure = 0, + UnknownFailure, + Timeout, + TcpError, + FrameDecryptionFailure, + InternalError, + ProtocolError +}; + using CapDesc = std::pair; using CapDescSet = std::set; using CapDescs = std::vector; diff --git a/libp2p/Host.cpp b/libp2p/Host.cpp index 195dad440eb..fc195162219 100644 --- a/libp2p/Host.cpp +++ b/libp2p/Host.cpp @@ -196,6 +196,25 @@ void Host::stopCapabilities() } } +std::shared_ptr Host::peer(NodeID const& _n) const +{ + RecursiveGuard l(x_sessions); + auto it = m_peers.find(_n); + if (it == m_peers.end()) + { + LOG(m_logger) << "Peer " << _n << " not found"; + return nullptr; + } + return it->second; +} + +void Host::handshakeFailed(NodeID const& _n, HandshakeFailureReason _r) +{ + std::shared_ptr p = peer(_n); + assert(p); + p->m_lastHandshakeFailure = _r; +} + void Host::doneWorking() { // Return early if we have no capabilities since there's nothing to do. We've already stopped @@ -284,6 +303,7 @@ void Host::startPeerSession(Public const& _id, RLP const& _hello, m_peers[_id] = peer; } } + peer->m_lastHandshakeFailure = NoFailure; if (peer->isOffline()) peer->m_lastConnected = chrono::system_clock::now(); peer->endpoint.setAddress(_s->remoteEndpoint().address()); @@ -783,15 +803,21 @@ void Host::run(boost::system::error_code const& _ec) unsigned reqConn = 0; { RecursiveGuard l(x_sessions); - for (auto const& p : m_peers) { - bool haveSession = havePeerSession(p.second->id); - bool required = p.second->peerType == PeerType::Required; - if (haveSession && required) - reqConn++; - else if (!haveSession && p.second->shouldReconnect() && - (!m_netConfig.pin || required)) - toConnect.push_back(p.second); + for (auto p = m_peers.cbegin(); p != m_peers.cend(); p++) + { + bool haveSession = havePeerSession(p->second->id); + bool required = p->second->peerType == PeerType::Required; + if (haveSession && required) + reqConn++; + else if (!haveSession) + { + if (p->second->fallbackSeconds() == numeric_limits::max()) + p = m_peers.erase(p); + else if (p->second->shouldReconnect() && (!m_netConfig.pin || required)) + toConnect.push_back(p->second); + } + } } } diff --git a/libp2p/Host.h b/libp2p/Host.h index ebe26b54db7..420131471f6 100644 --- a/libp2p/Host.h +++ b/libp2p/Host.h @@ -343,6 +343,11 @@ class Host: public Worker /// Stop registered capabilities, typically done when the network is being shut down. void stopCapabilities(); + std::shared_ptr peer(NodeID const& _n) const; + + /// Set a handshake failure reason for a peer + void handshakeFailed(NodeID const& _n, HandshakeFailureReason _r); + bytes m_restoreNetwork; ///< Set by constructor and used to set Host key and restore network peers & nodes. std::atomic m_run{false}; ///< Whether network is running. @@ -408,7 +413,7 @@ class Host: public Worker /// logging to once every c_logActivePeersInterval seconds std::chrono::steady_clock::time_point m_lastPeerLogMessage; - Logger m_logger{createLogger(VerbosityDebug, "net")}; + mutable Logger m_logger{createLogger(VerbosityDebug, "net")}; Logger m_detailsLogger{createLogger(VerbosityTrace, "net")}; Logger m_infoLogger{createLogger(VerbosityInfo, "net")}; }; diff --git a/libp2p/Peer.cpp b/libp2p/Peer.cpp index 317ec36e643..a232c7f80bd 100644 --- a/libp2p/Peer.cpp +++ b/libp2p/Peer.cpp @@ -30,12 +30,12 @@ namespace dev namespace p2p { - -Peer::Peer(Peer const& _original): - Node(_original), +Peer::Peer(Peer const& _original) + : Node(_original), m_lastConnected(_original.m_lastConnected), m_lastAttempted(_original.m_lastAttempted), m_lastDisconnect(_original.m_lastDisconnect), + m_lastHandshakeFailure(_original.m_lastHandshakeFailure), m_session(_original.m_session) { m_score = _original.m_score.load(); @@ -45,18 +45,33 @@ Peer::Peer(Peer const& _original): bool Peer::shouldReconnect() const { - return id && endpoint && chrono::system_clock::now() > m_lastAttempted + chrono::seconds(fallbackSeconds()); + return id && endpoint && + fallbackSeconds() != numeric_limits::max() && + chrono::system_clock::now() > m_lastAttempted + chrono::seconds(fallbackSeconds()); } unsigned Peer::fallbackSeconds() const { if (peerType == PeerType::Required) return 5; + + switch (m_lastHandshakeFailure) + { + case FrameDecryptionFailure: + case ProtocolError: + return numeric_limits::max(); + default: + break; + } + switch (m_lastDisconnect) { case BadProtocol: - return 30 * (m_failedAttempts + 1); case UselessPeer: + case IncompatibleProtocol: + case UnexpectedIdentity: + case UserReason: + return numeric_limits::max(); case TooManyPeers: return 25 * (m_failedAttempts + 1); case ClientQuit: diff --git a/libp2p/Peer.h b/libp2p/Peer.h index 6748dd89cfe..9befb5cc04c 100644 --- a/libp2p/Peer.h +++ b/libp2p/Peer.h @@ -80,7 +80,8 @@ class Peer: public Node void noteSessionGood() { m_failedAttempts = 0; } private: - /// Returns number of seconds to wait until attempting connection, based on attempted connection history. + /// Returns number of seconds to wait until attempting connection, based on attempted connection history, or + /// numeric_limits::max() if a connection should never be attempted. unsigned fallbackSeconds() const; std::atomic m_score{0}; ///< All time cumulative. @@ -92,6 +93,7 @@ class Peer: public Node std::chrono::system_clock::time_point m_lastAttempted; std::atomic m_failedAttempts{0}; DisconnectReason m_lastDisconnect = NoDisconnect; ///< Reason for disconnect that happened last. + HandshakeFailureReason m_lastHandshakeFailure = NoFailure; ///< Reason for most recent handshake failure /// Used by isOffline() and (todo) for peer to emit session information. std::weak_ptr m_session; diff --git a/libp2p/RLPxHandshake.cpp b/libp2p/RLPxHandshake.cpp index da9a26b9682..224e26e336d 100644 --- a/libp2p/RLPxHandshake.cpp +++ b/libp2p/RLPxHandshake.cpp @@ -246,6 +246,9 @@ void RLPXHandshake::cancel() void RLPXHandshake::error(boost::system::error_code _ech) { + if (m_originated) + m_host->handshakeFailed(m_remote, m_failureReason); + stringstream errorStream; errorStream << "Handshake failed"; if (_ech) @@ -278,7 +281,9 @@ void RLPXHandshake::transition(boost::system::error_code _ech) if (!_ec) { LOG(m_logger) << "Disconnecting (Handshake Timeout) from"; - cancel(); + m_failureReason = Timeout; + m_nextState = Error; + transition(); } }); @@ -328,10 +333,12 @@ void RLPXHandshake::transition(boost::system::error_code _ech) bytes packet; s.swapOut(packet); m_io->writeSingleFramePacket(&packet, m_handshakeOutBuffer); - ba::async_write(m_socket->ref(), ba::buffer(m_handshakeOutBuffer), [this, self](boost::system::error_code ec, std::size_t) - { - transition(ec); - }); + ba::async_write(m_socket->ref(), ba::buffer(m_handshakeOutBuffer), + [this, self](boost::system::error_code ec, std::size_t) { + if (ec) + m_failureReason = TcpError; + transition(ec); + }); } else if (m_nextState == ReadHello) { @@ -346,7 +353,10 @@ void RLPXHandshake::transition(boost::system::error_code _ech) boost::asio::buffer(m_handshakeInBuffer, handshakeSizeBytes), [this, self](boost::system::error_code ec, std::size_t) { if (ec) + { + m_failureReason = TcpError; transition(ec); + } else { if (!m_io) @@ -354,6 +364,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech) LOG(m_errorLogger) << "Internal error in handshake: RLPXFrameCoder disappeared (" << m_remote << ")"; + m_failureReason = InternalError; m_nextState = Error; transition(); return; @@ -365,6 +376,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech) if (!m_io->authAndDecryptHeader( bytesRef(m_handshakeInBuffer.data(), m_handshakeInBuffer.size()))) { + m_failureReason = FrameDecryptionFailure; m_nextState = Error; transition(); return; @@ -383,6 +395,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech) LOG(m_logger) << "Frame is too large! Expected size: " << expectedFrameSizeBytes << " bytes, actual size: " << frameSize << " bytes"; + m_failureReason = ProtocolError; m_nextState = Error; transition(); return; @@ -407,13 +420,17 @@ void RLPXHandshake::transition(boost::system::error_code _ech) m_idleTimer.cancel(); if (ec) + { + m_failureReason = TcpError; transition(ec); + } else { if (!m_io) { LOG(m_errorLogger) << "Internal error in handshake: " "RLPXFrameCoder disappeared"; + m_failureReason = InternalError; m_nextState = Error; transition(); return; @@ -423,6 +440,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech) if (!m_io->authAndDecryptFrame(frame)) { LOG(m_logger) << "Frame body decrypt failed"; + m_failureReason = FrameDecryptionFailure; m_nextState = Error; transition(); return; @@ -436,6 +454,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech) << "Invalid packet type. Expected: " << p2pPacketTypeToString(HelloPacket) << ", received: " << p2pPacketTypeToString(packetType); + m_failureReason = ProtocolError; m_nextState = Error; transition(); return; @@ -453,6 +472,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech) { LOG(m_errorLogger) << "Handshake causing an exception: " << _e.what(); + m_failureReason = UnknownFailure; m_nextState = Error; transition(); } diff --git a/libp2p/RLPxHandshake.h b/libp2p/RLPxHandshake.h index bae3e2a6b76..2036264ea22 100644 --- a/libp2p/RLPxHandshake.h +++ b/libp2p/RLPxHandshake.h @@ -137,6 +137,8 @@ class RLPXHandshake: public std::enable_shared_from_this /// Timer which enforces c_timeout. Reset for each stage of the handshake. ba::steady_timer m_idleTimer; + HandshakeFailureReason m_failureReason; + Logger m_logger{createLogger(VerbosityTrace, "rlpx")}; Logger m_errorLogger{createLogger(VerbosityError, "rlpx")}; }; From 06b00ce17147d62e5efc2b220639905f5e609cf0 Mon Sep 17 00:00:00 2001 From: Nils-Erik Frantzell Date: Tue, 11 Jun 2019 20:53:25 -0700 Subject: [PATCH 2/7] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 585f6491783..a28ab1404ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Added: [#5591](https://github.com/ethereum/aleth/pull/5591) Network logging bugfixes and improvements and add p2pcap log channel. - Added: [#5588](https://github.com/ethereum/aleth/pull/5588) Testeth prints similar test suite name suggestions, when the name passed in `-t` argument is not found. - Added: [#5593](https://github.com/ethereum/aleth/pull/5593) Dynamically updating host ENR. +- Added: [#5624](https://github.com/ethereum/aleth/pull/5624) Remove useless peers from peer list. - Changed: [#5532](https://github.com/ethereum/aleth/pull/5532) The leveldb is upgraded to 1.22. This is breaking change on Windows and the old databases are not compatible. - Changed: [#5559](https://github.com/ethereum/aleth/pull/5559) Update peer validation error messages. - Changed: [#5568](https://github.com/ethereum/aleth/pull/5568) Improve rlpx handshake log messages and create new rlpx log channel. From a5875751e63bea8bc685d839c998d49f2abd9e66 Mon Sep 17 00:00:00 2001 From: Nils-Erik Frantzell Date: Wed, 12 Jun 2019 09:55:39 -0700 Subject: [PATCH 3/7] Set handshake failure reason in EIP8 code paths --- libp2p/RLPxHandshake.cpp | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/libp2p/RLPxHandshake.cpp b/libp2p/RLPxHandshake.cpp index 224e26e336d..c9d9ee7a870 100644 --- a/libp2p/RLPxHandshake.cpp +++ b/libp2p/RLPxHandshake.cpp @@ -112,10 +112,12 @@ void RLPXHandshake::writeAckEIP8() m_ackCipher.insert(m_ackCipher.begin(), prefix.begin(), prefix.end()); auto self(shared_from_this()); - ba::async_write(m_socket->ref(), ba::buffer(m_ackCipher), [this, self](boost::system::error_code ec, std::size_t) - { - transition(ec); - }); + ba::async_write(m_socket->ref(), ba::buffer(m_ackCipher), + [this, self](boost::system::error_code ec, std::size_t) { + if (ec) + m_failureReason = TcpError; + transition(ec); + }); } void RLPXHandshake::setAuthValues(Signature const& _sig, Public const& _remotePubk, h256 const& _remoteNonce, uint64_t _remoteVersion) @@ -135,7 +137,10 @@ void RLPXHandshake::readAuth() ba::async_read(m_socket->ref(), ba::buffer(m_authCipher, c_authCipherSizeBytes), [this, self](boost::system::error_code ec, std::size_t) { if (ec) + { + m_failureReason = TcpError; transition(ec); + } else if (decryptECIES(m_host->m_alias.secret(), bytesConstRef(&m_authCipher), m_auth)) { LOG(m_logger) << "auth from"; @@ -163,7 +168,10 @@ void RLPXHandshake::readAuthEIP8() { bytesConstRef ct(&m_authCipher); if (ec) + { + m_failureReason = TcpError; transition(ec); + } else if (decryptECIES(m_host->m_alias.secret(), ct.cropped(0, 2), ct.cropped(2), m_auth)) { RLP rlp(m_auth, RLP::ThrowOnFail | RLP::FailIfTooSmall); @@ -180,6 +188,7 @@ void RLPXHandshake::readAuthEIP8() { LOG(m_logger) << "EIP-8 auth decrypt failed"; m_nextState = Error; + m_failureReason = FrameDecryptionFailure; transition(); } }); @@ -192,7 +201,10 @@ void RLPXHandshake::readAck() ba::async_read(m_socket->ref(), ba::buffer(m_ackCipher, c_ackCipherSizeBytes), [this, self](boost::system::error_code ec, std::size_t) { if (ec) + { + m_failureReason = TcpError; transition(ec); + } else if (decryptECIES(m_host->m_alias.secret(), bytesConstRef(&m_ackCipher), m_ack)) { LOG(m_logger) << "ack from"; @@ -218,7 +230,10 @@ void RLPXHandshake::readAckEIP8() { bytesConstRef ct(&m_ackCipher); if (ec) + { + m_failureReason = TcpError; transition(ec); + } else if (decryptECIES(m_host->m_alias.secret(), ct.cropped(0, 2), ct.cropped(2), m_ack)) { RLP rlp(m_ack, RLP::ThrowOnFail | RLP::FailIfTooSmall); @@ -230,6 +245,7 @@ void RLPXHandshake::readAckEIP8() else { LOG(m_logger) << "EIP-8 ack decrypt failed"; + m_failureReason = FrameDecryptionFailure; m_nextState = Error; transition(); } From 64f120c29860105824369ba537c9d0a0065bd5fa Mon Sep 17 00:00:00 2001 From: Nils-Erik Frantzell Date: Thu, 13 Jun 2019 20:54:51 -0700 Subject: [PATCH 4/7] Add Peer::uselessPeer / log error when no peer found Rather than use a magic number to indicate a useless peer (numeric_limits::max()), add a function to Peer (Peer::uselessPeer) which can be called to detect this. Also update Peer::fallbackSeconds() to return a number which is large enough to prevent us from realistically ever connecting to a useless peer but which is small enough to not overflow when added to Peer::m_lastAttempted. Also, log a message with error verbosity in Host::handshakeFailed() if a peer is not found rather than asserting, since while we never expect users to hit this case, the EIP8 handshake tests can. --- libp2p/Host.cpp | 8 ++++++-- libp2p/Peer.cpp | 34 +++++++++++++++++++++++++++++++--- libp2p/Peer.h | 4 ++++ 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/libp2p/Host.cpp b/libp2p/Host.cpp index fc195162219..b69af5b55eb 100644 --- a/libp2p/Host.cpp +++ b/libp2p/Host.cpp @@ -211,7 +211,11 @@ std::shared_ptr Host::peer(NodeID const& _n) const void Host::handshakeFailed(NodeID const& _n, HandshakeFailureReason _r) { std::shared_ptr p = peer(_n); - assert(p); + if (!p) + { + cerror << "Peer " << _n << " not found"; + return; + } p->m_lastHandshakeFailure = _r; } @@ -812,7 +816,7 @@ void Host::run(boost::system::error_code const& _ec) reqConn++; else if (!haveSession) { - if (p->second->fallbackSeconds() == numeric_limits::max()) + if (p->second->uselessPeer()) p = m_peers.erase(p); else if (p->second->shouldReconnect() && (!m_netConfig.pin || required)) toConnect.push_back(p->second); diff --git a/libp2p/Peer.cpp b/libp2p/Peer.cpp index a232c7f80bd..a4633a9539b 100644 --- a/libp2p/Peer.cpp +++ b/libp2p/Peer.cpp @@ -49,9 +49,37 @@ bool Peer::shouldReconnect() const fallbackSeconds() != numeric_limits::max() && chrono::system_clock::now() > m_lastAttempted + chrono::seconds(fallbackSeconds()); } - + +bool Peer::uselessPeer() const +{ + switch (m_lastHandshakeFailure) + { + case FrameDecryptionFailure: + case ProtocolError: + return true; + default: + break; + } + + switch (m_lastDisconnect) + { + case BadProtocol: + case UselessPeer: + case IncompatibleProtocol: + case UnexpectedIdentity: + case UserReason: + return true; + default: + break; + } + return false; +} + + unsigned Peer::fallbackSeconds() const { + constexpr unsigned oneYearInSeconds{60 * 60 * 24 * 360}; + if (peerType == PeerType::Required) return 5; @@ -59,7 +87,7 @@ unsigned Peer::fallbackSeconds() const { case FrameDecryptionFailure: case ProtocolError: - return numeric_limits::max(); + return oneYearInSeconds; default: break; } @@ -71,7 +99,7 @@ unsigned Peer::fallbackSeconds() const case IncompatibleProtocol: case UnexpectedIdentity: case UserReason: - return numeric_limits::max(); + return oneYearInSeconds; case TooManyPeers: return 25 * (m_failedAttempts + 1); case ClientQuit: diff --git a/libp2p/Peer.h b/libp2p/Peer.h index 9befb5cc04c..8f3cb7388b8 100644 --- a/libp2p/Peer.h +++ b/libp2p/Peer.h @@ -70,6 +70,10 @@ class Peer: public Node /// Return true if connection attempt should be made to this peer or false if bool shouldReconnect() const; + /// A peer which should never be reconnected to - e.g. it's running on a different network, we + /// don't have any common capabilities + bool uselessPeer() const; + /// Number of times connection has been attempted to peer. int failedAttempts() const { return m_failedAttempts; } From e65197ad4ac0a5c8643b342adcf37161020d8011 Mon Sep 17 00:00:00 2001 From: Nils-Erik Frantzell Date: Thu, 13 Jun 2019 21:16:33 -0700 Subject: [PATCH 5/7] Required peers are never useless Update Peer::uselessPeer() to check peer type and return false if it's a required peer, since in this case the user obviously wants to try to stay connected to this peer. --- libp2p/Peer.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libp2p/Peer.cpp b/libp2p/Peer.cpp index a4633a9539b..7c815a444c5 100644 --- a/libp2p/Peer.cpp +++ b/libp2p/Peer.cpp @@ -52,6 +52,9 @@ bool Peer::shouldReconnect() const bool Peer::uselessPeer() const { + if (peerType == PeerType::Required) + return false; + switch (m_lastHandshakeFailure) { case FrameDecryptionFailure: From 71d2ea6b52b7e98cd546f41c420c43d20f9a7ef3 Mon Sep 17 00:00:00 2001 From: Nils-Erik Frantzell Date: Sun, 16 Jun 2019 15:08:47 -0700 Subject: [PATCH 6/7] Send UselessPeer disconnect reason on status failure Status validation failing means we have no chance of syncing with the peer (e.g. it's running on a different network) so a UselessPeer disconnect reason makes more sense than UserReason. This also has the benefit of status failure validation being treated as a critical disconnect and these nodes being gc'd in Host::run. --- libethereum/BlockChainSync.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libethereum/BlockChainSync.cpp b/libethereum/BlockChainSync.cpp index e55eb77167b..3c928a78a3e 100644 --- a/libethereum/BlockChainSync.cpp +++ b/libethereum/BlockChainSync.cpp @@ -197,7 +197,7 @@ void BlockChainSync::onPeerStatus(EthereumPeer const& _peer) if (!disconnectReason.empty()) { LOG(m_logger) << "Peer " << _peer.id() << " not suitable for sync: " << disconnectReason; - m_host.capabilityHost().disconnect(_peer.id(), p2p::UserReason); + m_host.capabilityHost().disconnect(_peer.id(), p2p::UselessPeer); return; } From b0d5485951ff0bf183ece12635eb03597b44bfd9 Mon Sep 17 00:00:00 2001 From: Nils-Erik Frantzell Date: Sun, 16 Jun 2019 15:24:42 -0700 Subject: [PATCH 7/7] Address PR feedback Various minor changes in RLPxHandshake, Host, and Peer classes: * Initialize RLPxHandshake::m_failureReason in ctor * Reduce redundant code in RLPxHandshake which sets the last failure reason when TCP errors occur * Rename Host::handshakeFailed * Fix m_peers iteration bug in Host * Make Host::onHandshakeFailed public (we want to avoid using friend classes where possible) * Rename Peers::uselessPeer * Update deprecated comment in Peer * Take # of failed connection attempts into account in Peer function which determines if instance is useless or not Move fallback seconds computation for default case to anonymous namespace function --- libp2p/Host.cpp | 54 +++++++++++++++++----------------- libp2p/Host.h | 6 ++-- libp2p/Peer.cpp | 63 ++++++++++++++++++++++------------------ libp2p/Peer.h | 6 ++-- libp2p/RLPxHandshake.cpp | 30 ++++--------------- 5 files changed, 72 insertions(+), 87 deletions(-) diff --git a/libp2p/Host.cpp b/libp2p/Host.cpp index b69af5b55eb..58fe25009fd 100644 --- a/libp2p/Host.cpp +++ b/libp2p/Host.cpp @@ -200,23 +200,7 @@ std::shared_ptr Host::peer(NodeID const& _n) const { RecursiveGuard l(x_sessions); auto it = m_peers.find(_n); - if (it == m_peers.end()) - { - LOG(m_logger) << "Peer " << _n << " not found"; - return nullptr; - } - return it->second; -} - -void Host::handshakeFailed(NodeID const& _n, HandshakeFailureReason _r) -{ - std::shared_ptr p = peer(_n); - if (!p) - { - cerror << "Peer " << _n << " not found"; - return; - } - p->m_lastHandshakeFailure = _r; + return it != m_peers.end() ? it->second : nullptr; } void Host::doneWorking() @@ -294,7 +278,10 @@ void Host::startPeerSession(Public const& _id, RLP const& _hello, { auto itPeer = m_peers.find(_id); if (itPeer != m_peers.end()) + { peer = itPeer->second; + peer->m_lastHandshakeFailure = NoFailure; + } else { // peer doesn't exist, try to get port info from node table @@ -307,7 +294,6 @@ void Host::startPeerSession(Public const& _id, RLP const& _hello, m_peers[_id] = peer; } } - peer->m_lastHandshakeFailure = NoFailure; if (peer->isOffline()) peer->m_lastConnected = chrono::system_clock::now(); peer->endpoint.setAddress(_s->remoteEndpoint().address()); @@ -410,6 +396,13 @@ void Host::startPeerSession(Public const& _id, RLP const& _hello, << _s->remoteEndpoint(); } +void Host::onHandshakeFailed(NodeID const& _n, HandshakeFailureReason _r) +{ + std::shared_ptr p = peer(_n); + if (p) + p->m_lastHandshakeFailure = _r; +} + void Host::onNodeTableEvent(NodeID const& _n, NodeTableEventType const& _e) { if (_e == NodeEntryAdded) @@ -807,21 +800,26 @@ void Host::run(boost::system::error_code const& _ec) unsigned reqConn = 0; { RecursiveGuard l(x_sessions); + auto p = m_peers.cbegin(); + while (p != m_peers.cend()) { - for (auto p = m_peers.cbegin(); p != m_peers.cend(); p++) + bool peerRemoved = false; + bool haveSession = havePeerSession(p->second->id); + bool required = p->second->peerType == PeerType::Required; + if (haveSession && required) + reqConn++; + else if (!haveSession) { - bool haveSession = havePeerSession(p->second->id); - bool required = p->second->peerType == PeerType::Required; - if (haveSession && required) - reqConn++; - else if (!haveSession) + if (p->second->isUseless()) { - if (p->second->uselessPeer()) - p = m_peers.erase(p); - else if (p->second->shouldReconnect() && (!m_netConfig.pin || required)) - toConnect.push_back(p->second); + peerRemoved = true; + p = m_peers.erase(p); } + else if (p->second->shouldReconnect() && (!m_netConfig.pin || required)) + toConnect.push_back(p->second); } + if (!peerRemoved) + p++; } } diff --git a/libp2p/Host.h b/libp2p/Host.h index 420131471f6..cee8a9d699e 100644 --- a/libp2p/Host.h +++ b/libp2p/Host.h @@ -214,6 +214,9 @@ class Host: public Worker return m_sessions.count(_id) ? m_sessions[_id].lock() : std::shared_ptr(); } + /// Set a handshake failure reason for a peer + void onHandshakeFailed(NodeID const& _n, HandshakeFailureReason _r); + /// Get our current node ID. NodeID id() const { return m_alias.pub(); } @@ -345,9 +348,6 @@ class Host: public Worker std::shared_ptr peer(NodeID const& _n) const; - /// Set a handshake failure reason for a peer - void handshakeFailed(NodeID const& _n, HandshakeFailureReason _r); - bytes m_restoreNetwork; ///< Set by constructor and used to set Host key and restore network peers & nodes. std::atomic m_run{false}; ///< Whether network is running. diff --git a/libp2p/Peer.cpp b/libp2p/Peer.cpp index 7c815a444c5..293f3547514 100644 --- a/libp2p/Peer.cpp +++ b/libp2p/Peer.cpp @@ -30,6 +30,19 @@ namespace dev namespace p2p { +namespace +{ +unsigned defaultFallbackSeconds(unsigned _failedAttempts) +{ + if (_failedAttempts < 5) + return _failedAttempts ? _failedAttempts * 5 : 5; + else if (_failedAttempts < 15) + return 25 + (_failedAttempts - 5) * 10; + else + return 25 + 100 + (_failedAttempts - 15) * 20; +} +} // namespace + Peer::Peer(Peer const& _original) : Node(_original), m_lastConnected(_original.m_lastConnected), @@ -45,12 +58,11 @@ Peer::Peer(Peer const& _original) bool Peer::shouldReconnect() const { - return id && endpoint && - fallbackSeconds() != numeric_limits::max() && + return id && endpoint && !isUseless() && chrono::system_clock::now() > m_lastAttempted + chrono::seconds(fallbackSeconds()); } -bool Peer::uselessPeer() const +bool Peer::isUseless() const { if (peerType == PeerType::Required) return false; @@ -66,19 +78,29 @@ bool Peer::uselessPeer() const switch (m_lastDisconnect) { + // Critical cases case BadProtocol: case UselessPeer: case IncompatibleProtocol: case UnexpectedIdentity: - case UserReason: + case DuplicatePeer: + case NullIdentity: return true; + // Potentially transient cases which can resolve quickly + case PingTimeout: + case TCPError: + case TooManyPeers: + return m_failedAttempts >= 10; + // Potentially transient cases which can take longer to resolve + case ClientQuit: + case UserReason: + return m_failedAttempts >= 25; default: break; } return false; } - unsigned Peer::fallbackSeconds() const { constexpr unsigned oneYearInSeconds{60 * 60 * 24 * 360}; @@ -86,35 +108,20 @@ unsigned Peer::fallbackSeconds() const if (peerType == PeerType::Required) return 5; - switch (m_lastHandshakeFailure) - { - case FrameDecryptionFailure: - case ProtocolError: - return oneYearInSeconds; - default: - break; - } + if (isUseless()) + return oneYearInSeconds; switch (m_lastDisconnect) { - case BadProtocol: - case UselessPeer: - case IncompatibleProtocol: - case UnexpectedIdentity: - case UserReason: - return oneYearInSeconds; + case TCPError: + case PingTimeout: case TooManyPeers: - return 25 * (m_failedAttempts + 1); - case ClientQuit: return 15 * (m_failedAttempts + 1); - case NoDisconnect: + case ClientQuit: + case UserReason: + return 25 * (m_failedAttempts + 1); default: - if (m_failedAttempts < 5) - return m_failedAttempts ? m_failedAttempts * 5 : 5; - else if (m_failedAttempts < 15) - return 25 + (m_failedAttempts - 5) * 10; - else - return 25 + 100 + (m_failedAttempts - 15) * 20; + return defaultFallbackSeconds(m_failedAttempts); } } diff --git a/libp2p/Peer.h b/libp2p/Peer.h index 8f3cb7388b8..68e690def26 100644 --- a/libp2p/Peer.h +++ b/libp2p/Peer.h @@ -72,7 +72,7 @@ class Peer: public Node /// A peer which should never be reconnected to - e.g. it's running on a different network, we /// don't have any common capabilities - bool uselessPeer() const; + bool isUseless() const; /// Number of times connection has been attempted to peer. int failedAttempts() const { return m_failedAttempts; } @@ -84,8 +84,8 @@ class Peer: public Node void noteSessionGood() { m_failedAttempts = 0; } private: - /// Returns number of seconds to wait until attempting connection, based on attempted connection history, or - /// numeric_limits::max() if a connection should never be attempted. + /// Returns number of seconds to wait until attempting connection, based on attempted connection + /// history unsigned fallbackSeconds() const; std::atomic m_score{0}; ///< All time cumulative. diff --git a/libp2p/RLPxHandshake.cpp b/libp2p/RLPxHandshake.cpp index c9d9ee7a870..2fe7be00bb8 100644 --- a/libp2p/RLPxHandshake.cpp +++ b/libp2p/RLPxHandshake.cpp @@ -30,7 +30,8 @@ RLPXHandshake::RLPXHandshake( m_remote(_remote), m_originated(_remote), m_socket(_socket), - m_idleTimer(m_socket->ref().get_executor()) + m_idleTimer(m_socket->ref().get_executor()), + m_failureReason{NoFailure} { auto const prefixAttr = boost::log::attributes::constant{connectionDirectionString()}; @@ -114,8 +115,6 @@ void RLPXHandshake::writeAckEIP8() auto self(shared_from_this()); ba::async_write(m_socket->ref(), ba::buffer(m_ackCipher), [this, self](boost::system::error_code ec, std::size_t) { - if (ec) - m_failureReason = TcpError; transition(ec); }); } @@ -137,10 +136,7 @@ void RLPXHandshake::readAuth() ba::async_read(m_socket->ref(), ba::buffer(m_authCipher, c_authCipherSizeBytes), [this, self](boost::system::error_code ec, std::size_t) { if (ec) - { - m_failureReason = TcpError; transition(ec); - } else if (decryptECIES(m_host->m_alias.secret(), bytesConstRef(&m_authCipher), m_auth)) { LOG(m_logger) << "auth from"; @@ -168,10 +164,7 @@ void RLPXHandshake::readAuthEIP8() { bytesConstRef ct(&m_authCipher); if (ec) - { - m_failureReason = TcpError; transition(ec); - } else if (decryptECIES(m_host->m_alias.secret(), ct.cropped(0, 2), ct.cropped(2), m_auth)) { RLP rlp(m_auth, RLP::ThrowOnFail | RLP::FailIfTooSmall); @@ -201,10 +194,7 @@ void RLPXHandshake::readAck() ba::async_read(m_socket->ref(), ba::buffer(m_ackCipher, c_ackCipherSizeBytes), [this, self](boost::system::error_code ec, std::size_t) { if (ec) - { - m_failureReason = TcpError; transition(ec); - } else if (decryptECIES(m_host->m_alias.secret(), bytesConstRef(&m_ackCipher), m_ack)) { LOG(m_logger) << "ack from"; @@ -230,10 +220,7 @@ void RLPXHandshake::readAckEIP8() { bytesConstRef ct(&m_ackCipher); if (ec) - { - m_failureReason = TcpError; transition(ec); - } else if (decryptECIES(m_host->m_alias.secret(), ct.cropped(0, 2), ct.cropped(2), m_ack)) { RLP rlp(m_ack, RLP::ThrowOnFail | RLP::FailIfTooSmall); @@ -262,8 +249,7 @@ void RLPXHandshake::cancel() void RLPXHandshake::error(boost::system::error_code _ech) { - if (m_originated) - m_host->handshakeFailed(m_remote, m_failureReason); + m_host->onHandshakeFailed(m_remote, m_failureReason); stringstream errorStream; errorStream << "Handshake failed"; @@ -286,6 +272,8 @@ void RLPXHandshake::transition(boost::system::error_code _ech) if (_ech || m_nextState == Error || m_cancel) { + if (_ech) + m_failureReason = TcpError; return error(_ech); } @@ -351,8 +339,6 @@ void RLPXHandshake::transition(boost::system::error_code _ech) m_io->writeSingleFramePacket(&packet, m_handshakeOutBuffer); ba::async_write(m_socket->ref(), ba::buffer(m_handshakeOutBuffer), [this, self](boost::system::error_code ec, std::size_t) { - if (ec) - m_failureReason = TcpError; transition(ec); }); } @@ -369,10 +355,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech) boost::asio::buffer(m_handshakeInBuffer, handshakeSizeBytes), [this, self](boost::system::error_code ec, std::size_t) { if (ec) - { - m_failureReason = TcpError; transition(ec); - } else { if (!m_io) @@ -436,10 +419,7 @@ void RLPXHandshake::transition(boost::system::error_code _ech) m_idleTimer.cancel(); if (ec) - { - m_failureReason = TcpError; transition(ec); - } else { if (!m_io)