From c3d6f5461d00502ce233e1f63fb317d3c667fa4f Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Thu, 24 Aug 2017 20:17:52 +0200 Subject: [PATCH 1/3] Fix duplicate headers download in initial sync Now that initial block download is delayed until the headers sync is done, it was noticed that the initial headers sync may happen multiple times in parallel in the case new blocks are announced. This happens because for every block in INV that is received, a getheaders message is immediately sent out resulting in a full download of the headers chain starting from the point of where the initial headers sync is currently at. This happens once for each peer that announces the new block. This slows down the initial headers sync and increases the chance of another block being announced before it is finished, probably leading to the same behavior as already described, slowing down the sync even more...and so on. This commit delays sending of GETHEADERS to later in case the chain is too far behind while a new block gets announced. Header chains will still be downloaded multiple times, but the downloading will start much closer to the tip of the chain, so the damage is not that bad anymore. This ensures that we get all headers from all peers, even if any of them is on another chain. This should avoid what happened in https://github.com/bitcoin/bitcoin/pull/8054 which needed to be reverted later. This fixes the Bitcoin issue https://github.com/bitcoin/bitcoin/issues/6755 --- src/net.h | 9 +++++++ src/net_processing.cpp | 56 +++++++++++++++++++++++++++++------------- 2 files changed, 48 insertions(+), 17 deletions(-) diff --git a/src/net.h b/src/net.h index ee8f7a0980a3b..815d00d7eb449 100644 --- a/src/net.h +++ b/src/net.h @@ -742,6 +742,9 @@ class CNode // Used for headers announcements - unfiltered blocks to relay // Also protected by cs_inventory std::vector vBlockHashesToAnnounce; + // Blocks received by INV while headers chain was too far behind. These are used to delay GETHEADERS messages + // Also protected by cs_inventory + std::vector vBlockHashesFromINV; // Block and TXN accept times std::atomic nLastBlockTime; @@ -875,6 +878,12 @@ class CNode vBlockHashesToAnnounce.push_back(hash); } + void PushBlockHashFromINV(const uint256 &hash) + { + LOCK(cs_inventory); + vBlockHashesFromINV.push_back(hash); + } + void AskFor(const CInv& inv); void CloseSocketDisconnect(); diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 4304367e0760e..be97a458c0d20 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -1380,24 +1380,35 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, if (inv.type == MSG_BLOCK) { UpdateBlockAvailability(pfrom->GetId(), inv.hash); if (!fAlreadyHave && !fImporting && !fReindex && !mapBlocksInFlight.count(inv.hash)) { - // First request the headers preceding the announced block. In the normal fully-synced - // case where a new block is announced that succeeds the current tip (no reorganization), - // there are no such headers. - // Secondly, and only when we are close to being synced, we request the announced block directly, - // to avoid an extra round-trip. Note that we must *first* ask for the headers, so by the - // time the block arrives, the header chain leading up to it is already validated. Not - // doing this will result in the received block being rejected as an orphan in case it is - // not a direct successor. - connman.PushMessage(pfrom, NetMsgType::GETHEADERS, chainActive.GetLocator(pindexBestHeader), inv.hash); - CNodeState *nodestate = State(pfrom->GetId()); - if (CanDirectFetch(chainparams.GetConsensus()) && - nodestate->nBlocksInFlight < MAX_BLOCKS_IN_TRANSIT_PER_PEER) { - vToFetch.push_back(inv); - // Mark block as in flight already, even though the actual "getdata" message only goes out - // later (within the same cs_main lock, though). - MarkBlockAsInFlight(pfrom->GetId(), inv.hash, chainparams.GetConsensus()); + if (pindexBestHeader->GetBlockTime() < GetAdjustedTime() - 7 * 24 * 60 * 60) { + // We are pretty far from being completely synced at the moment. If we would initiate a new + // chain of GETHEADERS/HEADERS now, we may end up downnloading the full chain from multiple + // peers at the same time, slowing down the initial sync. At the same time, we don't know + // if the peer we got this INV from may have a chain we don't know about yet, so we HAVE TO + // send a GETHEADERS message at some point in time. This is delayed to later in SendMessages + // when the headers chain has catched up enough. + LogPrint("net", "delaying getheaders (%d) %s to peer=%d\n", pindexBestHeader->nHeight, inv.hash.ToString(), pfrom->id); + pfrom->PushBlockHashFromINV(inv.hash); + } else { + // First request the headers preceding the announced block. In the normal fully-synced + // case where a new block is announced that succeeds the current tip (no reorganization), + // there are no such headers. + // Secondly, and only when we are close to being synced, we request the announced block directly, + // to avoid an extra round-trip. Note that we must *first* ask for the headers, so by the + // time the block arrives, the header chain leading up to it is already validated. Not + // doing this will result in the received block being rejected as an orphan in case it is + // not a direct successor. + connman.PushMessage(pfrom, NetMsgType::GETHEADERS, chainActive.GetLocator(pindexBestHeader), inv.hash); + CNodeState *nodestate = State(pfrom->GetId()); + if (CanDirectFetch(chainparams.GetConsensus()) && + nodestate->nBlocksInFlight < MAX_BLOCKS_IN_TRANSIT_PER_PEER) { + vToFetch.push_back(inv); + // Mark block as in flight already, even though the actual "getdata" message only goes out + // later (within the same cs_main lock, though). + MarkBlockAsInFlight(pfrom->GetId(), inv.hash, chainparams.GetConsensus()); + } + LogPrint("net", "getheaders (%d) %s to peer=%d\n", pindexBestHeader->nHeight, inv.hash.ToString(), pfrom->id); } - LogPrint("net", "getheaders (%d) %s to peer=%d\n", pindexBestHeader->nHeight, inv.hash.ToString(), pfrom->id); } } else @@ -2391,6 +2402,17 @@ bool SendMessages(CNode* pto, CConnman& connman, std::atomic& interruptMsg } } + if (pindexBestHeader->GetBlockTime() > GetAdjustedTime() - 7 * 24 * 60 * 60) { + // Headers chain has catched up enough so we can send out GETHEADER messages which were initially meant to + // be sent directly after INV was received + LOCK(pto->cs_inventory); + BOOST_FOREACH(const uint256 &hash, pto->vBlockHashesFromINV) { + LogPrint("net", "process delayed getheaders (%d) to peer=%d\n", pindexBestHeader->nHeight, pto->id); + connman.PushMessage(pto, NetMsgType::GETHEADERS, chainActive.GetLocator(pindexBestHeader), hash); + } + pto->vBlockHashesFromINV.clear(); + } + // Resend wallet transactions that haven't gotten in a block yet // Except during reindex, importing and IBD, when old wallet // transactions become unconfirmed and spams other nodes. From fac0a6b1262879041c51b12c7df2a2962d6326ca Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Fri, 1 Sep 2017 19:24:56 +0200 Subject: [PATCH 2/3] Introduce DelayGetHeadersTime chain param and fix tests The delaying of GETHEADERS in combination with very old block times in test cases resulted in the delaying being triggered when the first newly mined block arrives. This results in a completely stalled sync. This is fixed by avoiding delaying in when running tests. --- src/chainparams.cpp | 3 +++ src/chainparams.h | 2 ++ src/net_processing.cpp | 7 ++++--- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/chainparams.cpp b/src/chainparams.cpp index 9c5f043d33809..bd8a815357632 100644 --- a/src/chainparams.cpp +++ b/src/chainparams.cpp @@ -125,6 +125,7 @@ class CMainParams : public CChainParams { vAlertPubKey = ParseHex("048240a8748a80a286b270ba126705ced4f2ce5a7847b3610ea3c06513150dade2a8512ed5ea86320824683fc0818f0ac019214973e677acd1244f6d0571fc5103"); nDefaultPort = 9999; nMaxTipAge = 6 * 60 * 60; // ~144 blocks behind -> 2 x fork detection time, was 24 * 60 * 60 in bitcoin + nDelayGetHeadersTime = 24 * 60 * 60; nPruneAfterHeight = 100000; genesis = CreateGenesisBlock(1390095618, 28917698, 0x1e0ffff0, 1, 50 * COIN); @@ -250,6 +251,7 @@ class CTestNetParams : public CChainParams { vAlertPubKey = ParseHex("04517d8a699cb43d3938d7b24faaff7cda448ca4ea267723ba614784de661949bf632d6304316b244646dea079735b9a6fc4af804efb4752075b9fe2245e14e412"); nDefaultPort = 19999; nMaxTipAge = 0x7fffffff; // allow mining on top of old blocks for testnet + nDelayGetHeadersTime = 24 * 60 * 60; nPruneAfterHeight = 1000; genesis = CreateGenesisBlock(1390666206UL, 3861367235UL, 0x1e0ffff0, 1, 50 * COIN); @@ -359,6 +361,7 @@ class CRegTestParams : public CChainParams { pchMessageStart[2] = 0xb7; pchMessageStart[3] = 0xdc; nMaxTipAge = 6 * 60 * 60; // ~144 blocks behind -> 2 x fork detection time, was 24 * 60 * 60 in bitcoin + nDelayGetHeadersTime = 0; // never delay GETHEADERS in regtests nDefaultPort = 19994; nPruneAfterHeight = 1000; diff --git a/src/chainparams.h b/src/chainparams.h index cf4e9df42a263..4160e6c53a531 100644 --- a/src/chainparams.h +++ b/src/chainparams.h @@ -65,6 +65,7 @@ class CChainParams /** Policy: Filter transactions that do not match well-defined patterns */ bool RequireStandard() const { return fRequireStandard; } int64_t MaxTipAge() const { return nMaxTipAge; } + int64_t DelayGetHeadersTime() const { return nDelayGetHeadersTime; } uint64_t PruneAfterHeight() const { return nPruneAfterHeight; } /** Make miner stop after a block is found. In RPC, don't return until nGenProcLimit blocks are generated */ bool MineBlocksOnDemand() const { return fMineBlocksOnDemand; } @@ -89,6 +90,7 @@ class CChainParams std::vector vAlertPubKey; int nDefaultPort; long nMaxTipAge; + int64_t nDelayGetHeadersTime; uint64_t nPruneAfterHeight; std::vector vSeeds; std::vector base58Prefixes[MAX_BASE58_TYPES]; diff --git a/src/net_processing.cpp b/src/net_processing.cpp index be97a458c0d20..203ccca270829 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -1380,7 +1380,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, if (inv.type == MSG_BLOCK) { UpdateBlockAvailability(pfrom->GetId(), inv.hash); if (!fAlreadyHave && !fImporting && !fReindex && !mapBlocksInFlight.count(inv.hash)) { - if (pindexBestHeader->GetBlockTime() < GetAdjustedTime() - 7 * 24 * 60 * 60) { + if (chainparams.DelayGetHeadersTime() != 0 && pindexBestHeader->GetBlockTime() < GetAdjustedTime() - chainparams.DelayGetHeadersTime()) { // We are pretty far from being completely synced at the moment. If we would initiate a new // chain of GETHEADERS/HEADERS now, we may end up downnloading the full chain from multiple // peers at the same time, slowing down the initial sync. At the same time, we don't know @@ -2285,7 +2285,8 @@ bool ProcessMessages(CNode* pfrom, CConnman& connman, std::atomic& interru bool SendMessages(CNode* pto, CConnman& connman, std::atomic& interruptMsgProc) { - const Consensus::Params& consensusParams = Params().GetConsensus(); + const CChainParams chainParams = Params(); + const Consensus::Params& consensusParams = chainParams.GetConsensus(); { // Don't send anything until the version handshake is complete if (!pto->fSuccessfullyConnected || pto->fDisconnect) @@ -2402,7 +2403,7 @@ bool SendMessages(CNode* pto, CConnman& connman, std::atomic& interruptMsg } } - if (pindexBestHeader->GetBlockTime() > GetAdjustedTime() - 7 * 24 * 60 * 60) { + if (chainParams.DelayGetHeadersTime() != 0 && pindexBestHeader->GetBlockTime() >= GetAdjustedTime() - chainParams.DelayGetHeadersTime()) { // Headers chain has catched up enough so we can send out GETHEADER messages which were initially meant to // be sent directly after INV was received LOCK(pto->cs_inventory); From 6182c2899874a2c619a9ef2a090b348852fea2ad Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Sun, 3 Sep 2017 21:01:00 +0200 Subject: [PATCH 3/3] Disconnect peers which are not catched up Peers which stop sending us headers too early are very likely peers which did not catch up before and stalled for some reason. We should disconnect these peers and chose another one to continue. --- src/net_processing.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/net_processing.cpp b/src/net_processing.cpp index 203ccca270829..f84a4980d043a 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -1775,11 +1775,6 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, ReadCompactSize(vRecv); // ignore tx count; assume it is 0. } - if (nCount == 0) { - // Nothing interesting. Stop asking this peers for more headers. - return true; - } - CBlockIndex *pindexLast = NULL; { LOCK(cs_main); @@ -1816,6 +1811,20 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv, // from there instead. LogPrint("net", "more getheaders (%d) to end to peer=%d (startheight:%d)\n", pindexLast->nHeight, pfrom->id, pfrom->nStartingHeight); connman.PushMessage(pfrom, NetMsgType::GETHEADERS, chainActive.GetLocator(pindexLast), uint256()); + } else { + if (chainparams.DelayGetHeadersTime() != 0 && pindexBestHeader->GetBlockTime() < GetAdjustedTime() - chainparams.DelayGetHeadersTime()) { + // peer has sent us a HEADERS message below maximum size and we are still quite far from being fully + // synced, this means we probably got a bad peer for initial sync and need to continue with another one. + // By disconnecting we force to start a new iteration of initial headers sync in SendMessages + // TODO should we handle whitelisted peers here as we do in headers sync timeout handling? + pfrom->fDisconnect = true; + return error("detected bad peer for initial headers sync, disconnecting %d", pfrom->id); + } + + if (nCount == 0) { + // Nothing interesting. Stop asking this peers for more headers. + return true; + } } bool fCanDirectFetch = CanDirectFetch(chainparams.GetConsensus());