From c3d6f5461d00502ce233e1f63fb317d3c667fa4f Mon Sep 17 00:00:00 2001
From: Alexander Block <ablock84@gmail.com>
Date: Thu, 24 Aug 2017 20:17:52 +0200
Subject: [PATCH 1/3] Fix duplicate headers download in initial sync

Now that initial block download is delayed until the headers sync is done,
it was noticed that the initial headers sync may happen multiple times in
parallel in the case new blocks are announced. This happens because for
every block in INV that is received, a getheaders message is immediately
sent out resulting in a full download of the headers chain starting from
the point of where the initial headers sync is currently at. This happens
once for each peer that announces the new block. This slows down the
initial headers sync and increases the chance of another block being
announced before it is finished, probably leading to the same behavior
as already described, slowing down the sync even more...and so on.

This commit delays sending of GETHEADERS to later in case the chain is too
far behind while a new block gets announced. Header chains will still be
downloaded multiple times, but the downloading will start much closer
to the tip of the chain, so the damage is not that bad anymore.

This ensures that we get all headers from all peers, even if any of them
is on another chain. This should avoid what happened in
https://github.com/bitcoin/bitcoin/pull/8054
which needed to be reverted later.

This fixes the Bitcoin issue https://github.com/bitcoin/bitcoin/issues/6755
---
 src/net.h              |  9 +++++++
 src/net_processing.cpp | 56 +++++++++++++++++++++++++++++-------------
 2 files changed, 48 insertions(+), 17 deletions(-)
diff --git a/src/net.h b/src/net.h
index ee8f7a0980a3b..815d00d7eb449 100644
--- a/src/net.h
+++ b/src/net.h
@@ -742,6 +742,9 @@ class CNode
     // Used for headers announcements - unfiltered blocks to relay
     // Also protected by cs_inventory
     std::vector<uint256> vBlockHashesToAnnounce;
+    // Blocks received by INV while headers chain was too far behind. These are used to delay GETHEADERS messages
+    // Also protected by cs_inventory
+    std::vector<uint256> vBlockHashesFromINV;
 
     // Block and TXN accept times
     std::atomic<int64_t> nLastBlockTime;
@@ -875,6 +878,12 @@ class CNode
         vBlockHashesToAnnounce.push_back(hash);
     }
 
+    void PushBlockHashFromINV(const uint256 &hash)
+    {
+        LOCK(cs_inventory);
+        vBlockHashesFromINV.push_back(hash);
+    }
+
     void AskFor(const CInv& inv);
 
     void CloseSocketDisconnect();
diff --git a/src/net_processing.cpp b/src/net_processing.cpp
index 4304367e0760e..be97a458c0d20 100644
--- a/src/net_processing.cpp
+++ b/src/net_processing.cpp
@@ -1380,24 +1380,35 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv,
             if (inv.type == MSG_BLOCK) {
                 UpdateBlockAvailability(pfrom->GetId(), inv.hash);
                 if (!fAlreadyHave && !fImporting && !fReindex && !mapBlocksInFlight.count(inv.hash)) {
-                    // First request the headers preceding the announced block. In the normal fully-synced
-                    // case where a new block is announced that succeeds the current tip (no reorganization),
-                    // there are no such headers.
-                    // Secondly, and only when we are close to being synced, we request the announced block directly,
-                    // to avoid an extra round-trip. Note that we must *first* ask for the headers, so by the
-                    // time the block arrives, the header chain leading up to it is already validated. Not
-                    // doing this will result in the received block being rejected as an orphan in case it is
-                    // not a direct successor.
-                    connman.PushMessage(pfrom, NetMsgType::GETHEADERS, chainActive.GetLocator(pindexBestHeader), inv.hash);
-                    CNodeState *nodestate = State(pfrom->GetId());
-                    if (CanDirectFetch(chainparams.GetConsensus()) &&
-                        nodestate->nBlocksInFlight < MAX_BLOCKS_IN_TRANSIT_PER_PEER) {
-                        vToFetch.push_back(inv);
-                        // Mark block as in flight already, even though the actual "getdata" message only goes out
-                        // later (within the same cs_main lock, though).
-                        MarkBlockAsInFlight(pfrom->GetId(), inv.hash, chainparams.GetConsensus());
+                    if (pindexBestHeader->GetBlockTime() < GetAdjustedTime() - 7 * 24 * 60 * 60) {
+                        // We are pretty far from being completely synced at the moment. If we would initiate a new
+                        // chain of GETHEADERS/HEADERS now, we may end up downnloading the full chain from multiple
+                        // peers at the same time, slowing down the initial sync. At the same time, we don't know
+                        // if the peer we got this INV from may have a chain we don't know about yet, so we HAVE TO
+                        // send a GETHEADERS message at some point in time. This is delayed to later in SendMessages
+                        // when the headers chain has catched up enough.
+                        LogPrint("net", "delaying getheaders (%d) %s to peer=%d\n", pindexBestHeader->nHeight, inv.hash.ToString(), pfrom->id);
+                        pfrom->PushBlockHashFromINV(inv.hash);
+                    } else {
+                        // First request the headers preceding the announced block. In the normal fully-synced
+                        // case where a new block is announced that succeeds the current tip (no reorganization),
+                        // there are no such headers.
+                        // Secondly, and only when we are close to being synced, we request the announced block directly,
+                        // to avoid an extra round-trip. Note that we must *first* ask for the headers, so by the
+                        // time the block arrives, the header chain leading up to it is already validated. Not
+                        // doing this will result in the received block being rejected as an orphan in case it is
+                        // not a direct successor.
+                        connman.PushMessage(pfrom, NetMsgType::GETHEADERS, chainActive.GetLocator(pindexBestHeader), inv.hash);
+                        CNodeState *nodestate = State(pfrom->GetId());
+                        if (CanDirectFetch(chainparams.GetConsensus()) &&
+                            nodestate->nBlocksInFlight < MAX_BLOCKS_IN_TRANSIT_PER_PEER) {
+                            vToFetch.push_back(inv);
+                            // Mark block as in flight already, even though the actual "getdata" message only goes out
+                            // later (within the same cs_main lock, though).
+                            MarkBlockAsInFlight(pfrom->GetId(), inv.hash, chainparams.GetConsensus());
+                        }
+                        LogPrint("net", "getheaders (%d) %s to peer=%d\n", pindexBestHeader->nHeight, inv.hash.ToString(), pfrom->id);
                     }
-                    LogPrint("net", "getheaders (%d) %s to peer=%d\n", pindexBestHeader->nHeight, inv.hash.ToString(), pfrom->id);
                 }
             }
             else
@@ -2391,6 +2402,17 @@ bool SendMessages(CNode* pto, CConnman& connman, std::atomic<bool>& interruptMsg
             }
         }
 
+        if (pindexBestHeader->GetBlockTime() > GetAdjustedTime() - 7 * 24 * 60 * 60) {
+            // Headers chain has catched up enough so we can send out GETHEADER messages which were initially meant to
+            // be sent directly after INV was received
+            LOCK(pto->cs_inventory);
+            BOOST_FOREACH(const uint256 &hash, pto->vBlockHashesFromINV) {
+                LogPrint("net", "process delayed getheaders (%d) to peer=%d\n", pindexBestHeader->nHeight, pto->id);
+                connman.PushMessage(pto, NetMsgType::GETHEADERS, chainActive.GetLocator(pindexBestHeader), hash);
+            }
+            pto->vBlockHashesFromINV.clear();
+        }
+
         // Resend wallet transactions that haven't gotten in a block yet
         // Except during reindex, importing and IBD, when old wallet
         // transactions become unconfirmed and spams other nodes.

From fac0a6b1262879041c51b12c7df2a2962d6326ca Mon Sep 17 00:00:00 2001
From: Alexander Block <ablock84@gmail.com>
Date: Fri, 1 Sep 2017 19:24:56 +0200
Subject: [PATCH 2/3] Introduce DelayGetHeadersTime chain param and fix tests

The delaying of GETHEADERS in combination with very old block times in
test cases resulted in the delaying being triggered when the first newly
mined block arrives. This results in a completely stalled sync.

This is fixed by avoiding delaying in when running tests.
---
 src/chainparams.cpp    | 3 +++
 src/chainparams.h      | 2 ++
 src/net_processing.cpp | 7 ++++---
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/chainparams.cpp b/src/chainparams.cpp
index 9c5f043d33809..bd8a815357632 100644
--- a/src/chainparams.cpp
+++ b/src/chainparams.cpp
@@ -125,6 +125,7 @@ class CMainParams : public CChainParams {
         vAlertPubKey = ParseHex("048240a8748a80a286b270ba126705ced4f2ce5a7847b3610ea3c06513150dade2a8512ed5ea86320824683fc0818f0ac019214973e677acd1244f6d0571fc5103");
         nDefaultPort = 9999;
         nMaxTipAge = 6 * 60 * 60; // ~144 blocks behind -> 2 x fork detection time, was 24 * 60 * 60 in bitcoin
+        nDelayGetHeadersTime = 24 * 60 * 60;
         nPruneAfterHeight = 100000;
 
         genesis = CreateGenesisBlock(1390095618, 28917698, 0x1e0ffff0, 1, 50 * COIN);
@@ -250,6 +251,7 @@ class CTestNetParams : public CChainParams {
         vAlertPubKey = ParseHex("04517d8a699cb43d3938d7b24faaff7cda448ca4ea267723ba614784de661949bf632d6304316b244646dea079735b9a6fc4af804efb4752075b9fe2245e14e412");
         nDefaultPort = 19999;
         nMaxTipAge = 0x7fffffff; // allow mining on top of old blocks for testnet
+        nDelayGetHeadersTime = 24 * 60 * 60;
         nPruneAfterHeight = 1000;
 
         genesis = CreateGenesisBlock(1390666206UL, 3861367235UL, 0x1e0ffff0, 1, 50 * COIN);
@@ -359,6 +361,7 @@ class CRegTestParams : public CChainParams {
         pchMessageStart[2] = 0xb7;
         pchMessageStart[3] = 0xdc;
         nMaxTipAge = 6 * 60 * 60; // ~144 blocks behind -> 2 x fork detection time, was 24 * 60 * 60 in bitcoin
+        nDelayGetHeadersTime = 0; // never delay GETHEADERS in regtests
         nDefaultPort = 19994;
         nPruneAfterHeight = 1000;
 
diff --git a/src/chainparams.h b/src/chainparams.h
index cf4e9df42a263..4160e6c53a531 100644
--- a/src/chainparams.h
+++ b/src/chainparams.h
@@ -65,6 +65,7 @@ class CChainParams
     /** Policy: Filter transactions that do not match well-defined patterns */
     bool RequireStandard() const { return fRequireStandard; }
     int64_t MaxTipAge() const { return nMaxTipAge; }
+    int64_t DelayGetHeadersTime() const { return nDelayGetHeadersTime; }
     uint64_t PruneAfterHeight() const { return nPruneAfterHeight; }
     /** Make miner stop after a block is found. In RPC, don't return until nGenProcLimit blocks are generated */
     bool MineBlocksOnDemand() const { return fMineBlocksOnDemand; }
@@ -89,6 +90,7 @@ class CChainParams
     std::vector<unsigned char> vAlertPubKey;
     int nDefaultPort;
     long nMaxTipAge;
+    int64_t nDelayGetHeadersTime;
     uint64_t nPruneAfterHeight;
     std::vector<CDNSSeedData> vSeeds;
     std::vector<unsigned char> base58Prefixes[MAX_BASE58_TYPES];
diff --git a/src/net_processing.cpp b/src/net_processing.cpp
index be97a458c0d20..203ccca270829 100644
--- a/src/net_processing.cpp
+++ b/src/net_processing.cpp
@@ -1380,7 +1380,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv,
             if (inv.type == MSG_BLOCK) {
                 UpdateBlockAvailability(pfrom->GetId(), inv.hash);
                 if (!fAlreadyHave && !fImporting && !fReindex && !mapBlocksInFlight.count(inv.hash)) {
-                    if (pindexBestHeader->GetBlockTime() < GetAdjustedTime() - 7 * 24 * 60 * 60) {
+                    if (chainparams.DelayGetHeadersTime() != 0 && pindexBestHeader->GetBlockTime() < GetAdjustedTime() - chainparams.DelayGetHeadersTime()) {
                         // We are pretty far from being completely synced at the moment. If we would initiate a new
                         // chain of GETHEADERS/HEADERS now, we may end up downnloading the full chain from multiple
                         // peers at the same time, slowing down the initial sync. At the same time, we don't know
@@ -2285,7 +2285,8 @@ bool ProcessMessages(CNode* pfrom, CConnman& connman, std::atomic<bool>& interru
 
 bool SendMessages(CNode* pto, CConnman& connman, std::atomic<bool>& interruptMsgProc)
 {
-    const Consensus::Params& consensusParams = Params().GetConsensus();
+    const CChainParams chainParams = Params();
+    const Consensus::Params& consensusParams = chainParams.GetConsensus();
     {
         // Don't send anything until the version handshake is complete
         if (!pto->fSuccessfullyConnected || pto->fDisconnect)
@@ -2402,7 +2403,7 @@ bool SendMessages(CNode* pto, CConnman& connman, std::atomic<bool>& interruptMsg
             }
         }
 
-        if (pindexBestHeader->GetBlockTime() > GetAdjustedTime() - 7 * 24 * 60 * 60) {
+        if (chainParams.DelayGetHeadersTime() != 0 && pindexBestHeader->GetBlockTime() >= GetAdjustedTime() - chainParams.DelayGetHeadersTime()) {
             // Headers chain has catched up enough so we can send out GETHEADER messages which were initially meant to
             // be sent directly after INV was received
             LOCK(pto->cs_inventory);

From 6182c2899874a2c619a9ef2a090b348852fea2ad Mon Sep 17 00:00:00 2001
From: Alexander Block <ablock84@gmail.com>
Date: Sun, 3 Sep 2017 21:01:00 +0200
Subject: [PATCH 3/3] Disconnect peers which are not catched up

Peers which stop sending us headers too early are very likely peers which
did not catch up before and stalled for some reason. We should disconnect
these peers and chose another one to continue.
---
 src/net_processing.cpp | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/net_processing.cpp b/src/net_processing.cpp
index 203ccca270829..f84a4980d043a 100644
--- a/src/net_processing.cpp
+++ b/src/net_processing.cpp
@@ -1775,11 +1775,6 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv,
             ReadCompactSize(vRecv); // ignore tx count; assume it is 0.
         }
 
-        if (nCount == 0) {
-            // Nothing interesting. Stop asking this peers for more headers.
-            return true;
-        }
-
         CBlockIndex *pindexLast = NULL;
         {
         LOCK(cs_main);
@@ -1816,6 +1811,20 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv,
             // from there instead.
             LogPrint("net", "more getheaders (%d) to end to peer=%d (startheight:%d)\n", pindexLast->nHeight, pfrom->id, pfrom->nStartingHeight);
             connman.PushMessage(pfrom, NetMsgType::GETHEADERS, chainActive.GetLocator(pindexLast), uint256());
+        } else {
+            if (chainparams.DelayGetHeadersTime() != 0 && pindexBestHeader->GetBlockTime() < GetAdjustedTime() - chainparams.DelayGetHeadersTime()) {
+                // peer has sent us a HEADERS message below maximum size and we are still quite far from being fully
+                // synced, this means we probably got a bad peer for initial sync and need to continue with another one.
+                // By disconnecting we force to start a new iteration of initial headers sync in SendMessages
+                // TODO should we handle whitelisted peers here as we do in headers sync timeout handling?
+                pfrom->fDisconnect = true;
+                return error("detected bad peer for initial headers sync, disconnecting %d", pfrom->id);
+            }
+
+            if (nCount == 0) {
+                // Nothing interesting. Stop asking this peers for more headers.
+                return true;
+            }
         }
 
         bool fCanDirectFetch = CanDirectFetch(chainparams.GetConsensus());