From e1a0572a1576bdd0f9e2153df291ba7ac9c9052a Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Wed, 30 Oct 2024 08:28:33 -0700 Subject: [PATCH 1/7] fix(miner): ignore lastWork when selecting the best mining candidate Previously, we only took the new head if it's heavier than the last head. Unfortunately, this meant that F3 finalization wasn't properly propagated to the miner. In terms of impact: 1. It seems likely that this check was simply defensive as, prior to F3, the new head should never have a lower weight (unless you're talking to multiple lotus nodes, I guess...). 2. The `lastWork` field is mostly used to track null blocks. Worst-case scenario, if we switch heads, we'll attempt to re-mine previous heights. However, that should be relatively fast and, due to the slash filter, we won't attempt to re-broadcast any of those blocks. --- CHANGELOG.md | 4 ++++ miner/miner.go | 28 ++++------------------------ 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9dcd7bf9fc..fc8361cbf77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,10 @@ - Change the F3 HeadLookback parameter to 4 ([filecoin-project/lotus#12648](https://github.com/filecoin-project/lotus/pull/12648)). - Upgrade go-f3 to 0.7.1 to resolve Tipset not found errors when trying to establish instance start time ([filecoin-project/lotus#12651](https://github.com/filecoin-project/lotus/pull/12651)). +## Changes + +- The Lotus Miner will now always mine on the latest chain head returned by lotus, even if that head has less "weight" than the previously seen head. This is necessary because F3 may end up finalizing a tipset with a lower weight, although this situation should be rare on the Filecoin mainnet. ([filecoin-project/lotus#12659](https://github.com/filecoin-project/lotus/pull/12659)) + ## Deps # UNRELEASED Node v1.30.0 diff --git a/miner/miner.go b/miner/miner.go index b7e39a953c4..785cc3a2c9a 100644 --- a/miner/miner.go +++ b/miner/miner.go @@ -400,11 +400,8 @@ type MiningBase struct { } // GetBestMiningCandidate implements the fork choice rule from a miner's -// perspective. -// -// It obtains the current chain head (HEAD), and compares it to the last tipset -// we selected as our mining base (LAST). If HEAD's weight is larger than -// LAST's weight, it selects HEAD to build on. Else, it selects LAST. +// perspective, returning the best head to mine on. This includes the number of null rounds we think +// we should insert and the time at which we received said head. func (m *Miner) GetBestMiningCandidate(ctx context.Context) (*MiningBase, error) { m.lk.Lock() defer m.lk.Unlock() @@ -414,27 +411,10 @@ func (m *Miner) GetBestMiningCandidate(ctx context.Context) (*MiningBase, error) return nil, err } - if m.lastWork != nil { - if m.lastWork.TipSet.Equals(bts) { - return m.lastWork, nil - } - - btsw, err := m.api.ChainTipSetWeight(ctx, bts.Key()) - if err != nil { - return nil, err - } - ltsw, err := m.api.ChainTipSetWeight(ctx, m.lastWork.TipSet.Key()) - if err != nil { - m.lastWork = nil - return nil, err - } - - if types.BigCmp(btsw, ltsw) <= 0 { - return m.lastWork, nil - } + if m.lastWork == nil || !m.lastWork.TipSet.Equals(bts) { + m.lastWork = &MiningBase{TipSet: bts, ComputeTime: time.Now()} } - m.lastWork = &MiningBase{TipSet: bts, ComputeTime: time.Now()} return m.lastWork, nil } From c80f32de4f010ddd26eec53f39a695655e2108b8 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Fri, 1 Nov 2024 20:52:10 +0900 Subject: [PATCH 2/7] fix(miner): continue mining if we fail to submit a block --- miner/miner.go | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/miner/miner.go b/miner/miner.go index 785cc3a2c9a..0f587b4b632 100644 --- a/miner/miner.go +++ b/miner/miner.go @@ -311,7 +311,8 @@ minerLoop: onDone(b != nil, h, nil) // Process the mined block. - if b != nil { + switch { + case b != nil: // Record the event of mining a block. m.journal.RecordEvent(m.evtTypes[evtTypeBlockMined], func() interface{} { return map[string]interface{}{ @@ -348,19 +349,19 @@ minerLoop: if err != nil { log.Errorf(" SLASH FILTER ERRORED: %s", err) // Continue here, because it's _probably_ wiser to not submit this block - continue + break } if fault { log.Errorf(" SLASH FILTER DETECTED FAULT due to blocks %s and %s", b.Header.Cid(), witness) - continue + break } } // Check for blocks created at the same height. if _, ok := m.minedBlockHeights.Get(b.Header.Height); ok { log.Warnw("Created a block at the same height as another block we've created", "height", b.Header.Height, "miner", b.Header.Miner, "parents", b.Header.Parents) - continue + break } // Add the block height to the mined block heights. @@ -369,24 +370,26 @@ minerLoop: // Submit the newly mined block. if err := m.api.SyncSubmitBlock(ctx, b); err != nil { log.Errorf("failed to submit newly mined block: %+v", err) + break } - } else { - // If no block was mined, increase the null rounds and wait for the next epoch. - base.NullRounds++ - - // Calculate the time for the next round. - nextRound := time.Unix(int64(base.TipSet.MinTimestamp()+buildconstants.BlockDelaySecs*uint64(base.NullRounds))+int64(buildconstants.PropagationDelaySecs), 0) - - // Wait for the next round or stop signal. - select { - case <-build.Clock.After(build.Clock.Until(nextRound)): - case <-m.stop: - stopping := m.stopping - m.stop = nil - m.stopping = nil - close(stopping) - return - } + continue // TODO: we should probably remove this continue and wait in this case as well... but that's a bigger change. + } + + // If no block was mined or if we fail to submit the block, increase the null rounds and wait for the next epoch. + base.NullRounds++ + + // Calculate the time for the next round. + nextRound := time.Unix(int64(base.TipSet.MinTimestamp()+buildconstants.BlockDelaySecs*uint64(base.NullRounds))+int64(buildconstants.PropagationDelaySecs), 0) + + // Wait for the next round or stop signal. + select { + case <-build.Clock.After(build.Clock.Until(nextRound)): + case <-m.stop: + stopping := m.stopping + m.stop = nil + m.stopping = nil + close(stopping) + return } } } From 4404614ac94d2b4a203c936e3fb8c01a20f8a87d Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Fri, 1 Nov 2024 20:53:00 +0900 Subject: [PATCH 3/7] [WIP] test: checkpoint to a fork This test still doesn't pass and I'm not sure why: 1. Sync is a bit broken because it can't figure out that we already have all the data locally. 2. But with 2 nodes, it should work. Except that, if I add some logging, I see that sync works until the libp2p nodes just flat-out disconnect from eachother. --- itests/checkpoint_test.go | 64 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 itests/checkpoint_test.go diff --git a/itests/checkpoint_test.go b/itests/checkpoint_test.go new file mode 100644 index 00000000000..e3b783b2342 --- /dev/null +++ b/itests/checkpoint_test.go @@ -0,0 +1,64 @@ +package itests + +import ( + "context" + "testing" + "time" + + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/itests/kit" + "github.com/stretchr/testify/require" +) + +func TestCheckpointFork(t *testing.T) { + ctx := context.Background() + + blocktime := 100 * time.Millisecond + + nopts := []kit.NodeOpt{kit.WithAllSubsystems(), kit.ThroughRPC()} + var n1, n2 kit.TestFullNode + var m1, m2 kit.TestMiner + ens := kit.NewEnsemble(t, kit.MockProofs()). + FullNode(&n1, nopts...). + FullNode(&n2, nopts...). + Miner(&m1, &n1, nopts...). + Miner(&m2, &n1, nopts...).Start() + + // Start 2 of them. + ens.InterconnectAll().BeginMining(blocktime) + + { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + n1.WaitTillChain(ctx, kit.HeightAtLeast(abi.ChainEpoch(5))) + cancel() + } + + // Wait till both participate in a single tipset. + var target *types.TipSet + { + // find the first tipset where two miners mine a block. + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + target = n1.WaitTillChain(ctx, func(ts *types.TipSet) bool { + return len(ts.Blocks()) == 2 + }) + cancel() + } + + // Wait till we've moved on from that tipset. + targetHeight := target.Height() + 10 + n1.WaitTillChain(ctx, kit.HeightAtLeast(targetHeight)) + n2.WaitTillChain(ctx, kit.HeightAtLeast(targetHeight)) + + // Forcibly sync to this fork tipset. + forkTs, err := types.NewTipSet(target.Blocks()[:1]) + require.NoError(t, err) + require.NoError(t, n2.SyncCheckpoint(ctx, forkTs.Key())) + require.NoError(t, n1.SyncCheckpoint(ctx, forkTs.Key())) + + // See if we can start making progress again! + newHead := n1.WaitTillChain(ctx, kit.HeightAtLeast(targetHeight)) + forkTs2, err := n1.ChainGetTipSetByHeight(ctx, forkTs.Height(), newHead.Key()) + require.NoError(t, err) + require.True(t, forkTs.Equals(forkTs2)) +} From 065df6a472865f4beb0139a4200081e19f9068e9 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Tue, 5 Nov 2024 11:01:49 +0700 Subject: [PATCH 4/7] fix(miner): check the slash filter with the correct parent height We also perform this check inside `SyncSubmitBlock` so we did have an effective filter, but this was still wrong. --- miner/miner.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/miner/miner.go b/miner/miner.go index 0f587b4b632..8c1dae1483b 100644 --- a/miner/miner.go +++ b/miner/miner.go @@ -345,7 +345,7 @@ minerLoop: // Check for slash filter conditions. if os.Getenv("LOTUS_MINER_NO_SLASHFILTER") != "_yes_i_know_i_can_and_probably_will_lose_all_my_fil_and_power_" && !buildconstants.IsNearUpgrade(base.TipSet.Height(), buildconstants.UpgradeWatermelonFixHeight) { - witness, fault, err := m.sf.MinedBlock(ctx, b.Header, base.TipSet.Height()+base.NullRounds) + witness, fault, err := m.sf.MinedBlock(ctx, b.Header, base.TipSet.Height()) if err != nil { log.Errorf(" SLASH FILTER ERRORED: %s", err) // Continue here, because it's _probably_ wiser to not submit this block From d5a45bae3fd7a7051e1b1621e3f8f2b20e976058 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Tue, 5 Nov 2024 11:02:46 +0700 Subject: [PATCH 5/7] fix(exchange): avoid adding ourselves as an exchange peer We have some cases where we submit a tipset to ourselves, from ourselves and end up calling `AddPeer` with our own ID. Ignore this case. --- chain/exchange/client.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/chain/exchange/client.go b/chain/exchange/client.go index 289fe7ca0b2..bd645f59bc4 100644 --- a/chain/exchange/client.go +++ b/chain/exchange/client.go @@ -496,6 +496,9 @@ func (c *client) sendRequestToPeer(ctx context.Context, peer peer.ID, req *Reque // AddPeer implements Client.AddPeer(). Refer to the godocs there. func (c *client) AddPeer(p peer.ID) { + if p == c.host.ID() { + return + } c.peerTracker.addPeer(p) } From 475eda54aaea2104a6db8bdffec2cd0961a9bd20 Mon Sep 17 00:00:00 2001 From: Steven Allen Date: Tue, 5 Nov 2024 11:03:57 +0700 Subject: [PATCH 6/7] fix(hello): submit new blocks from peers even if we're at genesis A side-effect of InformNewHead is to record the peer for future chain-sync sessions. If we don't pass blocks to InformNewHead here, we can have some difficulty bootstrapping networks. --- node/hello/hello.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/node/hello/hello.go b/node/hello/hello.go index b1977bdaea9..02b9acb74ea 100644 --- a/node/hello/hello.go +++ b/node/hello/hello.go @@ -132,13 +132,11 @@ func (hs *Service) HandleStream(s inet.Stream) { return } - if ts.TipSet().Height() > 0 { - hs.h.ConnManager().TagPeer(s.Conn().RemotePeer(), "fcpeer", 10) + hs.h.ConnManager().TagPeer(s.Conn().RemotePeer(), "fcpeer", 10) - // don't bother informing about genesis - log.Debugf("Got new tipset through Hello: %s from %s", ts.Cids(), s.Conn().RemotePeer()) - hs.syncer.InformNewHead(s.Conn().RemotePeer(), ts) - } + // don't bother informing about genesis + log.Debugf("Got new tipset through Hello: %s from %s", ts.Cids(), s.Conn().RemotePeer()) + hs.syncer.InformNewHead(s.Conn().RemotePeer(), ts) } func (hs *Service) SayHello(ctx context.Context, pid peer.ID) error { From 8d4a55548dcf39fe93d17818f56759df69206302 Mon Sep 17 00:00:00 2001 From: "Masih H. Derkani" Date: Tue, 12 Nov 2024 14:26:00 +0700 Subject: [PATCH 7/7] `make gen` --- itests/checkpoint_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/itests/checkpoint_test.go b/itests/checkpoint_test.go index e3b783b2342..35c05ef8b14 100644 --- a/itests/checkpoint_test.go +++ b/itests/checkpoint_test.go @@ -5,10 +5,12 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/itests/kit" - "github.com/stretchr/testify/require" ) func TestCheckpointFork(t *testing.T) {