From 13ccb8cbfe6106fdad985daaef0e74e5d42cdea6 Mon Sep 17 00:00:00 2001 From: zenground0 Date: Tue, 1 Feb 2022 11:39:42 +0530 Subject: [PATCH 1/4] Stop recovery attempts after fault --- extern/storage-sealing/states_failed.go | 11 +++++++++ extern/storage-sealing/upgrade_queue.go | 31 ++++++++++++++++--------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/extern/storage-sealing/states_failed.go b/extern/storage-sealing/states_failed.go index c32ac4c3a9f..03d87a3b491 100644 --- a/extern/storage-sealing/states_failed.go +++ b/extern/storage-sealing/states_failed.go @@ -237,6 +237,17 @@ func (m *Sealing) handleSubmitReplicaUpdateFailed(ctx statemachine.Context, sect } } + // Abort upgrade for sectors that went faulty since being marked for upgrade + active, err := sectorActive(ctx.Context(), m.Api, m.maddr, tok, sector.SectorNumber) + if err != nil { + log.Errorf("sector active check: api error, not proceeding: %+v", err) + return nil + } + if !active { + log.Errorf("sector marked for upgrade %d no longer active, aborting upgrade", sector.SectorNumber) + return ctx.Send(SectorAbortUpgrade{}) + } + if err := failedCooldown(ctx, sector); err != nil { return err } diff --git a/extern/storage-sealing/upgrade_queue.go b/extern/storage-sealing/upgrade_queue.go index 1aacc9c0808..343d6ca9554 100644 --- a/extern/storage-sealing/upgrade_queue.go +++ b/extern/storage-sealing/upgrade_queue.go @@ -3,6 +3,7 @@ package sealing import ( "context" + "github.com/filecoin-project/go-address" "github.com/filecoin-project/lotus/chain/actors/builtin/miner" market7 "github.com/filecoin-project/specs-actors/v7/actors/builtin/market" @@ -86,19 +87,11 @@ func (m *Sealing) MarkForSnapUpgrade(ctx context.Context, id abi.SectorNumber) e return xerrors.Errorf("failed to read sector on chain info: %w", err) } - active, err := m.Api.StateMinerActiveSectors(ctx, m.maddr, tok) + active, err := sectorActive(ctx, m.Api, m.maddr, tok, id) if err != nil { - return xerrors.Errorf("failed to check active sectors: %w", err) + return xerrors.Errorf("failed to check if sector is active") } - // Ensure the upgraded sector is active - var found bool - for _, si := range active { - if si.SectorNumber == id { - found = true - break - } - } - if !found { + if !active { return xerrors.Errorf("cannot mark inactive sector for upgrade") } @@ -110,6 +103,22 @@ func (m *Sealing) MarkForSnapUpgrade(ctx context.Context, id abi.SectorNumber) e return m.sectors.Send(uint64(id), SectorStartCCUpdate{}) } +func sectorActive(ctx context.Context, api SealingAPI, maddr address.Address, tok TipSetToken, sector abi.SectorNumber) (bool, error) { + active, err := api.StateMinerActiveSectors(ctx, maddr, tok) + if err != nil { + return false, xerrors.Errorf("failed to check active sectors: %w", err) + } + // Ensure the upgraded sector is active + var found bool + for _, si := range active { + if si.SectorNumber == sector { + found = true + break + } + } + return found, nil +} + func (m *Sealing) tryUpgradeSector(ctx context.Context, params *miner.SectorPreCommitInfo) big.Int { if len(params.DealIDs) == 0 { return big.Zero() From 47ffceef0dd908ab8e39a0bc6faa747b548a3647 Mon Sep 17 00:00:00 2001 From: zenground0 Date: Mon, 7 Feb 2022 07:41:48 -0500 Subject: [PATCH 2/4] Check sector is active before PRU --- extern/storage-sealing/states_replica_update.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/extern/storage-sealing/states_replica_update.go b/extern/storage-sealing/states_replica_update.go index 43d5467ed55..341c553e1ad 100644 --- a/extern/storage-sealing/states_replica_update.go +++ b/extern/storage-sealing/states_replica_update.go @@ -31,6 +31,21 @@ func (m *Sealing) handleProveReplicaUpdate(ctx statemachine.Context, sector Sect if sector.CommR == nil { return xerrors.Errorf("invalid sector %d with nil CommR", sector.SectorNumber) } + // Abort upgrade for sectors that went faulty since being marked for upgrade + tok, _, err := m.Api.ChainHead(ctx.Context()) + if err != nil { + log.Errorf("handleProveReplicaUpdate: api error, not proceeding: %+v", err) + return nil + } + active, err := sectorActive(ctx.Context(), m.Api, m.maddr, tok, sector.SectorNumber) + if err != nil { + log.Errorf("sector active check: api error, not proceeding: %+v", err) + return nil + } + if !active { + log.Errorf("sector marked for upgrade %d no longer active, aborting upgrade", sector.SectorNumber) + return ctx.Send(SectorAbortUpgrade{}) + } vanillaProofs, err := m.sealer.ProveReplicaUpdate1(sector.sealingCtx(ctx.Context()), m.minerSector(sector.SectorType, sector.SectorNumber), *sector.CommR, *sector.UpdateSealed, *sector.UpdateUnsealed) if err != nil { From 1ab2744c84792f47b9cd1c94966662484d7056d2 Mon Sep 17 00:00:00 2001 From: zenground0 Date: Mon, 7 Feb 2022 09:15:23 -0500 Subject: [PATCH 3/4] Fix log --- extern/storage-sealing/states_failed.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extern/storage-sealing/states_failed.go b/extern/storage-sealing/states_failed.go index 03d87a3b491..55bda80368d 100644 --- a/extern/storage-sealing/states_failed.go +++ b/extern/storage-sealing/states_failed.go @@ -211,7 +211,7 @@ func (m *Sealing) handleSubmitReplicaUpdateFailed(ctx statemachine.Context, sect tok, _, err := m.Api.ChainHead(ctx.Context()) if err != nil { - log.Errorf("handleCommitting: api error, not proceeding: %+v", err) + log.Errorf("handleSubmitReplicaUpdateFailed: api error, not proceeding: %+v", err) return nil } From b6c141c05960773c8f449181838877262150d33a Mon Sep 17 00:00:00 2001 From: Aayush Rajasekaran Date: Tue, 8 Feb 2022 12:06:33 -0500 Subject: [PATCH 4/4] nit: edit a comment --- extern/storage-sealing/upgrade_queue.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extern/storage-sealing/upgrade_queue.go b/extern/storage-sealing/upgrade_queue.go index 343d6ca9554..86083930d6a 100644 --- a/extern/storage-sealing/upgrade_queue.go +++ b/extern/storage-sealing/upgrade_queue.go @@ -108,7 +108,7 @@ func sectorActive(ctx context.Context, api SealingAPI, maddr address.Address, to if err != nil { return false, xerrors.Errorf("failed to check active sectors: %w", err) } - // Ensure the upgraded sector is active + // Check if sector is among active sectors var found bool for _, si := range active { if si.SectorNumber == sector {