From b576008e87236812ff257140fd9b5e511ce84c94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 23 May 2022 22:56:11 +0200 Subject: [PATCH 1/5] sched: Strong preferrence in WorkerSelector --- extern/sector-storage/sched.go | 4 +++- extern/sector-storage/sched_assigner_common.go | 17 +++++++++++++++-- extern/sector-storage/selector_alloc.go | 16 ++++++++-------- extern/sector-storage/selector_existing.go | 16 ++++++++-------- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/extern/sector-storage/sched.go b/extern/sector-storage/sched.go index 53b6415ff39..c47ae5fd910 100644 --- a/extern/sector-storage/sched.go +++ b/extern/sector-storage/sched.go @@ -44,7 +44,9 @@ const mib = 1 << 20 type WorkerAction func(ctx context.Context, w Worker) error type WorkerSelector interface { - Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, a *WorkerHandle) (bool, error) // true if worker is acceptable for performing a task + // Ok is true if worker is acceptable for performing a task. + // If any worker is preferred for a task, other workers won't be considered for that task. + Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, a *WorkerHandle) (ok, preferred bool, err error) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) // true if a is preferred over b } diff --git a/extern/sector-storage/sched_assigner_common.go b/extern/sector-storage/sched_assigner_common.go index a4a2dfc231d..5be551693ee 100644 --- a/extern/sector-storage/sched_assigner_common.go +++ b/extern/sector-storage/sched_assigner_common.go @@ -61,8 +61,10 @@ func (a *AssignerCommon) TrySched(sh *Scheduler) { }() task := (*sh.SchedQueue)[sqi] - task.IndexHeap = sqi + + var havePreferred bool + for wnd, windowRequest := range sh.OpenWindows { worker, ok := sh.Workers[windowRequest.Worker] if !ok { @@ -84,7 +86,7 @@ func (a *AssignerCommon) TrySched(sh *Scheduler) { } rpcCtx, cancel := context.WithTimeout(task.Ctx, SelectorTimeout) - ok, err := task.Sel.Ok(rpcCtx, task.TaskType, task.Sector.ProofType, worker) + ok, preferred, err := task.Sel.Ok(rpcCtx, task.TaskType, task.Sector.ProofType, worker) cancel() if err != nil { log.Errorf("trySched(1) req.Sel.Ok error: %+v", err) @@ -95,6 +97,17 @@ func (a *AssignerCommon) TrySched(sh *Scheduler) { continue } + if havePreferred && !preferred { + // we have a way better worker for this task + continue + } + + if preferred && !havePreferred { + // all workers we considered previously are much worse choice + acceptableWindows[sqi] = acceptableWindows[sqi][:0] + havePreferred = true + } + acceptableWindows[sqi] = append(acceptableWindows[sqi], wnd) } diff --git a/extern/sector-storage/selector_alloc.go b/extern/sector-storage/selector_alloc.go index d2a49f3ce6c..ffdf35b5ee7 100644 --- a/extern/sector-storage/selector_alloc.go +++ b/extern/sector-storage/selector_alloc.go @@ -26,18 +26,18 @@ func newAllocSelector(index stores.SectorIndex, alloc storiface.SectorFileType, } } -func (s *allocSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *WorkerHandle) (bool, error) { +func (s *allocSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *WorkerHandle) (bool, bool, error) { tasks, err := whnd.TaskTypes(ctx) if err != nil { - return false, xerrors.Errorf("getting supported worker task types: %w", err) + return false, false, xerrors.Errorf("getting supported worker task types: %w", err) } if _, supported := tasks[task]; !supported { - return false, nil + return false, false, nil } paths, err := whnd.workerRpc.Paths(ctx) if err != nil { - return false, xerrors.Errorf("getting worker paths: %w", err) + return false, false, xerrors.Errorf("getting worker paths: %w", err) } have := map[storiface.ID]struct{}{} @@ -47,21 +47,21 @@ func (s *allocSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi ssize, err := spt.SectorSize() if err != nil { - return false, xerrors.Errorf("getting sector size: %w", err) + return false, false, xerrors.Errorf("getting sector size: %w", err) } best, err := s.index.StorageBestAlloc(ctx, s.alloc, ssize, s.ptype) if err != nil { - return false, xerrors.Errorf("finding best alloc storage: %w", err) + return false, false, xerrors.Errorf("finding best alloc storage: %w", err) } for _, info := range best { if _, ok := have[info.ID]; ok { - return true, nil + return true, false, nil } } - return false, nil + return false, false, nil } func (s *allocSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) { diff --git a/extern/sector-storage/selector_existing.go b/extern/sector-storage/selector_existing.go index 0703f22edc1..78a3b1988d0 100644 --- a/extern/sector-storage/selector_existing.go +++ b/extern/sector-storage/selector_existing.go @@ -28,18 +28,18 @@ func newExistingSelector(index stores.SectorIndex, sector abi.SectorID, alloc st } } -func (s *existingSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *WorkerHandle) (bool, error) { +func (s *existingSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *WorkerHandle) (bool, bool, error) { tasks, err := whnd.TaskTypes(ctx) if err != nil { - return false, xerrors.Errorf("getting supported worker task types: %w", err) + return false, false, xerrors.Errorf("getting supported worker task types: %w", err) } if _, supported := tasks[task]; !supported { - return false, nil + return false, false, nil } paths, err := whnd.workerRpc.Paths(ctx) if err != nil { - return false, xerrors.Errorf("getting worker paths: %w", err) + return false, false, xerrors.Errorf("getting worker paths: %w", err) } have := map[storiface.ID]struct{}{} @@ -49,21 +49,21 @@ func (s *existingSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt ssize, err := spt.SectorSize() if err != nil { - return false, xerrors.Errorf("getting sector size: %w", err) + return false, false, xerrors.Errorf("getting sector size: %w", err) } best, err := s.index.StorageFindSector(ctx, s.sector, s.alloc, ssize, s.allowFetch) if err != nil { - return false, xerrors.Errorf("finding best storage: %w", err) + return false, false, xerrors.Errorf("finding best storage: %w", err) } for _, info := range best { if _, ok := have[info.ID]; ok { - return true, nil + return true, false, nil } } - return false, nil + return false, false, nil } func (s *existingSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) { From 8c6cba7a0336078f9cd8a0f1584839980fcebca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 23 May 2022 23:27:28 +0200 Subject: [PATCH 2/5] feat: sched: Finalize* move selectors --- extern/sector-storage/manager.go | 21 +++++- extern/sector-storage/sched_test.go | 4 +- extern/sector-storage/selector_move.go | 96 ++++++++++++++++++++++++++ extern/sector-storage/selector_task.go | 6 +- 4 files changed, 119 insertions(+), 8 deletions(-) create mode 100644 extern/sector-storage/selector_move.go diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 01600cc216c..fc8589bbf6a 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -589,6 +589,7 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector storage.SectorRef, return xerrors.Errorf("acquiring sector lock: %w", err) } + // first check if the unsealed file exists anywhere; If it doesn't ignore it unsealed := storiface.FTUnsealed { unsealedStores, err := m.index.StorageFindSector(ctx, sector.ID, storiface.FTUnsealed, 0, false) @@ -601,6 +602,8 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector storage.SectorRef, } } + // Make sure that the sealed file is still in sealing storage; In case it already + // isn't, we want to do finalize in long-term storage pathType := storiface.PathStorage { sealedStores, err := m.index.StorageFindSector(ctx, sector.ID, storiface.FTSealed, 0, false) @@ -616,6 +619,8 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector storage.SectorRef, } } + // do the cache trimming wherever the likely still very large cache lives. + // we really don't want to move it. selector := newExistingSelector(m.index, sector.ID, storiface.FTCache, false) err := m.sched.Schedule(ctx, sector, sealtasks.TTFinalize, selector, @@ -628,7 +633,10 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector storage.SectorRef, return err } - fetchSel := newAllocSelector(m.index, storiface.FTCache|storiface.FTSealed, storiface.PathStorage) + // get a selector for moving stuff into long-term storage + fetchSel := newMoveSelector(m.index, sector.ID, storiface.FTCache|storiface.FTSealed, storiface.PathStorage) + + // only move the unsealed file if it still exists and needs moving moveUnsealed := unsealed { if len(keepUnsealed) == 0 { @@ -636,6 +644,7 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector storage.SectorRef, } } + // move stuff to long-term storage err = m.sched.Schedule(ctx, sector, sealtasks.TTFetch, fetchSel, m.schedFetch(sector, storiface.FTCache|storiface.FTSealed|moveUnsealed, storiface.PathStorage, storiface.AcquireMove), func(ctx context.Context, w Worker) error { @@ -657,6 +666,7 @@ func (m *Manager) FinalizeReplicaUpdate(ctx context.Context, sector storage.Sect return xerrors.Errorf("acquiring sector lock: %w", err) } + // first check if the unsealed file exists anywhere; If it doesn't ignore it moveUnsealed := storiface.FTUnsealed { unsealedStores, err := m.index.StorageFindSector(ctx, sector.ID, storiface.FTUnsealed, 0, false) @@ -669,6 +679,8 @@ func (m *Manager) FinalizeReplicaUpdate(ctx context.Context, sector storage.Sect } } + // Make sure that the update file is still in sealing storage; In case it already + // isn't, we want to do finalize in long-term storage pathType := storiface.PathStorage { sealedStores, err := m.index.StorageFindSector(ctx, sector.ID, storiface.FTUpdate, 0, false) @@ -684,7 +696,9 @@ func (m *Manager) FinalizeReplicaUpdate(ctx context.Context, sector storage.Sect } } - selector := newExistingSelector(m.index, sector.ID, storiface.FTCache|storiface.FTUpdateCache, false) + // do the cache trimming wherever the likely still large cache lives. + // we really don't want to move it. + selector := newExistingSelector(m.index, sector.ID, storiface.FTUpdateCache, false) err := m.sched.Schedule(ctx, sector, sealtasks.TTFinalizeReplicaUpdate, selector, m.schedFetch(sector, storiface.FTCache|storiface.FTUpdateCache|moveUnsealed, pathType, storiface.AcquireMove), @@ -697,7 +711,8 @@ func (m *Manager) FinalizeReplicaUpdate(ctx context.Context, sector storage.Sect } move := func(types storiface.SectorFileType) error { - fetchSel := newAllocSelector(m.index, types, storiface.PathStorage) + // get a selector for moving stuff into long-term storage + fetchSel := newMoveSelector(m.index, sector.ID, types, storiface.PathStorage) { if len(keepUnsealed) == 0 { moveUnsealed = storiface.FTNone diff --git a/extern/sector-storage/sched_test.go b/extern/sector-storage/sched_test.go index 312503a9c1b..c5a8b98fd79 100644 --- a/extern/sector-storage/sched_test.go +++ b/extern/sector-storage/sched_test.go @@ -584,9 +584,9 @@ func TestSched(t *testing.T) { type slowishSelector bool -func (s slowishSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, a *WorkerHandle) (bool, error) { +func (s slowishSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, a *WorkerHandle) (bool, bool, error) { time.Sleep(200 * time.Microsecond) - return bool(s), nil + return bool(s), false, nil } func (s slowishSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) { diff --git a/extern/sector-storage/selector_move.go b/extern/sector-storage/selector_move.go new file mode 100644 index 00000000000..1fb4c9457f7 --- /dev/null +++ b/extern/sector-storage/selector_move.go @@ -0,0 +1,96 @@ +package sectorstorage + +import ( + "context" + + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-state-types/abi" + + "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" + "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" +) + +type moveSelector struct { + index stores.SectorIndex + sector abi.SectorID + alloc storiface.SectorFileType + destPtype storiface.PathType +} + +func newMoveSelector(index stores.SectorIndex, sector abi.SectorID, alloc storiface.SectorFileType, destPtype storiface.PathType) *moveSelector { + return &moveSelector{ + index: index, + sector: sector, + alloc: alloc, + destPtype: destPtype, + } +} + +func (s *moveSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *WorkerHandle) (bool, bool, error) { + tasks, err := whnd.TaskTypes(ctx) + if err != nil { + return false, false, xerrors.Errorf("getting supported worker task types: %w", err) + } + if _, supported := tasks[task]; !supported { + return false, false, nil + } + + paths, err := whnd.workerRpc.Paths(ctx) + if err != nil { + return false, false, xerrors.Errorf("getting worker paths: %w", err) + } + + workerPaths := map[storiface.ID]int{} + for _, path := range paths { + workerPaths[path.ID] = 0 + } + + ssize, err := spt.SectorSize() + if err != nil { + return false, false, xerrors.Errorf("getting sector size: %w", err) + } + + // note: allowFetch is always false here, because we want to find workers with + // the sector available locally + preferred, err := s.index.StorageFindSector(ctx, s.sector, s.alloc, ssize, false) + if err != nil { + return false, false, xerrors.Errorf("finding preferred storage: %w", err) + } + + for _, info := range preferred { + if _, ok := workerPaths[info.ID]; ok { + workerPaths[info.ID]++ + } + } + + best, err := s.index.StorageBestAlloc(ctx, s.alloc, ssize, s.destPtype) + if err != nil { + return false, false, xerrors.Errorf("finding best dest storage: %w", err) + } + + var ok bool + + for _, info := range best { + if n, has := workerPaths[info.ID]; has { + ok = true + + // if the worker has a local path with the sector already in it + // prefer that worker; This usually meant that the move operation is + // either a no-op because the sector is already in the correct path, + // or the move a local move. + if n > 0 { + return true, true, nil + } + } + } + + return ok, false, nil +} + +func (s *moveSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) { + return a.Utilization() < b.Utilization(), nil +} + +var _ WorkerSelector = &moveSelector{} diff --git a/extern/sector-storage/selector_task.go b/extern/sector-storage/selector_task.go index 1be2c367769..d5ea9618bd2 100644 --- a/extern/sector-storage/selector_task.go +++ b/extern/sector-storage/selector_task.go @@ -19,14 +19,14 @@ func newTaskSelector() *taskSelector { return &taskSelector{} } -func (s *taskSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *WorkerHandle) (bool, error) { +func (s *taskSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *WorkerHandle) (bool, bool, error) { tasks, err := whnd.TaskTypes(ctx) if err != nil { - return false, xerrors.Errorf("getting supported worker task types: %w", err) + return false, false, xerrors.Errorf("getting supported worker task types: %w", err) } _, supported := tasks[task] - return supported, nil + return supported, false, nil } func (s *taskSelector) Cmp(ctx context.Context, _ sealtasks.TaskType, a, b *WorkerHandle) (bool, error) { From 7612860d15433742b1ad80880a56c047f0347116 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 23 May 2022 23:53:25 +0200 Subject: [PATCH 3/5] config: Storage.DisallowRemoteFinalize --- .../en/default-lotus-miner-config.toml | 7 ++++++ extern/sector-storage/manager.go | 12 ++++++---- extern/sector-storage/selector_move.go | 22 ++++++++++--------- node/config/doc_gen.go | 7 ++++++ node/config/storage.go | 1 + node/config/types.go | 13 +++++++++++ 6 files changed, 48 insertions(+), 14 deletions(-) diff --git a/documentation/en/default-lotus-miner-config.toml b/documentation/en/default-lotus-miner-config.toml index ab9924c02c4..1184724b528 100644 --- a/documentation/en/default-lotus-miner-config.toml +++ b/documentation/en/default-lotus-miner-config.toml @@ -542,6 +542,13 @@ # env var: LOTUS_STORAGE_ASSIGNER #Assigner = "utilization" + # If you see stuck Finalize tasks after enabling this setting, check + # 'lotus-miner sealing sched-diag' and 'lotus-miner storage find [sector num]' + # + # type: bool + # env var: LOTUS_STORAGE_DISALLOWREMOTEFINALIZE + #DisallowRemoteFinalize = false + # ResourceFiltering instructs the system which resource filtering strategy # to use when evaluating tasks against this worker. An empty value defaults # to "hardware". diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index fc8589bbf6a..1abe04fa17e 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -71,7 +71,8 @@ type Manager struct { workLk sync.Mutex work *statestore.StateStore - parallelCheckLimit int + parallelCheckLimit int + disallowRemoteFinalize bool callToWork map[storiface.CallID]WorkID // used when we get an early return and there's no callToWork mapping @@ -123,6 +124,8 @@ type Config struct { // PoSt config ParallelCheckLimit int + DisallowRemoteFinalize bool + Assigner string } @@ -155,7 +158,8 @@ func New(ctx context.Context, lstor *stores.Local, stor stores.Store, ls stores. localProver: prover, - parallelCheckLimit: sc.ParallelCheckLimit, + parallelCheckLimit: sc.ParallelCheckLimit, + disallowRemoteFinalize: sc.DisallowRemoteFinalize, work: mss, callToWork: map[storiface.CallID]WorkID{}, @@ -634,7 +638,7 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector storage.SectorRef, } // get a selector for moving stuff into long-term storage - fetchSel := newMoveSelector(m.index, sector.ID, storiface.FTCache|storiface.FTSealed, storiface.PathStorage) + fetchSel := newMoveSelector(m.index, sector.ID, storiface.FTCache|storiface.FTSealed, storiface.PathStorage, !m.disallowRemoteFinalize) // only move the unsealed file if it still exists and needs moving moveUnsealed := unsealed @@ -712,7 +716,7 @@ func (m *Manager) FinalizeReplicaUpdate(ctx context.Context, sector storage.Sect move := func(types storiface.SectorFileType) error { // get a selector for moving stuff into long-term storage - fetchSel := newMoveSelector(m.index, sector.ID, types, storiface.PathStorage) + fetchSel := newMoveSelector(m.index, sector.ID, types, storiface.PathStorage, !m.disallowRemoteFinalize) { if len(keepUnsealed) == 0 { moveUnsealed = storiface.FTNone diff --git a/extern/sector-storage/selector_move.go b/extern/sector-storage/selector_move.go index 1fb4c9457f7..7c63691b143 100644 --- a/extern/sector-storage/selector_move.go +++ b/extern/sector-storage/selector_move.go @@ -13,18 +13,20 @@ import ( ) type moveSelector struct { - index stores.SectorIndex - sector abi.SectorID - alloc storiface.SectorFileType - destPtype storiface.PathType + index stores.SectorIndex + sector abi.SectorID + alloc storiface.SectorFileType + destPtype storiface.PathType + allowRemote bool } -func newMoveSelector(index stores.SectorIndex, sector abi.SectorID, alloc storiface.SectorFileType, destPtype storiface.PathType) *moveSelector { +func newMoveSelector(index stores.SectorIndex, sector abi.SectorID, alloc storiface.SectorFileType, destPtype storiface.PathType, allowRemote bool) *moveSelector { return &moveSelector{ - index: index, - sector: sector, - alloc: alloc, - destPtype: destPtype, + index: index, + sector: sector, + alloc: alloc, + destPtype: destPtype, + allowRemote: allowRemote, } } @@ -86,7 +88,7 @@ func (s *moveSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi. } } - return ok, false, nil + return ok && s.allowRemote, false, nil } func (s *moveSelector) Cmp(ctx context.Context, task sealtasks.TaskType, a, b *WorkerHandle) (bool, error) { diff --git a/node/config/doc_gen.go b/node/config/doc_gen.go index a9c7ed8f222..ceacaca428f 100644 --- a/node/config/doc_gen.go +++ b/node/config/doc_gen.go @@ -763,6 +763,13 @@ This parameter is ONLY applicable if the retrieval pricing policy strategy has b Comment: `Assigner specifies the worker assigner to use when scheduling tasks. "utilization" (default) - assign tasks to workers with lowest utilization. "spread" - assign tasks to as many distinct workers as possible.`, + }, + { + Name: "DisallowRemoteFinalize", + Type: "bool", + + Comment: `If you see stuck Finalize tasks after enabling this setting, check +'lotus-miner sealing sched-diag' and 'lotus-miner storage find [sector num]'`, }, { Name: "ResourceFiltering", diff --git a/node/config/storage.go b/node/config/storage.go index 6ab35984a91..de65dc60a41 100644 --- a/node/config/storage.go +++ b/node/config/storage.go @@ -63,6 +63,7 @@ func (c *StorageMiner) StorageManager() sectorstorage.Config { AllowProveReplicaUpdate2: c.Storage.AllowProveReplicaUpdate2, AllowRegenSectorKey: c.Storage.AllowRegenSectorKey, ResourceFiltering: c.Storage.ResourceFiltering, + DisallowRemoteFinalize: c.Storage.DisallowRemoteFinalize, Assigner: c.Storage.Assigner, diff --git a/node/config/types.go b/node/config/types.go index 0e5bcbd5fa1..c79f70876a6 100644 --- a/node/config/types.go +++ b/node/config/types.go @@ -335,6 +335,19 @@ type SealerConfig struct { // "spread" - assign tasks to as many distinct workers as possible. Assigner string + // DisallowRemoteFinalize when set to true will force all Finalize tasks to + // run on workers with local access to both long-term storage and the sealing + // path containing the sector. + // + // WARNING: Only set this if all workers have access to long-term storage + // paths. If this flag is enabled, and there are workers without long-term + // storage access, sectors will not be moved from them, and Finalize tasks + // will appear to be stuck. + // + // If you see stuck Finalize tasks after enabling this setting, check + // 'lotus-miner sealing sched-diag' and 'lotus-miner storage find [sector num]' + DisallowRemoteFinalize bool + // ResourceFiltering instructs the system which resource filtering strategy // to use when evaluating tasks against this worker. An empty value defaults // to "hardware". From 8c081e271a187a156001a4a36a927b0598cd5328 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 24 May 2022 01:13:30 +0200 Subject: [PATCH 4/5] itests: test TestWorkerPledgeLocalFin --- itests/kit/ensemble.go | 2 ++ itests/kit/node_opts.go | 24 ++++++++++++++++-------- itests/worker_test.go | 16 ++++++++++++++++ 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/itests/kit/ensemble.go b/itests/kit/ensemble.go index 990baf92001..0de06bd676c 100644 --- a/itests/kit/ensemble.go +++ b/itests/kit/ensemble.go @@ -571,6 +571,7 @@ func (n *Ensemble) Start() *Ensemble { noLocal := m.options.minerNoLocalSealing assigner := m.options.minerAssigner + disallowRemoteFinalize := m.options.disallowRemoteFinalize var mineBlock = make(chan lotusminer.MineReq) opts := []node.Option{ @@ -597,6 +598,7 @@ func (n *Ensemble) Start() *Ensemble { } scfg.Storage.Assigner = assigner + scfg.Storage.DisallowRemoteFinalize = disallowRemoteFinalize scfg.Storage.ResourceFiltering = sectorstorage.ResourceFilteringDisabled return scfg.StorageManager() }), diff --git a/itests/kit/node_opts.go b/itests/kit/node_opts.go index 936308608ac..0de640b2722 100644 --- a/itests/kit/node_opts.go +++ b/itests/kit/node_opts.go @@ -34,14 +34,15 @@ type nodeOpts struct { ownerKey *wallet.Key extraNodeOpts []node.Option - subsystems MinerSubsystem - mainMiner *TestMiner - disableLibp2p bool - optBuilders []OptBuilder - sectorSize abi.SectorSize - maxStagingDealsBytes int64 - minerNoLocalSealing bool // use worker - minerAssigner string + subsystems MinerSubsystem + mainMiner *TestMiner + disableLibp2p bool + optBuilders []OptBuilder + sectorSize abi.SectorSize + maxStagingDealsBytes int64 + minerNoLocalSealing bool // use worker + minerAssigner string + disallowRemoteFinalize bool workerTasks []sealtasks.TaskType workerStorageOpt func(stores.Store) stores.Store @@ -105,6 +106,13 @@ func WithAssigner(a string) NodeOpt { } } +func WithDisallowRemoteFinalize(d bool) NodeOpt { + return func(opts *nodeOpts) error { + opts.disallowRemoteFinalize = d + return nil + } +} + func DisableLibp2p() NodeOpt { return func(opts *nodeOpts) error { opts.disableLibp2p = true diff --git a/itests/worker_test.go b/itests/worker_test.go index fd8798448b7..ec487a6e782 100644 --- a/itests/worker_test.go +++ b/itests/worker_test.go @@ -57,6 +57,22 @@ func TestWorkerPledgeSpread(t *testing.T) { miner.PledgeSectors(ctx, 1, 0, nil) } +func TestWorkerPledgeLocalFin(t *testing.T) { + ctx := context.Background() + _, miner, worker, ens := kit.EnsembleWorker(t, kit.WithAllSubsystems(), kit.ThroughRPC(), + kit.WithTaskTypes([]sealtasks.TaskType{sealtasks.TTFetch, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTPreCommit2, sealtasks.TTCommit2, sealtasks.TTUnseal}), + kit.WithDisallowRemoteFinalize(true), + ) // no mock proofs + + ens.InterconnectAll().BeginMining(50 * time.Millisecond) + + e, err := worker.Enabled(ctx) + require.NoError(t, err) + require.True(t, e) + + miner.PledgeSectors(ctx, 1, 0, nil) +} + func TestWorkerDataCid(t *testing.T) { ctx := context.Background() _, miner, worker, _ := kit.EnsembleWorker(t, kit.WithAllSubsystems(), kit.ThroughRPC(), kit.WithNoLocalSealing(true), From 70f3b985747a3bdd0d9c3d1931ca7ff042a031d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 24 May 2022 01:33:56 +0200 Subject: [PATCH 5/5] Fix config doc --- documentation/en/default-lotus-miner-config.toml | 9 +++++++++ node/config/doc_gen.go | 11 ++++++++++- node/config/types.go | 4 ++-- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/documentation/en/default-lotus-miner-config.toml b/documentation/en/default-lotus-miner-config.toml index 1184724b528..282d3f3c6a1 100644 --- a/documentation/en/default-lotus-miner-config.toml +++ b/documentation/en/default-lotus-miner-config.toml @@ -542,6 +542,15 @@ # env var: LOTUS_STORAGE_ASSIGNER #Assigner = "utilization" + # DisallowRemoteFinalize when set to true will force all Finalize tasks to + # run on workers with local access to both long-term storage and the sealing + # path containing the sector. + # -- + # WARNING: Only set this if all workers have access to long-term storage + # paths. If this flag is enabled, and there are workers without long-term + # storage access, sectors will not be moved from them, and Finalize tasks + # will appear to be stuck. + # -- # If you see stuck Finalize tasks after enabling this setting, check # 'lotus-miner sealing sched-diag' and 'lotus-miner storage find [sector num]' # diff --git a/node/config/doc_gen.go b/node/config/doc_gen.go index ceacaca428f..c3fed5197b0 100644 --- a/node/config/doc_gen.go +++ b/node/config/doc_gen.go @@ -768,7 +768,16 @@ This parameter is ONLY applicable if the retrieval pricing policy strategy has b Name: "DisallowRemoteFinalize", Type: "bool", - Comment: `If you see stuck Finalize tasks after enabling this setting, check + Comment: `DisallowRemoteFinalize when set to true will force all Finalize tasks to +run on workers with local access to both long-term storage and the sealing +path containing the sector. +-- +WARNING: Only set this if all workers have access to long-term storage +paths. If this flag is enabled, and there are workers without long-term +storage access, sectors will not be moved from them, and Finalize tasks +will appear to be stuck. +-- +If you see stuck Finalize tasks after enabling this setting, check 'lotus-miner sealing sched-diag' and 'lotus-miner storage find [sector num]'`, }, { diff --git a/node/config/types.go b/node/config/types.go index c79f70876a6..940d4893a11 100644 --- a/node/config/types.go +++ b/node/config/types.go @@ -338,12 +338,12 @@ type SealerConfig struct { // DisallowRemoteFinalize when set to true will force all Finalize tasks to // run on workers with local access to both long-term storage and the sealing // path containing the sector. - // + // -- // WARNING: Only set this if all workers have access to long-term storage // paths. If this flag is enabled, and there are workers without long-term // storage access, sectors will not be moved from them, and Finalize tasks // will appear to be stuck. - // + // -- // If you see stuck Finalize tasks after enabling this setting, check // 'lotus-miner sealing sched-diag' and 'lotus-miner storage find [sector num]' DisallowRemoteFinalize bool