From 159ce13f5e20beb238ac8bf8ac99f53ec4d89a70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 6 Sep 2020 18:47:16 +0200 Subject: [PATCH 01/61] Async worker API --- api/api_storage.go | 1 + api/api_worker.go | 17 +- api/apistruct/struct.go | 122 ++++++++---- build/version.go | 2 +- extern/sector-storage/localworker.go | 217 +++++++++++++++------ extern/sector-storage/manager.go | 8 +- extern/sector-storage/storiface/storage.go | 1 + extern/sector-storage/storiface/worker.go | 45 ++++- 8 files changed, 293 insertions(+), 120 deletions(-) create mode 100644 extern/sector-storage/storiface/storage.go diff --git a/api/api_storage.go b/api/api_storage.go index 48f6e9e4551..77c05259516 100644 --- a/api/api_storage.go +++ b/api/api_storage.go @@ -64,6 +64,7 @@ type StorageMiner interface { WorkerConnect(context.Context, string) error WorkerStats(context.Context) (map[uint64]storiface.WorkerStats, error) WorkerJobs(context.Context) (map[uint64][]storiface.WorkerJob, error) + storiface.WorkerReturn // SealingSchedDiag dumps internal sealing scheduler state SealingSchedDiag(context.Context) (interface{}, error) diff --git a/api/api_worker.go b/api/api_worker.go index 00c4df8bcec..b64c7ec24c2 100644 --- a/api/api_worker.go +++ b/api/api_worker.go @@ -2,15 +2,12 @@ package api import ( "context" - "io" - "github.com/ipfs/go-cid" + "github.com/filecoin-project/specs-actors/actors/abi" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" - "github.com/filecoin-project/specs-actors/actors/abi" - "github.com/filecoin-project/specs-storage/storage" "github.com/filecoin-project/lotus/build" ) @@ -23,18 +20,12 @@ type WorkerAPI interface { Paths(context.Context) ([]stores.StoragePath, error) Info(context.Context) (storiface.WorkerInfo, error) - AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (abi.PieceInfo, error) - - storage.Sealer + storiface.WorkerCalls - MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) error - - UnsealPiece(context.Context, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize, abi.SealRandomness, cid.Cid) error - ReadPiece(context.Context, io.Writer, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize) (bool, error) + // Storage / Other + Remove(ctx context.Context, sector abi.SectorID) error StorageAddLocal(ctx context.Context, path string) error - Fetch(context.Context, abi.SectorID, stores.SectorFileType, stores.PathType, stores.AcquireMode) error - Closing(context.Context) (<-chan struct{}, error) } diff --git a/api/apistruct/struct.go b/api/apistruct/struct.go index e2444f16bb2..5fb80433d58 100644 --- a/api/apistruct/struct.go +++ b/api/apistruct/struct.go @@ -270,6 +270,18 @@ type StorageMinerStruct struct { WorkerStats func(context.Context) (map[uint64]storiface.WorkerStats, error) `perm:"admin"` WorkerJobs func(context.Context) (map[uint64][]storiface.WorkerJob, error) `perm:"admin"` + ReturnAddPiece func(ctx context.Context, callID storiface.CallID, pi abi.PieceInfo, err string) error `perm:"admin"` + ReturnSealPreCommit1 func(ctx context.Context, callID storiface.CallID, p1o storage.PreCommit1Out, err string) error `perm:"admin"` + ReturnSealPreCommit2 func(ctx context.Context, callID storiface.CallID, sealed storage.SectorCids, err string) error `perm:"admin"` + ReturnSealCommit1 func(ctx context.Context, callID storiface.CallID, out storage.Commit1Out, err string) error `perm:"admin"` + ReturnSealCommit2 func(ctx context.Context, callID storiface.CallID, proof storage.Proof, err string) error `perm:"admin"` + ReturnFinalizeSector func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin"` + ReturnReleaseUnsealed func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin"` + ReturnMoveStorage func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin"` + ReturnUnsealPiece func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin"` + ReturnReadPiece func(ctx context.Context, callID storiface.CallID, ok bool, err string) error `perm:"admin"` + ReturnFetch func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin"` + SealingSchedDiag func(context.Context) (interface{}, error) `perm:"admin"` StorageList func(context.Context) (map[stores.ID][]stores.Decl, error) `perm:"admin"` @@ -317,21 +329,21 @@ type WorkerStruct struct { Paths func(context.Context) ([]stores.StoragePath, error) `perm:"admin"` Info func(context.Context) (storiface.WorkerInfo, error) `perm:"admin"` - AddPiece func(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (abi.PieceInfo, error) `perm:"admin"` - SealPreCommit1 func(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storage.PreCommit1Out, error) `perm:"admin"` - SealPreCommit2 func(context.Context, abi.SectorID, storage.PreCommit1Out) (cids storage.SectorCids, err error) `perm:"admin"` - SealCommit1 func(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storage.Commit1Out, error) `perm:"admin"` - SealCommit2 func(context.Context, abi.SectorID, storage.Commit1Out) (storage.Proof, error) `perm:"admin"` - FinalizeSector func(context.Context, abi.SectorID, []storage.Range) error `perm:"admin"` - ReleaseUnsealed func(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) error `perm:"admin"` - Remove func(ctx context.Context, sector abi.SectorID) error `perm:"admin"` - MoveStorage func(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) error `perm:"admin"` - StorageAddLocal func(ctx context.Context, path string) error `perm:"admin"` + AddPiece func(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (storiface.CallID, error) `perm:"admin"` + SealPreCommit1 func(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) `perm:"admin"` + SealPreCommit2 func(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storiface.CallID, error) `perm:"admin"` + SealCommit1 func(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storiface.CallID, error) `perm:"admin"` + SealCommit2 func(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storiface.CallID, error) `perm:"admin"` + FinalizeSector func(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) (storiface.CallID, error) `perm:"admin"` + ReleaseUnsealed func(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) (storiface.CallID, error) `perm:"admin"` + MoveStorage func(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) (storiface.CallID, error) `perm:"admin"` + UnsealPiece func(context.Context, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize, abi.SealRandomness, cid.Cid) (storiface.CallID, error) `perm:"admin"` + ReadPiece func(context.Context, io.Writer, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize) (storiface.CallID, error) `perm:"admin"` + Fetch func(context.Context, abi.SectorID, stores.SectorFileType, stores.PathType, stores.AcquireMode) (storiface.CallID, error) `perm:"admin"` - UnsealPiece func(context.Context, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize, abi.SealRandomness, cid.Cid) error `perm:"admin"` - ReadPiece func(context.Context, io.Writer, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize) (bool, error) `perm:"admin"` + Remove func(ctx context.Context, sector abi.SectorID) error `perm:"admin"` + StorageAddLocal func(ctx context.Context, path string) error `perm:"admin"` - Fetch func(context.Context, abi.SectorID, stores.SectorFileType, stores.PathType, stores.AcquireMode) error `perm:"admin"` Closing func(context.Context) (<-chan struct{}, error) `perm:"admin"` } @@ -1038,6 +1050,50 @@ func (c *StorageMinerStruct) WorkerJobs(ctx context.Context) (map[uint64][]stori return c.Internal.WorkerJobs(ctx) } +func (c *StorageMinerStruct) ReturnAddPiece(ctx context.Context, callID storiface.CallID, pi abi.PieceInfo, err string) error { + return c.Internal.ReturnAddPiece(ctx, callID, pi, err) +} + +func (c *StorageMinerStruct) ReturnSealPreCommit1(ctx context.Context, callID storiface.CallID, p1o storage.PreCommit1Out, err string) error { + return c.Internal.ReturnSealPreCommit1(ctx, callID, p1o, err) +} + +func (c *StorageMinerStruct) ReturnSealPreCommit2(ctx context.Context, callID storiface.CallID, sealed storage.SectorCids, err string) error { + return c.Internal.ReturnSealPreCommit2(ctx, callID, sealed, err) +} + +func (c *StorageMinerStruct) ReturnSealCommit1(ctx context.Context, callID storiface.CallID, out storage.Commit1Out, err string) error { + return c.Internal.ReturnSealCommit1(ctx, callID, out, err) +} + +func (c *StorageMinerStruct) ReturnSealCommit2(ctx context.Context, callID storiface.CallID, proof storage.Proof, err string) error { + return c.Internal.ReturnSealCommit2(ctx, callID, proof, err) +} + +func (c *StorageMinerStruct) ReturnFinalizeSector(ctx context.Context, callID storiface.CallID, err string) error { + return c.Internal.ReturnFinalizeSector(ctx, callID, err) +} + +func (c *StorageMinerStruct) ReturnReleaseUnsealed(ctx context.Context, callID storiface.CallID, err string) error { + return c.Internal.ReturnReleaseUnsealed(ctx, callID, err) +} + +func (c *StorageMinerStruct) ReturnMoveStorage(ctx context.Context, callID storiface.CallID, err string) error { + return c.Internal.ReturnMoveStorage(ctx, callID, err) +} + +func (c *StorageMinerStruct) ReturnUnsealPiece(ctx context.Context, callID storiface.CallID, err string) error { + return c.Internal.ReturnUnsealPiece(ctx, callID, err) +} + +func (c *StorageMinerStruct) ReturnReadPiece(ctx context.Context, callID storiface.CallID, ok bool, err string) error { + return c.Internal.ReturnReadPiece(ctx, callID, ok, err) +} + +func (c *StorageMinerStruct) ReturnFetch(ctx context.Context, callID storiface.CallID, err string) error { + return c.Internal.ReturnFetch(ctx, callID, err) +} + func (c *StorageMinerStruct) SealingSchedDiag(ctx context.Context) (interface{}, error) { return c.Internal.SealingSchedDiag(ctx) } @@ -1220,56 +1276,56 @@ func (w *WorkerStruct) Info(ctx context.Context) (storiface.WorkerInfo, error) { return w.Internal.Info(ctx) } -func (w *WorkerStruct) AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (abi.PieceInfo, error) { +func (w *WorkerStruct) AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (storiface.CallID, error) { return w.Internal.AddPiece(ctx, sector, pieceSizes, newPieceSize, pieceData) } -func (w *WorkerStruct) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storage.PreCommit1Out, error) { +func (w *WorkerStruct) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { return w.Internal.SealPreCommit1(ctx, sector, ticket, pieces) } -func (w *WorkerStruct) SealPreCommit2(ctx context.Context, sector abi.SectorID, p1o storage.PreCommit1Out) (storage.SectorCids, error) { - return w.Internal.SealPreCommit2(ctx, sector, p1o) +func (w *WorkerStruct) SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storiface.CallID, error) { + return w.Internal.SealPreCommit2(ctx, sector, pc1o) } -func (w *WorkerStruct) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storage.Commit1Out, error) { +func (w *WorkerStruct) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storiface.CallID, error) { return w.Internal.SealCommit1(ctx, sector, ticket, seed, pieces, cids) } -func (w *WorkerStruct) SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storage.Proof, error) { +func (w *WorkerStruct) SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storiface.CallID, error) { return w.Internal.SealCommit2(ctx, sector, c1o) } -func (w *WorkerStruct) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) error { +func (w *WorkerStruct) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) (storiface.CallID, error) { return w.Internal.FinalizeSector(ctx, sector, keepUnsealed) } -func (w *WorkerStruct) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) error { +func (w *WorkerStruct) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) (storiface.CallID, error) { return w.Internal.ReleaseUnsealed(ctx, sector, safeToFree) } -func (w *WorkerStruct) Remove(ctx context.Context, sector abi.SectorID) error { - return w.Internal.Remove(ctx, sector) +func (w *WorkerStruct) MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) (storiface.CallID, error) { + return w.Internal.MoveStorage(ctx, sector, types) } -func (w *WorkerStruct) MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) error { - return w.Internal.MoveStorage(ctx, sector, types) +func (w *WorkerStruct) UnsealPiece(ctx context.Context, sector abi.SectorID, offset storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, ticket abi.SealRandomness, c cid.Cid) (storiface.CallID, error) { + return w.Internal.UnsealPiece(ctx, sector, offset, size, ticket, c) } -func (w *WorkerStruct) StorageAddLocal(ctx context.Context, path string) error { - return w.Internal.StorageAddLocal(ctx, path) +func (w *WorkerStruct) ReadPiece(ctx context.Context, sink io.Writer, sector abi.SectorID, offset storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (storiface.CallID, error) { + return w.Internal.ReadPiece(ctx, sink, sector, offset, size) } -func (w *WorkerStruct) UnsealPiece(ctx context.Context, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, c cid.Cid) error { - return w.Internal.UnsealPiece(ctx, id, index, size, randomness, c) +func (w *WorkerStruct) Fetch(ctx context.Context, id abi.SectorID, fileType stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) (storiface.CallID, error) { + return w.Internal.Fetch(ctx, id, fileType, ptype, am) } -func (w *WorkerStruct) ReadPiece(ctx context.Context, writer io.Writer, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (bool, error) { - return w.Internal.ReadPiece(ctx, writer, id, index, size) +func (w *WorkerStruct) Remove(ctx context.Context, sector abi.SectorID) error { + return w.Internal.Remove(ctx, sector) } -func (w *WorkerStruct) Fetch(ctx context.Context, id abi.SectorID, fileType stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) error { - return w.Internal.Fetch(ctx, id, fileType, ptype, am) +func (w *WorkerStruct) StorageAddLocal(ctx context.Context, path string) error { + return w.Internal.StorageAddLocal(ctx, path) } func (w *WorkerStruct) Closing(ctx context.Context) (<-chan struct{}, error) { diff --git a/build/version.go b/build/version.go index a3c5d155230..b57a72b496e 100644 --- a/build/version.go +++ b/build/version.go @@ -53,7 +53,7 @@ func (ve Version) EqMajorMinor(v2 Version) bool { } // APIVersion is a semver version of the rpc api exposed -var APIVersion Version = newVer(0, 14, 0) +var APIVersion Version = newVer(0, 15, 0) //nolint:varcheck,deadcode const ( diff --git a/extern/sector-storage/localworker.go b/extern/sector-storage/localworker.go index 773ef2d3b68..9d451309df9 100644 --- a/extern/sector-storage/localworker.go +++ b/extern/sector-storage/localworker.go @@ -7,6 +7,7 @@ import ( "runtime" "github.com/elastic/go-sysinfo" + "github.com/google/uuid" "github.com/hashicorp/go-multierror" "github.com/ipfs/go-cid" "golang.org/x/xerrors" @@ -33,6 +34,7 @@ type LocalWorker struct { storage stores.Store localStore *stores.Local sindex stores.SectorIndex + ret storiface.WorkerReturn acceptTasks map[sealtasks.TaskType]struct{} } @@ -95,6 +97,25 @@ func (l *LocalWorker) sb() (ffiwrapper.Storage, error) { return ffiwrapper.New(&localWorkerPathProvider{w: l}, l.scfg) } +func (l *LocalWorker) asyncCall(sector abi.SectorID, work func(ci storiface.CallID)) (storiface.CallID, error) { + ci := storiface.CallID{ + Sector: sector, + ID: uuid.New(), + } + + go work(ci) + + return ci, nil +} + +func errstr(err error) string { + if err != nil { + return err.Error() + } + + return "" +} + func (l *LocalWorker) NewSector(ctx context.Context, sector abi.SectorID) error { sb, err := l.sb() if err != nil { @@ -104,92 +125,140 @@ func (l *LocalWorker) NewSector(ctx context.Context, sector abi.SectorID) error return sb.NewSector(ctx, sector) } -func (l *LocalWorker) AddPiece(ctx context.Context, sector abi.SectorID, epcs []abi.UnpaddedPieceSize, sz abi.UnpaddedPieceSize, r io.Reader) (abi.PieceInfo, error) { +func (l *LocalWorker) AddPiece(ctx context.Context, sector abi.SectorID, epcs []abi.UnpaddedPieceSize, sz abi.UnpaddedPieceSize, r io.Reader) (storiface.CallID, error) { sb, err := l.sb() if err != nil { - return abi.PieceInfo{}, err + return storiface.UndefCall, err } - return sb.AddPiece(ctx, sector, epcs, sz, r) -} + return l.asyncCall(sector, func(ci storiface.CallID) { + pi, err := sb.AddPiece(ctx, sector, epcs, sz, r) -func (l *LocalWorker) Fetch(ctx context.Context, sector abi.SectorID, fileType stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) error { - _, done, err := (&localWorkerPathProvider{w: l, op: am}).AcquireSector(ctx, sector, fileType, stores.FTNone, ptype) - if err != nil { - return err - } - done() - return nil + if err := l.ret.ReturnAddPiece(ctx, ci, pi, errstr(err)); err != nil { + log.Errorf("ReturnAddPiece: %+v", err) + } + }) } -func (l *LocalWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (out storage2.PreCommit1Out, err error) { - { - // cleanup previous failed attempts if they exist - if err := l.storage.Remove(ctx, sector, stores.FTSealed, true); err != nil { - return nil, xerrors.Errorf("cleaning up sealed data: %w", err) +func (l *LocalWorker) Fetch(ctx context.Context, sector abi.SectorID, fileType stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) (storiface.CallID, error) { + return l.asyncCall(sector, func(ci storiface.CallID) { + _, done, err := (&localWorkerPathProvider{w: l, op: am}).AcquireSector(ctx, sector, fileType, stores.FTNone, ptype) + if err == nil { + done() } - if err := l.storage.Remove(ctx, sector, stores.FTCache, true); err != nil { - return nil, xerrors.Errorf("cleaning up cache data: %w", err) + if err := l.ret.ReturnFetch(ctx, ci, errstr(err)); err != nil { + log.Errorf("ReturnFetch: %+v", err) } - } + }) +} - sb, err := l.sb() - if err != nil { - return nil, err - } +func (l *LocalWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { + return l.asyncCall(sector, func(ci storiface.CallID) { + var err error + var p1o storage2.PreCommit1Out + defer func() { + if err := l.ret.ReturnSealPreCommit1(ctx, ci, p1o, errstr(err)); err != nil { + log.Errorf("ReturnSealPreCommit1: %+v", err) + } + }() + + { + // cleanup previous failed attempts if they exist + if err = l.storage.Remove(ctx, sector, stores.FTSealed, true); err != nil { + err = xerrors.Errorf("cleaning up sealed data: %w", err) + return + } + + if err = l.storage.Remove(ctx, sector, stores.FTCache, true); err != nil { + err = xerrors.Errorf("cleaning up cache data: %w", err) + return + } + } + + var sb ffiwrapper.Storage + sb, err = l.sb() + if err != nil { + return + } - return sb.SealPreCommit1(ctx, sector, ticket, pieces) + p1o, err = sb.SealPreCommit1(ctx, sector, ticket, pieces) + }) } -func (l *LocalWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage2.PreCommit1Out) (cids storage2.SectorCids, err error) { +func (l *LocalWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage2.PreCommit1Out) (storiface.CallID, error) { sb, err := l.sb() if err != nil { - return storage2.SectorCids{}, err + return storiface.UndefCall, err } - return sb.SealPreCommit2(ctx, sector, phase1Out) + return l.asyncCall(sector, func(ci storiface.CallID) { + cs, err := sb.SealPreCommit2(ctx, sector, phase1Out) + + if err := l.ret.ReturnSealPreCommit2(ctx, ci, cs, errstr(err)); err != nil { + log.Errorf("ReturnSealPreCommit2: %+v", err) + } + }) } -func (l *LocalWorker) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage2.SectorCids) (output storage2.Commit1Out, err error) { +func (l *LocalWorker) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage2.SectorCids) (storiface.CallID, error) { sb, err := l.sb() if err != nil { - return nil, err + return storiface.UndefCall, err } - return sb.SealCommit1(ctx, sector, ticket, seed, pieces, cids) + return l.asyncCall(sector, func(ci storiface.CallID) { + c1o, err := sb.SealCommit1(ctx, sector, ticket, seed, pieces, cids) + + if err := l.ret.ReturnSealCommit1(ctx, ci, c1o, errstr(err)); err != nil { + log.Errorf("ReturnSealCommit1: %+v", err) + } + }) } -func (l *LocalWorker) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage2.Commit1Out) (proof storage2.Proof, err error) { +func (l *LocalWorker) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage2.Commit1Out) (storiface.CallID, error) { sb, err := l.sb() if err != nil { - return nil, err + return storiface.UndefCall, err } - return sb.SealCommit2(ctx, sector, phase1Out) + return l.asyncCall(sector, func(ci storiface.CallID) { + proof, err := sb.SealCommit2(ctx, sector, phase1Out) + + if err := l.ret.ReturnSealCommit2(ctx, ci, proof, errstr(err)); err != nil { + log.Errorf("ReturnSealCommit2: %+v", err) + } + }) } -func (l *LocalWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage2.Range) error { +func (l *LocalWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage2.Range) (storiface.CallID, error) { sb, err := l.sb() if err != nil { - return err + return storiface.UndefCall, err } - if err := sb.FinalizeSector(ctx, sector, keepUnsealed); err != nil { - return xerrors.Errorf("finalizing sector: %w", err) - } + return l.asyncCall(sector, func(ci storiface.CallID) { + if err := sb.FinalizeSector(ctx, sector, keepUnsealed); err != nil { + if err := l.ret.ReturnFinalizeSector(ctx, ci, errstr(xerrors.Errorf("finalizing sector: %w", err))); err != nil { + log.Errorf("ReturnFinalizeSector: %+v", err) + } + } - if len(keepUnsealed) == 0 { - if err := l.storage.Remove(ctx, sector, stores.FTUnsealed, true); err != nil { - return xerrors.Errorf("removing unsealed data: %w", err) + if len(keepUnsealed) == 0 { + err = xerrors.Errorf("removing unsealed data: %w", err) + if err := l.ret.ReturnFinalizeSector(ctx, ci, errstr(err)); err != nil { + log.Errorf("ReturnFinalizeSector: %+v", err) + } } - } - return nil + if err := l.ret.ReturnFinalizeSector(ctx, ci, errstr(err)); err != nil { + log.Errorf("ReturnFinalizeSector: %+v", err) + } + }) } -func (l *LocalWorker) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage2.Range) error { - return xerrors.Errorf("implement me") +func (l *LocalWorker) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage2.Range) (storiface.CallID, error) { + return storiface.UndefCall, xerrors.Errorf("implement me") } func (l *LocalWorker) Remove(ctx context.Context, sector abi.SectorID) error { @@ -208,42 +277,60 @@ func (l *LocalWorker) Remove(ctx context.Context, sector abi.SectorID) error { return err } -func (l *LocalWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) error { - if err := l.storage.MoveStorage(ctx, sector, l.scfg.SealProofType, types); err != nil { - return xerrors.Errorf("moving sealed data to storage: %w", err) - } +func (l *LocalWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) (storiface.CallID, error) { + return l.asyncCall(sector, func(ci storiface.CallID) { + err := l.storage.MoveStorage(ctx, sector, l.scfg.SealProofType, types) - return nil + if err := l.ret.ReturnMoveStorage(ctx, ci, errstr(err)); err != nil { + log.Errorf("ReturnMoveStorage: %+v", err) + } + }) } -func (l *LocalWorker) UnsealPiece(ctx context.Context, sector abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) error { +func (l *LocalWorker) UnsealPiece(ctx context.Context, sector abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) (storiface.CallID, error) { sb, err := l.sb() if err != nil { - return err + return storiface.UndefCall, err } - if err := sb.UnsealPiece(ctx, sector, index, size, randomness, cid); err != nil { - return xerrors.Errorf("unsealing sector: %w", err) - } + return l.asyncCall(sector, func(ci storiface.CallID) { + var err error + defer func() { + if err := l.ret.ReturnUnsealPiece(ctx, ci, errstr(err)); err != nil { + log.Errorf("ReturnUnsealPiece: %+v", err) + } + }() - if err := l.storage.RemoveCopies(ctx, sector, stores.FTSealed); err != nil { - return xerrors.Errorf("removing source data: %w", err) - } + if err = sb.UnsealPiece(ctx, sector, index, size, randomness, cid); err != nil { + err = xerrors.Errorf("unsealing sector: %w", err) + return + } - if err := l.storage.RemoveCopies(ctx, sector, stores.FTCache); err != nil { - return xerrors.Errorf("removing source data: %w", err) - } + if err = l.storage.RemoveCopies(ctx, sector, stores.FTSealed); err != nil { + err = xerrors.Errorf("removing source data: %w", err) + return + } - return nil + if err = l.storage.RemoveCopies(ctx, sector, stores.FTCache); err != nil { + err = xerrors.Errorf("removing source data: %w", err) + return + } + }) } -func (l *LocalWorker) ReadPiece(ctx context.Context, writer io.Writer, sector abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (bool, error) { +func (l *LocalWorker) ReadPiece(ctx context.Context, writer io.Writer, sector abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (storiface.CallID, error) { sb, err := l.sb() if err != nil { - return false, err + return storiface.UndefCall, err } - return sb.ReadPiece(ctx, writer, sector, index, size) + return l.asyncCall(sector, func(ci storiface.CallID) { + ok, err := sb.ReadPiece(ctx, writer, sector, index, size) + + if err := l.ret.ReturnReadPiece(ctx, ci, ok, errstr(err)); err != nil { + log.Errorf("ReturnReadPiece: %+v", err) + } + }) } func (l *LocalWorker) TaskTypes(context.Context) (map[sealtasks.TaskType]struct{}, error) { diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 300958e397a..683f3450567 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -29,13 +29,7 @@ var ErrNoWorkers = errors.New("no suitable workers found") type URLs []string type Worker interface { - ffiwrapper.StorageSealer - - MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) error - - Fetch(ctx context.Context, s abi.SectorID, ft stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) error - UnsealPiece(context.Context, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize, abi.SealRandomness, cid.Cid) error - ReadPiece(context.Context, io.Writer, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize) (bool, error) + storiface.WorkerCalls TaskTypes(context.Context) (map[sealtasks.TaskType]struct{}, error) diff --git a/extern/sector-storage/storiface/storage.go b/extern/sector-storage/storiface/storage.go new file mode 100644 index 00000000000..f1de3656e72 --- /dev/null +++ b/extern/sector-storage/storiface/storage.go @@ -0,0 +1 @@ +package storiface diff --git a/extern/sector-storage/storiface/worker.go b/extern/sector-storage/storiface/worker.go index 37e4aad1d02..9f79e37be2a 100644 --- a/extern/sector-storage/storiface/worker.go +++ b/extern/sector-storage/storiface/worker.go @@ -1,10 +1,18 @@ package storiface import ( + "context" + "io" "time" - "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" + "github.com/google/uuid" + "github.com/ipfs/go-cid" + "github.com/filecoin-project/specs-actors/actors/abi" + "github.com/filecoin-project/specs-storage/storage" + + "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" + "github.com/filecoin-project/lotus/extern/sector-storage/stores" ) type WorkerInfo struct { @@ -40,3 +48,38 @@ type WorkerJob struct { RunWait int // 0 - running, 1+ - assigned Start time.Time } + +type CallID struct { + Sector abi.SectorID + ID uuid.UUID +} + +var UndefCall CallID + +type WorkerCalls interface { + AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (CallID, error) + SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (CallID, error) + SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (CallID, error) + SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (CallID, error) + SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (CallID, error) + FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) (CallID, error) + ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) (CallID, error) + MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) (CallID, error) + UnsealPiece(context.Context, abi.SectorID, UnpaddedByteIndex, abi.UnpaddedPieceSize, abi.SealRandomness, cid.Cid) (CallID, error) + ReadPiece(context.Context, io.Writer, abi.SectorID, UnpaddedByteIndex, abi.UnpaddedPieceSize) (CallID, error) + Fetch(context.Context, abi.SectorID, stores.SectorFileType, stores.PathType, stores.AcquireMode) (CallID, error) +} + +type WorkerReturn interface { + ReturnAddPiece(ctx context.Context, callID CallID, pi abi.PieceInfo, err string) error + ReturnSealPreCommit1(ctx context.Context, callID CallID, p1o storage.PreCommit1Out, err string) error + ReturnSealPreCommit2(ctx context.Context, callID CallID, sealed storage.SectorCids, err string) error + ReturnSealCommit1(ctx context.Context, callID CallID, out storage.Commit1Out, err string) error + ReturnSealCommit2(ctx context.Context, callID CallID, proof storage.Proof, err string) error + ReturnFinalizeSector(ctx context.Context, callID CallID, err string) error + ReturnReleaseUnsealed(ctx context.Context, callID CallID, err string) error + ReturnMoveStorage(ctx context.Context, callID CallID, err string) error + ReturnUnsealPiece(ctx context.Context, callID CallID, err string) error + ReturnReadPiece(ctx context.Context, callID CallID, ok bool, err string) error + ReturnFetch(ctx context.Context, callID CallID, err string) error +} From 5d7394392965fe7b4bb339606c434530461a7e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 6 Sep 2020 18:54:00 +0200 Subject: [PATCH 02/61] storage: Fix import cycle --- api/apistruct/struct.go | 63 ++++++++------- cmd/lotus-bench/main.go | 4 +- cmd/lotus-seed/seed/seed.go | 5 +- cmd/lotus-storage-miner/storage.go | 7 +- extern/sector-storage/faults.go | 6 +- .../sector-storage/ffiwrapper/basicfs/fs.go | 29 ++++--- .../sector-storage/ffiwrapper/sealer_cgo.go | 25 +++--- .../sector-storage/ffiwrapper/sealer_test.go | 4 +- extern/sector-storage/ffiwrapper/types.go | 7 +- .../sector-storage/ffiwrapper/verifier_cgo.go | 5 +- extern/sector-storage/localworker.go | 36 ++++----- extern/sector-storage/manager.go | 74 ++++++++--------- extern/sector-storage/roprov.go | 15 ++-- extern/sector-storage/sched_test.go | 6 +- extern/sector-storage/selector_alloc.go | 7 +- extern/sector-storage/selector_existing.go | 5 +- extern/sector-storage/stores/http_handler.go | 23 +++--- extern/sector-storage/stores/index.go | 43 +++++----- extern/sector-storage/stores/index_locks.go | 25 +++--- .../sector-storage/stores/index_locks_test.go | 65 +++++++-------- extern/sector-storage/stores/interface.go | 24 ++---- extern/sector-storage/stores/local.go | 79 +++++++++---------- extern/sector-storage/stores/remote.go | 54 ++++++------- .../{stores => storiface}/filetype.go | 4 +- extern/sector-storage/storiface/storage.go | 14 ++++ extern/sector-storage/storiface/worker.go | 5 +- extern/sector-storage/testworker_test.go | 4 +- extern/sector-storage/work_tracker.go | 3 +- 28 files changed, 323 insertions(+), 318 deletions(-) rename extern/sector-storage/{stores => storiface}/filetype.go (96%) diff --git a/api/apistruct/struct.go b/api/apistruct/struct.go index 5fb80433d58..bc51f9b86ac 100644 --- a/api/apistruct/struct.go +++ b/api/apistruct/struct.go @@ -284,18 +284,18 @@ type StorageMinerStruct struct { SealingSchedDiag func(context.Context) (interface{}, error) `perm:"admin"` - StorageList func(context.Context) (map[stores.ID][]stores.Decl, error) `perm:"admin"` - StorageLocal func(context.Context) (map[stores.ID]string, error) `perm:"admin"` - StorageStat func(context.Context, stores.ID) (fsutil.FsStat, error) `perm:"admin"` - StorageAttach func(context.Context, stores.StorageInfo, fsutil.FsStat) error `perm:"admin"` - StorageDeclareSector func(context.Context, stores.ID, abi.SectorID, stores.SectorFileType, bool) error `perm:"admin"` - StorageDropSector func(context.Context, stores.ID, abi.SectorID, stores.SectorFileType) error `perm:"admin"` - StorageFindSector func(context.Context, abi.SectorID, stores.SectorFileType, abi.RegisteredSealProof, bool) ([]stores.SectorStorageInfo, error) `perm:"admin"` - StorageInfo func(context.Context, stores.ID) (stores.StorageInfo, error) `perm:"admin"` - StorageBestAlloc func(ctx context.Context, allocate stores.SectorFileType, spt abi.RegisteredSealProof, sealing stores.PathType) ([]stores.StorageInfo, error) `perm:"admin"` - StorageReportHealth func(ctx context.Context, id stores.ID, report stores.HealthReport) error `perm:"admin"` - StorageLock func(ctx context.Context, sector abi.SectorID, read stores.SectorFileType, write stores.SectorFileType) error `perm:"admin"` - StorageTryLock func(ctx context.Context, sector abi.SectorID, read stores.SectorFileType, write stores.SectorFileType) (bool, error) `perm:"admin"` + StorageList func(context.Context) (map[stores.ID][]stores.Decl, error) `perm:"admin"` + StorageLocal func(context.Context) (map[stores.ID]string, error) `perm:"admin"` + StorageStat func(context.Context, stores.ID) (fsutil.FsStat, error) `perm:"admin"` + StorageAttach func(context.Context, stores.StorageInfo, fsutil.FsStat) error `perm:"admin"` + StorageDeclareSector func(context.Context, stores.ID, abi.SectorID, storiface.SectorFileType, bool) error `perm:"admin"` + StorageDropSector func(context.Context, stores.ID, abi.SectorID, storiface.SectorFileType) error `perm:"admin"` + StorageFindSector func(context.Context, abi.SectorID, storiface.SectorFileType, abi.RegisteredSealProof, bool) ([]stores.SectorStorageInfo, error) `perm:"admin"` + StorageInfo func(context.Context, stores.ID) (stores.StorageInfo, error) `perm:"admin"` + StorageBestAlloc func(ctx context.Context, allocate storiface.SectorFileType, spt abi.RegisteredSealProof, sealing storiface.PathType) ([]stores.StorageInfo, error) `perm:"admin"` + StorageReportHealth func(ctx context.Context, id stores.ID, report stores.HealthReport) error `perm:"admin"` + StorageLock func(ctx context.Context, sector abi.SectorID, read storiface.SectorFileType, write storiface.SectorFileType) error `perm:"admin"` + StorageTryLock func(ctx context.Context, sector abi.SectorID, read storiface.SectorFileType, write storiface.SectorFileType) (bool, error) `perm:"admin"` DealsImportData func(ctx context.Context, dealPropCid cid.Cid, file string) error `perm:"write"` DealsList func(ctx context.Context) ([]storagemarket.StorageDeal, error) `perm:"read"` @@ -329,22 +329,21 @@ type WorkerStruct struct { Paths func(context.Context) ([]stores.StoragePath, error) `perm:"admin"` Info func(context.Context) (storiface.WorkerInfo, error) `perm:"admin"` - AddPiece func(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (storiface.CallID, error) `perm:"admin"` - SealPreCommit1 func(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) `perm:"admin"` - SealPreCommit2 func(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storiface.CallID, error) `perm:"admin"` - SealCommit1 func(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storiface.CallID, error) `perm:"admin"` - SealCommit2 func(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storiface.CallID, error) `perm:"admin"` - FinalizeSector func(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) (storiface.CallID, error) `perm:"admin"` - ReleaseUnsealed func(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) (storiface.CallID, error) `perm:"admin"` - MoveStorage func(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) (storiface.CallID, error) `perm:"admin"` - UnsealPiece func(context.Context, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize, abi.SealRandomness, cid.Cid) (storiface.CallID, error) `perm:"admin"` - ReadPiece func(context.Context, io.Writer, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize) (storiface.CallID, error) `perm:"admin"` - Fetch func(context.Context, abi.SectorID, stores.SectorFileType, stores.PathType, stores.AcquireMode) (storiface.CallID, error) `perm:"admin"` + AddPiece func(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (storiface.CallID, error) `perm:"admin"` + SealPreCommit1 func(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) `perm:"admin"` + SealPreCommit2 func(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storiface.CallID, error) `perm:"admin"` + SealCommit1 func(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storiface.CallID, error) `perm:"admin"` + SealCommit2 func(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storiface.CallID, error) `perm:"admin"` + FinalizeSector func(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) (storiface.CallID, error) `perm:"admin"` + ReleaseUnsealed func(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) (storiface.CallID, error) `perm:"admin"` + MoveStorage func(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) (storiface.CallID, error) `perm:"admin"` + UnsealPiece func(context.Context, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize, abi.SealRandomness, cid.Cid) (storiface.CallID, error) `perm:"admin"` + ReadPiece func(context.Context, io.Writer, abi.SectorID, storiface.UnpaddedByteIndex, abi.UnpaddedPieceSize) (storiface.CallID, error) `perm:"admin"` + Fetch func(context.Context, abi.SectorID, storiface.SectorFileType, storiface.PathType, storiface.AcquireMode) (storiface.CallID, error) `perm:"admin"` Remove func(ctx context.Context, sector abi.SectorID) error `perm:"admin"` StorageAddLocal func(ctx context.Context, path string) error `perm:"admin"` - Closing func(context.Context) (<-chan struct{}, error) `perm:"admin"` } } @@ -1102,15 +1101,15 @@ func (c *StorageMinerStruct) StorageAttach(ctx context.Context, si stores.Storag return c.Internal.StorageAttach(ctx, si, st) } -func (c *StorageMinerStruct) StorageDeclareSector(ctx context.Context, storageId stores.ID, s abi.SectorID, ft stores.SectorFileType, primary bool) error { +func (c *StorageMinerStruct) StorageDeclareSector(ctx context.Context, storageId stores.ID, s abi.SectorID, ft storiface.SectorFileType, primary bool) error { return c.Internal.StorageDeclareSector(ctx, storageId, s, ft, primary) } -func (c *StorageMinerStruct) StorageDropSector(ctx context.Context, storageId stores.ID, s abi.SectorID, ft stores.SectorFileType) error { +func (c *StorageMinerStruct) StorageDropSector(ctx context.Context, storageId stores.ID, s abi.SectorID, ft storiface.SectorFileType) error { return c.Internal.StorageDropSector(ctx, storageId, s, ft) } -func (c *StorageMinerStruct) StorageFindSector(ctx context.Context, si abi.SectorID, types stores.SectorFileType, spt abi.RegisteredSealProof, allowFetch bool) ([]stores.SectorStorageInfo, error) { +func (c *StorageMinerStruct) StorageFindSector(ctx context.Context, si abi.SectorID, types storiface.SectorFileType, spt abi.RegisteredSealProof, allowFetch bool) ([]stores.SectorStorageInfo, error) { return c.Internal.StorageFindSector(ctx, si, types, spt, allowFetch) } @@ -1130,7 +1129,7 @@ func (c *StorageMinerStruct) StorageInfo(ctx context.Context, id stores.ID) (sto return c.Internal.StorageInfo(ctx, id) } -func (c *StorageMinerStruct) StorageBestAlloc(ctx context.Context, allocate stores.SectorFileType, spt abi.RegisteredSealProof, pt stores.PathType) ([]stores.StorageInfo, error) { +func (c *StorageMinerStruct) StorageBestAlloc(ctx context.Context, allocate storiface.SectorFileType, spt abi.RegisteredSealProof, pt storiface.PathType) ([]stores.StorageInfo, error) { return c.Internal.StorageBestAlloc(ctx, allocate, spt, pt) } @@ -1138,11 +1137,11 @@ func (c *StorageMinerStruct) StorageReportHealth(ctx context.Context, id stores. return c.Internal.StorageReportHealth(ctx, id, report) } -func (c *StorageMinerStruct) StorageLock(ctx context.Context, sector abi.SectorID, read stores.SectorFileType, write stores.SectorFileType) error { +func (c *StorageMinerStruct) StorageLock(ctx context.Context, sector abi.SectorID, read storiface.SectorFileType, write storiface.SectorFileType) error { return c.Internal.StorageLock(ctx, sector, read, write) } -func (c *StorageMinerStruct) StorageTryLock(ctx context.Context, sector abi.SectorID, read stores.SectorFileType, write stores.SectorFileType) (bool, error) { +func (c *StorageMinerStruct) StorageTryLock(ctx context.Context, sector abi.SectorID, read storiface.SectorFileType, write storiface.SectorFileType) (bool, error) { return c.Internal.StorageTryLock(ctx, sector, read, write) } @@ -1304,7 +1303,7 @@ func (w *WorkerStruct) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, return w.Internal.ReleaseUnsealed(ctx, sector, safeToFree) } -func (w *WorkerStruct) MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) (storiface.CallID, error) { +func (w *WorkerStruct) MoveStorage(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) (storiface.CallID, error) { return w.Internal.MoveStorage(ctx, sector, types) } @@ -1316,7 +1315,7 @@ func (w *WorkerStruct) ReadPiece(ctx context.Context, sink io.Writer, sector abi return w.Internal.ReadPiece(ctx, sink, sector, offset, size) } -func (w *WorkerStruct) Fetch(ctx context.Context, id abi.SectorID, fileType stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) (storiface.CallID, error) { +func (w *WorkerStruct) Fetch(ctx context.Context, id abi.SectorID, fileType storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) (storiface.CallID, error) { return w.Internal.Fetch(ctx, id, fileType, ptype, am) } diff --git a/cmd/lotus-bench/main.go b/cmd/lotus-bench/main.go index 694987f27bc..c63d9673a09 100644 --- a/cmd/lotus-bench/main.go +++ b/cmd/lotus-bench/main.go @@ -23,7 +23,7 @@ import ( lcli "github.com/filecoin-project/lotus/cli" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper/basicfs" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "github.com/filecoin-project/specs-actors/actors/abi" "github.com/filecoin-project/specs-actors/actors/builtin/miner" "github.com/filecoin-project/specs-storage/storage" @@ -612,7 +612,7 @@ func runSeals(sb *ffiwrapper.Sealer, sbfs *basicfs.Provider, numSectors int, par if !skipunseal { log.Infof("[%d] Unsealing sector", i) { - p, done, err := sbfs.AcquireSector(context.TODO(), abi.SectorID{Miner: mid, Number: 1}, stores.FTUnsealed, stores.FTNone, stores.PathSealing) + p, done, err := sbfs.AcquireSector(context.TODO(), abi.SectorID{Miner: mid, Number: 1}, storiface.FTUnsealed, storiface.FTNone, storiface.PathSealing) if err != nil { return xerrors.Errorf("acquire unsealed sector for removing: %w", err) } diff --git a/cmd/lotus-seed/seed/seed.go b/cmd/lotus-seed/seed/seed.go index f892709f619..0ae61e6b843 100644 --- a/cmd/lotus-seed/seed/seed.go +++ b/cmd/lotus-seed/seed/seed.go @@ -6,6 +6,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "io/ioutil" "os" "path/filepath" @@ -187,7 +188,7 @@ func presealSector(sb *ffiwrapper.Sealer, sbfs *basicfs.Provider, sid abi.Sector } func presealSectorFake(sbfs *basicfs.Provider, sid abi.SectorID, spt abi.RegisteredSealProof, ssize abi.SectorSize) (*genesis.PreSeal, error) { - paths, done, err := sbfs.AcquireSector(context.TODO(), sid, 0, stores.FTSealed|stores.FTCache, stores.PathSealing) + paths, done, err := sbfs.AcquireSector(context.TODO(), sid, 0, storiface.FTSealed|storiface.FTCache, storiface.PathSealing) if err != nil { return nil, xerrors.Errorf("acquire unsealed sector: %w", err) } @@ -211,7 +212,7 @@ func presealSectorFake(sbfs *basicfs.Provider, sid abi.SectorID, spt abi.Registe } func cleanupUnsealed(sbfs *basicfs.Provider, sid abi.SectorID) error { - paths, done, err := sbfs.AcquireSector(context.TODO(), sid, stores.FTUnsealed, stores.FTNone, stores.PathSealing) + paths, done, err := sbfs.AcquireSector(context.TODO(), sid, storiface.FTUnsealed, storiface.FTNone, storiface.PathSealing) if err != nil { return err } diff --git a/cmd/lotus-storage-miner/storage.go b/cmd/lotus-storage-miner/storage.go index 7fadcf83f32..5f3d2ac25fc 100644 --- a/cmd/lotus-storage-miner/storage.go +++ b/cmd/lotus-storage-miner/storage.go @@ -3,6 +3,7 @@ package main import ( "encoding/json" "fmt" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "io/ioutil" "os" "path/filepath" @@ -294,17 +295,17 @@ var storageFindCmd = &cli.Command{ Number: abi.SectorNumber(snum), } - u, err := nodeApi.StorageFindSector(ctx, sid, stores.FTUnsealed, 0, false) + u, err := nodeApi.StorageFindSector(ctx, sid, storiface.FTUnsealed, 0, false) if err != nil { return xerrors.Errorf("finding unsealed: %w", err) } - s, err := nodeApi.StorageFindSector(ctx, sid, stores.FTSealed, 0, false) + s, err := nodeApi.StorageFindSector(ctx, sid, storiface.FTSealed, 0, false) if err != nil { return xerrors.Errorf("finding sealed: %w", err) } - c, err := nodeApi.StorageFindSector(ctx, sid, stores.FTCache, 0, false) + c, err := nodeApi.StorageFindSector(ctx, sid, storiface.FTCache, 0, false) if err != nil { return xerrors.Errorf("finding cache: %w", err) } diff --git a/extern/sector-storage/faults.go b/extern/sector-storage/faults.go index 06c823bb870..9c5948ab1d6 100644 --- a/extern/sector-storage/faults.go +++ b/extern/sector-storage/faults.go @@ -3,12 +3,12 @@ package sectorstorage import ( "context" "fmt" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "os" "path/filepath" "golang.org/x/xerrors" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/specs-actors/actors/abi" ) @@ -32,7 +32,7 @@ func (m *Manager) CheckProvable(ctx context.Context, spt abi.RegisteredSealProof ctx, cancel := context.WithCancel(ctx) defer cancel() - locked, err := m.index.StorageTryLock(ctx, sector, stores.FTSealed|stores.FTCache, stores.FTNone) + locked, err := m.index.StorageTryLock(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.FTNone) if err != nil { return xerrors.Errorf("acquiring sector lock: %w", err) } @@ -43,7 +43,7 @@ func (m *Manager) CheckProvable(ctx context.Context, spt abi.RegisteredSealProof return nil } - lp, _, err := m.localStore.AcquireSector(ctx, sector, spt, stores.FTSealed|stores.FTCache, stores.FTNone, stores.PathStorage, stores.AcquireMove) + lp, _, err := m.localStore.AcquireSector(ctx, sector, spt, storiface.FTSealed|storiface.FTCache, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove) if err != nil { log.Warnw("CheckProvable Sector FAULT: acquire sector in checkProvable", "sector", sector, "error", err) bad = append(bad, sector) diff --git a/extern/sector-storage/ffiwrapper/basicfs/fs.go b/extern/sector-storage/ffiwrapper/basicfs/fs.go index ae17273e9f5..491aeabc189 100644 --- a/extern/sector-storage/ffiwrapper/basicfs/fs.go +++ b/extern/sector-storage/ffiwrapper/basicfs/fs.go @@ -8,13 +8,12 @@ import ( "github.com/filecoin-project/specs-actors/actors/abi" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) type sectorFile struct { abi.SectorID - stores.SectorFileType + storiface.SectorFileType } type Provider struct { @@ -24,24 +23,24 @@ type Provider struct { waitSector map[sectorFile]chan struct{} } -func (b *Provider) AcquireSector(ctx context.Context, id abi.SectorID, existing stores.SectorFileType, allocate stores.SectorFileType, ptype stores.PathType) (stores.SectorPaths, func(), error) { - if err := os.Mkdir(filepath.Join(b.Root, stores.FTUnsealed.String()), 0755); err != nil && !os.IsExist(err) { // nolint - return stores.SectorPaths{}, nil, err +func (b *Provider) AcquireSector(ctx context.Context, id abi.SectorID, existing storiface.SectorFileType, allocate storiface.SectorFileType, ptype storiface.PathType) (storiface.SectorPaths, func(), error) { + if err := os.Mkdir(filepath.Join(b.Root, storiface.FTUnsealed.String()), 0755); err != nil && !os.IsExist(err) { // nolint + return storiface.SectorPaths{}, nil, err } - if err := os.Mkdir(filepath.Join(b.Root, stores.FTSealed.String()), 0755); err != nil && !os.IsExist(err) { // nolint - return stores.SectorPaths{}, nil, err + if err := os.Mkdir(filepath.Join(b.Root, storiface.FTSealed.String()), 0755); err != nil && !os.IsExist(err) { // nolint + return storiface.SectorPaths{}, nil, err } - if err := os.Mkdir(filepath.Join(b.Root, stores.FTCache.String()), 0755); err != nil && !os.IsExist(err) { // nolint - return stores.SectorPaths{}, nil, err + if err := os.Mkdir(filepath.Join(b.Root, storiface.FTCache.String()), 0755); err != nil && !os.IsExist(err) { // nolint + return storiface.SectorPaths{}, nil, err } done := func() {} - out := stores.SectorPaths{ + out := storiface.SectorPaths{ ID: id, } - for _, fileType := range stores.PathTypes { + for _, fileType := range storiface.PathTypes { if !existing.Has(fileType) && !allocate.Has(fileType) { continue } @@ -61,10 +60,10 @@ func (b *Provider) AcquireSector(ctx context.Context, id abi.SectorID, existing case ch <- struct{}{}: case <-ctx.Done(): done() - return stores.SectorPaths{}, nil, ctx.Err() + return storiface.SectorPaths{}, nil, ctx.Err() } - path := filepath.Join(b.Root, fileType.String(), stores.SectorName(id)) + path := filepath.Join(b.Root, fileType.String(), storiface.SectorName(id)) prevDone := done done = func() { @@ -75,11 +74,11 @@ func (b *Provider) AcquireSector(ctx context.Context, id abi.SectorID, existing if !allocate.Has(fileType) { if _, err := os.Stat(path); os.IsNotExist(err) { done() - return stores.SectorPaths{}, nil, storiface.ErrSectorNotFound + return storiface.SectorPaths{}, nil, storiface.ErrSectorNotFound } } - stores.SetPathByType(&out, fileType, path) + storiface.SetPathByType(&out, fileType, path) } return out, done, nil diff --git a/extern/sector-storage/ffiwrapper/sealer_cgo.go b/extern/sector-storage/ffiwrapper/sealer_cgo.go index d4f796dcb83..b3032a24384 100644 --- a/extern/sector-storage/ffiwrapper/sealer_cgo.go +++ b/extern/sector-storage/ffiwrapper/sealer_cgo.go @@ -21,7 +21,6 @@ import ( "github.com/filecoin-project/specs-storage/storage" "github.com/filecoin-project/lotus/extern/sector-storage/fr32" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "github.com/filecoin-project/lotus/extern/sector-storage/zerocomm" ) @@ -80,9 +79,9 @@ func (sb *Sealer) AddPiece(ctx context.Context, sector abi.SectorID, existingPie } }() - var stagedPath stores.SectorPaths + var stagedPath storiface.SectorPaths if len(existingPieceSizes) == 0 { - stagedPath, done, err = sb.sectors.AcquireSector(ctx, sector, 0, stores.FTUnsealed, stores.PathSealing) + stagedPath, done, err = sb.sectors.AcquireSector(ctx, sector, 0, storiface.FTUnsealed, storiface.PathSealing) if err != nil { return abi.PieceInfo{}, xerrors.Errorf("acquire unsealed sector: %w", err) } @@ -92,7 +91,7 @@ func (sb *Sealer) AddPiece(ctx context.Context, sector abi.SectorID, existingPie return abi.PieceInfo{}, xerrors.Errorf("creating unsealed sector file: %w", err) } } else { - stagedPath, done, err = sb.sectors.AcquireSector(ctx, sector, stores.FTUnsealed, 0, stores.PathSealing) + stagedPath, done, err = sb.sectors.AcquireSector(ctx, sector, storiface.FTUnsealed, 0, storiface.PathSealing) if err != nil { return abi.PieceInfo{}, xerrors.Errorf("acquire unsealed sector: %w", err) } @@ -199,12 +198,12 @@ func (sb *Sealer) UnsealPiece(ctx context.Context, sector abi.SectorID, offset s maxPieceSize := abi.PaddedPieceSize(sb.ssize) // try finding existing - unsealedPath, done, err := sb.sectors.AcquireSector(ctx, sector, stores.FTUnsealed, stores.FTNone, stores.PathStorage) + unsealedPath, done, err := sb.sectors.AcquireSector(ctx, sector, storiface.FTUnsealed, storiface.FTNone, storiface.PathStorage) var pf *partialFile switch { case xerrors.Is(err, storiface.ErrSectorNotFound): - unsealedPath, done, err = sb.sectors.AcquireSector(ctx, sector, stores.FTNone, stores.FTUnsealed, stores.PathStorage) + unsealedPath, done, err = sb.sectors.AcquireSector(ctx, sector, storiface.FTNone, storiface.FTUnsealed, storiface.PathStorage) if err != nil { return xerrors.Errorf("acquire unsealed sector path (allocate): %w", err) } @@ -241,7 +240,7 @@ func (sb *Sealer) UnsealPiece(ctx context.Context, sector abi.SectorID, offset s return nil } - srcPaths, srcDone, err := sb.sectors.AcquireSector(ctx, sector, stores.FTCache|stores.FTSealed, stores.FTNone, stores.PathStorage) + srcPaths, srcDone, err := sb.sectors.AcquireSector(ctx, sector, storiface.FTCache|storiface.FTSealed, storiface.FTNone, storiface.PathStorage) if err != nil { return xerrors.Errorf("acquire sealed sector paths: %w", err) } @@ -362,7 +361,7 @@ func (sb *Sealer) UnsealPiece(ctx context.Context, sector abi.SectorID, offset s } func (sb *Sealer) ReadPiece(ctx context.Context, writer io.Writer, sector abi.SectorID, offset storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (bool, error) { - path, done, err := sb.sectors.AcquireSector(ctx, sector, stores.FTUnsealed, stores.FTNone, stores.PathStorage) + path, done, err := sb.sectors.AcquireSector(ctx, sector, storiface.FTUnsealed, storiface.FTNone, storiface.PathStorage) if err != nil { return false, xerrors.Errorf("acquire unsealed sector path: %w", err) } @@ -414,7 +413,7 @@ func (sb *Sealer) ReadPiece(ctx context.Context, writer io.Writer, sector abi.Se } func (sb *Sealer) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (out storage.PreCommit1Out, err error) { - paths, done, err := sb.sectors.AcquireSector(ctx, sector, stores.FTUnsealed, stores.FTSealed|stores.FTCache, stores.PathSealing) + paths, done, err := sb.sectors.AcquireSector(ctx, sector, storiface.FTUnsealed, storiface.FTSealed|storiface.FTCache, storiface.PathSealing) if err != nil { return nil, xerrors.Errorf("acquiring sector paths: %w", err) } @@ -471,7 +470,7 @@ func (sb *Sealer) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke } func (sb *Sealer) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage.PreCommit1Out) (storage.SectorCids, error) { - paths, done, err := sb.sectors.AcquireSector(ctx, sector, stores.FTSealed|stores.FTCache, 0, stores.PathSealing) + paths, done, err := sb.sectors.AcquireSector(ctx, sector, storiface.FTSealed|storiface.FTCache, 0, storiface.PathSealing) if err != nil { return storage.SectorCids{}, xerrors.Errorf("acquiring sector paths: %w", err) } @@ -489,7 +488,7 @@ func (sb *Sealer) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase } func (sb *Sealer) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storage.Commit1Out, error) { - paths, done, err := sb.sectors.AcquireSector(ctx, sector, stores.FTSealed|stores.FTCache, 0, stores.PathSealing) + paths, done, err := sb.sectors.AcquireSector(ctx, sector, storiface.FTSealed|storiface.FTCache, 0, storiface.PathSealing) if err != nil { return nil, xerrors.Errorf("acquire sector paths: %w", err) } @@ -539,7 +538,7 @@ func (sb *Sealer) FinalizeSector(ctx context.Context, sector abi.SectorID, keepU } } - paths, done, err := sb.sectors.AcquireSector(ctx, sector, stores.FTUnsealed, 0, stores.PathStorage) + paths, done, err := sb.sectors.AcquireSector(ctx, sector, storiface.FTUnsealed, 0, storiface.PathStorage) if err != nil { return xerrors.Errorf("acquiring sector cache path: %w", err) } @@ -576,7 +575,7 @@ func (sb *Sealer) FinalizeSector(ctx context.Context, sector abi.SectorID, keepU } } - paths, done, err := sb.sectors.AcquireSector(ctx, sector, stores.FTCache, 0, stores.PathStorage) + paths, done, err := sb.sectors.AcquireSector(ctx, sector, storiface.FTCache, 0, storiface.PathStorage) if err != nil { return xerrors.Errorf("acquiring sector cache path: %w", err) } diff --git a/extern/sector-storage/ffiwrapper/sealer_test.go b/extern/sector-storage/ffiwrapper/sealer_test.go index b484b391fee..372af05bee2 100644 --- a/extern/sector-storage/ffiwrapper/sealer_test.go +++ b/extern/sector-storage/ffiwrapper/sealer_test.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "fmt" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "io" "io/ioutil" "math/rand" @@ -28,7 +29,6 @@ import ( ffi "github.com/filecoin-project/filecoin-ffi" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper/basicfs" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" ) func init() { @@ -123,7 +123,7 @@ func (s *seal) unseal(t *testing.T, sb *Sealer, sp *basicfs.Provider, si abi.Sec t.Fatal("read wrong bytes") } - p, sd, err := sp.AcquireSector(context.TODO(), si, stores.FTUnsealed, stores.FTNone, stores.PathStorage) + p, sd, err := sp.AcquireSector(context.TODO(), si, storiface.FTUnsealed, storiface.FTNone, storiface.PathStorage) if err != nil { t.Fatal(err) } diff --git a/extern/sector-storage/ffiwrapper/types.go b/extern/sector-storage/ffiwrapper/types.go index a634134eefe..71fe3cf3986 100644 --- a/extern/sector-storage/ffiwrapper/types.go +++ b/extern/sector-storage/ffiwrapper/types.go @@ -10,13 +10,12 @@ import ( "github.com/filecoin-project/specs-storage/storage" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper/basicfs" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) type Validator interface { - CanCommit(sector stores.SectorPaths) (bool, error) - CanProve(sector stores.SectorPaths) (bool, error) + CanCommit(sector storiface.SectorPaths) (bool, error) + CanProve(sector storiface.SectorPaths) (bool, error) } type StorageSealer interface { @@ -43,7 +42,7 @@ type Verifier interface { type SectorProvider interface { // * returns storiface.ErrSectorNotFound if a requested existing sector doesn't exist // * returns an error when allocate is set, and existing isn't, and the sector exists - AcquireSector(ctx context.Context, id abi.SectorID, existing stores.SectorFileType, allocate stores.SectorFileType, ptype stores.PathType) (stores.SectorPaths, func(), error) + AcquireSector(ctx context.Context, id abi.SectorID, existing storiface.SectorFileType, allocate storiface.SectorFileType, ptype storiface.PathType) (storiface.SectorPaths, func(), error) } var _ SectorProvider = &basicfs.Provider{} diff --git a/extern/sector-storage/ffiwrapper/verifier_cgo.go b/extern/sector-storage/ffiwrapper/verifier_cgo.go index de6fc08499a..c72070ef7ca 100644 --- a/extern/sector-storage/ffiwrapper/verifier_cgo.go +++ b/extern/sector-storage/ffiwrapper/verifier_cgo.go @@ -4,6 +4,7 @@ package ffiwrapper import ( "context" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "golang.org/x/xerrors" @@ -11,8 +12,6 @@ import ( ffi "github.com/filecoin-project/filecoin-ffi" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" - "go.opencensus.io/trace" ) @@ -64,7 +63,7 @@ func (sb *Sealer) pubSectorToPriv(ctx context.Context, mid abi.ActorID, sectorIn sid := abi.SectorID{Miner: mid, Number: s.SectorNumber} - paths, d, err := sb.sectors.AcquireSector(ctx, sid, stores.FTCache|stores.FTSealed, 0, stores.PathStorage) + paths, d, err := sb.sectors.AcquireSector(ctx, sid, storiface.FTCache|storiface.FTSealed, 0, storiface.PathStorage) if err != nil { log.Warnw("failed to acquire sector, skipping", "sector", sid, "error", err) skipped = append(skipped, sid) diff --git a/extern/sector-storage/localworker.go b/extern/sector-storage/localworker.go index 9d451309df9..1a2232dd16e 100644 --- a/extern/sector-storage/localworker.go +++ b/extern/sector-storage/localworker.go @@ -22,7 +22,7 @@ import ( "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) -var pathTypes = []stores.SectorFileType{stores.FTUnsealed, stores.FTSealed, stores.FTCache} +var pathTypes = []storiface.SectorFileType{storiface.FTUnsealed, storiface.FTSealed, storiface.FTCache} type WorkerConfig struct { SealProof abi.RegisteredSealProof @@ -59,19 +59,19 @@ func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, type localWorkerPathProvider struct { w *LocalWorker - op stores.AcquireMode + op storiface.AcquireMode } -func (l *localWorkerPathProvider) AcquireSector(ctx context.Context, sector abi.SectorID, existing stores.SectorFileType, allocate stores.SectorFileType, sealing stores.PathType) (stores.SectorPaths, func(), error) { +func (l *localWorkerPathProvider) AcquireSector(ctx context.Context, sector abi.SectorID, existing storiface.SectorFileType, allocate storiface.SectorFileType, sealing storiface.PathType) (storiface.SectorPaths, func(), error) { paths, storageIDs, err := l.w.storage.AcquireSector(ctx, sector, l.w.scfg.SealProofType, existing, allocate, sealing, l.op) if err != nil { - return stores.SectorPaths{}, nil, err + return storiface.SectorPaths{}, nil, err } - releaseStorage, err := l.w.localStore.Reserve(ctx, sector, l.w.scfg.SealProofType, allocate, storageIDs, stores.FSOverheadSeal) + releaseStorage, err := l.w.localStore.Reserve(ctx, sector, l.w.scfg.SealProofType, allocate, storageIDs, storiface.FSOverheadSeal) if err != nil { - return stores.SectorPaths{}, nil, xerrors.Errorf("reserving storage space: %w", err) + return storiface.SectorPaths{}, nil, xerrors.Errorf("reserving storage space: %w", err) } log.Debugf("acquired sector %d (e:%d; a:%d): %v", sector, existing, allocate, paths) @@ -84,9 +84,9 @@ func (l *localWorkerPathProvider) AcquireSector(ctx context.Context, sector abi. continue } - sid := stores.PathByType(storageIDs, fileType) + sid := storiface.PathByType(storageIDs, fileType) - if err := l.w.sindex.StorageDeclareSector(ctx, stores.ID(sid), sector, fileType, l.op == stores.AcquireMove); err != nil { + if err := l.w.sindex.StorageDeclareSector(ctx, stores.ID(sid), sector, fileType, l.op == storiface.AcquireMove); err != nil { log.Errorf("declare sector error: %+v", err) } } @@ -140,9 +140,9 @@ func (l *LocalWorker) AddPiece(ctx context.Context, sector abi.SectorID, epcs [] }) } -func (l *LocalWorker) Fetch(ctx context.Context, sector abi.SectorID, fileType stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) (storiface.CallID, error) { +func (l *LocalWorker) Fetch(ctx context.Context, sector abi.SectorID, fileType storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) (storiface.CallID, error) { return l.asyncCall(sector, func(ci storiface.CallID) { - _, done, err := (&localWorkerPathProvider{w: l, op: am}).AcquireSector(ctx, sector, fileType, stores.FTNone, ptype) + _, done, err := (&localWorkerPathProvider{w: l, op: am}).AcquireSector(ctx, sector, fileType, storiface.FTNone, ptype) if err == nil { done() } @@ -165,12 +165,12 @@ func (l *LocalWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, t { // cleanup previous failed attempts if they exist - if err = l.storage.Remove(ctx, sector, stores.FTSealed, true); err != nil { + if err = l.storage.Remove(ctx, sector, storiface.FTSealed, true); err != nil { err = xerrors.Errorf("cleaning up sealed data: %w", err) return } - if err = l.storage.Remove(ctx, sector, stores.FTCache, true); err != nil { + if err = l.storage.Remove(ctx, sector, storiface.FTCache, true); err != nil { err = xerrors.Errorf("cleaning up cache data: %w", err) return } @@ -264,20 +264,20 @@ func (l *LocalWorker) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, func (l *LocalWorker) Remove(ctx context.Context, sector abi.SectorID) error { var err error - if rerr := l.storage.Remove(ctx, sector, stores.FTSealed, true); rerr != nil { + if rerr := l.storage.Remove(ctx, sector, storiface.FTSealed, true); rerr != nil { err = multierror.Append(err, xerrors.Errorf("removing sector (sealed): %w", rerr)) } - if rerr := l.storage.Remove(ctx, sector, stores.FTCache, true); rerr != nil { + if rerr := l.storage.Remove(ctx, sector, storiface.FTCache, true); rerr != nil { err = multierror.Append(err, xerrors.Errorf("removing sector (cache): %w", rerr)) } - if rerr := l.storage.Remove(ctx, sector, stores.FTUnsealed, true); rerr != nil { + if rerr := l.storage.Remove(ctx, sector, storiface.FTUnsealed, true); rerr != nil { err = multierror.Append(err, xerrors.Errorf("removing sector (unsealed): %w", rerr)) } return err } -func (l *LocalWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) (storiface.CallID, error) { +func (l *LocalWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) (storiface.CallID, error) { return l.asyncCall(sector, func(ci storiface.CallID) { err := l.storage.MoveStorage(ctx, sector, l.scfg.SealProofType, types) @@ -306,12 +306,12 @@ func (l *LocalWorker) UnsealPiece(ctx context.Context, sector abi.SectorID, inde return } - if err = l.storage.RemoveCopies(ctx, sector, stores.FTSealed); err != nil { + if err = l.storage.RemoveCopies(ctx, sector, storiface.FTSealed); err != nil { err = xerrors.Errorf("removing source data: %w", err) return } - if err = l.storage.RemoveCopies(ctx, sector, stores.FTCache); err != nil { + if err = l.storage.RemoveCopies(ctx, sector, storiface.FTCache); err != nil { err = xerrors.Errorf("removing source data: %w", err) return } diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 683f3450567..dacd3439f7e 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -191,7 +191,7 @@ func schedNop(context.Context, Worker) error { return nil } -func schedFetch(sector abi.SectorID, ft stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) func(context.Context, Worker) error { +func schedFetch(sector abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) func(context.Context, Worker) error { return func(ctx context.Context, worker Worker) error { return worker.Fetch(ctx, sector, ft, ptype, am) } @@ -201,21 +201,21 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect ctx, cancel := context.WithCancel(ctx) defer cancel() - if err := m.index.StorageLock(ctx, sector, stores.FTSealed|stores.FTCache, stores.FTUnsealed); err != nil { + if err := m.index.StorageLock(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.FTUnsealed); err != nil { return xerrors.Errorf("acquiring sector lock: %w", err) } // passing 0 spt because we only need it when allowFetch is true - best, err := m.index.StorageFindSector(ctx, sector, stores.FTUnsealed, 0, false) + best, err := m.index.StorageFindSector(ctx, sector, storiface.FTUnsealed, 0, false) if err != nil { return xerrors.Errorf("read piece: checking for already existing unsealed sector: %w", err) } var selector WorkerSelector if len(best) == 0 { // new - selector = newAllocSelector(m.index, stores.FTUnsealed, stores.PathSealing) + selector = newAllocSelector(m.index, storiface.FTUnsealed, storiface.PathSealing) } else { // append to existing - selector = newExistingSelector(m.index, sector, stores.FTUnsealed, false) + selector = newExistingSelector(m.index, sector, storiface.FTUnsealed, false) } var readOk bool @@ -223,9 +223,9 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect if len(best) > 0 { // There is unsealed sector, see if we can read from it - selector = newExistingSelector(m.index, sector, stores.FTUnsealed, false) + selector = newExistingSelector(m.index, sector, storiface.FTUnsealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(sector, stores.FTUnsealed, stores.PathSealing, stores.AcquireMove), func(ctx context.Context, w Worker) error { + err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { readOk, err = w.ReadPiece(ctx, sink, sector, offset, size) return err }) @@ -239,12 +239,12 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect } unsealFetch := func(ctx context.Context, worker Worker) error { - if err := worker.Fetch(ctx, sector, stores.FTSealed|stores.FTCache, stores.PathSealing, stores.AcquireCopy); err != nil { + if err := worker.Fetch(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.PathSealing, storiface.AcquireCopy); err != nil { return xerrors.Errorf("copy sealed/cache sector data: %w", err) } if len(best) > 0 { - if err := worker.Fetch(ctx, sector, stores.FTUnsealed, stores.PathSealing, stores.AcquireMove); err != nil { + if err := worker.Fetch(ctx, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove); err != nil { return xerrors.Errorf("copy unsealed sector data: %w", err) } } @@ -258,9 +258,9 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect return err } - selector = newExistingSelector(m.index, sector, stores.FTUnsealed, false) + selector = newExistingSelector(m.index, sector, storiface.FTUnsealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(sector, stores.FTUnsealed, stores.PathSealing, stores.AcquireMove), func(ctx context.Context, w Worker) error { + err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { readOk, err = w.ReadPiece(ctx, sink, sector, offset, size) return err }) @@ -284,16 +284,16 @@ func (m *Manager) AddPiece(ctx context.Context, sector abi.SectorID, existingPie ctx, cancel := context.WithCancel(ctx) defer cancel() - if err := m.index.StorageLock(ctx, sector, stores.FTNone, stores.FTUnsealed); err != nil { + if err := m.index.StorageLock(ctx, sector, storiface.FTNone, storiface.FTUnsealed); err != nil { return abi.PieceInfo{}, xerrors.Errorf("acquiring sector lock: %w", err) } var selector WorkerSelector var err error if len(existingPieces) == 0 { // new - selector = newAllocSelector(m.index, stores.FTUnsealed, stores.PathSealing) + selector = newAllocSelector(m.index, storiface.FTUnsealed, storiface.PathSealing) } else { // use existing - selector = newExistingSelector(m.index, sector, stores.FTUnsealed, false) + selector = newExistingSelector(m.index, sector, storiface.FTUnsealed, false) } var out abi.PieceInfo @@ -313,15 +313,15 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke ctx, cancel := context.WithCancel(ctx) defer cancel() - if err := m.index.StorageLock(ctx, sector, stores.FTUnsealed, stores.FTSealed|stores.FTCache); err != nil { + if err := m.index.StorageLock(ctx, sector, storiface.FTUnsealed, storiface.FTSealed|storiface.FTCache); err != nil { return nil, xerrors.Errorf("acquiring sector lock: %w", err) } // TODO: also consider where the unsealed data sits - selector := newAllocSelector(m.index, stores.FTCache|stores.FTSealed, stores.PathSealing) + selector := newAllocSelector(m.index, storiface.FTCache|storiface.FTSealed, storiface.PathSealing) - err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit1, selector, schedFetch(sector, stores.FTUnsealed, stores.PathSealing, stores.AcquireMove), func(ctx context.Context, w Worker) error { + err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit1, selector, schedFetch(sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { p, err := w.SealPreCommit1(ctx, sector, ticket, pieces) if err != nil { return err @@ -337,13 +337,13 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase ctx, cancel := context.WithCancel(ctx) defer cancel() - if err := m.index.StorageLock(ctx, sector, stores.FTSealed, stores.FTCache); err != nil { + if err := m.index.StorageLock(ctx, sector, storiface.FTSealed, storiface.FTCache); err != nil { return storage.SectorCids{}, xerrors.Errorf("acquiring sector lock: %w", err) } - selector := newExistingSelector(m.index, sector, stores.FTCache|stores.FTSealed, true) + selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, true) - err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit2, selector, schedFetch(sector, stores.FTCache|stores.FTSealed, stores.PathSealing, stores.AcquireMove), func(ctx context.Context, w Worker) error { + err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit2, selector, schedFetch(sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { p, err := w.SealPreCommit2(ctx, sector, phase1Out) if err != nil { return err @@ -358,16 +358,16 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a ctx, cancel := context.WithCancel(ctx) defer cancel() - if err := m.index.StorageLock(ctx, sector, stores.FTSealed, stores.FTCache); err != nil { + if err := m.index.StorageLock(ctx, sector, storiface.FTSealed, storiface.FTCache); err != nil { return storage.Commit1Out{}, xerrors.Errorf("acquiring sector lock: %w", err) } // NOTE: We set allowFetch to false in so that we always execute on a worker // with direct access to the data. We want to do that because this step is // generally very cheap / fast, and transferring data is not worth the effort - selector := newExistingSelector(m.index, sector, stores.FTCache|stores.FTSealed, false) + selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit1, selector, schedFetch(sector, stores.FTCache|stores.FTSealed, stores.PathSealing, stores.AcquireMove), func(ctx context.Context, w Worker) error { + err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit1, selector, schedFetch(sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { p, err := w.SealCommit1(ctx, sector, ticket, seed, pieces, cids) if err != nil { return err @@ -397,26 +397,26 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector abi.SectorID, keepU ctx, cancel := context.WithCancel(ctx) defer cancel() - if err := m.index.StorageLock(ctx, sector, stores.FTNone, stores.FTSealed|stores.FTUnsealed|stores.FTCache); err != nil { + if err := m.index.StorageLock(ctx, sector, storiface.FTNone, storiface.FTSealed|storiface.FTUnsealed|storiface.FTCache); err != nil { return xerrors.Errorf("acquiring sector lock: %w", err) } - unsealed := stores.FTUnsealed + unsealed := storiface.FTUnsealed { - unsealedStores, err := m.index.StorageFindSector(ctx, sector, stores.FTUnsealed, 0, false) + unsealedStores, err := m.index.StorageFindSector(ctx, sector, storiface.FTUnsealed, 0, false) if err != nil { return xerrors.Errorf("finding unsealed sector: %w", err) } if len(unsealedStores) == 0 { // Is some edge-cases unsealed sector may not exist already, that's fine - unsealed = stores.FTNone + unsealed = storiface.FTNone } } - selector := newExistingSelector(m.index, sector, stores.FTCache|stores.FTSealed, false) + selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, false) err := m.sched.Schedule(ctx, sector, sealtasks.TTFinalize, selector, - schedFetch(sector, stores.FTCache|stores.FTSealed|unsealed, stores.PathSealing, stores.AcquireMove), + schedFetch(sector, storiface.FTCache|storiface.FTSealed|unsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { return w.FinalizeSector(ctx, sector, keepUnsealed) }) @@ -424,18 +424,18 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector abi.SectorID, keepU return err } - fetchSel := newAllocSelector(m.index, stores.FTCache|stores.FTSealed, stores.PathStorage) + fetchSel := newAllocSelector(m.index, storiface.FTCache|storiface.FTSealed, storiface.PathStorage) moveUnsealed := unsealed { if len(keepUnsealed) == 0 { - moveUnsealed = stores.FTNone + moveUnsealed = storiface.FTNone } } err = m.sched.Schedule(ctx, sector, sealtasks.TTFetch, fetchSel, - schedFetch(sector, stores.FTCache|stores.FTSealed|moveUnsealed, stores.PathStorage, stores.AcquireMove), + schedFetch(sector, storiface.FTCache|storiface.FTSealed|moveUnsealed, storiface.PathStorage, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - return w.MoveStorage(ctx, sector, stores.FTCache|stores.FTSealed|moveUnsealed) + return w.MoveStorage(ctx, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed) }) if err != nil { return xerrors.Errorf("moving sector to storage: %w", err) @@ -453,19 +453,19 @@ func (m *Manager) Remove(ctx context.Context, sector abi.SectorID) error { ctx, cancel := context.WithCancel(ctx) defer cancel() - if err := m.index.StorageLock(ctx, sector, stores.FTNone, stores.FTSealed|stores.FTUnsealed|stores.FTCache); err != nil { + if err := m.index.StorageLock(ctx, sector, storiface.FTNone, storiface.FTSealed|storiface.FTUnsealed|storiface.FTCache); err != nil { return xerrors.Errorf("acquiring sector lock: %w", err) } var err error - if rerr := m.storage.Remove(ctx, sector, stores.FTSealed, true); rerr != nil { + if rerr := m.storage.Remove(ctx, sector, storiface.FTSealed, true); rerr != nil { err = multierror.Append(err, xerrors.Errorf("removing sector (sealed): %w", rerr)) } - if rerr := m.storage.Remove(ctx, sector, stores.FTCache, true); rerr != nil { + if rerr := m.storage.Remove(ctx, sector, storiface.FTCache, true); rerr != nil { err = multierror.Append(err, xerrors.Errorf("removing sector (cache): %w", rerr)) } - if rerr := m.storage.Remove(ctx, sector, stores.FTUnsealed, true); rerr != nil { + if rerr := m.storage.Remove(ctx, sector, storiface.FTUnsealed, true); rerr != nil { err = multierror.Append(err, xerrors.Errorf("removing sector (unsealed): %w", rerr)) } diff --git a/extern/sector-storage/roprov.go b/extern/sector-storage/roprov.go index fe58a84450f..60dcea740b5 100644 --- a/extern/sector-storage/roprov.go +++ b/extern/sector-storage/roprov.go @@ -2,6 +2,7 @@ package sectorstorage import ( "context" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "golang.org/x/xerrors" @@ -16,25 +17,25 @@ type readonlyProvider struct { spt abi.RegisteredSealProof } -func (l *readonlyProvider) AcquireSector(ctx context.Context, id abi.SectorID, existing stores.SectorFileType, allocate stores.SectorFileType, sealing stores.PathType) (stores.SectorPaths, func(), error) { - if allocate != stores.FTNone { - return stores.SectorPaths{}, nil, xerrors.New("read-only storage") +func (l *readonlyProvider) AcquireSector(ctx context.Context, id abi.SectorID, existing storiface.SectorFileType, allocate storiface.SectorFileType, sealing storiface.PathType) (storiface.SectorPaths, func(), error) { + if allocate != storiface.FTNone { + return storiface.SectorPaths{}, nil, xerrors.New("read-only storage") } ctx, cancel := context.WithCancel(ctx) // use TryLock to avoid blocking - locked, err := l.index.StorageTryLock(ctx, id, existing, stores.FTNone) + locked, err := l.index.StorageTryLock(ctx, id, existing, storiface.FTNone) if err != nil { cancel() - return stores.SectorPaths{}, nil, xerrors.Errorf("acquiring sector lock: %w", err) + return storiface.SectorPaths{}, nil, xerrors.Errorf("acquiring sector lock: %w", err) } if !locked { cancel() - return stores.SectorPaths{}, nil, xerrors.Errorf("failed to acquire sector lock") + return storiface.SectorPaths{}, nil, xerrors.Errorf("failed to acquire sector lock") } - p, _, err := l.stor.AcquireSector(ctx, id, l.spt, existing, allocate, sealing, stores.AcquireMove) + p, _, err := l.stor.AcquireSector(ctx, id, l.spt, existing, allocate, sealing, storiface.AcquireMove) return p, cancel, err } diff --git a/extern/sector-storage/sched_test.go b/extern/sector-storage/sched_test.go index 4c39370a088..834a9f6ddfe 100644 --- a/extern/sector-storage/sched_test.go +++ b/extern/sector-storage/sched_test.go @@ -82,11 +82,11 @@ func (s *schedTestWorker) AddPiece(ctx context.Context, sector abi.SectorID, pie panic("implement me") } -func (s *schedTestWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) error { +func (s *schedTestWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) error { panic("implement me") } -func (s *schedTestWorker) Fetch(ctx context.Context, id abi.SectorID, ft stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) error { +func (s *schedTestWorker) Fetch(ctx context.Context, id abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) error { panic("implement me") } @@ -215,7 +215,7 @@ func TestSched(t *testing.T) { done := make(chan struct{}) rm.done[taskName] = done - sel := newAllocSelector(index, stores.FTCache, stores.PathSealing) + sel := newAllocSelector(index, storiface.FTCache, storiface.PathSealing) rm.wg.Add(1) go func() { diff --git a/extern/sector-storage/selector_alloc.go b/extern/sector-storage/selector_alloc.go index ca4b99bfc75..dfe6e5d415d 100644 --- a/extern/sector-storage/selector_alloc.go +++ b/extern/sector-storage/selector_alloc.go @@ -2,6 +2,7 @@ package sectorstorage import ( "context" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "golang.org/x/xerrors" @@ -13,11 +14,11 @@ import ( type allocSelector struct { index stores.SectorIndex - alloc stores.SectorFileType - ptype stores.PathType + alloc storiface.SectorFileType + ptype storiface.PathType } -func newAllocSelector(index stores.SectorIndex, alloc stores.SectorFileType, ptype stores.PathType) *allocSelector { +func newAllocSelector(index stores.SectorIndex, alloc storiface.SectorFileType, ptype storiface.PathType) *allocSelector { return &allocSelector{ index: index, alloc: alloc, diff --git a/extern/sector-storage/selector_existing.go b/extern/sector-storage/selector_existing.go index 1e97db539dd..124ccb5c1a7 100644 --- a/extern/sector-storage/selector_existing.go +++ b/extern/sector-storage/selector_existing.go @@ -2,6 +2,7 @@ package sectorstorage import ( "context" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "golang.org/x/xerrors" @@ -14,11 +15,11 @@ import ( type existingSelector struct { index stores.SectorIndex sector abi.SectorID - alloc stores.SectorFileType + alloc storiface.SectorFileType allowFetch bool } -func newExistingSelector(index stores.SectorIndex, sector abi.SectorID, alloc stores.SectorFileType, allowFetch bool) *existingSelector { +func newExistingSelector(index stores.SectorIndex, sector abi.SectorID, alloc storiface.SectorFileType, allowFetch bool) *existingSelector { return &existingSelector{ index: index, sector: sector, diff --git a/extern/sector-storage/stores/http_handler.go b/extern/sector-storage/stores/http_handler.go index 97af6e769cd..a5a2cd9137a 100644 --- a/extern/sector-storage/stores/http_handler.go +++ b/extern/sector-storage/stores/http_handler.go @@ -2,6 +2,7 @@ package stores import ( "encoding/json" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "io" "net/http" "os" @@ -55,7 +56,7 @@ func (handler *FetchHandler) remoteGetSector(w http.ResponseWriter, r *http.Requ log.Infof("SERVE GET %s", r.URL) vars := mux.Vars(r) - id, err := ParseSectorID(vars["id"]) + id, err := storiface.ParseSectorID(vars["id"]) if err != nil { log.Error("%+v", err) w.WriteHeader(500) @@ -72,7 +73,7 @@ func (handler *FetchHandler) remoteGetSector(w http.ResponseWriter, r *http.Requ // The caller has a lock on this sector already, no need to get one here // passing 0 spt because we don't allocate anything - paths, _, err := handler.Local.AcquireSector(r.Context(), id, 0, ft, FTNone, PathStorage, AcquireMove) + paths, _, err := handler.Local.AcquireSector(r.Context(), id, 0, ft, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove) if err != nil { log.Error("%+v", err) w.WriteHeader(500) @@ -81,7 +82,7 @@ func (handler *FetchHandler) remoteGetSector(w http.ResponseWriter, r *http.Requ // TODO: reserve local storage here - path := PathByType(paths, ft) + path := storiface.PathByType(paths, ft) if path == "" { log.Error("acquired path was empty") w.WriteHeader(500) @@ -120,7 +121,7 @@ func (handler *FetchHandler) remoteDeleteSector(w http.ResponseWriter, r *http.R log.Infof("SERVE DELETE %s", r.URL) vars := mux.Vars(r) - id, err := ParseSectorID(vars["id"]) + id, err := storiface.ParseSectorID(vars["id"]) if err != nil { log.Error("%+v", err) w.WriteHeader(500) @@ -141,14 +142,14 @@ func (handler *FetchHandler) remoteDeleteSector(w http.ResponseWriter, r *http.R } } -func ftFromString(t string) (SectorFileType, error) { +func ftFromString(t string) (storiface.SectorFileType, error) { switch t { - case FTUnsealed.String(): - return FTUnsealed, nil - case FTSealed.String(): - return FTSealed, nil - case FTCache.String(): - return FTCache, nil + case storiface.FTUnsealed.String(): + return storiface.FTUnsealed, nil + case storiface.FTSealed.String(): + return storiface.FTSealed, nil + case storiface.FTCache.String(): + return storiface.FTCache, nil default: return 0, xerrors.Errorf("unknown sector file type: '%s'", t) } diff --git a/extern/sector-storage/stores/index.go b/extern/sector-storage/stores/index.go index 256dc965174..a2c329de87e 100644 --- a/extern/sector-storage/stores/index.go +++ b/extern/sector-storage/stores/index.go @@ -2,6 +2,7 @@ package stores import ( "context" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "net/url" gopath "path" "sort" @@ -53,20 +54,20 @@ type SectorIndex interface { // part of storage-miner api StorageInfo(context.Context, ID) (StorageInfo, error) StorageReportHealth(context.Context, ID, HealthReport) error - StorageDeclareSector(ctx context.Context, storageID ID, s abi.SectorID, ft SectorFileType, primary bool) error - StorageDropSector(ctx context.Context, storageID ID, s abi.SectorID, ft SectorFileType) error - StorageFindSector(ctx context.Context, sector abi.SectorID, ft SectorFileType, spt abi.RegisteredSealProof, allowFetch bool) ([]SectorStorageInfo, error) + StorageDeclareSector(ctx context.Context, storageID ID, s abi.SectorID, ft storiface.SectorFileType, primary bool) error + StorageDropSector(ctx context.Context, storageID ID, s abi.SectorID, ft storiface.SectorFileType) error + StorageFindSector(ctx context.Context, sector abi.SectorID, ft storiface.SectorFileType, spt abi.RegisteredSealProof, allowFetch bool) ([]SectorStorageInfo, error) - StorageBestAlloc(ctx context.Context, allocate SectorFileType, spt abi.RegisteredSealProof, pathType PathType) ([]StorageInfo, error) + StorageBestAlloc(ctx context.Context, allocate storiface.SectorFileType, spt abi.RegisteredSealProof, pathType storiface.PathType) ([]StorageInfo, error) // atomically acquire locks on all sector file types. close ctx to unlock - StorageLock(ctx context.Context, sector abi.SectorID, read SectorFileType, write SectorFileType) error - StorageTryLock(ctx context.Context, sector abi.SectorID, read SectorFileType, write SectorFileType) (bool, error) + StorageLock(ctx context.Context, sector abi.SectorID, read storiface.SectorFileType, write storiface.SectorFileType) error + StorageTryLock(ctx context.Context, sector abi.SectorID, read storiface.SectorFileType, write storiface.SectorFileType) (bool, error) } type Decl struct { abi.SectorID - SectorFileType + storiface.SectorFileType } type declMeta struct { @@ -104,10 +105,10 @@ func (i *Index) StorageList(ctx context.Context) (map[ID][]Decl, error) { i.lk.RLock() defer i.lk.RUnlock() - byID := map[ID]map[abi.SectorID]SectorFileType{} + byID := map[ID]map[abi.SectorID]storiface.SectorFileType{} for id := range i.stores { - byID[id] = map[abi.SectorID]SectorFileType{} + byID[id] = map[abi.SectorID]storiface.SectorFileType{} } for decl, ids := range i.sectors { for _, id := range ids { @@ -180,12 +181,12 @@ func (i *Index) StorageReportHealth(ctx context.Context, id ID, report HealthRep return nil } -func (i *Index) StorageDeclareSector(ctx context.Context, storageID ID, s abi.SectorID, ft SectorFileType, primary bool) error { +func (i *Index) StorageDeclareSector(ctx context.Context, storageID ID, s abi.SectorID, ft storiface.SectorFileType, primary bool) error { i.lk.Lock() defer i.lk.Unlock() loop: - for _, fileType := range PathTypes { + for _, fileType := range storiface.PathTypes { if fileType&ft == 0 { continue } @@ -212,11 +213,11 @@ loop: return nil } -func (i *Index) StorageDropSector(ctx context.Context, storageID ID, s abi.SectorID, ft SectorFileType) error { +func (i *Index) StorageDropSector(ctx context.Context, storageID ID, s abi.SectorID, ft storiface.SectorFileType) error { i.lk.Lock() defer i.lk.Unlock() - for _, fileType := range PathTypes { + for _, fileType := range storiface.PathTypes { if fileType&ft == 0 { continue } @@ -246,14 +247,14 @@ func (i *Index) StorageDropSector(ctx context.Context, storageID ID, s abi.Secto return nil } -func (i *Index) StorageFindSector(ctx context.Context, s abi.SectorID, ft SectorFileType, spt abi.RegisteredSealProof, allowFetch bool) ([]SectorStorageInfo, error) { +func (i *Index) StorageFindSector(ctx context.Context, s abi.SectorID, ft storiface.SectorFileType, spt abi.RegisteredSealProof, allowFetch bool) ([]SectorStorageInfo, error) { i.lk.RLock() defer i.lk.RUnlock() storageIDs := map[ID]uint64{} isprimary := map[ID]bool{} - for _, pathType := range PathTypes { + for _, pathType := range storiface.PathTypes { if ft&pathType == 0 { continue } @@ -280,7 +281,7 @@ func (i *Index) StorageFindSector(ctx context.Context, s abi.SectorID, ft Sector return nil, xerrors.Errorf("failed to parse url: %w", err) } - rl.Path = gopath.Join(rl.Path, ft.String(), SectorName(s)) + rl.Path = gopath.Join(rl.Path, ft.String(), storiface.SectorName(s)) urls[k] = rl.String() } @@ -333,7 +334,7 @@ func (i *Index) StorageFindSector(ctx context.Context, s abi.SectorID, ft Sector return nil, xerrors.Errorf("failed to parse url: %w", err) } - rl.Path = gopath.Join(rl.Path, ft.String(), SectorName(s)) + rl.Path = gopath.Join(rl.Path, ft.String(), storiface.SectorName(s)) urls[k] = rl.String() } @@ -365,7 +366,7 @@ func (i *Index) StorageInfo(ctx context.Context, id ID) (StorageInfo, error) { return *si.info, nil } -func (i *Index) StorageBestAlloc(ctx context.Context, allocate SectorFileType, spt abi.RegisteredSealProof, pathType PathType) ([]StorageInfo, error) { +func (i *Index) StorageBestAlloc(ctx context.Context, allocate storiface.SectorFileType, spt abi.RegisteredSealProof, pathType storiface.PathType) ([]StorageInfo, error) { i.lk.RLock() defer i.lk.RUnlock() @@ -377,10 +378,10 @@ func (i *Index) StorageBestAlloc(ctx context.Context, allocate SectorFileType, s } for _, p := range i.stores { - if (pathType == PathSealing) && !p.info.CanSeal { + if (pathType == storiface.PathSealing) && !p.info.CanSeal { continue } - if (pathType == PathStorage) && !p.info.CanStore { + if (pathType == storiface.PathStorage) && !p.info.CanStore { continue } @@ -421,7 +422,7 @@ func (i *Index) StorageBestAlloc(ctx context.Context, allocate SectorFileType, s return out, nil } -func (i *Index) FindSector(id abi.SectorID, typ SectorFileType) ([]ID, error) { +func (i *Index) FindSector(id abi.SectorID, typ storiface.SectorFileType) ([]ID, error) { i.lk.RLock() defer i.lk.RUnlock() diff --git a/extern/sector-storage/stores/index_locks.go b/extern/sector-storage/stores/index_locks.go index 8bf15b95071..2137436d25b 100644 --- a/extern/sector-storage/stores/index_locks.go +++ b/extern/sector-storage/stores/index_locks.go @@ -2,6 +2,7 @@ package stores import ( "context" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "sync" "golang.org/x/xerrors" @@ -12,13 +13,13 @@ import ( type sectorLock struct { cond *ctxCond - r [FileTypes]uint - w SectorFileType + r [storiface.FileTypes]uint + w storiface.SectorFileType refs uint // access with indexLocks.lk } -func (l *sectorLock) canLock(read SectorFileType, write SectorFileType) bool { +func (l *sectorLock) canLock(read storiface.SectorFileType, write storiface.SectorFileType) bool { for i, b := range write.All() { if b && l.r[i] > 0 { return false @@ -29,7 +30,7 @@ func (l *sectorLock) canLock(read SectorFileType, write SectorFileType) bool { return l.w&read == 0 && l.w&write == 0 } -func (l *sectorLock) tryLock(read SectorFileType, write SectorFileType) bool { +func (l *sectorLock) tryLock(read storiface.SectorFileType, write storiface.SectorFileType) bool { if !l.canLock(read, write) { return false } @@ -45,16 +46,16 @@ func (l *sectorLock) tryLock(read SectorFileType, write SectorFileType) bool { return true } -type lockFn func(l *sectorLock, ctx context.Context, read SectorFileType, write SectorFileType) (bool, error) +type lockFn func(l *sectorLock, ctx context.Context, read storiface.SectorFileType, write storiface.SectorFileType) (bool, error) -func (l *sectorLock) tryLockSafe(ctx context.Context, read SectorFileType, write SectorFileType) (bool, error) { +func (l *sectorLock) tryLockSafe(ctx context.Context, read storiface.SectorFileType, write storiface.SectorFileType) (bool, error) { l.cond.L.Lock() defer l.cond.L.Unlock() return l.tryLock(read, write), nil } -func (l *sectorLock) lock(ctx context.Context, read SectorFileType, write SectorFileType) (bool, error) { +func (l *sectorLock) lock(ctx context.Context, read storiface.SectorFileType, write storiface.SectorFileType) (bool, error) { l.cond.L.Lock() defer l.cond.L.Unlock() @@ -67,7 +68,7 @@ func (l *sectorLock) lock(ctx context.Context, read SectorFileType, write Sector return true, nil } -func (l *sectorLock) unlock(read SectorFileType, write SectorFileType) { +func (l *sectorLock) unlock(read storiface.SectorFileType, write storiface.SectorFileType) { l.cond.L.Lock() defer l.cond.L.Unlock() @@ -88,12 +89,12 @@ type indexLocks struct { locks map[abi.SectorID]*sectorLock } -func (i *indexLocks) lockWith(ctx context.Context, lockFn lockFn, sector abi.SectorID, read SectorFileType, write SectorFileType) (bool, error) { +func (i *indexLocks) lockWith(ctx context.Context, lockFn lockFn, sector abi.SectorID, read storiface.SectorFileType, write storiface.SectorFileType) (bool, error) { if read|write == 0 { return false, nil } - if read|write > (1< (1< %s(se:%t; st:%t)", s, fileType, sst.ID, sst.CanSeal, sst.CanStore, dst.ID, dst.CanSeal, dst.CanStore) - if err := st.index.StorageDropSector(ctx, ID(PathByType(srcIds, fileType)), s, fileType); err != nil { + if err := st.index.StorageDropSector(ctx, ID(storiface.PathByType(srcIds, fileType)), s, fileType); err != nil { return xerrors.Errorf("dropping source sector from index: %w", err) } - if err := move(PathByType(src, fileType), PathByType(dest, fileType)); err != nil { + if err := move(storiface.PathByType(src, fileType), storiface.PathByType(dest, fileType)); err != nil { // TODO: attempt some recovery (check if src is still there, re-declare) return xerrors.Errorf("moving sector %v(%d): %w", s, fileType, err) } - if err := st.index.StorageDeclareSector(ctx, ID(PathByType(destIds, fileType)), s, fileType, true); err != nil { - return xerrors.Errorf("declare sector %d(t:%d) -> %s: %w", s, fileType, ID(PathByType(destIds, fileType)), err) + if err := st.index.StorageDeclareSector(ctx, ID(storiface.PathByType(destIds, fileType)), s, fileType, true); err != nil { + return xerrors.Errorf("declare sector %d(t:%d) -> %s: %w", s, fileType, ID(storiface.PathByType(destIds, fileType)), err) } } diff --git a/extern/sector-storage/stores/remote.go b/extern/sector-storage/stores/remote.go index a88e3b9470f..d9e1cf49fbc 100644 --- a/extern/sector-storage/stores/remote.go +++ b/extern/sector-storage/stores/remote.go @@ -38,7 +38,7 @@ type Remote struct { fetching map[abi.SectorID]chan struct{} } -func (r *Remote) RemoveCopies(ctx context.Context, s abi.SectorID, types SectorFileType) error { +func (r *Remote) RemoveCopies(ctx context.Context, s abi.SectorID, types storiface.SectorFileType) error { // TODO: do this on remotes too // (not that we really need to do that since it's always called by the // worker which pulled the copy) @@ -58,9 +58,9 @@ func NewRemote(local *Local, index SectorIndex, auth http.Header, fetchLimit int } } -func (r *Remote) AcquireSector(ctx context.Context, s abi.SectorID, spt abi.RegisteredSealProof, existing SectorFileType, allocate SectorFileType, pathType PathType, op AcquireMode) (SectorPaths, SectorPaths, error) { +func (r *Remote) AcquireSector(ctx context.Context, s abi.SectorID, spt abi.RegisteredSealProof, existing storiface.SectorFileType, allocate storiface.SectorFileType, pathType storiface.PathType, op storiface.AcquireMode) (storiface.SectorPaths, storiface.SectorPaths, error) { if existing|allocate != existing^allocate { - return SectorPaths{}, SectorPaths{}, xerrors.New("can't both find and allocate a sector") + return storiface.SectorPaths{}, storiface.SectorPaths{}, xerrors.New("can't both find and allocate a sector") } for { @@ -79,7 +79,7 @@ func (r *Remote) AcquireSector(ctx context.Context, s abi.SectorID, spt abi.Regi case <-c: continue case <-ctx.Done(): - return SectorPaths{}, SectorPaths{}, ctx.Err() + return storiface.SectorPaths{}, storiface.SectorPaths{}, ctx.Err() } } @@ -92,62 +92,62 @@ func (r *Remote) AcquireSector(ctx context.Context, s abi.SectorID, spt abi.Regi paths, stores, err := r.local.AcquireSector(ctx, s, spt, existing, allocate, pathType, op) if err != nil { - return SectorPaths{}, SectorPaths{}, xerrors.Errorf("local acquire error: %w", err) + return storiface.SectorPaths{}, storiface.SectorPaths{}, xerrors.Errorf("local acquire error: %w", err) } - var toFetch SectorFileType - for _, fileType := range PathTypes { + var toFetch storiface.SectorFileType + for _, fileType := range storiface.PathTypes { if fileType&existing == 0 { continue } - if PathByType(paths, fileType) == "" { + if storiface.PathByType(paths, fileType) == "" { toFetch |= fileType } } - apaths, ids, err := r.local.AcquireSector(ctx, s, spt, FTNone, toFetch, pathType, op) + apaths, ids, err := r.local.AcquireSector(ctx, s, spt, storiface.FTNone, toFetch, pathType, op) if err != nil { - return SectorPaths{}, SectorPaths{}, xerrors.Errorf("allocate local sector for fetching: %w", err) + return storiface.SectorPaths{}, storiface.SectorPaths{}, xerrors.Errorf("allocate local sector for fetching: %w", err) } - odt := FSOverheadSeal - if pathType == PathStorage { - odt = FsOverheadFinalized + odt := storiface.FSOverheadSeal + if pathType == storiface.PathStorage { + odt = storiface.FsOverheadFinalized } releaseStorage, err := r.local.Reserve(ctx, s, spt, toFetch, ids, odt) if err != nil { - return SectorPaths{}, SectorPaths{}, xerrors.Errorf("reserving storage space: %w", err) + return storiface.SectorPaths{}, storiface.SectorPaths{}, xerrors.Errorf("reserving storage space: %w", err) } defer releaseStorage() - for _, fileType := range PathTypes { + for _, fileType := range storiface.PathTypes { if fileType&existing == 0 { continue } - if PathByType(paths, fileType) != "" { + if storiface.PathByType(paths, fileType) != "" { continue } - dest := PathByType(apaths, fileType) - storageID := PathByType(ids, fileType) + dest := storiface.PathByType(apaths, fileType) + storageID := storiface.PathByType(ids, fileType) url, err := r.acquireFromRemote(ctx, s, fileType, dest) if err != nil { - return SectorPaths{}, SectorPaths{}, err + return storiface.SectorPaths{}, storiface.SectorPaths{}, err } - SetPathByType(&paths, fileType, dest) - SetPathByType(&stores, fileType, storageID) + storiface.SetPathByType(&paths, fileType, dest) + storiface.SetPathByType(&stores, fileType, storageID) - if err := r.index.StorageDeclareSector(ctx, ID(storageID), s, fileType, op == AcquireMove); err != nil { + if err := r.index.StorageDeclareSector(ctx, ID(storageID), s, fileType, op == storiface.AcquireMove); err != nil { log.Warnf("declaring sector %v in %s failed: %+v", s, storageID, err) continue } - if op == AcquireMove { + if op == storiface.AcquireMove { if err := r.deleteFromRemote(ctx, url); err != nil { log.Warnf("deleting sector %v from %s (delete %s): %+v", s, storageID, url, err) } @@ -169,7 +169,7 @@ func tempFetchDest(spath string, create bool) (string, error) { return filepath.Join(tempdir, b), nil } -func (r *Remote) acquireFromRemote(ctx context.Context, s abi.SectorID, fileType SectorFileType, dest string) (string, error) { +func (r *Remote) acquireFromRemote(ctx context.Context, s abi.SectorID, fileType storiface.SectorFileType, dest string) (string, error) { si, err := r.index.StorageFindSector(ctx, s, fileType, 0, false) if err != nil { return "", err @@ -281,9 +281,9 @@ func (r *Remote) fetch(ctx context.Context, url, outname string) error { } } -func (r *Remote) MoveStorage(ctx context.Context, s abi.SectorID, spt abi.RegisteredSealProof, types SectorFileType) error { +func (r *Remote) MoveStorage(ctx context.Context, s abi.SectorID, spt abi.RegisteredSealProof, types storiface.SectorFileType) error { // Make sure we have the data local - _, _, err := r.AcquireSector(ctx, s, spt, types, FTNone, PathStorage, AcquireMove) + _, _, err := r.AcquireSector(ctx, s, spt, types, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove) if err != nil { return xerrors.Errorf("acquire src storage (remote): %w", err) } @@ -291,7 +291,7 @@ func (r *Remote) MoveStorage(ctx context.Context, s abi.SectorID, spt abi.Regist return r.local.MoveStorage(ctx, s, spt, types) } -func (r *Remote) Remove(ctx context.Context, sid abi.SectorID, typ SectorFileType, force bool) error { +func (r *Remote) Remove(ctx context.Context, sid abi.SectorID, typ storiface.SectorFileType, force bool) error { if bits.OnesCount(uint(typ)) != 1 { return xerrors.New("delete expects one file type") } diff --git a/extern/sector-storage/stores/filetype.go b/extern/sector-storage/storiface/filetype.go similarity index 96% rename from extern/sector-storage/stores/filetype.go rename to extern/sector-storage/storiface/filetype.go index 50417d9683a..6db5cf9ec6e 100644 --- a/extern/sector-storage/stores/filetype.go +++ b/extern/sector-storage/storiface/filetype.go @@ -1,4 +1,4 @@ -package stores +package storiface import ( "fmt" @@ -16,6 +16,8 @@ const ( FileTypes = iota ) +var PathTypes = []SectorFileType{FTUnsealed, FTSealed, FTCache} + const ( FTNone SectorFileType = 0 ) diff --git a/extern/sector-storage/storiface/storage.go b/extern/sector-storage/storiface/storage.go index f1de3656e72..e836002d5de 100644 --- a/extern/sector-storage/storiface/storage.go +++ b/extern/sector-storage/storiface/storage.go @@ -1 +1,15 @@ package storiface + +type PathType string + +const ( + PathStorage PathType = "storage" + PathSealing PathType = "sealing" +) + +type AcquireMode string + +const ( + AcquireMove AcquireMode = "move" + AcquireCopy AcquireMode = "copy" +) diff --git a/extern/sector-storage/storiface/worker.go b/extern/sector-storage/storiface/worker.go index 9f79e37be2a..dac22aba0f9 100644 --- a/extern/sector-storage/storiface/worker.go +++ b/extern/sector-storage/storiface/worker.go @@ -12,7 +12,6 @@ import ( "github.com/filecoin-project/specs-storage/storage" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" ) type WorkerInfo struct { @@ -64,10 +63,10 @@ type WorkerCalls interface { SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (CallID, error) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) (CallID, error) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) (CallID, error) - MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) (CallID, error) + MoveStorage(ctx context.Context, sector abi.SectorID, types SectorFileType) (CallID, error) UnsealPiece(context.Context, abi.SectorID, UnpaddedByteIndex, abi.UnpaddedPieceSize, abi.SealRandomness, cid.Cid) (CallID, error) ReadPiece(context.Context, io.Writer, abi.SectorID, UnpaddedByteIndex, abi.UnpaddedPieceSize) (CallID, error) - Fetch(context.Context, abi.SectorID, stores.SectorFileType, stores.PathType, stores.AcquireMode) (CallID, error) + Fetch(context.Context, abi.SectorID, SectorFileType, PathType, AcquireMode) (CallID, error) } type WorkerReturn interface { diff --git a/extern/sector-storage/testworker_test.go b/extern/sector-storage/testworker_test.go index 858b76f7ce7..cfa2bed0662 100644 --- a/extern/sector-storage/testworker_test.go +++ b/extern/sector-storage/testworker_test.go @@ -85,11 +85,11 @@ func (t *testWorker) Remove(ctx context.Context, sector abi.SectorID) error { panic("implement me") } -func (t *testWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types stores.SectorFileType) error { +func (t *testWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) error { panic("implement me") } -func (t *testWorker) Fetch(ctx context.Context, id abi.SectorID, fileType stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) error { +func (t *testWorker) Fetch(ctx context.Context, id abi.SectorID, fileType storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) error { return nil } diff --git a/extern/sector-storage/work_tracker.go b/extern/sector-storage/work_tracker.go index fe176a7f7af..53f79af9059 100644 --- a/extern/sector-storage/work_tracker.go +++ b/extern/sector-storage/work_tracker.go @@ -12,7 +12,6 @@ import ( "github.com/filecoin-project/specs-storage/storage" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) @@ -108,7 +107,7 @@ func (t *trackedWorker) AddPiece(ctx context.Context, sector abi.SectorID, piece return t.Worker.AddPiece(ctx, sector, pieceSizes, newPieceSize, pieceData) } -func (t *trackedWorker) Fetch(ctx context.Context, s abi.SectorID, ft stores.SectorFileType, ptype stores.PathType, am stores.AcquireMode) error { +func (t *trackedWorker) Fetch(ctx context.Context, s abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) error { defer t.tracker.track(s, sealtasks.TTFetch)() return t.Worker.Fetch(ctx, s, ft, ptype, am) From 06e3852cef1a1a74ddd7352e72dddd10a6601dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 7 Sep 2020 12:20:50 +0200 Subject: [PATCH 03/61] storage: Integrate async workers in sealing manager --- extern/sector-storage/manager.go | 178 +++++++++++++++++++++++++------ 1 file changed, 148 insertions(+), 30 deletions(-) diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index dacd3439f7e..da700cdc0b3 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -5,6 +5,7 @@ import ( "errors" "io" "net/http" + "sync" "github.com/hashicorp/go-multierror" "github.com/ipfs/go-cid" @@ -68,6 +69,15 @@ type Manager struct { sched *scheduler storage.Prover + + resLk sync.Mutex + results map[storiface.CallID]result + waitRes map[storiface.CallID]chan struct{} +} + +type result struct { + r interface{} + err error } type SealerConfig struct { @@ -191,9 +201,10 @@ func schedNop(context.Context, Worker) error { return nil } -func schedFetch(sector abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) func(context.Context, Worker) error { +func schedFetch(wf waitFunc, sector abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) func(context.Context, Worker) error { return func(ctx context.Context, worker Worker) error { - return worker.Fetch(ctx, sector, ft, ptype, am) + _, err := wf(ctx)(worker.Fetch(ctx, sector, ft, ptype, am)) + return err } } @@ -220,15 +231,21 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect var readOk bool + readPiece := func(ctx context.Context, w Worker) error { + r, err := m.waitResult(ctx)(w.ReadPiece(ctx, sink, sector, offset, size)) + if err != nil { + return err + } + readOk = r.(bool) + return nil + } + if len(best) > 0 { // There is unsealed sector, see if we can read from it selector = newExistingSelector(m.index, sector, storiface.FTUnsealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - readOk, err = w.ReadPiece(ctx, sink, sector, offset, size) - return err - }) + err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(m.waitResult, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), readPiece) if err != nil { return xerrors.Errorf("reading piece from sealed sector: %w", err) } @@ -239,12 +256,12 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect } unsealFetch := func(ctx context.Context, worker Worker) error { - if err := worker.Fetch(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.PathSealing, storiface.AcquireCopy); err != nil { + if _, err := m.waitResult(ctx)(worker.Fetch(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.PathSealing, storiface.AcquireCopy)); err != nil { return xerrors.Errorf("copy sealed/cache sector data: %w", err) } if len(best) > 0 { - if err := worker.Fetch(ctx, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove); err != nil { + if _, err := m.waitResult(ctx)(worker.Fetch(ctx, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove)); err != nil { return xerrors.Errorf("copy unsealed sector data: %w", err) } } @@ -252,7 +269,8 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect } err = m.sched.Schedule(ctx, sector, sealtasks.TTUnseal, selector, unsealFetch, func(ctx context.Context, w Worker) error { - return w.UnsealPiece(ctx, sector, offset, size, ticket, unsealed) + _, err := m.waitResult(ctx)(w.UnsealPiece(ctx, sector, offset, size, ticket, unsealed)) + return err }) if err != nil { return err @@ -260,10 +278,7 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect selector = newExistingSelector(m.index, sector, storiface.FTUnsealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - readOk, err = w.ReadPiece(ctx, sink, sector, offset, size) - return err - }) + err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(m.waitResult, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), readPiece) if err != nil { return xerrors.Errorf("reading piece from sealed sector: %w", err) } @@ -298,11 +313,11 @@ func (m *Manager) AddPiece(ctx context.Context, sector abi.SectorID, existingPie var out abi.PieceInfo err = m.sched.Schedule(ctx, sector, sealtasks.TTAddPiece, selector, schedNop, func(ctx context.Context, w Worker) error { - p, err := w.AddPiece(ctx, sector, existingPieces, sz, r) + p, err := m.waitResult(ctx)(w.AddPiece(ctx, sector, existingPieces, sz, r)) if err != nil { return err } - out = p + out = p.(abi.PieceInfo) return nil }) @@ -321,12 +336,12 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke selector := newAllocSelector(m.index, storiface.FTCache|storiface.FTSealed, storiface.PathSealing) - err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit1, selector, schedFetch(sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - p, err := w.SealPreCommit1(ctx, sector, ticket, pieces) + err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit1, selector, schedFetch(m.waitResult, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { + p, err := m.waitResult(ctx)(w.SealPreCommit1(ctx, sector, ticket, pieces)) if err != nil { return err } - out = p + out = p.(storage.PreCommit1Out) return nil }) @@ -343,12 +358,12 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, true) - err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit2, selector, schedFetch(sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - p, err := w.SealPreCommit2(ctx, sector, phase1Out) + err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit2, selector, schedFetch(m.waitResult, sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { + p, err := m.waitResult(ctx)(w.SealPreCommit2(ctx, sector, phase1Out)) if err != nil { return err } - out = p + out = p.(storage.SectorCids) return nil }) return out, err @@ -367,12 +382,12 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a // generally very cheap / fast, and transferring data is not worth the effort selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit1, selector, schedFetch(sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - p, err := w.SealCommit1(ctx, sector, ticket, seed, pieces, cids) + err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit1, selector, schedFetch(m.waitResult, sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { + p, err := m.waitResult(ctx)(w.SealCommit1(ctx, sector, ticket, seed, pieces, cids)) if err != nil { return err } - out = p + out = p.(storage.Commit1Out) return nil }) return out, err @@ -382,11 +397,11 @@ func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Ou selector := newTaskSelector() err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit2, selector, schedNop, func(ctx context.Context, w Worker) error { - p, err := w.SealCommit2(ctx, sector, phase1Out) + p, err := m.waitResult(ctx)(w.SealCommit2(ctx, sector, phase1Out)) if err != nil { return err } - out = p + out = p.(storage.Proof) return nil }) @@ -416,9 +431,10 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector abi.SectorID, keepU selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, false) err := m.sched.Schedule(ctx, sector, sealtasks.TTFinalize, selector, - schedFetch(sector, storiface.FTCache|storiface.FTSealed|unsealed, storiface.PathSealing, storiface.AcquireMove), + schedFetch(m.waitResult, sector, storiface.FTCache|storiface.FTSealed|unsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - return w.FinalizeSector(ctx, sector, keepUnsealed) + _, err := m.waitResult(ctx)(w.FinalizeSector(ctx, sector, keepUnsealed)) + return err }) if err != nil { return err @@ -433,9 +449,10 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector abi.SectorID, keepU } err = m.sched.Schedule(ctx, sector, sealtasks.TTFetch, fetchSel, - schedFetch(sector, storiface.FTCache|storiface.FTSealed|moveUnsealed, storiface.PathStorage, storiface.AcquireMove), + schedFetch(m.waitResult, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed, storiface.PathStorage, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - return w.MoveStorage(ctx, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed) + _, err := m.waitResult(ctx)(w.MoveStorage(ctx, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed)) + return err }) if err != nil { return xerrors.Errorf("moving sector to storage: %w", err) @@ -472,6 +489,107 @@ func (m *Manager) Remove(ctx context.Context, sector abi.SectorID) error { return err } +type waitFunc func(ctx context.Context) func(callID storiface.CallID, err error) (interface{}, error) + +func (m *Manager) waitResult(ctx context.Context) func(callID storiface.CallID, err error) (interface{}, error) { + return func(callID storiface.CallID, err error) (interface{}, error) { + if err != nil { + return nil, err + } + + m.resLk.Lock() + res, ok := m.results[callID] + if ok { + m.resLk.Unlock() + return res.r, res.err + } + + ch, ok := m.waitRes[callID] + if !ok { + ch = make(chan struct{}) + m.waitRes[callID] = ch + } + m.resLk.Unlock() + + select { + case <-ch: + m.resLk.Lock() + defer m.resLk.Unlock() + + res := m.results[callID] + delete(m.results, callID) + + return res.r, res.err + case <-ctx.Done(): + return nil, xerrors.Errorf("waiting for result: %w", ctx.Err()) + } + } +} + +func (m *Manager) returnResult(callID storiface.CallID, r interface{}, err string) error { + m.resLk.Lock() + defer m.resLk.Unlock() + + _, ok := m.results[callID] + if ok { + return xerrors.Errorf("result for call %v already reported") + } + + m.results[callID] = result{ + r: r, + err: errors.New(err), + } + + close(m.waitRes[callID]) + delete(m.waitRes, callID) + + return nil +} + +func (m *Manager) ReturnAddPiece(ctx context.Context, callID storiface.CallID, pi abi.PieceInfo, err string) error { + return m.returnResult(callID, pi, err) +} + +func (m *Manager) ReturnSealPreCommit1(ctx context.Context, callID storiface.CallID, p1o storage.PreCommit1Out, err string) error { + return m.returnResult(callID, p1o, err) +} + +func (m *Manager) ReturnSealPreCommit2(ctx context.Context, callID storiface.CallID, sealed storage.SectorCids, err string) error { + return m.returnResult(callID, sealed, err) +} + +func (m *Manager) ReturnSealCommit1(ctx context.Context, callID storiface.CallID, out storage.Commit1Out, err string) error { + return m.returnResult(callID, out, err) +} + +func (m *Manager) ReturnSealCommit2(ctx context.Context, callID storiface.CallID, proof storage.Proof, err string) error { + return m.returnResult(callID, proof, err) +} + +func (m *Manager) ReturnFinalizeSector(ctx context.Context, callID storiface.CallID, err string) error { + return m.returnResult(callID, nil, err) +} + +func (m *Manager) ReturnReleaseUnsealed(ctx context.Context, callID storiface.CallID, err string) error { + return m.returnResult(callID, nil, err) +} + +func (m *Manager) ReturnMoveStorage(ctx context.Context, callID storiface.CallID, err string) error { + return m.returnResult(callID, nil, err) +} + +func (m *Manager) ReturnUnsealPiece(ctx context.Context, callID storiface.CallID, err string) error { + return m.returnResult(callID, nil, err) +} + +func (m *Manager) ReturnReadPiece(ctx context.Context, callID storiface.CallID, ok bool, err string) error { + return m.returnResult(callID, ok, err) +} + +func (m *Manager) ReturnFetch(ctx context.Context, callID storiface.CallID, err string) error { + return m.returnResult(callID, nil, err) +} + func (m *Manager) StorageLocal(ctx context.Context) (map[stores.ID]string, error) { l, err := m.localStore.Local(ctx) if err != nil { From 9e6f974f3c2bbb28c0258a5bbbd01c1034c074a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 7 Sep 2020 16:12:46 +0200 Subject: [PATCH 04/61] storage: Fix build --- cmd/lotus-seal-worker/rpc.go | 5 +- extern/sector-storage/manager.go | 8 +- extern/sector-storage/sched_test.go | 3 +- extern/sector-storage/stats.go | 2 +- extern/sector-storage/storiface/worker.go | 2 +- extern/sector-storage/work_tracker.go | 107 ++++++++++++---------- node/builder.go | 2 + node/impl/storminer.go | 1 + 8 files changed, 72 insertions(+), 58 deletions(-) diff --git a/cmd/lotus-seal-worker/rpc.go b/cmd/lotus-seal-worker/rpc.go index 5380fe43214..3c0a1f2cef1 100644 --- a/cmd/lotus-seal-worker/rpc.go +++ b/cmd/lotus-seal-worker/rpc.go @@ -6,11 +6,10 @@ import ( "github.com/mitchellh/go-homedir" "golang.org/x/xerrors" - "github.com/filecoin-project/specs-storage/storage" - "github.com/filecoin-project/lotus/build" sectorstorage "github.com/filecoin-project/lotus/extern/sector-storage" "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) type worker struct { @@ -43,4 +42,4 @@ func (w *worker) StorageAddLocal(ctx context.Context, path string) error { return nil } -var _ storage.Sealer = &worker{} +var _ storiface.WorkerCalls = &worker{} diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index da700cdc0b3..8891d83ab38 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -52,6 +52,7 @@ type SectorManager interface { ffiwrapper.StorageSealer storage.Prover + storiface.WorkerReturn FaultTracker } @@ -70,13 +71,13 @@ type Manager struct { storage.Prover - resLk sync.Mutex + resLk sync.Mutex results map[storiface.CallID]result waitRes map[storiface.CallID]chan struct{} } type result struct { - r interface{} + r interface{} err error } @@ -179,7 +180,8 @@ func (m *Manager) AddWorker(ctx context.Context, w Worker) error { m.sched.newWorkers <- &workerHandle{ w: w, wt: &workTracker{ - running: map[uint64]storiface.WorkerJob{}, + done: map[storiface.CallID]struct{}{}, + running: map[storiface.CallID]storiface.WorkerJob{}, }, info: info, preparing: &activeResources{}, diff --git a/extern/sector-storage/sched_test.go b/extern/sector-storage/sched_test.go index 834a9f6ddfe..a95d4b3fa15 100644 --- a/extern/sector-storage/sched_test.go +++ b/extern/sector-storage/sched_test.go @@ -166,7 +166,8 @@ func addTestWorker(t *testing.T, sched *scheduler, index *stores.Index, name str sched.newWorkers <- &workerHandle{ w: w, wt: &workTracker{ - running: map[uint64]storiface.WorkerJob{}, + done: map[storiface.CallID]struct{}{}, + running: map[storiface.CallID]storiface.WorkerJob{}, }, info: info, preparing: &activeResources{}, diff --git a/extern/sector-storage/stats.go b/extern/sector-storage/stats.go index 7f95e3bc37d..f9063cbec68 100644 --- a/extern/sector-storage/stats.go +++ b/extern/sector-storage/stats.go @@ -36,7 +36,7 @@ func (m *Manager) WorkerJobs() map[uint64][]storiface.WorkerJob { for wi, window := range handle.activeWindows { for _, request := range window.todo { out[uint64(id)] = append(out[uint64(id)], storiface.WorkerJob{ - ID: 0, + ID: storiface.UndefCall, Sector: request.sector, Task: request.taskType, RunWait: wi + 1, diff --git a/extern/sector-storage/storiface/worker.go b/extern/sector-storage/storiface/worker.go index dac22aba0f9..839ac14add3 100644 --- a/extern/sector-storage/storiface/worker.go +++ b/extern/sector-storage/storiface/worker.go @@ -40,7 +40,7 @@ type WorkerStats struct { } type WorkerJob struct { - ID uint64 + ID CallID Sector abi.SectorID Task sealtasks.TaskType diff --git a/extern/sector-storage/work_tracker.go b/extern/sector-storage/work_tracker.go index 53f79af9059..57e136af0d1 100644 --- a/extern/sector-storage/work_tracker.go +++ b/extern/sector-storage/work_tracker.go @@ -18,31 +18,58 @@ import ( type workTracker struct { lk sync.Mutex - ctr uint64 - running map[uint64]storiface.WorkerJob + done map[storiface.CallID]struct{} + running map[storiface.CallID]storiface.WorkerJob // TODO: done, aggregate stats, queue stats, scheduler feedback } -func (wt *workTracker) track(sid abi.SectorID, task sealtasks.TaskType) func() { +// TODO: CALL THIS! +// TODO: CALL THIS! +// TODO: CALL THIS! +// TODO: CALL THIS! +// TODO: CALL THIS! +// TODO: CALL THIS! +// TODO: CALL THIS! +// TODO: CALL THIS! +// TODO: CALL THIS! +// TODO: CALL THIS! +func (wt *workTracker) onDone(callID storiface.CallID) { wt.lk.Lock() defer wt.lk.Unlock() - id := wt.ctr - wt.ctr++ - - wt.running[id] = storiface.WorkerJob{ - ID: id, - Sector: sid, - Task: task, - Start: time.Now(), + _, ok := wt.running[callID] + if !ok { + wt.done[callID] = struct{}{} + return } - return func() { + delete(wt.running, callID) +} + +func (wt *workTracker) track(sid abi.SectorID, task sealtasks.TaskType) func(storiface.CallID, error) (storiface.CallID, error) { + return func(callID storiface.CallID, err error) (storiface.CallID, error) { + if err != nil { + return callID, err + } + wt.lk.Lock() defer wt.lk.Unlock() - delete(wt.running, id) + _, done := wt.done[callID] + if done { + delete(wt.done, callID) + return callID, err + } + + wt.running[callID] = storiface.WorkerJob{ + ID: callID, + Sector: sid, + Task: task, + Start: time.Now(), + } + + return callID, err } } @@ -71,58 +98,40 @@ type trackedWorker struct { tracker *workTracker } -func (t *trackedWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storage.PreCommit1Out, error) { - defer t.tracker.track(sector, sealtasks.TTPreCommit1)() - - return t.Worker.SealPreCommit1(ctx, sector, ticket, pieces) +func (t *trackedWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { + return t.tracker.track(sector, sealtasks.TTPreCommit1)(t.Worker.SealPreCommit1(ctx, sector, ticket, pieces)) } -func (t *trackedWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storage.SectorCids, error) { - defer t.tracker.track(sector, sealtasks.TTPreCommit2)() - - return t.Worker.SealPreCommit2(ctx, sector, pc1o) +func (t *trackedWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storiface.CallID, error) { + return t.tracker.track(sector, sealtasks.TTPreCommit2)(t.Worker.SealPreCommit2(ctx, sector, pc1o)) } -func (t *trackedWorker) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storage.Commit1Out, error) { - defer t.tracker.track(sector, sealtasks.TTCommit1)() - - return t.Worker.SealCommit1(ctx, sector, ticket, seed, pieces, cids) +func (t *trackedWorker) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storiface.CallID, error) { + return t.tracker.track(sector, sealtasks.TTCommit1)(t.Worker.SealCommit1(ctx, sector, ticket, seed, pieces, cids)) } -func (t *trackedWorker) SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storage.Proof, error) { - defer t.tracker.track(sector, sealtasks.TTCommit2)() - - return t.Worker.SealCommit2(ctx, sector, c1o) +func (t *trackedWorker) SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storiface.CallID, error) { + return t.tracker.track(sector, sealtasks.TTCommit2)(t.Worker.SealCommit2(ctx, sector, c1o)) } -func (t *trackedWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) error { - defer t.tracker.track(sector, sealtasks.TTFinalize)() - - return t.Worker.FinalizeSector(ctx, sector, keepUnsealed) +func (t *trackedWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) (storiface.CallID, error) { + return t.tracker.track(sector, sealtasks.TTFinalize)(t.Worker.FinalizeSector(ctx, sector, keepUnsealed)) } -func (t *trackedWorker) AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (abi.PieceInfo, error) { - defer t.tracker.track(sector, sealtasks.TTAddPiece)() - - return t.Worker.AddPiece(ctx, sector, pieceSizes, newPieceSize, pieceData) +func (t *trackedWorker) AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (storiface.CallID, error) { + return t.tracker.track(sector, sealtasks.TTAddPiece)(t.Worker.AddPiece(ctx, sector, pieceSizes, newPieceSize, pieceData)) } -func (t *trackedWorker) Fetch(ctx context.Context, s abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) error { - defer t.tracker.track(s, sealtasks.TTFetch)() - - return t.Worker.Fetch(ctx, s, ft, ptype, am) +func (t *trackedWorker) Fetch(ctx context.Context, s abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) (storiface.CallID, error) { + return t.tracker.track(s, sealtasks.TTFetch)(t.Worker.Fetch(ctx, s, ft, ptype, am)) } -func (t *trackedWorker) UnsealPiece(ctx context.Context, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) error { - defer t.tracker.track(id, sealtasks.TTUnseal)() - - return t.Worker.UnsealPiece(ctx, id, index, size, randomness, cid) +func (t *trackedWorker) UnsealPiece(ctx context.Context, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) (storiface.CallID, error) { + return t.tracker.track(id, sealtasks.TTUnseal)(t.Worker.UnsealPiece(ctx, id, index, size, randomness, cid)) } -func (t *trackedWorker) ReadPiece(ctx context.Context, writer io.Writer, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (bool, error) { - defer t.tracker.track(id, sealtasks.TTReadUnsealed)() - - return t.Worker.ReadPiece(ctx, writer, id, index, size) +func (t *trackedWorker) ReadPiece(ctx context.Context, writer io.Writer, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (storiface.CallID, error) { + return t.tracker.track(id, sealtasks.TTReadUnsealed)(t.Worker.ReadPiece(ctx, writer, id, index, size)) } var _ Worker = &trackedWorker{} diff --git a/node/builder.go b/node/builder.go index 5b6966cd4e3..5c21a155da6 100644 --- a/node/builder.go +++ b/node/builder.go @@ -43,6 +43,7 @@ import ( sectorstorage "github.com/filecoin-project/lotus/extern/sector-storage" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper" "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" sealing "github.com/filecoin-project/lotus/extern/storage-sealing" "github.com/filecoin-project/lotus/lib/blockstore" "github.com/filecoin-project/lotus/lib/peermgr" @@ -298,6 +299,7 @@ func Online() Option { Override(new(sectorstorage.SectorManager), From(new(*sectorstorage.Manager))), Override(new(storage2.Prover), From(new(sectorstorage.SectorManager))), + Override(new(storiface.WorkerReturn), From(new(sectorstorage.SectorManager))), Override(new(*sectorblocks.SectorBlocks), sectorblocks.NewSectorBlocks), Override(new(*storage.Miner), modules.StorageMiner(config.DefaultStorageMiner().Fees)), diff --git a/node/impl/storminer.go b/node/impl/storminer.go index c688ff6778d..73e8eea0644 100644 --- a/node/impl/storminer.go +++ b/node/impl/storminer.go @@ -53,6 +53,7 @@ type StorageMinerAPI struct { StorageMgr *sectorstorage.Manager `optional:"true"` IStorageMgr sectorstorage.SectorManager *stores.Index + storiface.WorkerReturn DataTransfer dtypes.ProviderDataTransfer Host host.Host From 231a9e40512827bd537901606660b560c5f26deb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 7 Sep 2020 16:35:54 +0200 Subject: [PATCH 05/61] Fix sealing sched tests --- cmd/lotus-seal-worker/main.go | 2 +- extern/sector-storage/localworker.go | 3 +- extern/sector-storage/manager.go | 14 ++++-- extern/sector-storage/manager_test.go | 6 ++- extern/sector-storage/mock/mock.go | 44 +++++++++++++++++ extern/sector-storage/sched_test.go | 26 +++++----- extern/sector-storage/testworker_test.go | 60 +++++++++++++++++------- 7 files changed, 120 insertions(+), 35 deletions(-) diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index e6361d3cf7e..b7fdfef2a36 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -344,7 +344,7 @@ var runCmd = &cli.Command{ LocalWorker: sectorstorage.NewLocalWorker(sectorstorage.WorkerConfig{ SealProof: spt, TaskTypes: taskTypes, - }, remote, localStore, nodeApi), + }, remote, localStore, nodeApi, nodeApi), localStore: localStore, ls: lr, } diff --git a/extern/sector-storage/localworker.go b/extern/sector-storage/localworker.go index 1a2232dd16e..c2017957876 100644 --- a/extern/sector-storage/localworker.go +++ b/extern/sector-storage/localworker.go @@ -39,7 +39,7 @@ type LocalWorker struct { acceptTasks map[sealtasks.TaskType]struct{} } -func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex) *LocalWorker { +func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex, ret storiface.WorkerReturn) *LocalWorker { acceptTasks := map[sealtasks.TaskType]struct{}{} for _, taskType := range wcfg.TaskTypes { acceptTasks[taskType] = struct{}{} @@ -52,6 +52,7 @@ func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, storage: store, localStore: local, sindex: sindex, + ret: ret, acceptTasks: acceptTasks, } diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 8891d83ab38..91381113cc6 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -119,6 +119,9 @@ func New(ctx context.Context, ls stores.LocalStorage, si stores.SectorIndex, cfg sched: newScheduler(cfg.SealProofType), Prover: prover, + + results: map[storiface.CallID]result{}, + waitRes: map[storiface.CallID]chan struct{}{}, } go m.sched.runSched() @@ -145,7 +148,7 @@ func New(ctx context.Context, ls stores.LocalStorage, si stores.SectorIndex, cfg err = m.AddWorker(ctx, NewLocalWorker(WorkerConfig{ SealProof: cfg.SealProofType, TaskTypes: localTasks, - }, stor, lstor, si)) + }, stor, lstor, si, m)) if err != nil { return nil, xerrors.Errorf("adding local worker: %w", err) } @@ -528,7 +531,7 @@ func (m *Manager) waitResult(ctx context.Context) func(callID storiface.CallID, } } -func (m *Manager) returnResult(callID storiface.CallID, r interface{}, err string) error { +func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr string) error { m.resLk.Lock() defer m.resLk.Unlock() @@ -537,9 +540,14 @@ func (m *Manager) returnResult(callID storiface.CallID, r interface{}, err strin return xerrors.Errorf("result for call %v already reported") } + var err error + if serr != "" { + err = errors.New(serr) + } + m.results[callID] = result{ r: r, - err: errors.New(err), + err: err, } close(m.waitRes[callID]) diff --git a/extern/sector-storage/manager_test.go b/extern/sector-storage/manager_test.go index 13ad9f8bf34..bab9a465799 100644 --- a/extern/sector-storage/manager_test.go +++ b/extern/sector-storage/manager_test.go @@ -15,6 +15,7 @@ import ( "github.com/filecoin-project/lotus/extern/sector-storage/fsutil" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "github.com/filecoin-project/specs-actors/actors/abi" @@ -109,6 +110,9 @@ func newTestMgr(ctx context.Context, t *testing.T) (*Manager, *stores.Local, *st sched: newScheduler(cfg.SealProofType), Prover: prover, + + results: map[storiface.CallID]result{}, + waitRes: map[storiface.CallID]chan struct{}{}, } go m.sched.runSched() @@ -129,7 +133,7 @@ func TestSimple(t *testing.T) { err := m.AddWorker(ctx, newTestWorker(WorkerConfig{ SealProof: abi.RegisteredSealProof_StackedDrg2KiBV1, TaskTypes: localTasks, - }, lstor)) + }, lstor, m)) require.NoError(t, err) sid := abi.SectorID{Miner: 1000, Number: 1} diff --git a/extern/sector-storage/mock/mock.go b/extern/sector-storage/mock/mock.go index 4afe5f0967c..aa3f2419746 100644 --- a/extern/sector-storage/mock/mock.go +++ b/extern/sector-storage/mock/mock.go @@ -384,6 +384,50 @@ func (mgr *SectorMgr) CheckProvable(ctx context.Context, spt abi.RegisteredSealP return bad, nil } +func (mgr *SectorMgr) ReturnAddPiece(ctx context.Context, callID storiface.CallID, pi abi.PieceInfo, err string) error { + panic("not supported") +} + +func (mgr *SectorMgr) ReturnSealPreCommit1(ctx context.Context, callID storiface.CallID, p1o storage.PreCommit1Out, err string) error { + panic("not supported") +} + +func (mgr *SectorMgr) ReturnSealPreCommit2(ctx context.Context, callID storiface.CallID, sealed storage.SectorCids, err string) error { + panic("not supported") +} + +func (mgr *SectorMgr) ReturnSealCommit1(ctx context.Context, callID storiface.CallID, out storage.Commit1Out, err string) error { + panic("not supported") +} + +func (mgr *SectorMgr) ReturnSealCommit2(ctx context.Context, callID storiface.CallID, proof storage.Proof, err string) error { + panic("not supported") +} + +func (mgr *SectorMgr) ReturnFinalizeSector(ctx context.Context, callID storiface.CallID, err string) error { + panic("not supported") +} + +func (mgr *SectorMgr) ReturnReleaseUnsealed(ctx context.Context, callID storiface.CallID, err string) error { + panic("not supported") +} + +func (mgr *SectorMgr) ReturnMoveStorage(ctx context.Context, callID storiface.CallID, err string) error { + panic("not supported") +} + +func (mgr *SectorMgr) ReturnUnsealPiece(ctx context.Context, callID storiface.CallID, err string) error { + panic("not supported") +} + +func (mgr *SectorMgr) ReturnReadPiece(ctx context.Context, callID storiface.CallID, ok bool, err string) error { + panic("not supported") +} + +func (mgr *SectorMgr) ReturnFetch(ctx context.Context, callID storiface.CallID, err string) error { + panic("not supported") +} + func (m mockVerif) VerifySeal(svi abi.SealVerifyInfo) (bool, error) { if len(svi.Proof) != 32 { // Real ones are longer, but this should be fine return false, nil diff --git a/extern/sector-storage/sched_test.go b/extern/sector-storage/sched_test.go index a95d4b3fa15..47da92217ea 100644 --- a/extern/sector-storage/sched_test.go +++ b/extern/sector-storage/sched_test.go @@ -46,55 +46,55 @@ type schedTestWorker struct { closing chan struct{} } -func (s *schedTestWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storage.PreCommit1Out, error) { +func (s *schedTestWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storage.SectorCids, error) { +func (s *schedTestWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storage.Commit1Out, error) { +func (s *schedTestWorker) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storage.Proof, error) { +func (s *schedTestWorker) SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) error { +func (s *schedTestWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) error { +func (s *schedTestWorker) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) Remove(ctx context.Context, sector abi.SectorID) error { +func (s *schedTestWorker) Remove(ctx context.Context, sector abi.SectorID) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) NewSector(ctx context.Context, sector abi.SectorID) error { +func (s *schedTestWorker) NewSector(ctx context.Context, sector abi.SectorID) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (abi.PieceInfo, error) { +func (s *schedTestWorker) AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) error { +func (s *schedTestWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) Fetch(ctx context.Context, id abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) error { +func (s *schedTestWorker) Fetch(ctx context.Context, id abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) UnsealPiece(ctx context.Context, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) error { +func (s *schedTestWorker) UnsealPiece(ctx context.Context, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) (storiface.CallID, error) { panic("implement me") } -func (s *schedTestWorker) ReadPiece(ctx context.Context, writer io.Writer, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (bool, error) { +func (s *schedTestWorker) ReadPiece(ctx context.Context, writer io.Writer, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (storiface.CallID, error) { panic("implement me") } diff --git a/extern/sector-storage/testworker_test.go b/extern/sector-storage/testworker_test.go index cfa2bed0662..a212271a788 100644 --- a/extern/sector-storage/testworker_test.go +++ b/extern/sector-storage/testworker_test.go @@ -4,6 +4,7 @@ import ( "context" "io" + "github.com/google/uuid" "github.com/ipfs/go-cid" "github.com/filecoin-project/specs-actors/actors/abi" @@ -18,11 +19,12 @@ import ( type testWorker struct { acceptTasks map[sealtasks.TaskType]struct{} lstor *stores.Local + ret storiface.WorkerReturn mockSeal *mock.SectorMgr } -func newTestWorker(wcfg WorkerConfig, lstor *stores.Local) *testWorker { +func newTestWorker(wcfg WorkerConfig, lstor *stores.Local, ret storiface.WorkerReturn) *testWorker { ssize, err := wcfg.SealProof.SectorSize() if err != nil { panic(err) @@ -36,61 +38,87 @@ func newTestWorker(wcfg WorkerConfig, lstor *stores.Local) *testWorker { return &testWorker{ acceptTasks: acceptTasks, lstor: lstor, + ret: ret, mockSeal: mock.NewMockSectorMgr(ssize, nil), } } -func (t *testWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storage.PreCommit1Out, error) { - return t.mockSeal.SealPreCommit1(ctx, sector, ticket, pieces) +func (t *testWorker) asyncCall(sector abi.SectorID, work func(ci storiface.CallID)) (storiface.CallID, error) { + ci := storiface.CallID{ + Sector: sector, + ID: uuid.New(), + } + + go work(ci) + + return ci, nil +} + +func (t *testWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { + return t.asyncCall(sector, func(ci storiface.CallID) { + p1o, err := t.mockSeal.SealPreCommit1(ctx, sector, ticket, pieces) + if err := t.ret.ReturnSealPreCommit1(ctx, ci, p1o, errstr(err)); err != nil { + log.Error(err) + } + }) } func (t *testWorker) NewSector(ctx context.Context, sector abi.SectorID) error { panic("implement me") } -func (t *testWorker) UnsealPiece(ctx context.Context, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) error { +func (t *testWorker) UnsealPiece(ctx context.Context, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) (storiface.CallID, error) { panic("implement me") } -func (t *testWorker) ReadPiece(ctx context.Context, writer io.Writer, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (bool, error) { +func (t *testWorker) ReadPiece(ctx context.Context, writer io.Writer, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (storiface.CallID, error) { panic("implement me") } -func (t *testWorker) AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (abi.PieceInfo, error) { - return t.mockSeal.AddPiece(ctx, sector, pieceSizes, newPieceSize, pieceData) +func (t *testWorker) AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (storiface.CallID, error) { + return t.asyncCall(sector, func(ci storiface.CallID) { + p, err := t.mockSeal.AddPiece(ctx, sector, pieceSizes, newPieceSize, pieceData) + if err := t.ret.ReturnAddPiece(ctx, ci, p, errstr(err)); err != nil { + log.Error(err) + } + }) } -func (t *testWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storage.SectorCids, error) { +func (t *testWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storiface.CallID, error) { panic("implement me") } -func (t *testWorker) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storage.Commit1Out, error) { +func (t *testWorker) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storiface.CallID, error) { panic("implement me") } -func (t *testWorker) SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storage.Proof, error) { +func (t *testWorker) SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storiface.CallID, error) { panic("implement me") } -func (t *testWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) error { +func (t *testWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) (storiface.CallID, error) { panic("implement me") } -func (t *testWorker) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) error { +func (t *testWorker) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) (storiface.CallID, error) { panic("implement me") } -func (t *testWorker) Remove(ctx context.Context, sector abi.SectorID) error { +func (t *testWorker) Remove(ctx context.Context, sector abi.SectorID) (storiface.CallID, error) { panic("implement me") } -func (t *testWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) error { +func (t *testWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) (storiface.CallID, error) { panic("implement me") } -func (t *testWorker) Fetch(ctx context.Context, id abi.SectorID, fileType storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) error { - return nil +func (t *testWorker) Fetch(ctx context.Context, sector abi.SectorID, fileType storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) (storiface.CallID, error) { + return t.asyncCall(sector, func(ci storiface.CallID) { + if err := t.ret.ReturnFetch(ctx, ci, ""); err != nil { + log.Error(err) + } + }) } func (t *testWorker) TaskTypes(ctx context.Context) (map[sealtasks.TaskType]struct{}, error) { From 554570f884f2530f4a5c8f3719d120f638b1ce52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 7 Sep 2020 17:57:23 +0200 Subject: [PATCH 06/61] docsgen --- documentation/en/api-methods.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/documentation/en/api-methods.md b/documentation/en/api-methods.md index dd5c1381514..050e142b675 100644 --- a/documentation/en/api-methods.md +++ b/documentation/en/api-methods.md @@ -204,7 +204,7 @@ Response: ```json { "Version": "string value", - "APIVersion": 3584, + "APIVersion": 3840, "BlockDelay": 42 } ``` From 1ebca8f7321d302c3c6776d7e5405c633b0bc6e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 14 Sep 2020 09:44:55 +0200 Subject: [PATCH 07/61] more working code --- cmd/lotus-seal-worker/main.go | 8 +- cmd/lotus-storage-miner/init.go | 9 +- extern/sector-storage/calltracker.go | 46 ++++++ extern/sector-storage/cbor_gen.go | 119 +++++++++++++++ extern/sector-storage/localworker.go | 176 +++++++++++----------- extern/sector-storage/manager.go | 7 +- extern/sector-storage/storiface/worker.go | 7 + gen/main.go | 8 + node/modules/storageminer.go | 9 +- 9 files changed, 296 insertions(+), 93 deletions(-) create mode 100644 extern/sector-storage/calltracker.go create mode 100644 extern/sector-storage/cbor_gen.go diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index 31ef2917d8b..9739acb689b 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -15,6 +15,8 @@ import ( "github.com/google/uuid" "github.com/gorilla/mux" + "github.com/ipfs/go-datastore" + "github.com/ipfs/go-datastore/namespace" logging "github.com/ipfs/go-log/v2" manet "github.com/multiformats/go-multiaddr/net" "github.com/urfave/cli/v2" @@ -23,6 +25,7 @@ import ( "github.com/filecoin-project/go-jsonrpc" "github.com/filecoin-project/go-jsonrpc/auth" paramfetch "github.com/filecoin-project/go-paramfetch" + "github.com/filecoin-project/go-statestore" "github.com/filecoin-project/lotus/api" "github.com/filecoin-project/lotus/api/apistruct" @@ -34,6 +37,7 @@ import ( "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/lib/lotuslog" "github.com/filecoin-project/lotus/lib/rpcenc" + "github.com/filecoin-project/lotus/node/modules" "github.com/filecoin-project/lotus/node/repo" ) @@ -342,11 +346,13 @@ var runCmd = &cli.Command{ // Create / expose the worker + wsts := statestore.New(namespace.Wrap(datastore.NewMapDatastore(), modules.WorkerCallsPrefix)) // TODO: USE A REAL DATASTORE + workerApi := &worker{ LocalWorker: sectorstorage.NewLocalWorker(sectorstorage.WorkerConfig{ SealProof: spt, TaskTypes: taskTypes, - }, remote, localStore, nodeApi, nodeApi), + }, remote, localStore, nodeApi, nodeApi, wsts), localStore: localStore, ls: lr, } diff --git a/cmd/lotus-storage-miner/init.go b/cmd/lotus-storage-miner/init.go index ba9460dab91..4009c74f746 100644 --- a/cmd/lotus-storage-miner/init.go +++ b/cmd/lotus-storage-miner/init.go @@ -12,11 +12,10 @@ import ( "path/filepath" "strconv" - "github.com/filecoin-project/go-state-types/big" - "github.com/docker/go-units" "github.com/google/uuid" "github.com/ipfs/go-datastore" + "github.com/ipfs/go-datastore/namespace" "github.com/libp2p/go-libp2p-core/crypto" "github.com/libp2p/go-libp2p-core/peer" "github.com/mitchellh/go-homedir" @@ -27,7 +26,9 @@ import ( cborutil "github.com/filecoin-project/go-cbor-util" paramfetch "github.com/filecoin-project/go-paramfetch" "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-state-types/big" crypto2 "github.com/filecoin-project/go-state-types/crypto" + "github.com/filecoin-project/go-statestore" sectorstorage "github.com/filecoin-project/lotus/extern/sector-storage" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper" "github.com/filecoin-project/lotus/extern/sector-storage/stores" @@ -441,6 +442,8 @@ func storageMinerInit(ctx context.Context, cctx *cli.Context, api lapi.FullNode, return err } + wsts := statestore.New(namespace.Wrap(mds, modules.WorkerCallsPrefix)) + smgr, err := sectorstorage.New(ctx, lr, stores.NewIndex(), &ffiwrapper.Config{ SealProofType: spt, }, sectorstorage.SealerConfig{ @@ -450,7 +453,7 @@ func storageMinerInit(ctx context.Context, cctx *cli.Context, api lapi.FullNode, AllowPreCommit2: true, AllowCommit: true, AllowUnseal: true, - }, nil, sa) + }, nil, sa, wsts) if err != nil { return err } diff --git a/extern/sector-storage/calltracker.go b/extern/sector-storage/calltracker.go new file mode 100644 index 00000000000..8c5aff577b9 --- /dev/null +++ b/extern/sector-storage/calltracker.go @@ -0,0 +1,46 @@ +package sectorstorage + +import ( + "github.com/filecoin-project/go-statestore" + + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" +) + +type callTracker struct { + st *statestore.StateStore // by CallID +} + +type CallState uint64 + +const ( + CallStarted CallState = iota + CallDone + // returned -> remove +) + +type Call struct { + State CallState + + // Params cbg.Deferred // TODO: support once useful + Result []byte +} + +func (wt *callTracker) onStart(ci storiface.CallID) error { + return wt.st.Begin(ci, &Call{ + State: CallStarted, + }) +} + +func (wt *callTracker) onDone(ci storiface.CallID, ret []byte) error { + st := wt.st.Get(ci) + return st.Mutate(func(cs *Call) error { + cs.State = CallDone + cs.Result = ret + return nil + }) +} + +func (wt *callTracker) onReturned(ci storiface.CallID) error { + st := wt.st.Get(ci) + return st.End() +} diff --git a/extern/sector-storage/cbor_gen.go b/extern/sector-storage/cbor_gen.go new file mode 100644 index 00000000000..c532a970091 --- /dev/null +++ b/extern/sector-storage/cbor_gen.go @@ -0,0 +1,119 @@ +// Code generated by github.com/whyrusleeping/cbor-gen. DO NOT EDIT. + +package sectorstorage + +import ( + "fmt" + "io" + + cbg "github.com/whyrusleeping/cbor-gen" + xerrors "golang.org/x/xerrors" +) + +var _ = xerrors.Errorf + +func (t *Call) MarshalCBOR(w io.Writer) error { + if t == nil { + _, err := w.Write(cbg.CborNull) + return err + } + panic("cbg") + if _, err := w.Write([]byte{162}); err != nil { + return err + } + + scratch := make([]byte, 9) + + // t.State (sectorstorage.CallState) (uint64) + if len("State") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"State\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("State"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("State")); err != nil { + return err + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajUnsignedInt, uint64(t.State)); err != nil { + return err + } + + // t.Result (typegen.Deferred) (struct) + if len("Result") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"Result\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("Result"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("Result")); err != nil { + return err + } + + return nil +} + +func (t *Call) UnmarshalCBOR(r io.Reader) error { + *t = Call{} + + br := cbg.GetPeeker(r) + scratch := make([]byte, 8) + + maj, extra, err := cbg.CborReadHeaderBuf(br, scratch) + if err != nil { + return err + } + if maj != cbg.MajMap { + return fmt.Errorf("cbor input should be of type map") + } + + if extra > cbg.MaxLength { + return fmt.Errorf("Call: map struct too large (%d)", extra) + } + + var name string + n := extra + + for i := uint64(0); i < n; i++ { + + { + sval, err := cbg.ReadStringBuf(br, scratch) + if err != nil { + return err + } + + name = string(sval) + } + + switch name { + // t.State (sectorstorage.CallState) (uint64) + case "State": + + { + + maj, extra, err = cbg.CborReadHeaderBuf(br, scratch) + if err != nil { + return err + } + if maj != cbg.MajUnsignedInt { + return fmt.Errorf("wrong type for uint64 field") + } + t.State = CallState(extra) + + } + // t.Result (typegen.Deferred) (struct) + case "Result": + + { + + } + + default: + return fmt.Errorf("unknown struct field %d: '%s'", i, name) + } + } + + return nil +} diff --git a/extern/sector-storage/localworker.go b/extern/sector-storage/localworker.go index 5b81f297c68..b2e264a1588 100644 --- a/extern/sector-storage/localworker.go +++ b/extern/sector-storage/localworker.go @@ -4,6 +4,7 @@ import ( "context" "io" "os" + "reflect" "runtime" "github.com/elastic/go-sysinfo" @@ -14,6 +15,7 @@ import ( ffi "github.com/filecoin-project/filecoin-ffi" "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-statestore" storage2 "github.com/filecoin-project/specs-storage/storage" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper" @@ -36,10 +38,11 @@ type LocalWorker struct { sindex stores.SectorIndex ret storiface.WorkerReturn + ct *callTracker acceptTasks map[sealtasks.TaskType]struct{} } -func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex, ret storiface.WorkerReturn) *LocalWorker { +func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex, ret storiface.WorkerReturn, cst *statestore.StateStore) *LocalWorker { acceptTasks := map[sealtasks.TaskType]struct{}{} for _, taskType := range wcfg.TaskTypes { acceptTasks[taskType] = struct{}{} @@ -54,6 +57,9 @@ func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex: sindex, ret: ret, + ct: &callTracker{ + st: cst, + }, acceptTasks: acceptTasks, } } @@ -98,13 +104,62 @@ func (l *LocalWorker) sb() (ffiwrapper.Storage, error) { return ffiwrapper.New(&localWorkerPathProvider{w: l}, l.scfg) } -func (l *LocalWorker) asyncCall(sector abi.SectorID, work func(ci storiface.CallID)) (storiface.CallID, error) { +type returnType string + +// in: func(WorkerReturn, context.Context, CallID, err string) +// in: func(WorkerReturn, context.Context, CallID, ret T, err string) +func rfunc(in interface{}) func(context.Context, storiface.WorkerReturn, interface{}, error) error { + rf := reflect.ValueOf(in) + ft := rf.Type() + withRet := ft.NumIn() == 4 + + return func(ctx context.Context, wr storiface.WorkerReturn, i interface{}, err error) error { + rctx := reflect.ValueOf(ctx) + rwr := reflect.ValueOf(wr) + rerr := reflect.ValueOf(errstr(err)) + + var ro []reflect.Value + + if withRet { + ro = rf.Call([]reflect.Value{rwr, rctx, reflect.ValueOf(i), rerr}) + } else { + ro = rf.Call([]reflect.Value{rwr, rctx, rerr}) + } + + return ro[0].Interface().(error) + } +} + +var returnFunc = map[returnType]func(context.Context, storiface.WorkerReturn, interface{}, error) error{ + "AddPiece": rfunc(storiface.WorkerReturn.ReturnAddPiece), + "SealPreCommit1": rfunc(storiface.WorkerReturn.ReturnSealPreCommit1), + "SealPreCommit2": rfunc(storiface.WorkerReturn.ReturnSealPreCommit2), + "SealCommit1": rfunc(storiface.WorkerReturn.ReturnSealCommit1), + "SealCommit2": rfunc(storiface.WorkerReturn.ReturnSealCommit2), + "FinalizeSector": rfunc(storiface.WorkerReturn.ReturnFinalizeSector), + "ReleaseUnsealed": rfunc(storiface.WorkerReturn.ReturnReleaseUnsealed), + "MoveStorage": rfunc(storiface.WorkerReturn.ReturnMoveStorage), + "UnsealPiece": rfunc(storiface.WorkerReturn.ReturnUnsealPiece), + "ReadPiece": rfunc(storiface.WorkerReturn.ReturnReadPiece), + "Fetch": rfunc(storiface.WorkerReturn.ReturnFetch), +} + +func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt returnType, work func(ci storiface.CallID) (interface{}, error)) (storiface.CallID, error) { ci := storiface.CallID{ Sector: sector, ID: uuid.New(), } - go work(ci) + if err := l.ct.onStart(ci); err != nil { + log.Errorf("tracking call (start): %+v", err) + } + + go func() { + res, err := work(ci) + if err := returnFunc[rt](ctx, l.ret, res, err); err != nil { + log.Errorf("return error: %s: %+v", rt, err) + } + }() return ci, nil } @@ -132,58 +187,42 @@ func (l *LocalWorker) AddPiece(ctx context.Context, sector abi.SectorID, epcs [] return storiface.UndefCall, err } - return l.asyncCall(sector, func(ci storiface.CallID) { - pi, err := sb.AddPiece(ctx, sector, epcs, sz, r) - - if err := l.ret.ReturnAddPiece(ctx, ci, pi, errstr(err)); err != nil { - log.Errorf("ReturnAddPiece: %+v", err) - } + return l.asyncCall(ctx, sector, "AddPiece", func(ci storiface.CallID) (interface{}, error) { + return sb.AddPiece(ctx, sector, epcs, sz, r) }) } func (l *LocalWorker) Fetch(ctx context.Context, sector abi.SectorID, fileType storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) (storiface.CallID, error) { - return l.asyncCall(sector, func(ci storiface.CallID) { + return l.asyncCall(ctx, sector, "Fetch", func(ci storiface.CallID) (interface{}, error) { _, done, err := (&localWorkerPathProvider{w: l, op: am}).AcquireSector(ctx, sector, fileType, storiface.FTNone, ptype) if err == nil { done() } - if err := l.ret.ReturnFetch(ctx, ci, errstr(err)); err != nil { - log.Errorf("ReturnFetch: %+v", err) - } + return nil, err }) } func (l *LocalWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { - return l.asyncCall(sector, func(ci storiface.CallID) { - var err error - var p1o storage2.PreCommit1Out - defer func() { - if err := l.ret.ReturnSealPreCommit1(ctx, ci, p1o, errstr(err)); err != nil { - log.Errorf("ReturnSealPreCommit1: %+v", err) - } - }() + return l.asyncCall(ctx, sector, "SealPreCommit1", func(ci storiface.CallID) (interface{}, error) { { // cleanup previous failed attempts if they exist - if err = l.storage.Remove(ctx, sector, storiface.FTSealed, true); err != nil { - err = xerrors.Errorf("cleaning up sealed data: %w", err) - return + if err := l.storage.Remove(ctx, sector, storiface.FTSealed, true); err != nil { + return nil, xerrors.Errorf("cleaning up sealed data: %w", err) } - if err = l.storage.Remove(ctx, sector, storiface.FTCache, true); err != nil { - err = xerrors.Errorf("cleaning up cache data: %w", err) - return + if err := l.storage.Remove(ctx, sector, storiface.FTCache, true); err != nil { + return nil, xerrors.Errorf("cleaning up cache data: %w", err) } } - var sb ffiwrapper.Storage - sb, err = l.sb() + sb, err := l.sb() if err != nil { - return + return nil, err } - p1o, err = sb.SealPreCommit1(ctx, sector, ticket, pieces) + return sb.SealPreCommit1(ctx, sector, ticket, pieces) }) } @@ -193,12 +232,8 @@ func (l *LocalWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, p return storiface.UndefCall, err } - return l.asyncCall(sector, func(ci storiface.CallID) { - cs, err := sb.SealPreCommit2(ctx, sector, phase1Out) - - if err := l.ret.ReturnSealPreCommit2(ctx, ci, cs, errstr(err)); err != nil { - log.Errorf("ReturnSealPreCommit2: %+v", err) - } + return l.asyncCall(ctx, sector, "SealPreCommit2", func(ci storiface.CallID) (interface{}, error) { + return sb.SealPreCommit2(ctx, sector, phase1Out) }) } @@ -208,12 +243,8 @@ func (l *LocalWorker) SealCommit1(ctx context.Context, sector abi.SectorID, tick return storiface.UndefCall, err } - return l.asyncCall(sector, func(ci storiface.CallID) { - c1o, err := sb.SealCommit1(ctx, sector, ticket, seed, pieces, cids) - - if err := l.ret.ReturnSealCommit1(ctx, ci, c1o, errstr(err)); err != nil { - log.Errorf("ReturnSealCommit1: %+v", err) - } + return l.asyncCall(ctx, sector, "SealCommit1", func(ci storiface.CallID) (interface{}, error) { + return sb.SealCommit1(ctx, sector, ticket, seed, pieces, cids) }) } @@ -223,12 +254,8 @@ func (l *LocalWorker) SealCommit2(ctx context.Context, sector abi.SectorID, phas return storiface.UndefCall, err } - return l.asyncCall(sector, func(ci storiface.CallID) { - proof, err := sb.SealCommit2(ctx, sector, phase1Out) - - if err := l.ret.ReturnSealCommit2(ctx, ci, proof, errstr(err)); err != nil { - log.Errorf("ReturnSealCommit2: %+v", err) - } + return l.asyncCall(ctx, sector, "SealCommit2", func(ci storiface.CallID) (interface{}, error) { + return sb.SealCommit2(ctx, sector, phase1Out) }) } @@ -238,23 +265,18 @@ func (l *LocalWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, k return storiface.UndefCall, err } - return l.asyncCall(sector, func(ci storiface.CallID) { + return l.asyncCall(ctx, sector, "FinalizeSector", func(ci storiface.CallID) (interface{}, error) { if err := sb.FinalizeSector(ctx, sector, keepUnsealed); err != nil { - if err := l.ret.ReturnFinalizeSector(ctx, ci, errstr(xerrors.Errorf("finalizing sector: %w", err))); err != nil { - log.Errorf("ReturnFinalizeSector: %+v", err) - } + return nil, xerrors.Errorf("finalizing sector: %w", err) } if len(keepUnsealed) == 0 { - err = xerrors.Errorf("removing unsealed data: %w", err) - if err := l.ret.ReturnFinalizeSector(ctx, ci, errstr(err)); err != nil { - log.Errorf("ReturnFinalizeSector: %+v", err) + if err := l.storage.Remove(ctx, sector, storiface.FTUnsealed, true); err != nil { + return nil, xerrors.Errorf("removing unsealed data: %w", err) } } - if err := l.ret.ReturnFinalizeSector(ctx, ci, errstr(err)); err != nil { - log.Errorf("ReturnFinalizeSector: %+v", err) - } + return nil, err }) } @@ -279,12 +301,8 @@ func (l *LocalWorker) Remove(ctx context.Context, sector abi.SectorID) error { } func (l *LocalWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) (storiface.CallID, error) { - return l.asyncCall(sector, func(ci storiface.CallID) { - err := l.storage.MoveStorage(ctx, sector, l.scfg.SealProofType, types) - - if err := l.ret.ReturnMoveStorage(ctx, ci, errstr(err)); err != nil { - log.Errorf("ReturnMoveStorage: %+v", err) - } + return l.asyncCall(ctx, sector, "MoveStorage", func(ci storiface.CallID) (interface{}, error) { + return nil, l.storage.MoveStorage(ctx, sector, l.scfg.SealProofType, types) }) } @@ -294,28 +312,20 @@ func (l *LocalWorker) UnsealPiece(ctx context.Context, sector abi.SectorID, inde return storiface.UndefCall, err } - return l.asyncCall(sector, func(ci storiface.CallID) { - var err error - defer func() { - if err := l.ret.ReturnUnsealPiece(ctx, ci, errstr(err)); err != nil { - log.Errorf("ReturnUnsealPiece: %+v", err) - } - }() - + return l.asyncCall(ctx, sector, "UnsealPiece", func(ci storiface.CallID) (interface{}, error) { if err = sb.UnsealPiece(ctx, sector, index, size, randomness, cid); err != nil { - err = xerrors.Errorf("unsealing sector: %w", err) - return + return nil, xerrors.Errorf("unsealing sector: %w", err) } if err = l.storage.RemoveCopies(ctx, sector, storiface.FTSealed); err != nil { - err = xerrors.Errorf("removing source data: %w", err) - return + return nil, xerrors.Errorf("removing source data: %w", err) } if err = l.storage.RemoveCopies(ctx, sector, storiface.FTCache); err != nil { - err = xerrors.Errorf("removing source data: %w", err) - return + return nil, xerrors.Errorf("removing source data: %w", err) } + + return nil, nil }) } @@ -325,12 +335,8 @@ func (l *LocalWorker) ReadPiece(ctx context.Context, writer io.Writer, sector ab return storiface.UndefCall, err } - return l.asyncCall(sector, func(ci storiface.CallID) { - ok, err := sb.ReadPiece(ctx, writer, sector, index, size) - - if err := l.ret.ReturnReadPiece(ctx, ci, ok, errstr(err)); err != nil { - log.Errorf("ReturnReadPiece: %+v", err) - } + return l.asyncCall(ctx, sector, "ReadPiece", func(ci storiface.CallID) (interface{}, error) { + return sb.ReadPiece(ctx, writer, sector, index, size) }) } diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index cdc8fc3cef8..f51cd713a9b 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -3,6 +3,7 @@ package sectorstorage import ( "context" "errors" + "github.com/filecoin-project/go-statestore" "io" "net/http" "sync" @@ -94,7 +95,9 @@ type SealerConfig struct { type StorageAuth http.Header -func New(ctx context.Context, ls stores.LocalStorage, si stores.SectorIndex, cfg *ffiwrapper.Config, sc SealerConfig, urls URLs, sa StorageAuth) (*Manager, error) { +type WorkerStateStore *statestore.StateStore + +func New(ctx context.Context, ls stores.LocalStorage, si stores.SectorIndex, cfg *ffiwrapper.Config, sc SealerConfig, urls URLs, sa StorageAuth, wss WorkerStateStore) (*Manager, error) { lstor, err := stores.NewLocal(ctx, ls, si, urls) if err != nil { return nil, err @@ -148,7 +151,7 @@ func New(ctx context.Context, ls stores.LocalStorage, si stores.SectorIndex, cfg err = m.AddWorker(ctx, NewLocalWorker(WorkerConfig{ SealProof: cfg.SealProofType, TaskTypes: localTasks, - }, stor, lstor, si, m)) + }, stor, lstor, si, m, wss)) if err != nil { return nil, xerrors.Errorf("adding local worker: %w", err) } diff --git a/extern/sector-storage/storiface/worker.go b/extern/sector-storage/storiface/worker.go index 30587007ac9..ead7705241a 100644 --- a/extern/sector-storage/storiface/worker.go +++ b/extern/sector-storage/storiface/worker.go @@ -2,6 +2,7 @@ package storiface import ( "context" + "fmt" "io" "time" @@ -53,6 +54,12 @@ type CallID struct { ID uuid.UUID } +func (c CallID) String() string { + return fmt.Sprintf("%d-%d-%s", c.Sector.Miner, c.Sector.Number, c.ID) +} + +var _ fmt.Stringer = &CallID{} + var UndefCall CallID type WorkerCalls interface { diff --git a/gen/main.go b/gen/main.go index e7586a92a89..c7ae5bd5700 100644 --- a/gen/main.go +++ b/gen/main.go @@ -2,6 +2,7 @@ package main import ( "fmt" + sectorstorage "github.com/filecoin-project/lotus/extern/sector-storage" "os" gen "github.com/whyrusleeping/cbor-gen" @@ -74,4 +75,11 @@ func main() { os.Exit(1) } + err = gen.WriteMapEncodersToFile("./extern/sector-storage/cbor_gen.go", "sectorstorage", + sectorstorage.Call{}, + ) + if err != nil { + fmt.Println(err) + os.Exit(1) + } } diff --git a/node/modules/storageminer.go b/node/modules/storageminer.go index 773df78feab..b28d3e3f407 100644 --- a/node/modules/storageminer.go +++ b/node/modules/storageminer.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "github.com/filecoin-project/go-statestore" "net/http" "time" @@ -479,10 +480,14 @@ func RetrievalProvider(h host.Host, miner *storage.Miner, sealer sectorstorage.S return retrievalimpl.NewProvider(maddr, adapter, netwk, pieceStore, mds, dt, namespace.Wrap(ds, datastore.NewKey("/retrievals/provider")), opt) } -func SectorStorage(mctx helpers.MetricsCtx, lc fx.Lifecycle, ls stores.LocalStorage, si stores.SectorIndex, cfg *ffiwrapper.Config, sc sectorstorage.SealerConfig, urls sectorstorage.URLs, sa sectorstorage.StorageAuth) (*sectorstorage.Manager, error) { +var WorkerCallsPrefix = datastore.NewKey("/worker/calls") + +func SectorStorage(mctx helpers.MetricsCtx, lc fx.Lifecycle, ls stores.LocalStorage, si stores.SectorIndex, cfg *ffiwrapper.Config, sc sectorstorage.SealerConfig, urls sectorstorage.URLs, sa sectorstorage.StorageAuth, ds dtypes.MetadataDS) (*sectorstorage.Manager, error) { ctx := helpers.LifecycleCtx(mctx, lc) - sst, err := sectorstorage.New(ctx, ls, si, cfg, sc, urls, sa) + wsts := statestore.New(namespace.Wrap(ds, WorkerCallsPrefix)) + + sst, err := sectorstorage.New(ctx, ls, si, cfg, sc, urls, sa, wsts) if err != nil { return nil, err } From e9d25e591912c0e3ef6914ae87928dd9bd9aafef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 14 Sep 2020 20:28:47 +0200 Subject: [PATCH 08/61] More fixes --- chain/exchange/cbor_gen.go | 12 +++---- extern/sector-storage/cbor_gen.go | 34 ++++++++++++++++--- .../sector-storage/ffiwrapper/sealer_test.go | 4 +-- extern/sector-storage/localworker.go | 21 +++++++----- markets/storageadapter/provider.go | 4 ++- 5 files changed, 54 insertions(+), 21 deletions(-) diff --git a/chain/exchange/cbor_gen.go b/chain/exchange/cbor_gen.go index dc91babe3c7..29b2580813a 100644 --- a/chain/exchange/cbor_gen.go +++ b/chain/exchange/cbor_gen.go @@ -146,7 +146,7 @@ func (t *Response) MarshalCBOR(w io.Writer) error { scratch := make([]byte, 9) - // t.Status (blocksync.status) (uint64) + // t.Status (exchange.status) (uint64) if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajUnsignedInt, uint64(t.Status)); err != nil { return err @@ -164,7 +164,7 @@ func (t *Response) MarshalCBOR(w io.Writer) error { return err } - // t.Chain ([]*blocksync.BSTipSet) (slice) + // t.Chain ([]*exchange.BSTipSet) (slice) if len(t.Chain) > cbg.MaxLength { return xerrors.Errorf("Slice value in field t.Chain was too long") } @@ -198,7 +198,7 @@ func (t *Response) UnmarshalCBOR(r io.Reader) error { return fmt.Errorf("cbor input had wrong number of fields") } - // t.Status (blocksync.status) (uint64) + // t.Status (exchange.status) (uint64) { @@ -222,7 +222,7 @@ func (t *Response) UnmarshalCBOR(r io.Reader) error { t.ErrorMessage = string(sval) } - // t.Chain ([]*blocksync.BSTipSet) (slice) + // t.Chain ([]*exchange.BSTipSet) (slice) maj, extra, err = cbg.CborReadHeaderBuf(br, scratch) if err != nil { @@ -567,7 +567,7 @@ func (t *BSTipSet) MarshalCBOR(w io.Writer) error { } } - // t.Messages (blocksync.CompactedMessages) (struct) + // t.Messages (exchange.CompactedMessages) (struct) if err := t.Messages.MarshalCBOR(w); err != nil { return err } @@ -621,7 +621,7 @@ func (t *BSTipSet) UnmarshalCBOR(r io.Reader) error { t.Blocks[i] = &v } - // t.Messages (blocksync.CompactedMessages) (struct) + // t.Messages (exchange.CompactedMessages) (struct) { diff --git a/extern/sector-storage/cbor_gen.go b/extern/sector-storage/cbor_gen.go index c532a970091..c20df2157dc 100644 --- a/extern/sector-storage/cbor_gen.go +++ b/extern/sector-storage/cbor_gen.go @@ -17,7 +17,6 @@ func (t *Call) MarshalCBOR(w io.Writer) error { _, err := w.Write(cbg.CborNull) return err } - panic("cbg") if _, err := w.Write([]byte{162}); err != nil { return err } @@ -40,7 +39,7 @@ func (t *Call) MarshalCBOR(w io.Writer) error { return err } - // t.Result (typegen.Deferred) (struct) + // t.Result ([]uint8) (slice) if len("Result") > cbg.MaxLength { return xerrors.Errorf("Value in field \"Result\" was too long") } @@ -52,6 +51,17 @@ func (t *Call) MarshalCBOR(w io.Writer) error { return err } + if len(t.Result) > cbg.ByteArrayMaxLen { + return xerrors.Errorf("Byte array in field t.Result was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajByteString, uint64(len(t.Result))); err != nil { + return err + } + + if _, err := w.Write(t.Result[:]); err != nil { + return err + } return nil } @@ -103,11 +113,27 @@ func (t *Call) UnmarshalCBOR(r io.Reader) error { t.State = CallState(extra) } - // t.Result (typegen.Deferred) (struct) + // t.Result ([]uint8) (slice) case "Result": - { + maj, extra, err = cbg.CborReadHeaderBuf(br, scratch) + if err != nil { + return err + } + + if extra > cbg.ByteArrayMaxLen { + return fmt.Errorf("t.Result: byte array too large (%d)", extra) + } + if maj != cbg.MajByteString { + return fmt.Errorf("expected byte array") + } + + if extra > 0 { + t.Result = make([]uint8, extra) + } + if _, err := io.ReadFull(br, t.Result[:]); err != nil { + return err } default: diff --git a/extern/sector-storage/ffiwrapper/sealer_test.go b/extern/sector-storage/ffiwrapper/sealer_test.go index caa70c554a8..35d5d586cc1 100644 --- a/extern/sector-storage/ffiwrapper/sealer_test.go +++ b/extern/sector-storage/ffiwrapper/sealer_test.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "fmt" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "io" "io/ioutil" "math/rand" @@ -31,6 +30,7 @@ import ( ffi "github.com/filecoin-project/filecoin-ffi" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper/basicfs" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) func init() { @@ -206,7 +206,7 @@ func post(t *testing.T, sealer *Sealer, skipped []abi.SectorID, seals ...seal) { } func corrupt(t *testing.T, sealer *Sealer, id abi.SectorID) { - paths, done, err := sealer.sectors.AcquireSector(context.Background(), id, stores.FTSealed, 0, stores.PathStorage) + paths, done, err := sealer.sectors.AcquireSector(context.Background(), id, storiface.FTSealed, 0, storiface.PathStorage) require.NoError(t, err) defer done() diff --git a/extern/sector-storage/localworker.go b/extern/sector-storage/localworker.go index b2e264a1588..0a1a02397dc 100644 --- a/extern/sector-storage/localworker.go +++ b/extern/sector-storage/localworker.go @@ -108,29 +108,34 @@ type returnType string // in: func(WorkerReturn, context.Context, CallID, err string) // in: func(WorkerReturn, context.Context, CallID, ret T, err string) -func rfunc(in interface{}) func(context.Context, storiface.WorkerReturn, interface{}, error) error { +func rfunc(in interface{}) func(context.Context, storiface.CallID, storiface.WorkerReturn, interface{}, error) error { rf := reflect.ValueOf(in) ft := rf.Type() - withRet := ft.NumIn() == 4 + withRet := ft.NumIn() == 5 - return func(ctx context.Context, wr storiface.WorkerReturn, i interface{}, err error) error { + return func(ctx context.Context, ci storiface.CallID, wr storiface.WorkerReturn, i interface{}, err error) error { rctx := reflect.ValueOf(ctx) rwr := reflect.ValueOf(wr) rerr := reflect.ValueOf(errstr(err)) + rci := reflect.ValueOf(ci) var ro []reflect.Value if withRet { - ro = rf.Call([]reflect.Value{rwr, rctx, reflect.ValueOf(i), rerr}) + ro = rf.Call([]reflect.Value{rwr, rctx, rci, reflect.ValueOf(i), rerr}) } else { - ro = rf.Call([]reflect.Value{rwr, rctx, rerr}) + ro = rf.Call([]reflect.Value{rwr, rctx, rci, rerr}) } - return ro[0].Interface().(error) + if !ro[0].IsNil() { + return ro[0].Interface().(error) + } + + return nil } } -var returnFunc = map[returnType]func(context.Context, storiface.WorkerReturn, interface{}, error) error{ +var returnFunc = map[returnType]func(context.Context, storiface.CallID, storiface.WorkerReturn, interface{}, error) error{ "AddPiece": rfunc(storiface.WorkerReturn.ReturnAddPiece), "SealPreCommit1": rfunc(storiface.WorkerReturn.ReturnSealPreCommit1), "SealPreCommit2": rfunc(storiface.WorkerReturn.ReturnSealPreCommit2), @@ -156,7 +161,7 @@ func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt ret go func() { res, err := work(ci) - if err := returnFunc[rt](ctx, l.ret, res, err); err != nil { + if err := returnFunc[rt](ctx, ci, l.ret, res, err); err != nil { log.Errorf("return error: %s: %+v", rt, err) } }() diff --git a/markets/storageadapter/provider.go b/markets/storageadapter/provider.go index 7af1808c17c..612504afc6c 100644 --- a/markets/storageadapter/provider.go +++ b/markets/storageadapter/provider.go @@ -108,7 +108,9 @@ func (n *ProviderNodeAdapter) OnDealComplete(ctx context.Context, deal storagema curTime := time.Now() for time.Since(curTime) < addPieceRetryTimeout { if !xerrors.Is(err, sealing.ErrTooManySectorsSealing) { - log.Errorf("failed to addPiece for deal %d, err: %w", deal.DealID, err) + if err != nil { + log.Errorf("failed to addPiece for deal %d, err: %w", deal.DealID, err) + } break } select { From b1361aaf8bc6f3a6ea0bcfeab4f84493dbab9bec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 16 Sep 2020 17:08:05 +0200 Subject: [PATCH 09/61] sectorstorage: wip manager work tracker --- extern/sector-storage/manager.go | 148 ++++------ extern/sector-storage/manager_calltracker.go | 271 ++++++++++++++++++ .../{calltracker.go => worker_calltracker.go} | 8 +- .../{localworker.go => worker_local.go} | 4 +- .../{work_tracker.go => worker_tracked.go} | 0 gen/main.go | 1 + 6 files changed, 337 insertions(+), 95 deletions(-) create mode 100644 extern/sector-storage/manager_calltracker.go rename extern/sector-storage/{calltracker.go => worker_calltracker.go} (71%) rename extern/sector-storage/{localworker.go => worker_local.go} (99%) rename extern/sector-storage/{work_tracker.go => worker_tracked.go} (100%) diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 3b2c7e99458..14ec875e9d8 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -72,9 +72,15 @@ type Manager struct { storage.Prover - resLk sync.Mutex - results map[storiface.CallID]result - waitRes map[storiface.CallID]chan struct{} + workLk sync.Mutex + work *statestore.StateStore + + callToWork map[storiface.CallID]workID + // used when we get an early return and there's no callToWork mapping + callRes map[storiface.CallID]chan result + + results map[workID]result + waitRes map[workID]chan struct{} } type result struct { @@ -96,8 +102,9 @@ type SealerConfig struct { type StorageAuth http.Header type WorkerStateStore *statestore.StateStore +type ManagerStateStore *statestore.StateStore -func New(ctx context.Context, ls stores.LocalStorage, si stores.SectorIndex, cfg *ffiwrapper.Config, sc SealerConfig, urls URLs, sa StorageAuth, wss WorkerStateStore) (*Manager, error) { +func New(ctx context.Context, ls stores.LocalStorage, si stores.SectorIndex, cfg *ffiwrapper.Config, sc SealerConfig, urls URLs, sa StorageAuth, wss WorkerStateStore, mss ManagerStateStore) (*Manager, error) { lstor, err := stores.NewLocal(ctx, ls, si, urls) if err != nil { return nil, err @@ -123,10 +130,15 @@ func New(ctx context.Context, ls stores.LocalStorage, si stores.SectorIndex, cfg Prover: prover, - results: map[storiface.CallID]result{}, - waitRes: map[storiface.CallID]chan struct{}{}, + work: mss, + workWait: map[workID]*sync.Cond{}, + callToWork: map[storiface.CallID]workID{}, + results: map[workID]result{}, + waitRes: map[workID]chan struct{}{}, } + // TODO: remove all non-running work from the work tracker + go m.sched.runSched() localTasks := []sealtasks.TaskType{ @@ -209,16 +221,16 @@ func schedNop(context.Context, Worker) error { return nil } -func schedFetch(wf waitFunc, sector abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) func(context.Context, Worker) error { +func (m *Manager) schedFetch(sector abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) func(context.Context, Worker) error { return func(ctx context.Context, worker Worker) error { - _, err := wf(ctx)(worker.Fetch(ctx, sector, ft, ptype, am)) + _, err := m.startWork(ctx)(worker.Fetch(ctx, sector, ft, ptype, am)) return err } } func (m *Manager) readPiece(sink io.Writer, sector abi.SectorID, offset storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, rok *bool) func(ctx context.Context, w Worker) error { return func(ctx context.Context, w Worker) error { - r, err := m.waitResult(ctx)(w.ReadPiece(ctx, sink, sector, offset, size)) + r, err := m.startWork(ctx)(w.ReadPiece(ctx, sink, sector, offset, size)) if err != nil { return err } @@ -251,7 +263,7 @@ func (m *Manager) tryReadUnsealedPiece(ctx context.Context, sink io.Writer, sect selector = newExistingSelector(m.index, sector, storiface.FTUnsealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(m.waitResult, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), + err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(m.startWork, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), m.readPiece(sink, sector, offset, size, &readOk)) if err != nil { returnErr = xerrors.Errorf("reading piece from sealed sector: %w", err) @@ -278,12 +290,12 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect } unsealFetch := func(ctx context.Context, worker Worker) error { - if _, err := m.waitResult(ctx)(worker.Fetch(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.PathSealing, storiface.AcquireCopy)); err != nil { + if _, err := m.startWork(ctx)(worker.Fetch(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.PathSealing, storiface.AcquireCopy)); err != nil { return xerrors.Errorf("copy sealed/cache sector data: %w", err) } if foundUnsealed { - if _, err := m.waitResult(ctx)(worker.Fetch(ctx, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove)); err != nil { + if _, err := m.startWork(ctx)(worker.Fetch(ctx, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove)); err != nil { return xerrors.Errorf("copy unsealed sector data: %w", err) } } @@ -294,7 +306,7 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect return xerrors.Errorf("cannot unseal piece (sector: %d, offset: %d size: %d) - unsealed cid is undefined", sector, offset, size) } err = m.sched.Schedule(ctx, sector, sealtasks.TTUnseal, selector, unsealFetch, func(ctx context.Context, w Worker) error { - _, err := m.waitResult(ctx)(w.UnsealPiece(ctx, sector, offset, size, ticket, unsealed)) + _, err := m.startWork(ctx)(w.UnsealPiece(ctx, sector, offset, size, ticket, unsealed)) return err }) if err != nil { @@ -303,7 +315,7 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect selector = newExistingSelector(m.index, sector, storiface.FTUnsealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(m.waitResult, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), + err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(m.startWork, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), m.readPiece(sink, sector, offset, size, &readOk)) if err != nil { return xerrors.Errorf("reading piece from sealed sector: %w", err) @@ -339,7 +351,7 @@ func (m *Manager) AddPiece(ctx context.Context, sector abi.SectorID, existingPie var out abi.PieceInfo err = m.sched.Schedule(ctx, sector, sealtasks.TTAddPiece, selector, schedNop, func(ctx context.Context, w Worker) error { - p, err := m.waitResult(ctx)(w.AddPiece(ctx, sector, existingPieces, sz, r)) + p, err := m.startWork(ctx)(w.AddPiece(ctx, sector, existingPieces, sz, r)) if err != nil { return err } @@ -354,6 +366,25 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke ctx, cancel := context.WithCancel(ctx) defer cancel() + wk, wait, err := m.getWork(ctx, "PreCommit1", sector, ticket, pieces) + if err != nil { + return nil, xerrors.Errorf("getWork: %w", err) + } + + waitRes := func() { + p, werr := m.waitWork(ctx, wk) + if werr != nil { + err = werr + return + } + out = p.(storage.PreCommit1Out) + } + + if wait { // already in progress + waitRes() + return + } + if err := m.index.StorageLock(ctx, sector, storiface.FTUnsealed, storiface.FTSealed|storiface.FTCache); err != nil { return nil, xerrors.Errorf("acquiring sector lock: %w", err) } @@ -362,12 +393,13 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke selector := newAllocSelector(m.index, storiface.FTCache|storiface.FTSealed, storiface.PathSealing) - err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit1, selector, schedFetch(m.waitResult, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - p, err := m.waitResult(ctx)(w.SealPreCommit1(ctx, sector, ticket, pieces)) + err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit1, selector, m.schedFetch(sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { + err := m.startWork(ctx, wk)(w.SealPreCommit1(ctx, sector, ticket, pieces)) if err != nil { return err } - out = p.(storage.PreCommit1Out) + waitRes() + return nil }) @@ -384,8 +416,8 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, true) - err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit2, selector, schedFetch(m.waitResult, sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - p, err := m.waitResult(ctx)(w.SealPreCommit2(ctx, sector, phase1Out)) + err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit2, selector, schedFetch(m.startWork, sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { + p, err := m.startWork(ctx)(w.SealPreCommit2(ctx, sector, phase1Out)) if err != nil { return err } @@ -408,8 +440,8 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a // generally very cheap / fast, and transferring data is not worth the effort selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit1, selector, schedFetch(m.waitResult, sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - p, err := m.waitResult(ctx)(w.SealCommit1(ctx, sector, ticket, seed, pieces, cids)) + err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit1, selector, schedFetch(m.startWork, sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { + p, err := m.startWork(ctx)(w.SealCommit1(ctx, sector, ticket, seed, pieces, cids)) if err != nil { return err } @@ -423,7 +455,7 @@ func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Ou selector := newTaskSelector() err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit2, selector, schedNop, func(ctx context.Context, w Worker) error { - p, err := m.waitResult(ctx)(w.SealCommit2(ctx, sector, phase1Out)) + p, err := m.startWork(ctx)(w.SealCommit2(ctx, sector, phase1Out)) if err != nil { return err } @@ -457,9 +489,9 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector abi.SectorID, keepU selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, false) err := m.sched.Schedule(ctx, sector, sealtasks.TTFinalize, selector, - schedFetch(m.waitResult, sector, storiface.FTCache|storiface.FTSealed|unsealed, storiface.PathSealing, storiface.AcquireMove), + schedFetch(m.startWork, sector, storiface.FTCache|storiface.FTSealed|unsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - _, err := m.waitResult(ctx)(w.FinalizeSector(ctx, sector, keepUnsealed)) + _, err := m.startWork(ctx)(w.FinalizeSector(ctx, sector, keepUnsealed)) return err }) if err != nil { @@ -475,9 +507,9 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector abi.SectorID, keepU } err = m.sched.Schedule(ctx, sector, sealtasks.TTFetch, fetchSel, - schedFetch(m.waitResult, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed, storiface.PathStorage, storiface.AcquireMove), + schedFetch(m.startWork, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed, storiface.PathStorage, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - _, err := m.waitResult(ctx)(w.MoveStorage(ctx, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed)) + _, err := m.startWork(ctx)(w.MoveStorage(ctx, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed)) return err }) if err != nil { @@ -515,68 +547,6 @@ func (m *Manager) Remove(ctx context.Context, sector abi.SectorID) error { return err } -type waitFunc func(ctx context.Context) func(callID storiface.CallID, err error) (interface{}, error) - -func (m *Manager) waitResult(ctx context.Context) func(callID storiface.CallID, err error) (interface{}, error) { - return func(callID storiface.CallID, err error) (interface{}, error) { - if err != nil { - return nil, err - } - - m.resLk.Lock() - res, ok := m.results[callID] - if ok { - m.resLk.Unlock() - return res.r, res.err - } - - ch, ok := m.waitRes[callID] - if !ok { - ch = make(chan struct{}) - m.waitRes[callID] = ch - } - m.resLk.Unlock() - - select { - case <-ch: - m.resLk.Lock() - defer m.resLk.Unlock() - - res := m.results[callID] - delete(m.results, callID) - - return res.r, res.err - case <-ctx.Done(): - return nil, xerrors.Errorf("waiting for result: %w", ctx.Err()) - } - } -} - -func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr string) error { - m.resLk.Lock() - defer m.resLk.Unlock() - - _, ok := m.results[callID] - if ok { - return xerrors.Errorf("result for call %v already reported") - } - - var err error - if serr != "" { - err = errors.New(serr) - } - - m.results[callID] = result{ - r: r, - err: err, - } - - close(m.waitRes[callID]) - delete(m.waitRes, callID) - - return nil -} - func (m *Manager) ReturnAddPiece(ctx context.Context, callID storiface.CallID, pi abi.PieceInfo, err string) error { return m.returnResult(callID, pi, err) } diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go new file mode 100644 index 00000000000..d209cc1f09c --- /dev/null +++ b/extern/sector-storage/manager_calltracker.go @@ -0,0 +1,271 @@ +package sectorstorage + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "golang.org/x/xerrors" + "io" + + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" +) + +type workID struct { + Method string + Params string // json [...params] +} + +func (w *workID) String() string { + return fmt.Sprintf("%s(%s)", w.Method, w.Params) +} + +var _ fmt.Stringer = &workID{} + +type WorkStatus string +const ( + wsStarted WorkStatus = "started" // task started, not scheduled/running on a worker yet + wsRunning WorkStatus = "running" // task running on a worker, waiting for worker return + wsDone WorkStatus = "done" // task returned from the worker, results available +) + +type WorkState struct { + Status WorkStatus + + WorkerCall storiface.CallID // Set when entering wsRunning + WorkError string // Status = wsDone, set when failed to start work +} + +func (w *WorkState) UnmarshalCBOR(reader io.Reader) error { + panic("implement me") +} + +func newWorkID(method string, params ...interface{}) (workID, error) { + pb, err := json.Marshal(params) + if err != nil { + return workID{}, xerrors.Errorf("marshaling work params: %w", err) + } + + return workID{ + Method: method, + Params: string(pb), + }, nil +} + +// returns wait=true when the task is already tracked/running +func (m *Manager) getWork(ctx context.Context, method string, params ...interface{}) (wid workID, wait bool, err error) { + wid, err = newWorkID(method, params) + if err != nil { + return workID{}, false, xerrors.Errorf("creating workID: %w", err) + } + + m.workLk.Lock() + defer m.workLk.Unlock() + + have, err := m.work.Has(wid) + if err != nil { + return workID{}, false, xerrors.Errorf("failed to check if the task is already tracked: %w", err) + } + + if !have { + err := m.work.Begin(wid, WorkState{ + Status: wsStarted, + }) + if err != nil { + return workID{}, false, xerrors.Errorf("failed to track task start: %w", err) + } + + return wid, false, nil + } + + // already started + + return wid, true, nil +} + +func (m *Manager) startWork(ctx context.Context, wk workID) func(callID storiface.CallID, err error) error { + return func(callID storiface.CallID, err error) error { + m.workLk.Lock() + defer m.workLk.Unlock() + + if err != nil { + merr := m.work.Get(wk).Mutate(func(ws *WorkState) error { + ws.Status = wsDone + ws.WorkError = err.Error() + return nil + }) + + if merr != nil { + return xerrors.Errorf("failed to start work and to track the error; merr: %+v, err: %w", merr, err) + } + return err + } + + err = m.work.Get(wk).Mutate(func(ws *WorkState) error { + _, ok := m.results[wk] + if ok { + log.Warn("work returned before we started tracking it") + ws.Status = wsDone + } else { + ws.Status = wsRunning + } + ws.WorkerCall = callID + return nil + }) + if err != nil { + return xerrors.Errorf("registering running work: %w", err) + } + + m.callToWork[callID] = wk + + return nil + } +} + +func (m *Manager) waitWork(ctx context.Context, wid workID) (interface{}, error) { + m.workLk.Lock() + + var ws WorkState + if err := m.work.Get(wid).Get(&ws); err != nil { + m.workLk.Unlock() + return nil, xerrors.Errorf("getting work status: %w", err) + } + + if ws.Status == wsStarted { + m.workLk.Unlock() + return nil, xerrors.Errorf("waitWork called for work in 'started' state") + } + + // sanity check + wk := m.callToWork[ws.WorkerCall] + if wk != wid { + m.workLk.Unlock() + return nil, xerrors.Errorf("wrong callToWork mapping for call %s; expected %s, got %s", ws.WorkerCall, wid, wk) + } + + // make sure we don't have the result ready + cr, ok := m.callRes[ws.WorkerCall] + if ok { + delete(m.callToWork, ws.WorkerCall) + + if len(cr) == 1 { + err := m.work.Get(wk).End() + if err != nil { + m.workLk.Unlock() + // Not great, but not worth discarding potentially multi-hour computation over this + log.Errorf("marking work as done: %+v", err) + } + + res := <- cr + delete(m.callRes, ws.WorkerCall) + + m.workLk.Unlock() + return res.r, res.err + } + + m.workLk.Unlock() + return nil, xerrors.Errorf("something else in waiting on callRes") + } + + ch, ok := m.waitRes[wid] + if !ok { + ch = make(chan struct{}) + m.waitRes[wid] = ch + } + m.workLk.Unlock() + + select { + case <-ch: + m.workLk.Lock() + defer m.workLk.Unlock() + + res := m.results[wid] + delete(m.results, wid) + + err := m.work.Get(wk).End() + if err != nil { + // Not great, but not worth discarding potentially multi-hour computation over this + log.Errorf("marking work as done: %+v", err) + } + + return res.r, res.err + case <-ctx.Done(): + return nil, xerrors.Errorf("waiting for work result: %w", ctx.Err()) + } +} + +func (m *Manager) waitCall(ctx context.Context, callID storiface.CallID) (interface{}, error) { + m.workLk.Lock() + _, ok := m.callToWork[callID] + if ok { + m.workLk.Unlock() + return nil, xerrors.Errorf("can't wait for calls related to work") + } + + ch, ok := m.callRes[callID] + if !ok { + ch = make(chan result) + m.callRes[callID] = ch + } + m.workLk.Unlock() + + defer func() { + m.workLk.Lock() + defer m.workLk.Unlock() + + delete(m.callRes, callID) + }() + + select { + case res := <-ch: + return res.r, res.err + case <-ctx.Done(): + return nil, xerrors.Errorf("waiting for call result: %w", ctx.Err()) + } +} + +func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr string) error { + var err error + if serr != "" { + err = errors.New(serr) + } + + res := result{ + r: r, + err: err, + } + + m.workLk.Lock() + defer m.workLk.Unlock() + + wid, ok := m.callToWork[callID] + if !ok { + rch, ok := m.callRes[callID] + if !ok { + rch = make(chan result, 1) + m.callRes[callID] = rch + } + + if len(rch) > 0 { + return xerrors.Errorf("callRes channel already has a response") + } + if cap(rch) == 0 { + return xerrors.Errorf("expected rch to be buffered") + } + + rch <- res + return nil + } + + _, ok = m.results[wid] + if ok { + return xerrors.Errorf("result for call %v already reported") + } + + m.results[wid] = res + + close(m.waitRes[wid]) + delete(m.waitRes, wid) + + return nil +} diff --git a/extern/sector-storage/calltracker.go b/extern/sector-storage/worker_calltracker.go similarity index 71% rename from extern/sector-storage/calltracker.go rename to extern/sector-storage/worker_calltracker.go index 8c5aff577b9..56909e68c5c 100644 --- a/extern/sector-storage/calltracker.go +++ b/extern/sector-storage/worker_calltracker.go @@ -6,7 +6,7 @@ import ( "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) -type callTracker struct { +type workerCallTracker struct { st *statestore.StateStore // by CallID } @@ -25,13 +25,13 @@ type Call struct { Result []byte } -func (wt *callTracker) onStart(ci storiface.CallID) error { +func (wt *workerCallTracker) onStart(ci storiface.CallID) error { return wt.st.Begin(ci, &Call{ State: CallStarted, }) } -func (wt *callTracker) onDone(ci storiface.CallID, ret []byte) error { +func (wt *workerCallTracker) onDone(ci storiface.CallID, ret []byte) error { st := wt.st.Get(ci) return st.Mutate(func(cs *Call) error { cs.State = CallDone @@ -40,7 +40,7 @@ func (wt *callTracker) onDone(ci storiface.CallID, ret []byte) error { }) } -func (wt *callTracker) onReturned(ci storiface.CallID) error { +func (wt *workerCallTracker) onReturned(ci storiface.CallID) error { st := wt.st.Get(ci) return st.End() } diff --git a/extern/sector-storage/localworker.go b/extern/sector-storage/worker_local.go similarity index 99% rename from extern/sector-storage/localworker.go rename to extern/sector-storage/worker_local.go index 0a1a02397dc..67b9df5e17d 100644 --- a/extern/sector-storage/localworker.go +++ b/extern/sector-storage/worker_local.go @@ -38,7 +38,7 @@ type LocalWorker struct { sindex stores.SectorIndex ret storiface.WorkerReturn - ct *callTracker + ct *workerCallTracker acceptTasks map[sealtasks.TaskType]struct{} } @@ -57,7 +57,7 @@ func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex: sindex, ret: ret, - ct: &callTracker{ + ct: &workerCallTracker{ st: cst, }, acceptTasks: acceptTasks, diff --git a/extern/sector-storage/work_tracker.go b/extern/sector-storage/worker_tracked.go similarity index 100% rename from extern/sector-storage/work_tracker.go rename to extern/sector-storage/worker_tracked.go diff --git a/gen/main.go b/gen/main.go index c7ae5bd5700..95ace5583dd 100644 --- a/gen/main.go +++ b/gen/main.go @@ -77,6 +77,7 @@ func main() { err = gen.WriteMapEncodersToFile("./extern/sector-storage/cbor_gen.go", "sectorstorage", sectorstorage.Call{}, + sectorstorage.WorkState{}, ) if err != nil { fmt.Println(err) From 5e09581256ee57deab2d71df2d5052d80e311e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 16 Sep 2020 22:33:49 +0200 Subject: [PATCH 10/61] sectorstorage: get new work tracker to run --- cmd/lotus-storage-miner/init.go | 3 +- extern/sector-storage/cbor_gen.go | 148 +++++++++++++++++++ extern/sector-storage/manager.go | 111 ++++++++++---- extern/sector-storage/manager_calltracker.go | 27 +++- extern/sector-storage/manager_test.go | 21 ++- extern/sector-storage/storiface/cbor_gen.go | 142 ++++++++++++++++++ gen/main.go | 11 +- lotuspond/front/src/chain/methods.json | 3 +- node/modules/storageminer.go | 4 +- 9 files changed, 425 insertions(+), 45 deletions(-) create mode 100644 extern/sector-storage/storiface/cbor_gen.go diff --git a/cmd/lotus-storage-miner/init.go b/cmd/lotus-storage-miner/init.go index 462a54985ea..c7bbf09a7ff 100644 --- a/cmd/lotus-storage-miner/init.go +++ b/cmd/lotus-storage-miner/init.go @@ -444,6 +444,7 @@ func storageMinerInit(ctx context.Context, cctx *cli.Context, api lapi.FullNode, } wsts := statestore.New(namespace.Wrap(mds, modules.WorkerCallsPrefix)) + smsts := statestore.New(namespace.Wrap(mds, modules.ManagerWorkPrefix)) smgr, err := sectorstorage.New(ctx, lr, stores.NewIndex(), &ffiwrapper.Config{ SealProofType: spt, @@ -454,7 +455,7 @@ func storageMinerInit(ctx context.Context, cctx *cli.Context, api lapi.FullNode, AllowPreCommit2: true, AllowCommit: true, AllowUnseal: true, - }, nil, sa, wsts) + }, nil, sa, wsts, smsts) if err != nil { return err } diff --git a/extern/sector-storage/cbor_gen.go b/extern/sector-storage/cbor_gen.go index c20df2157dc..137e32650b8 100644 --- a/extern/sector-storage/cbor_gen.go +++ b/extern/sector-storage/cbor_gen.go @@ -143,3 +143,151 @@ func (t *Call) UnmarshalCBOR(r io.Reader) error { return nil } +func (t *WorkState) MarshalCBOR(w io.Writer) error { + if t == nil { + _, err := w.Write(cbg.CborNull) + return err + } + if _, err := w.Write([]byte{163}); err != nil { + return err + } + + scratch := make([]byte, 9) + + // t.Status (sectorstorage.WorkStatus) (string) + if len("Status") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"Status\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("Status"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("Status")); err != nil { + return err + } + + if len(t.Status) > cbg.MaxLength { + return xerrors.Errorf("Value in field t.Status was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len(t.Status))); err != nil { + return err + } + if _, err := io.WriteString(w, string(t.Status)); err != nil { + return err + } + + // t.WorkerCall (storiface.CallID) (struct) + if len("WorkerCall") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"WorkerCall\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("WorkerCall"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("WorkerCall")); err != nil { + return err + } + + if err := t.WorkerCall.MarshalCBOR(w); err != nil { + return err + } + + // t.WorkError (string) (string) + if len("WorkError") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"WorkError\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("WorkError"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("WorkError")); err != nil { + return err + } + + if len(t.WorkError) > cbg.MaxLength { + return xerrors.Errorf("Value in field t.WorkError was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len(t.WorkError))); err != nil { + return err + } + if _, err := io.WriteString(w, string(t.WorkError)); err != nil { + return err + } + return nil +} + +func (t *WorkState) UnmarshalCBOR(r io.Reader) error { + *t = WorkState{} + + br := cbg.GetPeeker(r) + scratch := make([]byte, 8) + + maj, extra, err := cbg.CborReadHeaderBuf(br, scratch) + if err != nil { + return err + } + if maj != cbg.MajMap { + return fmt.Errorf("cbor input should be of type map") + } + + if extra > cbg.MaxLength { + return fmt.Errorf("WorkState: map struct too large (%d)", extra) + } + + var name string + n := extra + + for i := uint64(0); i < n; i++ { + + { + sval, err := cbg.ReadStringBuf(br, scratch) + if err != nil { + return err + } + + name = string(sval) + } + + switch name { + // t.Status (sectorstorage.WorkStatus) (string) + case "Status": + + { + sval, err := cbg.ReadStringBuf(br, scratch) + if err != nil { + return err + } + + t.Status = WorkStatus(sval) + } + // t.WorkerCall (storiface.CallID) (struct) + case "WorkerCall": + + { + + if err := t.WorkerCall.UnmarshalCBOR(br); err != nil { + return xerrors.Errorf("unmarshaling t.WorkerCall: %w", err) + } + + } + // t.WorkError (string) (string) + case "WorkError": + + { + sval, err := cbg.ReadStringBuf(br, scratch) + if err != nil { + return err + } + + t.WorkError = string(sval) + } + + default: + return fmt.Errorf("unknown struct field %d: '%s'", i, name) + } + } + + return nil +} diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 14ec875e9d8..a3f04037d8b 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -130,11 +130,11 @@ func New(ctx context.Context, ls stores.LocalStorage, si stores.SectorIndex, cfg Prover: prover, - work: mss, - workWait: map[workID]*sync.Cond{}, + work: mss, callToWork: map[storiface.CallID]workID{}, - results: map[workID]result{}, - waitRes: map[workID]chan struct{}{}, + callRes: map[storiface.CallID]chan result{}, + results: map[workID]result{}, + waitRes: map[workID]chan struct{}{}, } // TODO: remove all non-running work from the work tracker @@ -223,14 +223,14 @@ func schedNop(context.Context, Worker) error { func (m *Manager) schedFetch(sector abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) func(context.Context, Worker) error { return func(ctx context.Context, worker Worker) error { - _, err := m.startWork(ctx)(worker.Fetch(ctx, sector, ft, ptype, am)) + _, err := m.waitSimpleCall(ctx)(worker.Fetch(ctx, sector, ft, ptype, am)) return err } } func (m *Manager) readPiece(sink io.Writer, sector abi.SectorID, offset storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, rok *bool) func(ctx context.Context, w Worker) error { return func(ctx context.Context, w Worker) error { - r, err := m.startWork(ctx)(w.ReadPiece(ctx, sink, sector, offset, size)) + r, err := m.waitSimpleCall(ctx)(w.ReadPiece(ctx, sink, sector, offset, size)) if err != nil { return err } @@ -263,7 +263,7 @@ func (m *Manager) tryReadUnsealedPiece(ctx context.Context, sink io.Writer, sect selector = newExistingSelector(m.index, sector, storiface.FTUnsealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(m.startWork, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), + err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, m.schedFetch(sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), m.readPiece(sink, sector, offset, size, &readOk)) if err != nil { returnErr = xerrors.Errorf("reading piece from sealed sector: %w", err) @@ -290,12 +290,12 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect } unsealFetch := func(ctx context.Context, worker Worker) error { - if _, err := m.startWork(ctx)(worker.Fetch(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.PathSealing, storiface.AcquireCopy)); err != nil { + if _, err := m.waitSimpleCall(ctx)(worker.Fetch(ctx, sector, storiface.FTSealed|storiface.FTCache, storiface.PathSealing, storiface.AcquireCopy)); err != nil { return xerrors.Errorf("copy sealed/cache sector data: %w", err) } if foundUnsealed { - if _, err := m.startWork(ctx)(worker.Fetch(ctx, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove)); err != nil { + if _, err := m.waitSimpleCall(ctx)(worker.Fetch(ctx, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove)); err != nil { return xerrors.Errorf("copy unsealed sector data: %w", err) } } @@ -306,7 +306,8 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect return xerrors.Errorf("cannot unseal piece (sector: %d, offset: %d size: %d) - unsealed cid is undefined", sector, offset, size) } err = m.sched.Schedule(ctx, sector, sealtasks.TTUnseal, selector, unsealFetch, func(ctx context.Context, w Worker) error { - _, err := m.startWork(ctx)(w.UnsealPiece(ctx, sector, offset, size, ticket, unsealed)) + // TODO: make restartable + _, err := m.waitSimpleCall(ctx)(w.UnsealPiece(ctx, sector, offset, size, ticket, unsealed)) return err }) if err != nil { @@ -315,7 +316,7 @@ func (m *Manager) ReadPiece(ctx context.Context, sink io.Writer, sector abi.Sect selector = newExistingSelector(m.index, sector, storiface.FTUnsealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, schedFetch(m.startWork, sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), + err = m.sched.Schedule(ctx, sector, sealtasks.TTReadUnsealed, selector, m.schedFetch(sector, storiface.FTUnsealed, storiface.PathSealing, storiface.AcquireMove), m.readPiece(sink, sector, offset, size, &readOk)) if err != nil { return xerrors.Errorf("reading piece from sealed sector: %w", err) @@ -351,7 +352,7 @@ func (m *Manager) AddPiece(ctx context.Context, sector abi.SectorID, existingPie var out abi.PieceInfo err = m.sched.Schedule(ctx, sector, sealtasks.TTAddPiece, selector, schedNop, func(ctx context.Context, w Worker) error { - p, err := m.startWork(ctx)(w.AddPiece(ctx, sector, existingPieces, sz, r)) + p, err := m.waitSimpleCall(ctx)(w.AddPiece(ctx, sector, existingPieces, sz, r)) if err != nil { return err } @@ -398,8 +399,8 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke if err != nil { return err } - waitRes() + waitRes() return nil }) @@ -410,18 +411,38 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase ctx, cancel := context.WithCancel(ctx) defer cancel() + wk, wait, err := m.getWork(ctx, "PreCommit2", sector, phase1Out) + if err != nil { + return storage.SectorCids{}, xerrors.Errorf("getWork: %w", err) + } + + waitRes := func() { + p, werr := m.waitWork(ctx, wk) + if werr != nil { + err = werr + return + } + out = p.(storage.SectorCids) + } + + if wait { // already in progress + waitRes() + return + } + if err := m.index.StorageLock(ctx, sector, storiface.FTSealed, storiface.FTCache); err != nil { return storage.SectorCids{}, xerrors.Errorf("acquiring sector lock: %w", err) } selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, true) - err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit2, selector, schedFetch(m.startWork, sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - p, err := m.startWork(ctx)(w.SealPreCommit2(ctx, sector, phase1Out)) + err = m.sched.Schedule(ctx, sector, sealtasks.TTPreCommit2, selector, m.schedFetch(sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { + err := m.startWork(ctx, wk)(w.SealPreCommit2(ctx, sector, phase1Out)) if err != nil { return err } - out = p.(storage.SectorCids) + + waitRes() return nil }) return out, err @@ -431,6 +452,25 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a ctx, cancel := context.WithCancel(ctx) defer cancel() + wk, wait, err := m.getWork(ctx, "Commit1", sector, ticket, seed, pieces, cids) + if err != nil { + return storage.Commit1Out{}, xerrors.Errorf("getWork: %w", err) + } + + waitRes := func() { + p, werr := m.waitWork(ctx, wk) + if werr != nil { + err = werr + return + } + out = p.(storage.Commit1Out) + } + + if wait { // already in progress + waitRes() + return + } + if err := m.index.StorageLock(ctx, sector, storiface.FTSealed, storiface.FTCache); err != nil { return storage.Commit1Out{}, xerrors.Errorf("acquiring sector lock: %w", err) } @@ -440,26 +480,47 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a // generally very cheap / fast, and transferring data is not worth the effort selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, false) - err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit1, selector, schedFetch(m.startWork, sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - p, err := m.startWork(ctx)(w.SealCommit1(ctx, sector, ticket, seed, pieces, cids)) + err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit1, selector, m.schedFetch(sector, storiface.FTCache|storiface.FTSealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { + err := m.startWork(ctx, wk)(w.SealCommit1(ctx, sector, ticket, seed, pieces, cids)) if err != nil { return err } - out = p.(storage.Commit1Out) + + waitRes() return nil }) return out, err } func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage.Commit1Out) (out storage.Proof, err error) { + wk, wait, err := m.getWork(ctx, "Commit2", sector, phase1Out) + if err != nil { + return storage.Proof{}, xerrors.Errorf("getWork: %w", err) + } + + waitRes := func() { + p, werr := m.waitWork(ctx, wk) + if werr != nil { + err = werr + return + } + out = p.(storage.Proof) + } + + if wait { // already in progress + waitRes() + return + } + selector := newTaskSelector() err = m.sched.Schedule(ctx, sector, sealtasks.TTCommit2, selector, schedNop, func(ctx context.Context, w Worker) error { - p, err := m.startWork(ctx)(w.SealCommit2(ctx, sector, phase1Out)) + err := m.startWork(ctx, wk)(w.SealCommit2(ctx, sector, phase1Out)) if err != nil { return err } - out = p.(storage.Proof) + + waitRes() return nil }) @@ -489,9 +550,9 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector abi.SectorID, keepU selector := newExistingSelector(m.index, sector, storiface.FTCache|storiface.FTSealed, false) err := m.sched.Schedule(ctx, sector, sealtasks.TTFinalize, selector, - schedFetch(m.startWork, sector, storiface.FTCache|storiface.FTSealed|unsealed, storiface.PathSealing, storiface.AcquireMove), + m.schedFetch(sector, storiface.FTCache|storiface.FTSealed|unsealed, storiface.PathSealing, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - _, err := m.startWork(ctx)(w.FinalizeSector(ctx, sector, keepUnsealed)) + _, err := m.waitSimpleCall(ctx)(w.FinalizeSector(ctx, sector, keepUnsealed)) return err }) if err != nil { @@ -507,9 +568,9 @@ func (m *Manager) FinalizeSector(ctx context.Context, sector abi.SectorID, keepU } err = m.sched.Schedule(ctx, sector, sealtasks.TTFetch, fetchSel, - schedFetch(m.startWork, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed, storiface.PathStorage, storiface.AcquireMove), + m.schedFetch(sector, storiface.FTCache|storiface.FTSealed|moveUnsealed, storiface.PathStorage, storiface.AcquireMove), func(ctx context.Context, w Worker) error { - _, err := m.startWork(ctx)(w.MoveStorage(ctx, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed)) + _, err := m.waitSimpleCall(ctx)(w.MoveStorage(ctx, sector, storiface.FTCache|storiface.FTSealed|moveUnsealed)) return err }) if err != nil { diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index d209cc1f09c..8092f514ac7 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -2,11 +2,11 @@ package sectorstorage import ( "context" + "crypto/sha256" "encoding/json" "errors" "fmt" "golang.org/x/xerrors" - "io" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) @@ -16,7 +16,7 @@ type workID struct { Params string // json [...params] } -func (w *workID) String() string { +func (w workID) String() string { return fmt.Sprintf("%s(%s)", w.Method, w.Params) } @@ -36,16 +36,17 @@ type WorkState struct { WorkError string // Status = wsDone, set when failed to start work } -func (w *WorkState) UnmarshalCBOR(reader io.Reader) error { - panic("implement me") -} - func newWorkID(method string, params ...interface{}) (workID, error) { pb, err := json.Marshal(params) if err != nil { return workID{}, xerrors.Errorf("marshaling work params: %w", err) } + if len(pb) > 256 { + s := sha256.Sum256(pb) + pb = s[:] + } + return workID{ Method: method, Params: string(pb), @@ -68,7 +69,7 @@ func (m *Manager) getWork(ctx context.Context, method string, params ...interfac } if !have { - err := m.work.Begin(wid, WorkState{ + err := m.work.Begin(wid, &WorkState{ Status: wsStarted, }) if err != nil { @@ -194,6 +195,16 @@ func (m *Manager) waitWork(ctx context.Context, wid workID) (interface{}, error) } } +func (m *Manager) waitSimpleCall(ctx context.Context) func(callID storiface.CallID, err error) (interface{}, error) { + return func(callID storiface.CallID, err error) (interface{}, error) { + if err != nil { + return nil, err + } + + return m.waitCall(ctx, callID) + } +} + func (m *Manager) waitCall(ctx context.Context, callID storiface.CallID) (interface{}, error) { m.workLk.Lock() _, ok := m.callToWork[callID] @@ -204,7 +215,7 @@ func (m *Manager) waitCall(ctx context.Context, callID storiface.CallID) (interf ch, ok := m.callRes[callID] if !ok { - ch = make(chan result) + ch = make(chan result, 1) m.callRes[callID] = ch } m.workLk.Unlock() diff --git a/extern/sector-storage/manager_test.go b/extern/sector-storage/manager_test.go index db32d655ec1..a4015c132cb 100644 --- a/extern/sector-storage/manager_test.go +++ b/extern/sector-storage/manager_test.go @@ -11,17 +11,19 @@ import ( "strings" "testing" + "github.com/google/uuid" + "github.com/ipfs/go-datastore" + logging "github.com/ipfs/go-log" + "github.com/stretchr/testify/require" + + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-statestore" + "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper" "github.com/filecoin-project/lotus/extern/sector-storage/fsutil" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" - - "github.com/filecoin-project/go-state-types/abi" - - "github.com/google/uuid" - logging "github.com/ipfs/go-log" - "github.com/stretchr/testify/require" ) func init() { @@ -111,8 +113,11 @@ func newTestMgr(ctx context.Context, t *testing.T) (*Manager, *stores.Local, *st Prover: prover, - results: map[storiface.CallID]result{}, - waitRes: map[storiface.CallID]chan struct{}{}, + work: statestore.New(datastore.NewMapDatastore()), + callToWork: map[storiface.CallID]workID{}, + callRes: map[storiface.CallID]chan result{}, + results: map[workID]result{}, + waitRes: map[workID]chan struct{}{}, } go m.sched.runSched() diff --git a/extern/sector-storage/storiface/cbor_gen.go b/extern/sector-storage/storiface/cbor_gen.go new file mode 100644 index 00000000000..0efbc125b72 --- /dev/null +++ b/extern/sector-storage/storiface/cbor_gen.go @@ -0,0 +1,142 @@ +// Code generated by github.com/whyrusleeping/cbor-gen. DO NOT EDIT. + +package storiface + +import ( + "fmt" + "io" + + cbg "github.com/whyrusleeping/cbor-gen" + xerrors "golang.org/x/xerrors" +) + +var _ = xerrors.Errorf + +func (t *CallID) MarshalCBOR(w io.Writer) error { + if t == nil { + _, err := w.Write(cbg.CborNull) + return err + } + if _, err := w.Write([]byte{162}); err != nil { + return err + } + + scratch := make([]byte, 9) + + // t.Sector (abi.SectorID) (struct) + if len("Sector") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"Sector\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("Sector"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("Sector")); err != nil { + return err + } + + if err := t.Sector.MarshalCBOR(w); err != nil { + return err + } + + // t.ID (uuid.UUID) (array) + if len("ID") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"ID\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("ID"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("ID")); err != nil { + return err + } + + if len(t.ID) > cbg.ByteArrayMaxLen { + return xerrors.Errorf("Byte array in field t.ID was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajByteString, uint64(len(t.ID))); err != nil { + return err + } + + if _, err := w.Write(t.ID[:]); err != nil { + return err + } + return nil +} + +func (t *CallID) UnmarshalCBOR(r io.Reader) error { + *t = CallID{} + + br := cbg.GetPeeker(r) + scratch := make([]byte, 8) + + maj, extra, err := cbg.CborReadHeaderBuf(br, scratch) + if err != nil { + return err + } + if maj != cbg.MajMap { + return fmt.Errorf("cbor input should be of type map") + } + + if extra > cbg.MaxLength { + return fmt.Errorf("CallID: map struct too large (%d)", extra) + } + + var name string + n := extra + + for i := uint64(0); i < n; i++ { + + { + sval, err := cbg.ReadStringBuf(br, scratch) + if err != nil { + return err + } + + name = string(sval) + } + + switch name { + // t.Sector (abi.SectorID) (struct) + case "Sector": + + { + + if err := t.Sector.UnmarshalCBOR(br); err != nil { + return xerrors.Errorf("unmarshaling t.Sector: %w", err) + } + + } + // t.ID (uuid.UUID) (array) + case "ID": + + maj, extra, err = cbg.CborReadHeaderBuf(br, scratch) + if err != nil { + return err + } + + if extra > cbg.ByteArrayMaxLen { + return fmt.Errorf("t.ID: byte array too large (%d)", extra) + } + if maj != cbg.MajByteString { + return fmt.Errorf("expected byte array") + } + + if extra != 16 { + return fmt.Errorf("expected array to have 16 elements") + } + + t.ID = [16]uint8{} + + if _, err := io.ReadFull(br, t.ID[:]); err != nil { + return err + } + + default: + return fmt.Errorf("unknown struct field %d: '%s'", i, name) + } + } + + return nil +} diff --git a/gen/main.go b/gen/main.go index 95ace5583dd..c2adbb7a06c 100644 --- a/gen/main.go +++ b/gen/main.go @@ -2,7 +2,6 @@ package main import ( "fmt" - sectorstorage "github.com/filecoin-project/lotus/extern/sector-storage" "os" gen "github.com/whyrusleeping/cbor-gen" @@ -10,6 +9,8 @@ import ( "github.com/filecoin-project/lotus/api" "github.com/filecoin-project/lotus/chain/exchange" "github.com/filecoin-project/lotus/chain/types" + sectorstorage "github.com/filecoin-project/lotus/extern/sector-storage" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "github.com/filecoin-project/lotus/node/hello" "github.com/filecoin-project/lotus/paychmgr" ) @@ -75,6 +76,14 @@ func main() { os.Exit(1) } + err = gen.WriteMapEncodersToFile("./extern/sector-storage/storiface/cbor_gen.go", "storiface", + storiface.CallID{}, + ) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + err = gen.WriteMapEncodersToFile("./extern/sector-storage/cbor_gen.go", "sectorstorage", sectorstorage.Call{}, sectorstorage.WorkState{}, diff --git a/lotuspond/front/src/chain/methods.json b/lotuspond/front/src/chain/methods.json index ad1076c8498..ce4919cc489 100644 --- a/lotuspond/front/src/chain/methods.json +++ b/lotuspond/front/src/chain/methods.json @@ -23,7 +23,8 @@ "AddSigner", "RemoveSigner", "SwapSigner", - "ChangeNumApprovalsThreshold" + "ChangeNumApprovalsThreshold", + "LockBalance" ], "fil/1/paymentchannel": [ "Send", diff --git a/node/modules/storageminer.go b/node/modules/storageminer.go index e73ac06c350..af76861c108 100644 --- a/node/modules/storageminer.go +++ b/node/modules/storageminer.go @@ -517,13 +517,15 @@ func RetrievalProvider(h host.Host, miner *storage.Miner, sealer sectorstorage.S } var WorkerCallsPrefix = datastore.NewKey("/worker/calls") +var ManagerWorkPrefix = datastore.NewKey("/stmgr/calls") func SectorStorage(mctx helpers.MetricsCtx, lc fx.Lifecycle, ls stores.LocalStorage, si stores.SectorIndex, cfg *ffiwrapper.Config, sc sectorstorage.SealerConfig, urls sectorstorage.URLs, sa sectorstorage.StorageAuth, ds dtypes.MetadataDS) (*sectorstorage.Manager, error) { ctx := helpers.LifecycleCtx(mctx, lc) wsts := statestore.New(namespace.Wrap(ds, WorkerCallsPrefix)) + smsts := statestore.New(namespace.Wrap(ds, ManagerWorkPrefix)) - sst, err := sectorstorage.New(ctx, ls, si, cfg, sc, urls, sa, wsts) + sst, err := sectorstorage.New(ctx, ls, si, cfg, sc, urls, sa, wsts, smsts) if err != nil { return nil, err } From d9d644b27fd3a0a6de24542ec0e1fa6857ba2bba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Thu, 17 Sep 2020 00:35:09 +0200 Subject: [PATCH 11/61] sectorstorage: handle restarting manager, test that --- extern/sector-storage/cbor_gen.go | 152 ++++++++++++++++++- extern/sector-storage/manager.go | 16 +- extern/sector-storage/manager_calltracker.go | 65 ++++++-- extern/sector-storage/manager_test.go | 123 ++++++++++++++- extern/sector-storage/sched.go | 4 +- extern/sector-storage/testworker_test.go | 32 ++-- extern/sector-storage/worker_calltracker.go | 3 +- extern/sector-storage/worker_local.go | 18 +++ gen/main.go | 1 + 9 files changed, 373 insertions(+), 41 deletions(-) diff --git a/extern/sector-storage/cbor_gen.go b/extern/sector-storage/cbor_gen.go index 137e32650b8..7ec29c7950c 100644 --- a/extern/sector-storage/cbor_gen.go +++ b/extern/sector-storage/cbor_gen.go @@ -148,12 +148,28 @@ func (t *WorkState) MarshalCBOR(w io.Writer) error { _, err := w.Write(cbg.CborNull) return err } - if _, err := w.Write([]byte{163}); err != nil { + if _, err := w.Write([]byte{164}); err != nil { return err } scratch := make([]byte, 9) + // t.ID (sectorstorage.WorkID) (struct) + if len("ID") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"ID\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("ID"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("ID")); err != nil { + return err + } + + if err := t.ID.MarshalCBOR(w); err != nil { + return err + } + // t.Status (sectorstorage.WorkStatus) (string) if len("Status") > cbg.MaxLength { return xerrors.Errorf("Value in field \"Status\" was too long") @@ -251,7 +267,17 @@ func (t *WorkState) UnmarshalCBOR(r io.Reader) error { } switch name { - // t.Status (sectorstorage.WorkStatus) (string) + // t.ID (sectorstorage.WorkID) (struct) + case "ID": + + { + + if err := t.ID.UnmarshalCBOR(br); err != nil { + return xerrors.Errorf("unmarshaling t.ID: %w", err) + } + + } + // t.Status (sectorstorage.WorkStatus) (string) case "Status": { @@ -291,3 +317,125 @@ func (t *WorkState) UnmarshalCBOR(r io.Reader) error { return nil } +func (t *WorkID) MarshalCBOR(w io.Writer) error { + if t == nil { + _, err := w.Write(cbg.CborNull) + return err + } + if _, err := w.Write([]byte{162}); err != nil { + return err + } + + scratch := make([]byte, 9) + + // t.Method (string) (string) + if len("Method") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"Method\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("Method"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("Method")); err != nil { + return err + } + + if len(t.Method) > cbg.MaxLength { + return xerrors.Errorf("Value in field t.Method was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len(t.Method))); err != nil { + return err + } + if _, err := io.WriteString(w, string(t.Method)); err != nil { + return err + } + + // t.Params (string) (string) + if len("Params") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"Params\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("Params"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("Params")); err != nil { + return err + } + + if len(t.Params) > cbg.MaxLength { + return xerrors.Errorf("Value in field t.Params was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len(t.Params))); err != nil { + return err + } + if _, err := io.WriteString(w, string(t.Params)); err != nil { + return err + } + return nil +} + +func (t *WorkID) UnmarshalCBOR(r io.Reader) error { + *t = WorkID{} + + br := cbg.GetPeeker(r) + scratch := make([]byte, 8) + + maj, extra, err := cbg.CborReadHeaderBuf(br, scratch) + if err != nil { + return err + } + if maj != cbg.MajMap { + return fmt.Errorf("cbor input should be of type map") + } + + if extra > cbg.MaxLength { + return fmt.Errorf("WorkID: map struct too large (%d)", extra) + } + + var name string + n := extra + + for i := uint64(0); i < n; i++ { + + { + sval, err := cbg.ReadStringBuf(br, scratch) + if err != nil { + return err + } + + name = string(sval) + } + + switch name { + // t.Method (string) (string) + case "Method": + + { + sval, err := cbg.ReadStringBuf(br, scratch) + if err != nil { + return err + } + + t.Method = string(sval) + } + // t.Params (string) (string) + case "Params": + + { + sval, err := cbg.ReadStringBuf(br, scratch) + if err != nil { + return err + } + + t.Params = string(sval) + } + + default: + return fmt.Errorf("unknown struct field %d: '%s'", i, name) + } + } + + return nil +} diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index a3f04037d8b..b23b3d46fc7 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -43,7 +43,7 @@ type Worker interface { // returns channel signalling worker shutdown Closing(context.Context) (<-chan struct{}, error) - Close() error + Close() error // TODO: do we need this? } type SectorManager interface { @@ -75,12 +75,12 @@ type Manager struct { workLk sync.Mutex work *statestore.StateStore - callToWork map[storiface.CallID]workID + callToWork map[storiface.CallID]WorkID // used when we get an early return and there's no callToWork mapping callRes map[storiface.CallID]chan result - results map[workID]result - waitRes map[workID]chan struct{} + results map[WorkID]result + waitRes map[WorkID]chan struct{} } type result struct { @@ -131,13 +131,13 @@ func New(ctx context.Context, ls stores.LocalStorage, si stores.SectorIndex, cfg Prover: prover, work: mss, - callToWork: map[storiface.CallID]workID{}, + callToWork: map[storiface.CallID]WorkID{}, callRes: map[storiface.CallID]chan result{}, - results: map[workID]result{}, - waitRes: map[workID]chan struct{}{}, + results: map[WorkID]result{}, + waitRes: map[WorkID]chan struct{}{}, } - // TODO: remove all non-running work from the work tracker + m.setupWorkTracker() go m.sched.runSched() diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index 8092f514ac7..f0dafda38e6 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -11,16 +11,16 @@ import ( "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) -type workID struct { +type WorkID struct { Method string Params string // json [...params] } -func (w workID) String() string { +func (w WorkID) String() string { return fmt.Sprintf("%s(%s)", w.Method, w.Params) } -var _ fmt.Stringer = &workID{} +var _ fmt.Stringer = &WorkID{} type WorkStatus string const ( @@ -30,16 +30,18 @@ const ( ) type WorkState struct { + ID WorkID + Status WorkStatus WorkerCall storiface.CallID // Set when entering wsRunning WorkError string // Status = wsDone, set when failed to start work } -func newWorkID(method string, params ...interface{}) (workID, error) { +func newWorkID(method string, params ...interface{}) (WorkID, error) { pb, err := json.Marshal(params) if err != nil { - return workID{}, xerrors.Errorf("marshaling work params: %w", err) + return WorkID{}, xerrors.Errorf("marshaling work params: %w", err) } if len(pb) > 256 { @@ -47,17 +49,55 @@ func newWorkID(method string, params ...interface{}) (workID, error) { pb = s[:] } - return workID{ + return WorkID{ Method: method, Params: string(pb), }, nil } +func (m *Manager) setupWorkTracker() { + m.workLk.Lock() + defer m.workLk.Unlock() + + var ids []WorkState + if err := m.work.List(&ids); err != nil { + log.Error("getting work IDs") // quite bad + return + } + + for _, st := range ids { + wid := st.ID + if err := m.work.Get(wid).Get(&st); err != nil { + log.Errorf("getting work state for %s", wid) + continue + } + + switch st.Status { + case wsStarted: + log.Warnf("dropping non-running work %s", wid) + + if err := m.work.Get(wid).End(); err != nil { + log.Errorf("cleannig up work state for %s", wid) + } + case wsDone: + // realistically this shouldn't ever happen as we return results + // immediately after getting them + log.Warnf("dropping done work, no result, wid %s", wid) + + if err := m.work.Get(wid).End(); err != nil { + log.Errorf("cleannig up work state for %s", wid) + } + case wsRunning: + m.callToWork[st.WorkerCall] = wid + } + } +} + // returns wait=true when the task is already tracked/running -func (m *Manager) getWork(ctx context.Context, method string, params ...interface{}) (wid workID, wait bool, err error) { +func (m *Manager) getWork(ctx context.Context, method string, params ...interface{}) (wid WorkID, wait bool, err error) { wid, err = newWorkID(method, params) if err != nil { - return workID{}, false, xerrors.Errorf("creating workID: %w", err) + return WorkID{}, false, xerrors.Errorf("creating WorkID: %w", err) } m.workLk.Lock() @@ -65,15 +105,16 @@ func (m *Manager) getWork(ctx context.Context, method string, params ...interfac have, err := m.work.Has(wid) if err != nil { - return workID{}, false, xerrors.Errorf("failed to check if the task is already tracked: %w", err) + return WorkID{}, false, xerrors.Errorf("failed to check if the task is already tracked: %w", err) } if !have { err := m.work.Begin(wid, &WorkState{ + ID: wid, Status: wsStarted, }) if err != nil { - return workID{}, false, xerrors.Errorf("failed to track task start: %w", err) + return WorkID{}, false, xerrors.Errorf("failed to track task start: %w", err) } return wid, false, nil @@ -84,7 +125,7 @@ func (m *Manager) getWork(ctx context.Context, method string, params ...interfac return wid, true, nil } -func (m *Manager) startWork(ctx context.Context, wk workID) func(callID storiface.CallID, err error) error { +func (m *Manager) startWork(ctx context.Context, wk WorkID) func(callID storiface.CallID, err error) error { return func(callID storiface.CallID, err error) error { m.workLk.Lock() defer m.workLk.Unlock() @@ -123,7 +164,7 @@ func (m *Manager) startWork(ctx context.Context, wk workID) func(callID storifac } } -func (m *Manager) waitWork(ctx context.Context, wid workID) (interface{}, error) { +func (m *Manager) waitWork(ctx context.Context, wid WorkID) (interface{}, error) { m.workLk.Lock() var ws WorkState diff --git a/extern/sector-storage/manager_test.go b/extern/sector-storage/manager_test.go index a4015c132cb..8ddfd822e38 100644 --- a/extern/sector-storage/manager_test.go +++ b/extern/sector-storage/manager_test.go @@ -9,6 +9,7 @@ import ( "os" "path/filepath" "strings" + "sync" "testing" "github.com/google/uuid" @@ -83,7 +84,7 @@ func (t *testStorage) Stat(path string) (fsutil.FsStat, error) { var _ stores.LocalStorage = &testStorage{} -func newTestMgr(ctx context.Context, t *testing.T) (*Manager, *stores.Local, *stores.Remote, *stores.Index) { +func newTestMgr(ctx context.Context, t *testing.T, ds datastore.Datastore) (*Manager, *stores.Local, *stores.Remote, *stores.Index) { st := newTestStorage(t) defer st.cleanup() @@ -113,13 +114,15 @@ func newTestMgr(ctx context.Context, t *testing.T) (*Manager, *stores.Local, *st Prover: prover, - work: statestore.New(datastore.NewMapDatastore()), - callToWork: map[storiface.CallID]workID{}, + work: statestore.New(ds), + callToWork: map[storiface.CallID]WorkID{}, callRes: map[storiface.CallID]chan result{}, - results: map[workID]result{}, - waitRes: map[workID]chan struct{}{}, + results: map[WorkID]result{}, + waitRes: map[WorkID]chan struct{}{}, } + m.setupWorkTracker() + go m.sched.runSched() return m, lstor, stor, si @@ -129,7 +132,7 @@ func TestSimple(t *testing.T) { logging.SetAllLoggers(logging.LevelDebug) ctx := context.Background() - m, lstor, _, _ := newTestMgr(ctx, t) + m, lstor, _, _ := newTestMgr(ctx, t, datastore.NewMapDatastore()) localTasks := []sealtasks.TaskType{ sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTFetch, @@ -157,5 +160,113 @@ func TestSimple(t *testing.T) { _, err = m.SealPreCommit1(ctx, sid, ticket, pieces) require.NoError(t, err) +} + +func TestRedoPC1(t *testing.T) { + logging.SetAllLoggers(logging.LevelDebug) + + ctx := context.Background() + m, lstor, _, _ := newTestMgr(ctx, t, datastore.NewMapDatastore()) + + localTasks := []sealtasks.TaskType{ + sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTFetch, + } + + tw := newTestWorker(WorkerConfig{ + SealProof: abi.RegisteredSealProof_StackedDrg2KiBV1, + TaskTypes: localTasks, + }, lstor, m) + + err := m.AddWorker(ctx, tw) + require.NoError(t, err) + + sid := abi.SectorID{Miner: 1000, Number: 1} + + pi, err := m.AddPiece(ctx, sid, nil, 1016, strings.NewReader(strings.Repeat("testthis", 127))) + require.NoError(t, err) + require.Equal(t, abi.PaddedPieceSize(1024), pi.Size) + + piz, err := m.AddPiece(ctx, sid, nil, 1016, bytes.NewReader(make([]byte, 1016)[:])) + require.NoError(t, err) + require.Equal(t, abi.PaddedPieceSize(1024), piz.Size) + + pieces := []abi.PieceInfo{pi, piz} + + ticket := abi.SealRandomness{9, 9, 9, 9, 9, 9, 9, 9} + + _, err = m.SealPreCommit1(ctx, sid, ticket, pieces) + require.NoError(t, err) + + _, err = m.SealPreCommit1(ctx, sid, ticket, pieces) + require.NoError(t, err) + + require.Equal(t, 2, tw.pc1s) +} + +func TestRestartManager(t *testing.T) { + logging.SetAllLoggers(logging.LevelDebug) + + ctx, done := context.WithCancel(context.Background()) + defer done() + + ds := datastore.NewMapDatastore() + + m, lstor, _, _ := newTestMgr(ctx, t, ds) + + localTasks := []sealtasks.TaskType{ + sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTFetch, + } + + tw := newTestWorker(WorkerConfig{ + SealProof: abi.RegisteredSealProof_StackedDrg2KiBV1, + TaskTypes: localTasks, + }, lstor, m) + + err := m.AddWorker(ctx, tw) + require.NoError(t, err) + + sid := abi.SectorID{Miner: 1000, Number: 1} + + pi, err := m.AddPiece(ctx, sid, nil, 1016, strings.NewReader(strings.Repeat("testthis", 127))) + require.NoError(t, err) + require.Equal(t, abi.PaddedPieceSize(1024), pi.Size) + + piz, err := m.AddPiece(ctx, sid, nil, 1016, bytes.NewReader(make([]byte, 1016)[:])) + require.NoError(t, err) + require.Equal(t, abi.PaddedPieceSize(1024), piz.Size) + + pieces := []abi.PieceInfo{pi, piz} + + ticket := abi.SealRandomness{0, 9, 9, 9, 9, 9, 9, 9} + + tw.pc1lk.Lock() + tw.pc1wait = &sync.WaitGroup{} + tw.pc1wait.Add(1) + + var cwg sync.WaitGroup + cwg.Add(1) + + var perr error + go func() { + defer cwg.Done() + _, perr = m.SealPreCommit1(ctx, sid, ticket, pieces) + }() + + tw.pc1wait.Wait() + + require.NoError(t, m.Close(ctx)) + tw.ret = nil + + cwg.Wait() + require.Error(t, perr) + + m, lstor, _, _ = newTestMgr(ctx, t, ds) + tw.ret = m // simulate jsonrpc auto-reconnect + + tw.pc1lk.Unlock() + + _, err = m.SealPreCommit1(ctx, sid, ticket, pieces) + require.NoError(t, err) + require.Equal(t, 1, tw.pc1s) } diff --git a/extern/sector-storage/sched.go b/extern/sector-storage/sched.go index 8b8ef6d466c..d757140b939 100644 --- a/extern/sector-storage/sched.go +++ b/extern/sector-storage/sched.go @@ -801,11 +801,11 @@ func (sh *scheduler) workerCleanup(wid WorkerID, w *workerHandle) { log.Debugf("dropWorker %d", wid) - go func() { + /*go func() { // TODO: just remove? if err := w.w.Close(); err != nil { log.Warnf("closing worker %d: %+v", err) } - }() + }()*/ } } diff --git a/extern/sector-storage/testworker_test.go b/extern/sector-storage/testworker_test.go index 0740f91d359..3decf928862 100644 --- a/extern/sector-storage/testworker_test.go +++ b/extern/sector-storage/testworker_test.go @@ -3,6 +3,7 @@ package sectorstorage import ( "context" "io" + "sync" "github.com/google/uuid" "github.com/ipfs/go-cid" @@ -22,6 +23,10 @@ type testWorker struct { ret storiface.WorkerReturn mockSeal *mock.SectorMgr + + pc1s int + pc1lk sync.Mutex + pc1wait *sync.WaitGroup } func newTestWorker(wcfg WorkerConfig, lstor *stores.Local, ret storiface.WorkerReturn) *testWorker { @@ -55,15 +60,6 @@ func (t *testWorker) asyncCall(sector abi.SectorID, work func(ci storiface.CallI return ci, nil } -func (t *testWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { - return t.asyncCall(sector, func(ci storiface.CallID) { - p1o, err := t.mockSeal.SealPreCommit1(ctx, sector, ticket, pieces) - if err := t.ret.ReturnSealPreCommit1(ctx, ci, p1o, errstr(err)); err != nil { - log.Error(err) - } - }) -} - func (t *testWorker) NewSector(ctx context.Context, sector abi.SectorID) error { panic("implement me") } @@ -85,6 +81,24 @@ func (t *testWorker) AddPiece(ctx context.Context, sector abi.SectorID, pieceSiz }) } +func (t *testWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { + return t.asyncCall(sector, func(ci storiface.CallID) { + t.pc1s++ + + if t.pc1wait != nil { + t.pc1wait.Done() + } + + t.pc1lk.Lock() + defer t.pc1lk.Unlock() + + p1o, err := t.mockSeal.SealPreCommit1(ctx, sector, ticket, pieces) + if err := t.ret.ReturnSealPreCommit1(ctx, ci, p1o, errstr(err)); err != nil { + log.Error(err) + } + }) +} + func (t *testWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storiface.CallID, error) { panic("implement me") } diff --git a/extern/sector-storage/worker_calltracker.go b/extern/sector-storage/worker_calltracker.go index 56909e68c5c..a16ee33be0c 100644 --- a/extern/sector-storage/worker_calltracker.go +++ b/extern/sector-storage/worker_calltracker.go @@ -21,8 +21,7 @@ const ( type Call struct { State CallState - // Params cbg.Deferred // TODO: support once useful - Result []byte + Result []byte // json bytes } func (wt *workerCallTracker) onStart(ci storiface.CallID) error { diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index 67b9df5e17d..210ea340cbd 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -2,6 +2,7 @@ package sectorstorage import ( "context" + "encoding/json" "io" "os" "reflect" @@ -161,9 +162,26 @@ func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt ret go func() { res, err := work(ci) + + { + rb, err := json.Marshal(res) + if err != nil { + log.Errorf("tracking call (marshaling results): %+v", err) + } else { + if err := l.ct.onDone(ci, rb); err != nil { + log.Errorf("tracking call (done): %+v", err) + } + } + + } + if err := returnFunc[rt](ctx, ci, l.ret, res, err); err != nil { log.Errorf("return error: %s: %+v", rt, err) } + + if err := l.ct.onReturned(ci); err != nil { + log.Errorf("tracking call (done): %+v", err) + } }() return ci, nil diff --git a/gen/main.go b/gen/main.go index c2adbb7a06c..65d5726abe9 100644 --- a/gen/main.go +++ b/gen/main.go @@ -87,6 +87,7 @@ func main() { err = gen.WriteMapEncodersToFile("./extern/sector-storage/cbor_gen.go", "sectorstorage", sectorstorage.Call{}, sectorstorage.WorkState{}, + sectorstorage.WorkID{}, ) if err != nil { fmt.Println(err) From 17680fff55f42a4c1494ce2e94151faff82cad61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Thu, 17 Sep 2020 00:35:30 +0200 Subject: [PATCH 12/61] gofmt --- documentation/en/api-methods.md | 2 +- extern/sector-storage/manager.go | 4 ++-- extern/sector-storage/manager_calltracker.go | 5 +++-- extern/sector-storage/testworker_test.go | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/documentation/en/api-methods.md b/documentation/en/api-methods.md index bc28f453e5f..d72beca983a 100644 --- a/documentation/en/api-methods.md +++ b/documentation/en/api-methods.md @@ -211,7 +211,7 @@ Response: ```json { "Version": "string value", - "APIVersion": 3840, + "APIVersion": 3584, "BlockDelay": 42 } ``` diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index b23b3d46fc7..eecfd1b55e4 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -72,8 +72,8 @@ type Manager struct { storage.Prover - workLk sync.Mutex - work *statestore.StateStore + workLk sync.Mutex + work *statestore.StateStore callToWork map[storiface.CallID]WorkID // used when we get an early return and there's no callToWork mapping diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index f0dafda38e6..8e3e20c6e30 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -23,6 +23,7 @@ func (w WorkID) String() string { var _ fmt.Stringer = &WorkID{} type WorkStatus string + const ( wsStarted WorkStatus = "started" // task started, not scheduled/running on a worker yet wsRunning WorkStatus = "running" // task running on a worker, waiting for worker return @@ -35,7 +36,7 @@ type WorkState struct { Status WorkStatus WorkerCall storiface.CallID // Set when entering wsRunning - WorkError string // Status = wsDone, set when failed to start work + WorkError string // Status = wsDone, set when failed to start work } func newWorkID(method string, params ...interface{}) (WorkID, error) { @@ -198,7 +199,7 @@ func (m *Manager) waitWork(ctx context.Context, wid WorkID) (interface{}, error) log.Errorf("marking work as done: %+v", err) } - res := <- cr + res := <-cr delete(m.callRes, ws.WorkerCall) m.workLk.Unlock() diff --git a/extern/sector-storage/testworker_test.go b/extern/sector-storage/testworker_test.go index 3decf928862..94a87cdd204 100644 --- a/extern/sector-storage/testworker_test.go +++ b/extern/sector-storage/testworker_test.go @@ -24,8 +24,8 @@ type testWorker struct { mockSeal *mock.SectorMgr - pc1s int - pc1lk sync.Mutex + pc1s int + pc1lk sync.Mutex pc1wait *sync.WaitGroup } From 03c3d8bdb32d4a28211f1b30cc09d3894b27ffbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 22 Sep 2020 00:52:33 +0200 Subject: [PATCH 13/61] workers: Return unfinished tasks on restart --- extern/sector-storage/cbor_gen.go | 64 +++++++++++++++- extern/sector-storage/manager_test.go | 69 ++++++++++++++++++ extern/sector-storage/teststorage_test.go | 81 +++++++++++++++++++++ extern/sector-storage/worker_calltracker.go | 12 ++- extern/sector-storage/worker_local.go | 75 ++++++++++++++----- node/modules/storageminer.go | 6 +- 6 files changed, 283 insertions(+), 24 deletions(-) create mode 100644 extern/sector-storage/teststorage_test.go diff --git a/extern/sector-storage/cbor_gen.go b/extern/sector-storage/cbor_gen.go index 7ec29c7950c..a291487f0a5 100644 --- a/extern/sector-storage/cbor_gen.go +++ b/extern/sector-storage/cbor_gen.go @@ -17,12 +17,51 @@ func (t *Call) MarshalCBOR(w io.Writer) error { _, err := w.Write(cbg.CborNull) return err } - if _, err := w.Write([]byte{162}); err != nil { + if _, err := w.Write([]byte{164}); err != nil { return err } scratch := make([]byte, 9) + // t.ID (storiface.CallID) (struct) + if len("ID") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"ID\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("ID"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("ID")); err != nil { + return err + } + + if err := t.ID.MarshalCBOR(w); err != nil { + return err + } + + // t.RetType (sectorstorage.ReturnType) (string) + if len("RetType") > cbg.MaxLength { + return xerrors.Errorf("Value in field \"RetType\" was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len("RetType"))); err != nil { + return err + } + if _, err := io.WriteString(w, string("RetType")); err != nil { + return err + } + + if len(t.RetType) > cbg.MaxLength { + return xerrors.Errorf("Value in field t.RetType was too long") + } + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, uint64(len(t.RetType))); err != nil { + return err + } + if _, err := io.WriteString(w, string(t.RetType)); err != nil { + return err + } + // t.State (sectorstorage.CallState) (uint64) if len("State") > cbg.MaxLength { return xerrors.Errorf("Value in field \"State\" was too long") @@ -98,7 +137,28 @@ func (t *Call) UnmarshalCBOR(r io.Reader) error { } switch name { - // t.State (sectorstorage.CallState) (uint64) + // t.ID (storiface.CallID) (struct) + case "ID": + + { + + if err := t.ID.UnmarshalCBOR(br); err != nil { + return xerrors.Errorf("unmarshaling t.ID: %w", err) + } + + } + // t.RetType (sectorstorage.ReturnType) (string) + case "RetType": + + { + sval, err := cbg.ReadStringBuf(br, scratch) + if err != nil { + return err + } + + t.RetType = ReturnType(sval) + } + // t.State (sectorstorage.CallState) (uint64) case "State": { diff --git a/extern/sector-storage/manager_test.go b/extern/sector-storage/manager_test.go index 8ddfd822e38..9a47c3b5538 100644 --- a/extern/sector-storage/manager_test.go +++ b/extern/sector-storage/manager_test.go @@ -11,6 +11,7 @@ import ( "strings" "sync" "testing" + "time" "github.com/google/uuid" "github.com/ipfs/go-datastore" @@ -203,6 +204,7 @@ func TestRedoPC1(t *testing.T) { require.Equal(t, 2, tw.pc1s) } +// Manager restarts in the middle of a task, restarts it, it completes func TestRestartManager(t *testing.T) { logging.SetAllLoggers(logging.LevelDebug) @@ -262,6 +264,8 @@ func TestRestartManager(t *testing.T) { m, lstor, _, _ = newTestMgr(ctx, t, ds) tw.ret = m // simulate jsonrpc auto-reconnect + err = m.AddWorker(ctx, tw) + require.NoError(t, err) tw.pc1lk.Unlock() @@ -270,3 +274,68 @@ func TestRestartManager(t *testing.T) { require.Equal(t, 1, tw.pc1s) } + +// Worker restarts in the middle of a task, task fails after restart +func TestRestartWorker(t *testing.T) { + logging.SetAllLoggers(logging.LevelDebug) + + ctx, done := context.WithCancel(context.Background()) + defer done() + + ds := datastore.NewMapDatastore() + + m, lstor, stor, idx := newTestMgr(ctx, t, ds) + + localTasks := []sealtasks.TaskType{ + sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTFetch, + } + + wds := datastore.NewMapDatastore() + + arch := make(chan chan apres) + w := newLocalWorker(func() (ffiwrapper.Storage, error) { + return &testExec{apch: arch}, nil + }, WorkerConfig{ + SealProof: 0, + TaskTypes: localTasks, + }, stor, lstor, idx, m, statestore.New(wds)) + + err := m.AddWorker(ctx, w) + require.NoError(t, err) + + sid := abi.SectorID{Miner: 1000, Number: 1} + + apDone := make(chan struct{}) + + go func() { + defer close(apDone) + + _, err := m.AddPiece(ctx, sid, nil, 1016, strings.NewReader(strings.Repeat("testthis", 127))) + require.Error(t, err) + }() + + // kill the worker + <-arch + require.NoError(t, w.Close()) + + for { + if len(m.WorkerStats()) == 0 { + break + } + + time.Sleep(time.Millisecond * 3) + } + + // restart the worker + w = newLocalWorker(func() (ffiwrapper.Storage, error) { + return &testExec{apch: arch}, nil + }, WorkerConfig{ + SealProof: 0, + TaskTypes: localTasks, + }, stor, lstor, idx, m, statestore.New(wds)) + + err = m.AddWorker(ctx, w) + require.NoError(t, err) + + <-apDone +} diff --git a/extern/sector-storage/teststorage_test.go b/extern/sector-storage/teststorage_test.go new file mode 100644 index 00000000000..da575a49154 --- /dev/null +++ b/extern/sector-storage/teststorage_test.go @@ -0,0 +1,81 @@ +package sectorstorage + +import ( + "context" + "io" + + "github.com/ipfs/go-cid" + + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/specs-actors/actors/runtime/proof" + "github.com/filecoin-project/specs-storage/storage" + + "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" +) + +type apres struct { + pi abi.PieceInfo + err error +} + +type testExec struct { + apch chan chan apres +} + +func (t *testExec) GenerateWinningPoSt(ctx context.Context, minerID abi.ActorID, sectorInfo []proof.SectorInfo, randomness abi.PoStRandomness) ([]proof.PoStProof, error) { + panic("implement me") +} + +func (t *testExec) GenerateWindowPoSt(ctx context.Context, minerID abi.ActorID, sectorInfo []proof.SectorInfo, randomness abi.PoStRandomness) (proof []proof.PoStProof, skipped []abi.SectorID, err error) { + panic("implement me") +} + +func (t *testExec) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storage.PreCommit1Out, error) { + panic("implement me") +} + +func (t *testExec) SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storage.SectorCids, error) { + panic("implement me") +} + +func (t *testExec) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storage.Commit1Out, error) { + panic("implement me") +} + +func (t *testExec) SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storage.Proof, error) { + panic("implement me") +} + +func (t *testExec) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) error { + panic("implement me") +} + +func (t *testExec) ReleaseUnsealed(ctx context.Context, sector abi.SectorID, safeToFree []storage.Range) error { + panic("implement me") +} + +func (t *testExec) Remove(ctx context.Context, sector abi.SectorID) error { + panic("implement me") +} + +func (t *testExec) NewSector(ctx context.Context, sector abi.SectorID) error { + panic("implement me") +} + +func (t *testExec) AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (abi.PieceInfo, error) { + resp := make(chan apres) + t.apch <- resp + ar := <-resp + return ar.pi, ar.err +} + +func (t *testExec) UnsealPiece(ctx context.Context, sector abi.SectorID, offset storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, commd cid.Cid) error { + panic("implement me") +} + +func (t *testExec) ReadPiece(ctx context.Context, writer io.Writer, sector abi.SectorID, offset storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (bool, error) { + panic("implement me") +} + +var _ ffiwrapper.Storage = &testExec{} \ No newline at end of file diff --git a/extern/sector-storage/worker_calltracker.go b/extern/sector-storage/worker_calltracker.go index a16ee33be0c..38fb39ee146 100644 --- a/extern/sector-storage/worker_calltracker.go +++ b/extern/sector-storage/worker_calltracker.go @@ -19,13 +19,18 @@ const ( ) type Call struct { + ID storiface.CallID + RetType ReturnType + State CallState Result []byte // json bytes } -func (wt *workerCallTracker) onStart(ci storiface.CallID) error { +func (wt *workerCallTracker) onStart(ci storiface.CallID, rt ReturnType) error { return wt.st.Begin(ci, &Call{ + ID: ci, + RetType:rt, State: CallStarted, }) } @@ -43,3 +48,8 @@ func (wt *workerCallTracker) onReturned(ci storiface.CallID) error { st := wt.st.Get(ci) return st.End() } + +func (wt *workerCallTracker) unfinished() ([]Call, error) { + var out []Call + return out, wt.st.List(&out) +} diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index 210ea340cbd..009e11921ab 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -38,18 +38,21 @@ type LocalWorker struct { localStore *stores.Local sindex stores.SectorIndex ret storiface.WorkerReturn + executor func() (ffiwrapper.Storage, error) ct *workerCallTracker acceptTasks map[sealtasks.TaskType]struct{} + + closing chan struct{} } -func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex, ret storiface.WorkerReturn, cst *statestore.StateStore) *LocalWorker { +func newLocalWorker(executor func() (ffiwrapper.Storage, error), wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex, ret storiface.WorkerReturn, cst *statestore.StateStore) *LocalWorker { acceptTasks := map[sealtasks.TaskType]struct{}{} for _, taskType := range wcfg.TaskTypes { acceptTasks[taskType] = struct{}{} } - return &LocalWorker{ + w := &LocalWorker{ scfg: &ffiwrapper.Config{ SealProofType: wcfg.SealProof, }, @@ -62,7 +65,37 @@ func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, st: cst, }, acceptTasks: acceptTasks, + executor: executor, + + closing: make(chan struct{}), } + + if w.executor == nil { + w.executor = w.ffiExec + } + + unfinished, err := w.ct.unfinished() + if err != nil { + log.Errorf("reading unfinished tasks: %+v", err) + return w + } + + go func() { + for _, call := range unfinished { + err := xerrors.Errorf("worker restarted") + + if err := returnFunc[call.RetType](context.TODO(), call.ID, ret, nil, err); err != nil { + log.Errorf("return error: %s: %+v", call.RetType, err) + } + } + }() + + + return w +} + +func NewLocalWorker(wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex, ret storiface.WorkerReturn, cst *statestore.StateStore) *LocalWorker { + return newLocalWorker(nil, wcfg, store, local, sindex, ret, cst) } type localWorkerPathProvider struct { @@ -101,11 +134,11 @@ func (l *localWorkerPathProvider) AcquireSector(ctx context.Context, sector abi. }, nil } -func (l *LocalWorker) sb() (ffiwrapper.Storage, error) { +func (l *LocalWorker) ffiExec() (ffiwrapper.Storage, error) { return ffiwrapper.New(&localWorkerPathProvider{w: l}, l.scfg) } -type returnType string +type ReturnType string // in: func(WorkerReturn, context.Context, CallID, err string) // in: func(WorkerReturn, context.Context, CallID, ret T, err string) @@ -123,7 +156,12 @@ func rfunc(in interface{}) func(context.Context, storiface.CallID, storiface.Wor var ro []reflect.Value if withRet { - ro = rf.Call([]reflect.Value{rwr, rctx, rci, reflect.ValueOf(i), rerr}) + ret := reflect.ValueOf(i) + if i == nil { + ret = reflect.Zero(rf.Type().In(3)) + } + + ro = rf.Call([]reflect.Value{rwr, rctx, rci, ret, rerr}) } else { ro = rf.Call([]reflect.Value{rwr, rctx, rci, rerr}) } @@ -136,7 +174,7 @@ func rfunc(in interface{}) func(context.Context, storiface.CallID, storiface.Wor } } -var returnFunc = map[returnType]func(context.Context, storiface.CallID, storiface.WorkerReturn, interface{}, error) error{ +var returnFunc = map[ReturnType]func(context.Context, storiface.CallID, storiface.WorkerReturn, interface{}, error) error{ "AddPiece": rfunc(storiface.WorkerReturn.ReturnAddPiece), "SealPreCommit1": rfunc(storiface.WorkerReturn.ReturnSealPreCommit1), "SealPreCommit2": rfunc(storiface.WorkerReturn.ReturnSealPreCommit2), @@ -150,13 +188,13 @@ var returnFunc = map[returnType]func(context.Context, storiface.CallID, storifac "Fetch": rfunc(storiface.WorkerReturn.ReturnFetch), } -func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt returnType, work func(ci storiface.CallID) (interface{}, error)) (storiface.CallID, error) { +func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt ReturnType, work func(ci storiface.CallID) (interface{}, error)) (storiface.CallID, error) { ci := storiface.CallID{ Sector: sector, ID: uuid.New(), } - if err := l.ct.onStart(ci); err != nil { + if err := l.ct.onStart(ci, rt); err != nil { log.Errorf("tracking call (start): %+v", err) } @@ -196,7 +234,7 @@ func errstr(err error) string { } func (l *LocalWorker) NewSector(ctx context.Context, sector abi.SectorID) error { - sb, err := l.sb() + sb, err := l.executor() if err != nil { return err } @@ -205,7 +243,7 @@ func (l *LocalWorker) NewSector(ctx context.Context, sector abi.SectorID) error } func (l *LocalWorker) AddPiece(ctx context.Context, sector abi.SectorID, epcs []abi.UnpaddedPieceSize, sz abi.UnpaddedPieceSize, r io.Reader) (storiface.CallID, error) { - sb, err := l.sb() + sb, err := l.executor() if err != nil { return storiface.UndefCall, err } @@ -240,7 +278,7 @@ func (l *LocalWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, t } } - sb, err := l.sb() + sb, err := l.executor() if err != nil { return nil, err } @@ -250,7 +288,7 @@ func (l *LocalWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, t } func (l *LocalWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage2.PreCommit1Out) (storiface.CallID, error) { - sb, err := l.sb() + sb, err := l.executor() if err != nil { return storiface.UndefCall, err } @@ -261,7 +299,7 @@ func (l *LocalWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, p } func (l *LocalWorker) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage2.SectorCids) (storiface.CallID, error) { - sb, err := l.sb() + sb, err := l.executor() if err != nil { return storiface.UndefCall, err } @@ -272,7 +310,7 @@ func (l *LocalWorker) SealCommit1(ctx context.Context, sector abi.SectorID, tick } func (l *LocalWorker) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage2.Commit1Out) (storiface.CallID, error) { - sb, err := l.sb() + sb, err := l.executor() if err != nil { return storiface.UndefCall, err } @@ -283,7 +321,7 @@ func (l *LocalWorker) SealCommit2(ctx context.Context, sector abi.SectorID, phas } func (l *LocalWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage2.Range) (storiface.CallID, error) { - sb, err := l.sb() + sb, err := l.executor() if err != nil { return storiface.UndefCall, err } @@ -330,7 +368,7 @@ func (l *LocalWorker) MoveStorage(ctx context.Context, sector abi.SectorID, type } func (l *LocalWorker) UnsealPiece(ctx context.Context, sector abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) (storiface.CallID, error) { - sb, err := l.sb() + sb, err := l.executor() if err != nil { return storiface.UndefCall, err } @@ -353,7 +391,7 @@ func (l *LocalWorker) UnsealPiece(ctx context.Context, sector abi.SectorID, inde } func (l *LocalWorker) ReadPiece(ctx context.Context, writer io.Writer, sector abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (storiface.CallID, error) { - sb, err := l.sb() + sb, err := l.executor() if err != nil { return storiface.UndefCall, err } @@ -405,10 +443,11 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) { } func (l *LocalWorker) Closing(ctx context.Context) (<-chan struct{}, error) { - return make(chan struct{}), nil + return l.closing, nil } func (l *LocalWorker) Close() error { + close(l.closing) return nil } diff --git a/node/modules/storageminer.go b/node/modules/storageminer.go index af76861c108..80bab786815 100644 --- a/node/modules/storageminer.go +++ b/node/modules/storageminer.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "github.com/filecoin-project/go-statestore" "net/http" "time" @@ -43,6 +42,7 @@ import ( "github.com/filecoin-project/go-multistore" paramfetch "github.com/filecoin-project/go-paramfetch" "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-statestore" "github.com/filecoin-project/go-storedcounter" sectorstorage "github.com/filecoin-project/lotus/extern/sector-storage" @@ -50,15 +50,15 @@ import ( "github.com/filecoin-project/lotus/extern/sector-storage/stores" sealing "github.com/filecoin-project/lotus/extern/storage-sealing" "github.com/filecoin-project/lotus/extern/storage-sealing/sealiface" - "github.com/filecoin-project/lotus/journal" - "github.com/filecoin-project/lotus/markets" lapi "github.com/filecoin-project/lotus/api" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/gen" "github.com/filecoin-project/lotus/chain/gen/slashfilter" "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/journal" "github.com/filecoin-project/lotus/lib/blockstore" + "github.com/filecoin-project/lotus/markets" marketevents "github.com/filecoin-project/lotus/markets/loggers" "github.com/filecoin-project/lotus/markets/retrievaladapter" "github.com/filecoin-project/lotus/miner" From b8865fb182f700b04589d76e2324fdf90c131c43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 22 Sep 2020 01:00:17 +0200 Subject: [PATCH 14/61] workers: Mark on-restart-failed returned tasks as returned --- extern/sector-storage/manager_test.go | 5 +++++ extern/sector-storage/teststorage_test.go | 4 ++-- extern/sector-storage/worker_calltracker.go | 6 +++--- extern/sector-storage/worker_local.go | 8 ++++++-- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/extern/sector-storage/manager_test.go b/extern/sector-storage/manager_test.go index 9a47c3b5538..d87ec082757 100644 --- a/extern/sector-storage/manager_test.go +++ b/extern/sector-storage/manager_test.go @@ -338,4 +338,9 @@ func TestRestartWorker(t *testing.T) { require.NoError(t, err) <-apDone + + time.Sleep(12 * time.Millisecond) + uf, err := w.ct.unfinished() + require.NoError(t, err) + require.Empty(t, uf) } diff --git a/extern/sector-storage/teststorage_test.go b/extern/sector-storage/teststorage_test.go index da575a49154..0c8a240a322 100644 --- a/extern/sector-storage/teststorage_test.go +++ b/extern/sector-storage/teststorage_test.go @@ -15,7 +15,7 @@ import ( ) type apres struct { - pi abi.PieceInfo + pi abi.PieceInfo err error } @@ -78,4 +78,4 @@ func (t *testExec) ReadPiece(ctx context.Context, writer io.Writer, sector abi.S panic("implement me") } -var _ ffiwrapper.Storage = &testExec{} \ No newline at end of file +var _ ffiwrapper.Storage = &testExec{} diff --git a/extern/sector-storage/worker_calltracker.go b/extern/sector-storage/worker_calltracker.go index 38fb39ee146..1033822a5b9 100644 --- a/extern/sector-storage/worker_calltracker.go +++ b/extern/sector-storage/worker_calltracker.go @@ -29,9 +29,9 @@ type Call struct { func (wt *workerCallTracker) onStart(ci storiface.CallID, rt ReturnType) error { return wt.st.Begin(ci, &Call{ - ID: ci, - RetType:rt, - State: CallStarted, + ID: ci, + RetType: rt, + State: CallStarted, }) } diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index 009e11921ab..38b41ceb453 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -65,7 +65,7 @@ func newLocalWorker(executor func() (ffiwrapper.Storage, error), wcfg WorkerConf st: cst, }, acceptTasks: acceptTasks, - executor: executor, + executor: executor, closing: make(chan struct{}), } @@ -86,11 +86,15 @@ func newLocalWorker(executor func() (ffiwrapper.Storage, error), wcfg WorkerConf if err := returnFunc[call.RetType](context.TODO(), call.ID, ret, nil, err); err != nil { log.Errorf("return error: %s: %+v", call.RetType, err) + continue + } + + if err := w.ct.onReturned(call.ID); err != nil { + log.Errorf("marking call as returned failed: %s: %+v", call.RetType, err) } } }() - return w } From 706f4f2ef505cbbafda1f60212227f46682647d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 22 Sep 2020 18:36:44 +0200 Subject: [PATCH 15/61] worker: Don't die with the connection --- cmd/lotus-seal-worker/main.go | 82 +++++++++------------------ extern/sector-storage/worker_local.go | 11 ++++ 2 files changed, 38 insertions(+), 55 deletions(-) diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index 9739acb689b..292590c2e94 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -10,7 +10,6 @@ import ( "os" "path/filepath" "strings" - "syscall" "time" "github.com/google/uuid" @@ -198,8 +197,6 @@ var runCmd = &cli.Command{ } log.Infof("Remote version %s", v) - watchMinerConn(ctx, cctx, nodeApi) - // Check params act, err := nodeApi.ActorAddress(ctx) @@ -422,66 +419,41 @@ var runCmd = &cli.Command{ } } - log.Info("Waiting for tasks") - go func() { - if err := nodeApi.WorkerConnect(ctx, "ws://"+address+"/rpc/v0"); err != nil { - log.Errorf("Registering worker failed: %+v", err) - cancel() - return - } - }() + for { + log.Info("Making sure no local tasks are running") - return srv.Serve(nl) - }, -} + // TODO: we could get rid of this, but that requires tracking resources for restarted tasks correctly + workerApi.LocalWorker.WaitQuiet() -func watchMinerConn(ctx context.Context, cctx *cli.Context, nodeApi api.StorageMiner) { - go func() { - closing, err := nodeApi.Closing(ctx) - if err != nil { - log.Errorf("failed to get remote closing channel: %+v", err) - } + if err := nodeApi.WorkerConnect(ctx, "ws://"+address+"/rpc/v0"); err != nil { + log.Errorf("Registering worker failed: %+v", err) + cancel() + return + } - select { - case <-closing: - case <-ctx.Done(): - } + log.Info("Worker registered successfully, waiting for tasks") - if ctx.Err() != nil { - return // graceful shutdown - } + closing, err := nodeApi.Closing(ctx) + if err != nil { + log.Errorf("failed to get remote closing channel: %+v", err) + } - log.Warnf("Connection with miner node lost, restarting") + select { + case <-closing: + case <-ctx.Done(): + } - exe, err := os.Executable() - if err != nil { - log.Errorf("getting executable for auto-restart: %+v", err) - } + if ctx.Err() != nil { + return // graceful shutdown + } - _ = log.Sync() - - // TODO: there are probably cleaner/more graceful ways to restart, - // but this is good enough for now (FSM can recover from the mess this creates) - //nolint:gosec - if err := syscall.Exec(exe, []string{exe, - fmt.Sprintf("--worker-repo=%s", cctx.String("worker-repo")), - fmt.Sprintf("--miner-repo=%s", cctx.String("miner-repo")), - fmt.Sprintf("--enable-gpu-proving=%t", cctx.Bool("enable-gpu-proving")), - "run", - fmt.Sprintf("--listen=%s", cctx.String("listen")), - fmt.Sprintf("--no-local-storage=%t", cctx.Bool("no-local-storage")), - fmt.Sprintf("--addpiece=%t", cctx.Bool("addpiece")), - fmt.Sprintf("--precommit1=%t", cctx.Bool("precommit1")), - fmt.Sprintf("--unseal=%t", cctx.Bool("unseal")), - fmt.Sprintf("--precommit2=%t", cctx.Bool("precommit2")), - fmt.Sprintf("--commit=%t", cctx.Bool("commit")), - fmt.Sprintf("--parallel-fetch-limit=%d", cctx.Int("parallel-fetch-limit")), - fmt.Sprintf("--timeout=%s", cctx.String("timeout")), - }, os.Environ()); err != nil { - fmt.Println(err) - } - }() + log.Errorf("LOTUS-MINER CONNECTION LOST") + } + }() + + return srv.Serve(nl) + }, } func extractRoutableIP(timeout time.Duration) (string, error) { diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index 38b41ceb453..46f0d65e2cd 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -7,6 +7,7 @@ import ( "os" "reflect" "runtime" + "sync" "github.com/elastic/go-sysinfo" "github.com/google/uuid" @@ -42,6 +43,7 @@ type LocalWorker struct { ct *workerCallTracker acceptTasks map[sealtasks.TaskType]struct{} + running sync.WaitGroup closing chan struct{} } @@ -202,7 +204,11 @@ func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt Ret log.Errorf("tracking call (start): %+v", err) } + l.running.Add(1) + go func() { + defer l.running.Done() + res, err := work(ci) { @@ -455,4 +461,9 @@ func (l *LocalWorker) Close() error { return nil } +// WaitQuiet blocks as long as there are tasks running +func (l *LocalWorker) WaitQuiet() { + l.running.Wait() +} + var _ Worker = &LocalWorker{} From bb5cc066771cf64b50fbd65a007199689f25e167 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 22 Sep 2020 23:33:13 +0200 Subject: [PATCH 16/61] Fix workid param hash --- extern/sector-storage/manager_calltracker.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index 8e3e20c6e30..13296a8434e 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -3,6 +3,7 @@ package sectorstorage import ( "context" "crypto/sha256" + "encoding/hex" "encoding/json" "errors" "fmt" @@ -47,7 +48,7 @@ func newWorkID(method string, params ...interface{}) (WorkID, error) { if len(pb) > 256 { s := sha256.Sum256(pb) - pb = s[:] + pb = []byte(hex.EncodeToString(s[:])) } return WorkID{ From 04ad1791b08da9008643caafd6760dfc714e1524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 23 Sep 2020 00:10:36 +0200 Subject: [PATCH 17/61] localworker: Fix contexts --- extern/sector-storage/worker_local.go | 56 +++++++++++++++++++++------ 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index 46f0d65e2cd..495c9630d01 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -8,6 +8,7 @@ import ( "reflect" "runtime" "sync" + "time" "github.com/elastic/go-sysinfo" "github.com/google/uuid" @@ -194,7 +195,7 @@ var returnFunc = map[ReturnType]func(context.Context, storiface.CallID, storifac "Fetch": rfunc(storiface.WorkerReturn.ReturnFetch), } -func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt ReturnType, work func(ci storiface.CallID) (interface{}, error)) (storiface.CallID, error) { +func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt ReturnType, work func(ctx context.Context, ci storiface.CallID) (interface{}, error)) (storiface.CallID, error) { ci := storiface.CallID{ Sector: sector, ID: uuid.New(), @@ -209,7 +210,10 @@ func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt Ret go func() { defer l.running.Done() - res, err := work(ci) + res, err := work(&wctx{ + vals: ctx, + closing: l.closing, + }, ci) { rb, err := json.Marshal(res) @@ -258,13 +262,13 @@ func (l *LocalWorker) AddPiece(ctx context.Context, sector abi.SectorID, epcs [] return storiface.UndefCall, err } - return l.asyncCall(ctx, sector, "AddPiece", func(ci storiface.CallID) (interface{}, error) { + return l.asyncCall(ctx, sector, "AddPiece", func(ctx context.Context, ci storiface.CallID) (interface{}, error) { return sb.AddPiece(ctx, sector, epcs, sz, r) }) } func (l *LocalWorker) Fetch(ctx context.Context, sector abi.SectorID, fileType storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) (storiface.CallID, error) { - return l.asyncCall(ctx, sector, "Fetch", func(ci storiface.CallID) (interface{}, error) { + return l.asyncCall(ctx, sector, "Fetch", func(ctx context.Context, ci storiface.CallID) (interface{}, error) { _, done, err := (&localWorkerPathProvider{w: l, op: am}).AcquireSector(ctx, sector, fileType, storiface.FTNone, ptype) if err == nil { done() @@ -275,7 +279,7 @@ func (l *LocalWorker) Fetch(ctx context.Context, sector abi.SectorID, fileType s } func (l *LocalWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { - return l.asyncCall(ctx, sector, "SealPreCommit1", func(ci storiface.CallID) (interface{}, error) { + return l.asyncCall(ctx, sector, "SealPreCommit1", func(ctx context.Context, ci storiface.CallID) (interface{}, error) { { // cleanup previous failed attempts if they exist @@ -303,7 +307,7 @@ func (l *LocalWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, p return storiface.UndefCall, err } - return l.asyncCall(ctx, sector, "SealPreCommit2", func(ci storiface.CallID) (interface{}, error) { + return l.asyncCall(ctx, sector, "SealPreCommit2", func(ctx context.Context, ci storiface.CallID) (interface{}, error) { return sb.SealPreCommit2(ctx, sector, phase1Out) }) } @@ -314,7 +318,7 @@ func (l *LocalWorker) SealCommit1(ctx context.Context, sector abi.SectorID, tick return storiface.UndefCall, err } - return l.asyncCall(ctx, sector, "SealCommit1", func(ci storiface.CallID) (interface{}, error) { + return l.asyncCall(ctx, sector, "SealCommit1", func(ctx context.Context, ci storiface.CallID) (interface{}, error) { return sb.SealCommit1(ctx, sector, ticket, seed, pieces, cids) }) } @@ -325,7 +329,7 @@ func (l *LocalWorker) SealCommit2(ctx context.Context, sector abi.SectorID, phas return storiface.UndefCall, err } - return l.asyncCall(ctx, sector, "SealCommit2", func(ci storiface.CallID) (interface{}, error) { + return l.asyncCall(ctx, sector, "SealCommit2", func(ctx context.Context, ci storiface.CallID) (interface{}, error) { return sb.SealCommit2(ctx, sector, phase1Out) }) } @@ -336,7 +340,7 @@ func (l *LocalWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, k return storiface.UndefCall, err } - return l.asyncCall(ctx, sector, "FinalizeSector", func(ci storiface.CallID) (interface{}, error) { + return l.asyncCall(ctx, sector, "FinalizeSector", func(ctx context.Context, ci storiface.CallID) (interface{}, error) { if err := sb.FinalizeSector(ctx, sector, keepUnsealed); err != nil { return nil, xerrors.Errorf("finalizing sector: %w", err) } @@ -372,7 +376,7 @@ func (l *LocalWorker) Remove(ctx context.Context, sector abi.SectorID) error { } func (l *LocalWorker) MoveStorage(ctx context.Context, sector abi.SectorID, types storiface.SectorFileType) (storiface.CallID, error) { - return l.asyncCall(ctx, sector, "MoveStorage", func(ci storiface.CallID) (interface{}, error) { + return l.asyncCall(ctx, sector, "MoveStorage", func(ctx context.Context, ci storiface.CallID) (interface{}, error) { return nil, l.storage.MoveStorage(ctx, sector, l.scfg.SealProofType, types) }) } @@ -383,7 +387,7 @@ func (l *LocalWorker) UnsealPiece(ctx context.Context, sector abi.SectorID, inde return storiface.UndefCall, err } - return l.asyncCall(ctx, sector, "UnsealPiece", func(ci storiface.CallID) (interface{}, error) { + return l.asyncCall(ctx, sector, "UnsealPiece", func(ctx context.Context, ci storiface.CallID) (interface{}, error) { if err = sb.UnsealPiece(ctx, sector, index, size, randomness, cid); err != nil { return nil, xerrors.Errorf("unsealing sector: %w", err) } @@ -406,7 +410,7 @@ func (l *LocalWorker) ReadPiece(ctx context.Context, writer io.Writer, sector ab return storiface.UndefCall, err } - return l.asyncCall(ctx, sector, "ReadPiece", func(ci storiface.CallID) (interface{}, error) { + return l.asyncCall(ctx, sector, "ReadPiece", func(ctx context.Context, ci storiface.CallID) (interface{}, error) { return sb.ReadPiece(ctx, writer, sector, index, size) }) } @@ -466,4 +470,32 @@ func (l *LocalWorker) WaitQuiet() { l.running.Wait() } +type wctx struct { + vals context.Context + closing chan struct{} +} + +func (w *wctx) Deadline() (time.Time, bool) { + return time.Time{}, false +} + +func (w *wctx) Done() <-chan struct{} { + return w.closing +} + +func (w *wctx) Err() error { + select { + case <-w.closing: + return context.Canceled + default: + return nil + } +} + +func (w *wctx) Value(key interface{}) interface{} { + return w.vals.Value(key) +} + +var _ context.Context = &wctx{} + var _ Worker = &LocalWorker{} From 6185e157e91a265022ca2b56af6987965e29b3ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 23 Sep 2020 00:26:07 +0200 Subject: [PATCH 18/61] sectorstorage: calltracker: work around cbor-gen bytearray len limit --- extern/sector-storage/cbor_gen.go | 44 ++++++-------- extern/sector-storage/worker_calltracker.go | 66 ++++++++++++++++++++- 2 files changed, 81 insertions(+), 29 deletions(-) diff --git a/extern/sector-storage/cbor_gen.go b/extern/sector-storage/cbor_gen.go index a291487f0a5..51b82ef1363 100644 --- a/extern/sector-storage/cbor_gen.go +++ b/extern/sector-storage/cbor_gen.go @@ -78,7 +78,7 @@ func (t *Call) MarshalCBOR(w io.Writer) error { return err } - // t.Result ([]uint8) (slice) + // t.Result (sectorstorage.ManyBytes) (struct) if len("Result") > cbg.MaxLength { return xerrors.Errorf("Value in field \"Result\" was too long") } @@ -90,15 +90,7 @@ func (t *Call) MarshalCBOR(w io.Writer) error { return err } - if len(t.Result) > cbg.ByteArrayMaxLen { - return xerrors.Errorf("Byte array in field t.Result was too long") - } - - if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajByteString, uint64(len(t.Result))); err != nil { - return err - } - - if _, err := w.Write(t.Result[:]); err != nil { + if err := t.Result.MarshalCBOR(w); err != nil { return err } return nil @@ -173,27 +165,25 @@ func (t *Call) UnmarshalCBOR(r io.Reader) error { t.State = CallState(extra) } - // t.Result ([]uint8) (slice) + // t.Result (sectorstorage.ManyBytes) (struct) case "Result": - maj, extra, err = cbg.CborReadHeaderBuf(br, scratch) - if err != nil { - return err - } - - if extra > cbg.ByteArrayMaxLen { - return fmt.Errorf("t.Result: byte array too large (%d)", extra) - } - if maj != cbg.MajByteString { - return fmt.Errorf("expected byte array") - } + { - if extra > 0 { - t.Result = make([]uint8, extra) - } + b, err := br.ReadByte() + if err != nil { + return err + } + if b != cbg.CborNull[0] { + if err := br.UnreadByte(); err != nil { + return err + } + t.Result = new(ManyBytes) + if err := t.Result.UnmarshalCBOR(br); err != nil { + return xerrors.Errorf("unmarshaling t.Result pointer: %w", err) + } + } - if _, err := io.ReadFull(br, t.Result[:]); err != nil { - return err } default: diff --git a/extern/sector-storage/worker_calltracker.go b/extern/sector-storage/worker_calltracker.go index 1033822a5b9..6f03c72cc0b 100644 --- a/extern/sector-storage/worker_calltracker.go +++ b/extern/sector-storage/worker_calltracker.go @@ -1,7 +1,12 @@ package sectorstorage import ( + "fmt" + "io" + "github.com/filecoin-project/go-statestore" + cbg "github.com/whyrusleeping/cbor-gen" + "golang.org/x/xerrors" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) @@ -24,7 +29,7 @@ type Call struct { State CallState - Result []byte // json bytes + Result *ManyBytes // json bytes } func (wt *workerCallTracker) onStart(ci storiface.CallID, rt ReturnType) error { @@ -39,7 +44,7 @@ func (wt *workerCallTracker) onDone(ci storiface.CallID, ret []byte) error { st := wt.st.Get(ci) return st.Mutate(func(cs *Call) error { cs.State = CallDone - cs.Result = ret + cs.Result = &ManyBytes{ret} return nil }) } @@ -53,3 +58,60 @@ func (wt *workerCallTracker) unfinished() ([]Call, error) { var out []Call return out, wt.st.List(&out) } + +// Ideally this would be a tag on the struct field telling cbor-gen to enforce higher max-len +type ManyBytes struct { + b []byte +} + +const many = 100 << 20 + +func (t *ManyBytes) MarshalCBOR(w io.Writer) error { + if t == nil { + t = &ManyBytes{} + } + + if len(t.b) > many { + return xerrors.Errorf("byte array in field t.Result was too long") + } + + scratch := make([]byte, 9) + + if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajByteString, uint64(len(t.b))); err != nil { + return err + } + + if _, err := w.Write(t.b[:]); err != nil { + return err + } + return nil +} + +func (t *ManyBytes) UnmarshalCBOR(r io.Reader) error { + *t = ManyBytes{} + + br := cbg.GetPeeker(r) + scratch := make([]byte, 9) + + maj, extra, err := cbg.CborReadHeaderBuf(br, scratch) + if err != nil { + return err + } + + if extra > many { + return fmt.Errorf("byte array too large (%d)", extra) + } + if maj != cbg.MajByteString { + return fmt.Errorf("expected byte array") + } + + if extra > 0 { + t.b = make([]uint8, extra) + } + + if _, err := io.ReadFull(br, t.b[:]); err != nil { + return err + } + + return nil +} From 86c222ab58bc875400a3071ddf794274ff0eb322 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 23 Sep 2020 14:56:37 +0200 Subject: [PATCH 19/61] sectorstorage: fix work tracking --- extern/sector-storage/manager.go | 5 +- extern/sector-storage/manager_calltracker.go | 2 + extern/sector-storage/manager_test.go | 5 ++ extern/sector-storage/sched.go | 14 +++-- extern/sector-storage/sched_test.go | 5 +- extern/sector-storage/stats.go | 6 +- extern/sector-storage/worker_tracked.go | 61 ++++++++++---------- 7 files changed, 53 insertions(+), 45 deletions(-) diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index eecfd1b55e4..d4db6080645 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -197,10 +197,7 @@ func (m *Manager) AddWorker(ctx context.Context, w Worker) error { m.sched.newWorkers <- &workerHandle{ w: w, - wt: &workTracker{ - done: map[storiface.CallID]struct{}{}, - running: map[storiface.CallID]storiface.WorkerJob{}, - }, + info: info, preparing: &activeResources{}, active: &activeResources{}, diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index 13296a8434e..e62f964b30f 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -289,6 +289,8 @@ func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr stri err: err, } + m.sched.wt.onDone(callID) + m.workLk.Lock() defer m.workLk.Unlock() diff --git a/extern/sector-storage/manager_test.go b/extern/sector-storage/manager_test.go index d87ec082757..0fada08bc65 100644 --- a/extern/sector-storage/manager_test.go +++ b/extern/sector-storage/manager_test.go @@ -115,6 +115,11 @@ func newTestMgr(ctx context.Context, t *testing.T, ds datastore.Datastore) (*Man Prover: prover, + wt: &workTracker{ + done: map[storiface.CallID]struct{}{}, + running: map[storiface.CallID]trackedWork{}, + }, + work: statestore.New(ds), callToWork: map[storiface.CallID]WorkID{}, callRes: map[storiface.CallID]chan result{}, diff --git a/extern/sector-storage/sched.go b/extern/sector-storage/sched.go index d757140b939..760fe9cba52 100644 --- a/extern/sector-storage/sched.go +++ b/extern/sector-storage/sched.go @@ -69,6 +69,8 @@ type scheduler struct { schedQueue *requestQueue openWindows []*schedWindowRequest + wt *workTracker + info chan func(interface{}) closing chan struct{} @@ -89,9 +91,6 @@ type workerHandle struct { wndLk sync.Mutex activeWindows []*schedWindow - // stats / tracking - wt *workTracker - // for sync manager goroutine closing cleanupStarted bool closedMgr chan struct{} @@ -157,6 +156,11 @@ func newScheduler(spt abi.RegisteredSealProof) *scheduler { schedQueue: &requestQueue{}, + wt: &workTracker{ + done: map[storiface.CallID]struct{}{}, + running: map[storiface.CallID]trackedWork{}, + }, + info: make(chan func(interface{})), closing: make(chan struct{}), @@ -680,7 +684,7 @@ func (sh *scheduler) assignWorker(taskDone chan struct{}, wid WorkerID, w *worke w.lk.Unlock() go func() { - err := req.prepare(req.ctx, w.wt.worker(w.w)) + err := req.prepare(req.ctx, sh.wt.worker(wid, w.w)) sh.workersLk.Lock() if err != nil { @@ -717,7 +721,7 @@ func (sh *scheduler) assignWorker(taskDone chan struct{}, wid WorkerID, w *worke case <-sh.closing: } - err = req.work(req.ctx, w.wt.worker(w.w)) + err = req.work(req.ctx, sh.wt.worker(wid, w.w)) select { case req.ret <- workerResponse{err: err}: diff --git a/extern/sector-storage/sched_test.go b/extern/sector-storage/sched_test.go index 55ef9bf02a9..f23be20c050 100644 --- a/extern/sector-storage/sched_test.go +++ b/extern/sector-storage/sched_test.go @@ -165,10 +165,7 @@ func addTestWorker(t *testing.T, sched *scheduler, index *stores.Index, name str sched.newWorkers <- &workerHandle{ w: w, - wt: &workTracker{ - done: map[storiface.CallID]struct{}{}, - running: map[storiface.CallID]storiface.WorkerJob{}, - }, + info: info, preparing: &activeResources{}, active: &activeResources{}, diff --git a/extern/sector-storage/stats.go b/extern/sector-storage/stats.go index f9063cbec68..1ce415fd213 100644 --- a/extern/sector-storage/stats.go +++ b/extern/sector-storage/stats.go @@ -29,9 +29,11 @@ func (m *Manager) WorkerJobs() map[uint64][]storiface.WorkerJob { out := map[uint64][]storiface.WorkerJob{} - for id, handle := range m.sched.workers { - out[uint64(id)] = handle.wt.Running() + for _, t := range m.sched.wt.Running() { + out[uint64(t.worker)] = append(out[uint64(t.worker)], t.job) + } + for id, handle := range m.sched.workers { handle.wndLk.Lock() for wi, window := range handle.activeWindows { for _, request := range window.todo { diff --git a/extern/sector-storage/worker_tracked.go b/extern/sector-storage/worker_tracked.go index f5ad153600c..4a22fcca714 100644 --- a/extern/sector-storage/worker_tracked.go +++ b/extern/sector-storage/worker_tracked.go @@ -15,25 +15,20 @@ import ( "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) +type trackedWork struct { + job storiface.WorkerJob + worker WorkerID +} + type workTracker struct { lk sync.Mutex done map[storiface.CallID]struct{} - running map[storiface.CallID]storiface.WorkerJob + running map[storiface.CallID]trackedWork // TODO: done, aggregate stats, queue stats, scheduler feedback } -// TODO: CALL THIS! -// TODO: CALL THIS! -// TODO: CALL THIS! -// TODO: CALL THIS! -// TODO: CALL THIS! -// TODO: CALL THIS! -// TODO: CALL THIS! -// TODO: CALL THIS! -// TODO: CALL THIS! -// TODO: CALL THIS! func (wt *workTracker) onDone(callID storiface.CallID) { wt.lk.Lock() defer wt.lk.Unlock() @@ -47,7 +42,7 @@ func (wt *workTracker) onDone(callID storiface.CallID) { delete(wt.running, callID) } -func (wt *workTracker) track(sid abi.SectorID, task sealtasks.TaskType) func(storiface.CallID, error) (storiface.CallID, error) { +func (wt *workTracker) track(wid WorkerID, sid abi.SectorID, task sealtasks.TaskType) func(storiface.CallID, error) (storiface.CallID, error) { return func(callID storiface.CallID, err error) (storiface.CallID, error) { if err != nil { return callID, err @@ -62,29 +57,34 @@ func (wt *workTracker) track(sid abi.SectorID, task sealtasks.TaskType) func(sto return callID, err } - wt.running[callID] = storiface.WorkerJob{ - ID: callID, - Sector: sid, - Task: task, - Start: time.Now(), + wt.running[callID] = trackedWork{ + job: storiface.WorkerJob{ + ID: callID, + Sector: sid, + Task: task, + Start: time.Now(), + }, + worker: wid, } return callID, err } } -func (wt *workTracker) worker(w Worker) Worker { +func (wt *workTracker) worker(wid WorkerID, w Worker) Worker { return &trackedWorker{ - Worker: w, + Worker: w, + wid: wid, + tracker: wt, } } -func (wt *workTracker) Running() []storiface.WorkerJob { +func (wt *workTracker) Running() []trackedWork { wt.lk.Lock() defer wt.lk.Unlock() - out := make([]storiface.WorkerJob, 0, len(wt.running)) + out := make([]trackedWork, 0, len(wt.running)) for _, job := range wt.running { out = append(out, job) } @@ -94,44 +94,45 @@ func (wt *workTracker) Running() []storiface.WorkerJob { type trackedWorker struct { Worker + wid WorkerID tracker *workTracker } func (t *trackedWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { - return t.tracker.track(sector, sealtasks.TTPreCommit1)(t.Worker.SealPreCommit1(ctx, sector, ticket, pieces)) + return t.tracker.track(t.wid, sector, sealtasks.TTPreCommit1)(t.Worker.SealPreCommit1(ctx, sector, ticket, pieces)) } func (t *trackedWorker) SealPreCommit2(ctx context.Context, sector abi.SectorID, pc1o storage.PreCommit1Out) (storiface.CallID, error) { - return t.tracker.track(sector, sealtasks.TTPreCommit2)(t.Worker.SealPreCommit2(ctx, sector, pc1o)) + return t.tracker.track(t.wid, sector, sealtasks.TTPreCommit2)(t.Worker.SealPreCommit2(ctx, sector, pc1o)) } func (t *trackedWorker) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (storiface.CallID, error) { - return t.tracker.track(sector, sealtasks.TTCommit1)(t.Worker.SealCommit1(ctx, sector, ticket, seed, pieces, cids)) + return t.tracker.track(t.wid, sector, sealtasks.TTCommit1)(t.Worker.SealCommit1(ctx, sector, ticket, seed, pieces, cids)) } func (t *trackedWorker) SealCommit2(ctx context.Context, sector abi.SectorID, c1o storage.Commit1Out) (storiface.CallID, error) { - return t.tracker.track(sector, sealtasks.TTCommit2)(t.Worker.SealCommit2(ctx, sector, c1o)) + return t.tracker.track(t.wid, sector, sealtasks.TTCommit2)(t.Worker.SealCommit2(ctx, sector, c1o)) } func (t *trackedWorker) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) (storiface.CallID, error) { - return t.tracker.track(sector, sealtasks.TTFinalize)(t.Worker.FinalizeSector(ctx, sector, keepUnsealed)) + return t.tracker.track(t.wid, sector, sealtasks.TTFinalize)(t.Worker.FinalizeSector(ctx, sector, keepUnsealed)) } func (t *trackedWorker) AddPiece(ctx context.Context, sector abi.SectorID, pieceSizes []abi.UnpaddedPieceSize, newPieceSize abi.UnpaddedPieceSize, pieceData storage.Data) (storiface.CallID, error) { - return t.tracker.track(sector, sealtasks.TTAddPiece)(t.Worker.AddPiece(ctx, sector, pieceSizes, newPieceSize, pieceData)) + return t.tracker.track(t.wid, sector, sealtasks.TTAddPiece)(t.Worker.AddPiece(ctx, sector, pieceSizes, newPieceSize, pieceData)) } func (t *trackedWorker) Fetch(ctx context.Context, s abi.SectorID, ft storiface.SectorFileType, ptype storiface.PathType, am storiface.AcquireMode) (storiface.CallID, error) { - return t.tracker.track(s, sealtasks.TTFetch)(t.Worker.Fetch(ctx, s, ft, ptype, am)) + return t.tracker.track(t.wid, s, sealtasks.TTFetch)(t.Worker.Fetch(ctx, s, ft, ptype, am)) } func (t *trackedWorker) UnsealPiece(ctx context.Context, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize, randomness abi.SealRandomness, cid cid.Cid) (storiface.CallID, error) { - return t.tracker.track(id, sealtasks.TTUnseal)(t.Worker.UnsealPiece(ctx, id, index, size, randomness, cid)) + return t.tracker.track(t.wid, id, sealtasks.TTUnseal)(t.Worker.UnsealPiece(ctx, id, index, size, randomness, cid)) } func (t *trackedWorker) ReadPiece(ctx context.Context, writer io.Writer, id abi.SectorID, index storiface.UnpaddedByteIndex, size abi.UnpaddedPieceSize) (storiface.CallID, error) { - return t.tracker.track(id, sealtasks.TTReadUnsealed)(t.Worker.ReadPiece(ctx, writer, id, index, size)) + return t.tracker.track(t.wid, id, sealtasks.TTReadUnsealed)(t.Worker.ReadPiece(ctx, writer, id, index, size)) } var _ Worker = &trackedWorker{} From 3003789288c25e428ef139e4765c0ca82af13b33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 23 Sep 2020 16:12:15 +0200 Subject: [PATCH 20/61] worker: Use a real datastore for keeping track of calls --- cmd/lotus-seal-worker/main.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index 292590c2e94..dc7ad402888 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -14,7 +14,6 @@ import ( "github.com/google/uuid" "github.com/gorilla/mux" - "github.com/ipfs/go-datastore" "github.com/ipfs/go-datastore/namespace" logging "github.com/ipfs/go-log/v2" manet "github.com/multiformats/go-multiaddr/net" @@ -304,6 +303,15 @@ var runCmd = &cli.Command{ if err != nil { return err } + defer func() { + if err := lr.Close(); err != nil { + log.Error("closing repo", err) + } + }() + ds, err := lr.Datastore("/metadata") + if err != nil { + return err + } log.Info("Opening local storage; connecting to master") const unspecifiedAddress = "0.0.0.0" @@ -343,7 +351,7 @@ var runCmd = &cli.Command{ // Create / expose the worker - wsts := statestore.New(namespace.Wrap(datastore.NewMapDatastore(), modules.WorkerCallsPrefix)) // TODO: USE A REAL DATASTORE + wsts := statestore.New(namespace.Wrap(ds, modules.WorkerCallsPrefix)) workerApi := &worker{ LocalWorker: sectorstorage.NewLocalWorker(sectorstorage.WorkerConfig{ From c17f0d7e61fdbc4db05a8d1003ddbbb03f0e9dae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 23 Sep 2020 17:37:05 +0200 Subject: [PATCH 21/61] sectorstorage: Fix panic in returnResult --- extern/sector-storage/manager_calltracker.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index e62f964b30f..01bc7c38d5a 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -320,8 +320,11 @@ func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr stri m.results[wid] = res - close(m.waitRes[wid]) - delete(m.waitRes, wid) + _, found := m.waitRes[wid] + if found { + close(m.waitRes[wid]) + delete(m.waitRes, wid) + } return nil } From d817dceb05797c2d3706f9fa04c7f927932ba4eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 23 Sep 2020 19:26:26 +0200 Subject: [PATCH 22/61] Show lost calls in sealing jobs cli --- api/api_storage.go | 2 +- api/apistruct/struct.go | 4 +-- cmd/lotus-storage-miner/sealing.go | 22 +++++++++----- extern/sector-storage/manager.go | 2 +- extern/sector-storage/stats.go | 36 ++++++++++++++++++----- extern/sector-storage/storiface/worker.go | 2 +- node/impl/storminer.go | 2 +- 7 files changed, 50 insertions(+), 20 deletions(-) diff --git a/api/api_storage.go b/api/api_storage.go index aab4e364a6b..aee5b5b5ba0 100644 --- a/api/api_storage.go +++ b/api/api_storage.go @@ -63,7 +63,7 @@ type StorageMiner interface { // WorkerConnect tells the node to connect to workers RPC WorkerConnect(context.Context, string) error WorkerStats(context.Context) (map[uint64]storiface.WorkerStats, error) - WorkerJobs(context.Context) (map[uint64][]storiface.WorkerJob, error) + WorkerJobs(context.Context) (map[int64][]storiface.WorkerJob, error) storiface.WorkerReturn // SealingSchedDiag dumps internal sealing scheduler state diff --git a/api/apistruct/struct.go b/api/apistruct/struct.go index 60e03f565ba..d4b48d66d13 100644 --- a/api/apistruct/struct.go +++ b/api/apistruct/struct.go @@ -280,7 +280,7 @@ type StorageMinerStruct struct { WorkerConnect func(context.Context, string) error `perm:"admin"` // TODO: worker perm WorkerStats func(context.Context) (map[uint64]storiface.WorkerStats, error) `perm:"admin"` - WorkerJobs func(context.Context) (map[uint64][]storiface.WorkerJob, error) `perm:"admin"` + WorkerJobs func(context.Context) (map[int64][]storiface.WorkerJob, error) `perm:"admin"` ReturnAddPiece func(ctx context.Context, callID storiface.CallID, pi abi.PieceInfo, err string) error `perm:"admin"` ReturnSealPreCommit1 func(ctx context.Context, callID storiface.CallID, p1o storage.PreCommit1Out, err string) error `perm:"admin"` @@ -1093,7 +1093,7 @@ func (c *StorageMinerStruct) WorkerStats(ctx context.Context) (map[uint64]storif return c.Internal.WorkerStats(ctx) } -func (c *StorageMinerStruct) WorkerJobs(ctx context.Context) (map[uint64][]storiface.WorkerJob, error) { +func (c *StorageMinerStruct) WorkerJobs(ctx context.Context) (map[int64][]storiface.WorkerJob, error) { return c.Internal.WorkerJobs(ctx) } diff --git a/cmd/lotus-storage-miner/sealing.go b/cmd/lotus-storage-miner/sealing.go index 5cc5c419af0..7d612a03ab4 100644 --- a/cmd/lotus-storage-miner/sealing.go +++ b/cmd/lotus-storage-miner/sealing.go @@ -1,6 +1,7 @@ package main import ( + "encoding/hex" "encoding/json" "fmt" "os" @@ -9,10 +10,9 @@ import ( "text/tabwriter" "time" - "golang.org/x/xerrors" - "github.com/fatih/color" "github.com/urfave/cli/v2" + "golang.org/x/xerrors" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" @@ -139,7 +139,7 @@ var sealingJobsCmd = &cli.Command{ type line struct { storiface.WorkerJob - wid uint64 + wid int64 } lines := make([]line, 0) @@ -161,7 +161,7 @@ var sealingJobsCmd = &cli.Command{ return lines[i].Start.Before(lines[j].Start) }) - workerHostnames := map[uint64]string{} + workerHostnames := map[int64]string{} wst, err := nodeApi.WorkerStats(ctx) if err != nil { @@ -169,7 +169,7 @@ var sealingJobsCmd = &cli.Command{ } for wid, st := range wst { - workerHostnames[wid] = st.Info.Hostname + workerHostnames[int64(wid)] = st.Info.Hostname } tw := tabwriter.NewWriter(os.Stdout, 2, 4, 2, ' ', 0) @@ -177,10 +177,18 @@ var sealingJobsCmd = &cli.Command{ for _, l := range lines { state := "running" - if l.RunWait != 0 { + if l.RunWait > 0 { state = fmt.Sprintf("assigned(%d)", l.RunWait-1) } - _, _ = fmt.Fprintf(tw, "%d\t%d\t%d\t%s\t%s\t%s\t%s\n", l.ID, l.Sector.Number, l.wid, workerHostnames[l.wid], l.Task.Short(), state, time.Now().Sub(l.Start).Truncate(time.Millisecond*100)) + if l.RunWait == -1 { + state = "ret-wait" + } + dur := "n/a" + if !l.Start.IsZero() { + dur = time.Now().Sub(l.Start).Truncate(time.Millisecond * 100).String() + } + + _, _ = fmt.Fprintf(tw, "%s\t%d\t%d\t%s\t%s\t%s\t%s\n", hex.EncodeToString(l.ID.ID[10:]), l.Sector.Number, l.wid, workerHostnames[l.wid], l.Task.Short(), state, dur) } return tw.Flush() diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index d4db6080645..c8553a4e9f1 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -57,7 +57,7 @@ type SectorManager interface { FaultTracker } -type WorkerID uint64 +type WorkerID int64 type Manager struct { scfg *ffiwrapper.Config diff --git a/extern/sector-storage/stats.go b/extern/sector-storage/stats.go index 1ce415fd213..9b8cbc24e6c 100644 --- a/extern/sector-storage/stats.go +++ b/extern/sector-storage/stats.go @@ -2,6 +2,7 @@ package sectorstorage import ( "github.com/filecoin-project/lotus/extern/sector-storage/storiface" + "time" ) func (m *Manager) WorkerStats() map[uint64]storiface.WorkerStats { @@ -23,21 +24,22 @@ func (m *Manager) WorkerStats() map[uint64]storiface.WorkerStats { return out } -func (m *Manager) WorkerJobs() map[uint64][]storiface.WorkerJob { - m.sched.workersLk.RLock() - defer m.sched.workersLk.RUnlock() - - out := map[uint64][]storiface.WorkerJob{} +func (m *Manager) WorkerJobs() map[int64][]storiface.WorkerJob { + out := map[int64][]storiface.WorkerJob{} + calls := map[storiface.CallID]struct{}{} for _, t := range m.sched.wt.Running() { - out[uint64(t.worker)] = append(out[uint64(t.worker)], t.job) + out[int64(t.worker)] = append(out[int64(t.worker)], t.job) + calls[t.job.ID] = struct{}{} } + m.sched.workersLk.RLock() + for id, handle := range m.sched.workers { handle.wndLk.Lock() for wi, window := range handle.activeWindows { for _, request := range window.todo { - out[uint64(id)] = append(out[uint64(id)], storiface.WorkerJob{ + out[int64(id)] = append(out[int64(id)], storiface.WorkerJob{ ID: storiface.UndefCall, Sector: request.sector, Task: request.taskType, @@ -49,5 +51,25 @@ func (m *Manager) WorkerJobs() map[uint64][]storiface.WorkerJob { handle.wndLk.Unlock() } + m.sched.workersLk.RUnlock() + + m.workLk.Lock() + defer m.workLk.Unlock() + + for id := range m.callToWork { + _, found := calls[id] + if found { + continue + } + + out[-1] = append(out[-1], storiface.WorkerJob{ + ID: id, + Sector: id.Sector, + Task: "???", + RunWait: -1, + Start: time.Time{}, + }) + } + return out } diff --git a/extern/sector-storage/storiface/worker.go b/extern/sector-storage/storiface/worker.go index ead7705241a..e6ab2246fdd 100644 --- a/extern/sector-storage/storiface/worker.go +++ b/extern/sector-storage/storiface/worker.go @@ -45,7 +45,7 @@ type WorkerJob struct { Sector abi.SectorID Task sealtasks.TaskType - RunWait int // 0 - running, 1+ - assigned + RunWait int // -1 - ret-wait, 0 - running, 1+ - assigned Start time.Time } diff --git a/node/impl/storminer.go b/node/impl/storminer.go index 33764002775..ba0719078ba 100644 --- a/node/impl/storminer.go +++ b/node/impl/storminer.go @@ -87,7 +87,7 @@ func (sm *StorageMinerAPI) WorkerStats(context.Context) (map[uint64]storiface.Wo return sm.StorageMgr.WorkerStats(), nil } -func (sm *StorageMinerAPI) WorkerJobs(ctx context.Context) (map[uint64][]storiface.WorkerJob, error) { +func (sm *StorageMinerAPI) WorkerJobs(ctx context.Context) (map[int64][]storiface.WorkerJob, error) { return sm.StorageMgr.WorkerJobs(), nil } From 04ee53e0612a04dde59a2bf9dc509d3addef0a88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Thu, 24 Sep 2020 11:55:11 +0200 Subject: [PATCH 23/61] sectorstorage: Show task type of ret-wait jobs --- cmd/lotus-storage-miner/sealing.go | 3 +++ extern/sector-storage/cbor_gen.go | 7 ++++--- extern/sector-storage/manager.go | 8 ++++---- extern/sector-storage/manager_calltracker.go | 12 +++++++++--- extern/sector-storage/stats.go | 4 ++-- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/cmd/lotus-storage-miner/sealing.go b/cmd/lotus-storage-miner/sealing.go index 7d612a03ab4..62276a0e36d 100644 --- a/cmd/lotus-storage-miner/sealing.go +++ b/cmd/lotus-storage-miner/sealing.go @@ -158,6 +158,9 @@ var sealingJobsCmd = &cli.Command{ if lines[i].RunWait != lines[j].RunWait { return lines[i].RunWait < lines[j].RunWait } + if lines[i].Start.Equal(lines[j].Start) { + return lines[i].ID.ID.String() < lines[j].ID.ID.String() + } return lines[i].Start.Before(lines[j].Start) }) diff --git a/extern/sector-storage/cbor_gen.go b/extern/sector-storage/cbor_gen.go index 51b82ef1363..0db97f2c9f4 100644 --- a/extern/sector-storage/cbor_gen.go +++ b/extern/sector-storage/cbor_gen.go @@ -6,6 +6,7 @@ import ( "fmt" "io" + sealtasks "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" cbg "github.com/whyrusleeping/cbor-gen" xerrors "golang.org/x/xerrors" ) @@ -378,7 +379,7 @@ func (t *WorkID) MarshalCBOR(w io.Writer) error { scratch := make([]byte, 9) - // t.Method (string) (string) + // t.Method (sealtasks.TaskType) (string) if len("Method") > cbg.MaxLength { return xerrors.Errorf("Value in field \"Method\" was too long") } @@ -459,7 +460,7 @@ func (t *WorkID) UnmarshalCBOR(r io.Reader) error { } switch name { - // t.Method (string) (string) + // t.Method (sealtasks.TaskType) (string) case "Method": { @@ -468,7 +469,7 @@ func (t *WorkID) UnmarshalCBOR(r io.Reader) error { return err } - t.Method = string(sval) + t.Method = sealtasks.TaskType(sval) } // t.Params (string) (string) case "Params": diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index c8553a4e9f1..7d49cc958f2 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -364,7 +364,7 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke ctx, cancel := context.WithCancel(ctx) defer cancel() - wk, wait, err := m.getWork(ctx, "PreCommit1", sector, ticket, pieces) + wk, wait, err := m.getWork(ctx, sealtasks.TTPreCommit1, sector, ticket, pieces) if err != nil { return nil, xerrors.Errorf("getWork: %w", err) } @@ -408,7 +408,7 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase ctx, cancel := context.WithCancel(ctx) defer cancel() - wk, wait, err := m.getWork(ctx, "PreCommit2", sector, phase1Out) + wk, wait, err := m.getWork(ctx, sealtasks.TTPreCommit2, sector, phase1Out) if err != nil { return storage.SectorCids{}, xerrors.Errorf("getWork: %w", err) } @@ -449,7 +449,7 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a ctx, cancel := context.WithCancel(ctx) defer cancel() - wk, wait, err := m.getWork(ctx, "Commit1", sector, ticket, seed, pieces, cids) + wk, wait, err := m.getWork(ctx, sealtasks.TTCommit1, sector, ticket, seed, pieces, cids) if err != nil { return storage.Commit1Out{}, xerrors.Errorf("getWork: %w", err) } @@ -490,7 +490,7 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a } func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage.Commit1Out) (out storage.Proof, err error) { - wk, wait, err := m.getWork(ctx, "Commit2", sector, phase1Out) + wk, wait, err := m.getWork(ctx, sealtasks.TTCommit2, sector, phase1Out) if err != nil { return storage.Proof{}, xerrors.Errorf("getWork: %w", err) } diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index 01bc7c38d5a..86509046775 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -8,12 +8,14 @@ import ( "errors" "fmt" "golang.org/x/xerrors" + "os" + "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) type WorkID struct { - Method string + Method sealtasks.TaskType Params string // json [...params] } @@ -40,7 +42,7 @@ type WorkState struct { WorkError string // Status = wsDone, set when failed to start work } -func newWorkID(method string, params ...interface{}) (WorkID, error) { +func newWorkID(method sealtasks.TaskType, params ...interface{}) (WorkID, error) { pb, err := json.Marshal(params) if err != nil { return WorkID{}, xerrors.Errorf("marshaling work params: %w", err) @@ -74,6 +76,10 @@ func (m *Manager) setupWorkTracker() { continue } + if os.Getenv("LOTUS_MINER_ABORT_UNFINISHED_WORK") == "1" { + st.Status = wsDone + } + switch st.Status { case wsStarted: log.Warnf("dropping non-running work %s", wid) @@ -96,7 +102,7 @@ func (m *Manager) setupWorkTracker() { } // returns wait=true when the task is already tracked/running -func (m *Manager) getWork(ctx context.Context, method string, params ...interface{}) (wid WorkID, wait bool, err error) { +func (m *Manager) getWork(ctx context.Context, method sealtasks.TaskType, params ...interface{}) (wid WorkID, wait bool, err error) { wid, err = newWorkID(method, params) if err != nil { return WorkID{}, false, xerrors.Errorf("creating WorkID: %w", err) diff --git a/extern/sector-storage/stats.go b/extern/sector-storage/stats.go index 9b8cbc24e6c..849322be00a 100644 --- a/extern/sector-storage/stats.go +++ b/extern/sector-storage/stats.go @@ -56,7 +56,7 @@ func (m *Manager) WorkerJobs() map[int64][]storiface.WorkerJob { m.workLk.Lock() defer m.workLk.Unlock() - for id := range m.callToWork { + for id, work := range m.callToWork { _, found := calls[id] if found { continue @@ -65,7 +65,7 @@ func (m *Manager) WorkerJobs() map[int64][]storiface.WorkerJob { out[-1] = append(out[-1], storiface.WorkerJob{ ID: id, Sector: id.Sector, - Task: "???", + Task: work.Method, RunWait: -1, Start: time.Time{}, }) From a8fcb86c10986839e700444e5f9705be8561bc61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Thu, 24 Sep 2020 22:17:20 +0200 Subject: [PATCH 24/61] miner allinfo: Don't fail if sector status fails --- cmd/lotus-storage-miner/info_all.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/lotus-storage-miner/info_all.go b/cmd/lotus-storage-miner/info_all.go index 265ba78a4d8..74f27fec55a 100644 --- a/cmd/lotus-storage-miner/info_all.go +++ b/cmd/lotus-storage-miner/info_all.go @@ -135,7 +135,7 @@ var infoAllCmd = &cli.Command{ } if err := sectorsStatusCmd.Action(cli.NewContext(cctx.App, fs, cctx)); err != nil { - return err + fmt.Println("ERROR: ", err) } fmt.Printf("\n##: Sector %d Storage Location\n", s) @@ -146,7 +146,7 @@ var infoAllCmd = &cli.Command{ } if err := storageFindCmd.Action(cli.NewContext(cctx.App, fs, cctx)); err != nil { - return err + fmt.Println("ERROR: ", err) } } From a9d1ca4d83362e15b8fea113ec2733e52b5f2409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 28 Sep 2020 12:08:09 +0200 Subject: [PATCH 25/61] Change order in miner sectors list --- cmd/lotus-storage-miner/sectors.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/lotus-storage-miner/sectors.go b/cmd/lotus-storage-miner/sectors.go index 370962bdcd8..67b14b38213 100644 --- a/cmd/lotus-storage-miner/sectors.go +++ b/cmd/lotus-storage-miner/sectors.go @@ -205,15 +205,15 @@ var sectorsListCmd = &cli.Command{ _, inSSet := commitedIDs[s] _, inASet := activeIDs[s] - _, _ = fmt.Fprintf(w, "%d: %s\tsSet: %s\tactive: %s\ttktH: %d\tseedH: %d\tdeals: %v\t toUpgrade:%t\n", + _, _ = fmt.Fprintf(w, "%d: %s\tsSet: %s\tactive: %s\ttktH: %d\tseedH: %d\ttoUpgrade: %t\tdeals: %v\n", s, st.State, yesno(inSSet), yesno(inASet), st.Ticket.Epoch, st.Seed.Epoch, - st.Deals, st.ToUpgrade, + st.Deals, ) } } From 9e7d6823b1950f684242a899143597c31279d720 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 28 Sep 2020 13:34:45 +0200 Subject: [PATCH 26/61] sectorstorage: Cleanup callToWork mapping after work is done --- extern/sector-storage/manager_calltracker.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index 86509046775..147e11b9164 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -232,6 +232,11 @@ func (m *Manager) waitWork(ctx context.Context, wid WorkID) (interface{}, error) res := m.results[wid] delete(m.results, wid) + _, ok := m.callToWork[ws.WorkerCall] + if ok { + delete(m.callToWork, ws.WorkerCall) + } + err := m.work.Get(wk).End() if err != nil { // Not great, but not worth discarding potentially multi-hour computation over this From 86cf3c835ea36ac4e9b14679bd661b45bf3e394a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 28 Sep 2020 13:37:50 +0200 Subject: [PATCH 27/61] worker: Reconnect correctly --- cmd/lotus-seal-worker/main.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index dc7ad402888..de2be716b3f 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -172,7 +172,6 @@ var runCmd = &cli.Command{ var err error for { nodeApi, closer, err = lcli.GetStorageMinerAPI(cctx, - jsonrpc.WithNoReconnect(), jsonrpc.WithTimeout(30*time.Second)) if err == nil { break From 4ba7af606192a71ec0f35f754a0e1f9afb460986 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 28 Sep 2020 20:46:44 +0200 Subject: [PATCH 28/61] worker: Mark return methods as retry-safe --- api/apistruct/struct.go | 24 ++++++++++++------------ cmd/lotus-seal-worker/main.go | 3 +-- extern/sector-storage/stores/local.go | 2 +- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/api/apistruct/struct.go b/api/apistruct/struct.go index 1122dd76bef..07035221e30 100644 --- a/api/apistruct/struct.go +++ b/api/apistruct/struct.go @@ -279,21 +279,21 @@ type StorageMinerStruct struct { SectorRemove func(context.Context, abi.SectorNumber) error `perm:"admin"` SectorMarkForUpgrade func(ctx context.Context, id abi.SectorNumber) error `perm:"admin"` - WorkerConnect func(context.Context, string) error `perm:"admin"` // TODO: worker perm + WorkerConnect func(context.Context, string) error `perm:"admin" retry:"true"` // TODO: worker perm WorkerStats func(context.Context) (map[uint64]storiface.WorkerStats, error) `perm:"admin"` WorkerJobs func(context.Context) (map[int64][]storiface.WorkerJob, error) `perm:"admin"` - ReturnAddPiece func(ctx context.Context, callID storiface.CallID, pi abi.PieceInfo, err string) error `perm:"admin"` - ReturnSealPreCommit1 func(ctx context.Context, callID storiface.CallID, p1o storage.PreCommit1Out, err string) error `perm:"admin"` - ReturnSealPreCommit2 func(ctx context.Context, callID storiface.CallID, sealed storage.SectorCids, err string) error `perm:"admin"` - ReturnSealCommit1 func(ctx context.Context, callID storiface.CallID, out storage.Commit1Out, err string) error `perm:"admin"` - ReturnSealCommit2 func(ctx context.Context, callID storiface.CallID, proof storage.Proof, err string) error `perm:"admin"` - ReturnFinalizeSector func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin"` - ReturnReleaseUnsealed func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin"` - ReturnMoveStorage func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin"` - ReturnUnsealPiece func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin"` - ReturnReadPiece func(ctx context.Context, callID storiface.CallID, ok bool, err string) error `perm:"admin"` - ReturnFetch func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin"` + ReturnAddPiece func(ctx context.Context, callID storiface.CallID, pi abi.PieceInfo, err string) error `perm:"admin" retry:"true"` + ReturnSealPreCommit1 func(ctx context.Context, callID storiface.CallID, p1o storage.PreCommit1Out, err string) error `perm:"admin" retry:"true"` + ReturnSealPreCommit2 func(ctx context.Context, callID storiface.CallID, sealed storage.SectorCids, err string) error `perm:"admin" retry:"true"` + ReturnSealCommit1 func(ctx context.Context, callID storiface.CallID, out storage.Commit1Out, err string) error `perm:"admin" retry:"true"` + ReturnSealCommit2 func(ctx context.Context, callID storiface.CallID, proof storage.Proof, err string) error `perm:"admin" retry:"true"` + ReturnFinalizeSector func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin" retry:"true"` + ReturnReleaseUnsealed func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin" retry:"true"` + ReturnMoveStorage func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin" retry:"true"` + ReturnUnsealPiece func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin" retry:"true"` + ReturnReadPiece func(ctx context.Context, callID storiface.CallID, ok bool, err string) error `perm:"admin" retry:"true"` + ReturnFetch func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin" retry:"true"` SealingSchedDiag func(context.Context) (interface{}, error) `perm:"admin"` diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index de2be716b3f..703f23055fe 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -171,8 +171,7 @@ var runCmd = &cli.Command{ var closer func() var err error for { - nodeApi, closer, err = lcli.GetStorageMinerAPI(cctx, - jsonrpc.WithTimeout(30*time.Second)) + nodeApi, closer, err = lcli.GetStorageMinerAPI(cctx, jsonrpc.WithTimeout(30*time.Second)) if err == nil { break } diff --git a/extern/sector-storage/stores/local.go b/extern/sector-storage/stores/local.go index ee8af32726c..5ac92b337db 100644 --- a/extern/sector-storage/stores/local.go +++ b/extern/sector-storage/stores/local.go @@ -257,7 +257,7 @@ func (st *Local) reportHealth(ctx context.Context) { for id, report := range toReport { if err := st.index.StorageReportHealth(ctx, id, report); err != nil { - log.Warnf("error reporting storage health for %s: %+v", id, report) + log.Warnf("error reporting storage health for %s (%+v): %+v", id, report, err) } } } From 810c76720034e7189eabc412555364df94128bf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 28 Sep 2020 21:06:49 +0200 Subject: [PATCH 29/61] worker: Redeclare storage on reconnect --- api/apistruct/struct.go | 2 +- cmd/lotus-seal-worker/main.go | 11 +++ extern/sector-storage/stores/local.go | 96 +++++++++++++++++++++------ 3 files changed, 86 insertions(+), 23 deletions(-) diff --git a/api/apistruct/struct.go b/api/apistruct/struct.go index 07035221e30..3abc5a18b37 100644 --- a/api/apistruct/struct.go +++ b/api/apistruct/struct.go @@ -300,7 +300,7 @@ type StorageMinerStruct struct { StorageList func(context.Context) (map[stores.ID][]stores.Decl, error) `perm:"admin"` StorageLocal func(context.Context) (map[stores.ID]string, error) `perm:"admin"` StorageStat func(context.Context, stores.ID) (fsutil.FsStat, error) `perm:"admin"` - StorageAttach func(context.Context, stores.StorageInfo, fsutil.FsStat) error `perm:"admin"` + StorageAttach func(context.Context, stores.StorageInfo, fsutil.FsStat) error `perm:"admin" retry:"true"` StorageDeclareSector func(context.Context, stores.ID, abi.SectorID, storiface.SectorFileType, bool) error `perm:"admin"` StorageDropSector func(context.Context, stores.ID, abi.SectorID, storiface.SectorFileType) error `perm:"admin"` StorageFindSector func(context.Context, abi.SectorID, storiface.SectorFileType, abi.RegisteredSealProof, bool) ([]stores.SectorStorageInfo, error) `perm:"admin"` diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index 703f23055fe..b34e999948f 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -426,12 +426,21 @@ var runCmd = &cli.Command{ } go func() { + var reconnect bool for { log.Info("Making sure no local tasks are running") // TODO: we could get rid of this, but that requires tracking resources for restarted tasks correctly workerApi.LocalWorker.WaitQuiet() + if reconnect { + if err := localStore.Redeclare(ctx); err != nil { + log.Errorf("Redeclaring local storage failed: %+v", err) + cancel() + return + } + } + if err := nodeApi.WorkerConnect(ctx, "ws://"+address+"/rpc/v0"); err != nil { log.Errorf("Registering worker failed: %+v", err) cancel() @@ -455,6 +464,8 @@ var runCmd = &cli.Command{ } log.Errorf("LOTUS-MINER CONNECTION LOST") + + reconnect = true } }() diff --git a/extern/sector-storage/stores/local.go b/extern/sector-storage/stores/local.go index 5ac92b337db..216e88cbabf 100644 --- a/extern/sector-storage/stores/local.go +++ b/extern/sector-storage/stores/local.go @@ -178,6 +178,78 @@ func (st *Local) OpenPath(ctx context.Context, p string) error { return xerrors.Errorf("declaring storage in index: %w", err) } + if err := st.declareSectors(ctx, p, meta.ID, meta.CanStore); err != nil { + return err + } + + st.paths[meta.ID] = out + + return nil +} + +func (st *Local) open(ctx context.Context) error { + cfg, err := st.localStorage.GetStorage() + if err != nil { + return xerrors.Errorf("getting local storage config: %w", err) + } + + for _, path := range cfg.StoragePaths { + err := st.OpenPath(ctx, path.Path) + if err != nil { + return xerrors.Errorf("opening path %s: %w", path.Path, err) + } + } + + go st.reportHealth(ctx) + + return nil +} + +func (st *Local) Redeclare(ctx context.Context) error { + st.localLk.Lock() + defer st.localLk.Unlock() + + for id, p := range st.paths { + mb, err := ioutil.ReadFile(filepath.Join(p.local, MetaFile)) + if err != nil { + return xerrors.Errorf("reading storage metadata for %s: %w", p, err) + } + + var meta LocalStorageMeta + if err := json.Unmarshal(mb, &meta); err != nil { + return xerrors.Errorf("unmarshalling storage metadata for %s: %w", p, err) + } + + fst, err := p.stat(st.localStorage) + if err != nil { + return err + } + + if id != meta.ID { + log.Errorf("storage path ID changed: %s; %s -> %s", p.local, id, meta.ID) + continue + } + + err = st.index.StorageAttach(ctx, StorageInfo{ + ID: id, + URLs: st.urls, + Weight: meta.Weight, + CanSeal: meta.CanSeal, + CanStore: meta.CanStore, + }, fst) + if err != nil { + return xerrors.Errorf("redeclaring storage in index: %w", err) + } + + if err := st.declareSectors(ctx, p.local, meta.ID, meta.CanStore); err != nil { + return xerrors.Errorf("redeclaring sectors: %w", err) + } + } + + return nil +} + +func (st *Local) declareSectors(ctx context.Context, p string, id ID, primary bool) error { for _, t := range storiface.PathTypes { ents, err := ioutil.ReadDir(filepath.Join(p, t.String())) if err != nil { @@ -201,32 +273,12 @@ func (st *Local) OpenPath(ctx context.Context, p string) error { return xerrors.Errorf("parse sector id %s: %w", ent.Name(), err) } - if err := st.index.StorageDeclareSector(ctx, meta.ID, sid, t, meta.CanStore); err != nil { - return xerrors.Errorf("declare sector %d(t:%d) -> %s: %w", sid, t, meta.ID, err) + if err := st.index.StorageDeclareSector(ctx, id, sid, t, primary); err != nil { + return xerrors.Errorf("declare sector %d(t:%d) -> %s: %w", sid, t, id, err) } } } - st.paths[meta.ID] = out - - return nil -} - -func (st *Local) open(ctx context.Context) error { - cfg, err := st.localStorage.GetStorage() - if err != nil { - return xerrors.Errorf("getting local storage config: %w", err) - } - - for _, path := range cfg.StoragePaths { - err := st.OpenPath(ctx, path.Path) - if err != nil { - return xerrors.Errorf("opening path %s: %w", path.Path, err) - } - } - - go st.reportHealth(ctx) - return nil } From bf554d0e43bdeb34b9d62314b4f732929c0c92c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 28 Sep 2020 21:11:25 +0200 Subject: [PATCH 30/61] worker: Redeclare storage early on reconnect --- cmd/lotus-seal-worker/main.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index b34e999948f..fb576f8741e 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -428,12 +428,9 @@ var runCmd = &cli.Command{ go func() { var reconnect bool for { - log.Info("Making sure no local tasks are running") - - // TODO: we could get rid of this, but that requires tracking resources for restarted tasks correctly - workerApi.LocalWorker.WaitQuiet() - if reconnect { + log.Info("Redeclaring local storage") + if err := localStore.Redeclare(ctx); err != nil { log.Errorf("Redeclaring local storage failed: %+v", err) cancel() @@ -441,6 +438,12 @@ var runCmd = &cli.Command{ } } + log.Info("Making sure no local tasks are running") + + // TODO: we could get rid of this, but that requires tracking resources for restarted tasks correctly + workerApi.LocalWorker.WaitQuiet() + + if err := nodeApi.WorkerConnect(ctx, "ws://"+address+"/rpc/v0"); err != nil { log.Errorf("Registering worker failed: %+v", err) cancel() From 9bd25379711ce925c9e71d34c1313e2b933870e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 28 Sep 2020 22:05:51 +0200 Subject: [PATCH 31/61] stores: Fix error printing in http handler --- cmd/lotus-seal-worker/main.go | 1 - extern/sector-storage/stores/http_handler.go | 18 +++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index fb576f8741e..0a88eacb480 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -443,7 +443,6 @@ var runCmd = &cli.Command{ // TODO: we could get rid of this, but that requires tracking resources for restarted tasks correctly workerApi.LocalWorker.WaitQuiet() - if err := nodeApi.WorkerConnect(ctx, "ws://"+address+"/rpc/v0"); err != nil { log.Errorf("Registering worker failed: %+v", err) cancel() diff --git a/extern/sector-storage/stores/http_handler.go b/extern/sector-storage/stores/http_handler.go index a5a2cd9137a..8891132ce71 100644 --- a/extern/sector-storage/stores/http_handler.go +++ b/extern/sector-storage/stores/http_handler.go @@ -58,14 +58,14 @@ func (handler *FetchHandler) remoteGetSector(w http.ResponseWriter, r *http.Requ id, err := storiface.ParseSectorID(vars["id"]) if err != nil { - log.Error("%+v", err) + log.Errorf("%+v", err) w.WriteHeader(500) return } ft, err := ftFromString(vars["type"]) if err != nil { - log.Error("%+v", err) + log.Errorf("%+v", err) w.WriteHeader(500) return } @@ -75,7 +75,7 @@ func (handler *FetchHandler) remoteGetSector(w http.ResponseWriter, r *http.Requ // passing 0 spt because we don't allocate anything paths, _, err := handler.Local.AcquireSector(r.Context(), id, 0, ft, storiface.FTNone, storiface.PathStorage, storiface.AcquireMove) if err != nil { - log.Error("%+v", err) + log.Errorf("%+v", err) w.WriteHeader(500) return } @@ -91,7 +91,7 @@ func (handler *FetchHandler) remoteGetSector(w http.ResponseWriter, r *http.Requ stat, err := os.Stat(path) if err != nil { - log.Error("%+v", err) + log.Errorf("%+v", err) w.WriteHeader(500) return } @@ -105,14 +105,14 @@ func (handler *FetchHandler) remoteGetSector(w http.ResponseWriter, r *http.Requ w.Header().Set("Content-Type", "application/octet-stream") } if err != nil { - log.Error("%+v", err) + log.Errorf("%+v", err) w.WriteHeader(500) return } w.WriteHeader(200) if _, err := io.Copy(w, rd); err != nil { // TODO: default 32k buf may be too small - log.Error("%+v", err) + log.Errorf("%+v", err) return } } @@ -123,20 +123,20 @@ func (handler *FetchHandler) remoteDeleteSector(w http.ResponseWriter, r *http.R id, err := storiface.ParseSectorID(vars["id"]) if err != nil { - log.Error("%+v", err) + log.Errorf("%+v", err) w.WriteHeader(500) return } ft, err := ftFromString(vars["type"]) if err != nil { - log.Error("%+v", err) + log.Errorf("%+v", err) w.WriteHeader(500) return } if err := handler.Remove(r.Context(), id, ft, false); err != nil { - log.Error("%+v", err) + log.Errorf("%+v", err) w.WriteHeader(500) return } From 1e6a69f8aa76846f234534eec6003b00b9e733b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 28 Sep 2020 22:10:02 +0200 Subject: [PATCH 32/61] localworker: Don't mark calls as returned when returning fails --- extern/sector-storage/worker_local.go | 1 + 1 file changed, 1 insertion(+) diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index 495c9630d01..68be481d9eb 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -229,6 +229,7 @@ func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt Ret if err := returnFunc[rt](ctx, ci, l.ret, res, err); err != nil { log.Errorf("return error: %s: %+v", rt, err) + return } if err := l.ct.onReturned(ci); err != nil { From 0f2dcf28b1f4da306ebd99e9792add208713716e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 29 Sep 2020 09:57:36 +0200 Subject: [PATCH 33/61] fsm: Reuse tickets in PC1 on retry --- extern/sector-storage/manager_test.go | 5 ----- extern/sector-storage/sched_test.go | 2 +- extern/storage-sealing/fsm.go | 9 +++++++- extern/storage-sealing/fsm_events.go | 18 ++++++++++++---- extern/storage-sealing/sector_state.go | 1 + extern/storage-sealing/states_sealing.go | 27 ++++++++++++++++++------ 6 files changed, 45 insertions(+), 17 deletions(-) diff --git a/extern/sector-storage/manager_test.go b/extern/sector-storage/manager_test.go index 0fada08bc65..d87ec082757 100644 --- a/extern/sector-storage/manager_test.go +++ b/extern/sector-storage/manager_test.go @@ -115,11 +115,6 @@ func newTestMgr(ctx context.Context, t *testing.T, ds datastore.Datastore) (*Man Prover: prover, - wt: &workTracker{ - done: map[storiface.CallID]struct{}{}, - running: map[storiface.CallID]trackedWork{}, - }, - work: statestore.New(ds), callToWork: map[storiface.CallID]WorkID{}, callRes: map[storiface.CallID]chan result{}, diff --git a/extern/sector-storage/sched_test.go b/extern/sector-storage/sched_test.go index f23be20c050..a77576048ca 100644 --- a/extern/sector-storage/sched_test.go +++ b/extern/sector-storage/sched_test.go @@ -439,7 +439,7 @@ func TestSched(t *testing.T) { for _, job := range jobs { lines = append(lines, line{ WorkerJob: job, - wid: wid, + wid: uint64(wid), }) } } diff --git a/extern/storage-sealing/fsm.go b/extern/storage-sealing/fsm.go index 0d2e766fd42..12af83f56bf 100644 --- a/extern/storage-sealing/fsm.go +++ b/extern/storage-sealing/fsm.go @@ -45,12 +45,14 @@ var fsmPlanners = map[SectorState]func(events []statemachine.Event, state *Secto on(SectorAddPiece{}, WaitDeals), on(SectorStartPacking{}, Packing), ), - Packing: planOne(on(SectorPacked{}, PreCommit1)), + Packing: planOne(on(SectorPacked{}, GetTicket)), + GetTicket: planOne(on(SectorTicket{}, PreCommit1)), PreCommit1: planOne( on(SectorPreCommit1{}, PreCommit2), on(SectorSealPreCommit1Failed{}, SealPreCommit1Failed), on(SectorDealsExpired{}, DealsExpired), on(SectorInvalidDealIDs{}, RecoverDealIDs), + on(SectorOldTicket{}, GetTicket), ), PreCommit2: planOne( on(SectorPreCommit2{}, PreCommitting), @@ -219,6 +221,9 @@ func (m *Sealing) plan(events []statemachine.Event, state *SectorInfo) (func(sta *<- Packing <- incoming committed capacity | | | v + | GetTicket + | | ^ + | v | *<- PreCommit1 <--> SealPreCommit1Failed | | ^ ^^ | | *----------++----\ @@ -267,6 +272,8 @@ func (m *Sealing) plan(events []statemachine.Event, state *SectorInfo) (func(sta log.Infof("Waiting for deals %d", state.SectorNumber) case Packing: return m.handlePacking, processed, nil + case GetTicket: + return m.handleGetTicket, processed, nil case PreCommit1: return m.handlePreCommit1, processed, nil case PreCommit2: diff --git a/extern/storage-sealing/fsm_events.go b/extern/storage-sealing/fsm_events.go index 3e597d7613e..aec2beb0a12 100644 --- a/extern/storage-sealing/fsm_events.go +++ b/extern/storage-sealing/fsm_events.go @@ -101,16 +101,26 @@ func (evt SectorPacked) apply(state *SectorInfo) { } } +type SectorTicket struct { + TicketValue abi.SealRandomness + TicketEpoch abi.ChainEpoch +} + +func (evt SectorTicket) apply(state *SectorInfo) { + state.TicketEpoch = evt.TicketEpoch + state.TicketValue = evt.TicketValue +} + +type SectorOldTicket struct{} + +func (evt SectorOldTicket) apply(*SectorInfo) {} + type SectorPreCommit1 struct { PreCommit1Out storage.PreCommit1Out - TicketValue abi.SealRandomness - TicketEpoch abi.ChainEpoch } func (evt SectorPreCommit1) apply(state *SectorInfo) { state.PreCommit1Out = evt.PreCommit1Out - state.TicketEpoch = evt.TicketEpoch - state.TicketValue = evt.TicketValue state.PreCommit2Fails = 0 } diff --git a/extern/storage-sealing/sector_state.go b/extern/storage-sealing/sector_state.go index 10b96e50437..8b0bff24a70 100644 --- a/extern/storage-sealing/sector_state.go +++ b/extern/storage-sealing/sector_state.go @@ -41,6 +41,7 @@ const ( Empty SectorState = "Empty" WaitDeals SectorState = "WaitDeals" // waiting for more pieces (deals) to be added to the sector Packing SectorState = "Packing" // sector not in sealStore, and not on chain + GetTicket SectorState = "GetTicket" // generate ticket PreCommit1 SectorState = "PreCommit1" // do PreCommit1 PreCommit2 SectorState = "PreCommit2" // do PreCommit2 PreCommitting SectorState = "PreCommitting" // on chain pre-commit diff --git a/extern/storage-sealing/states_sealing.go b/extern/storage-sealing/states_sealing.go index a4e85245422..55a3f27e81b 100644 --- a/extern/storage-sealing/states_sealing.go +++ b/extern/storage-sealing/states_sealing.go @@ -21,6 +21,7 @@ import ( ) var DealSectorPriority = 1024 +var MaxTicketAge = abi.ChainEpoch(builtin.EpochsInDay * 2) func (m *Sealing) handlePacking(ctx statemachine.Context, sector SectorInfo) error { log.Infow("performing filling up rest of the sector...", "sector", sector.SectorNumber) @@ -83,6 +84,18 @@ func (m *Sealing) getTicket(ctx statemachine.Context, sector SectorInfo) (abi.Se return abi.SealRandomness(rand), ticketEpoch, nil } +func (m *Sealing) handleGetTicket(ctx statemachine.Context, sector SectorInfo) error { + ticketValue, ticketEpoch, err := m.getTicket(ctx, sector) + if err != nil { + return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("getting ticket failed: %w", err)}) + } + + return ctx.Send(SectorTicket{ + TicketValue: ticketValue, + TicketEpoch: ticketEpoch, + }) +} + func (m *Sealing) handlePreCommit1(ctx statemachine.Context, sector SectorInfo) error { if err := checkPieces(ctx.Context(), m.maddr, sector, m.api); err != nil { // Sanity check state switch err.(type) { @@ -99,21 +112,23 @@ func (m *Sealing) handlePreCommit1(ctx statemachine.Context, sector SectorInfo) } } - log.Infow("performing sector replication...", "sector", sector.SectorNumber) - ticketValue, ticketEpoch, err := m.getTicket(ctx, sector) + _, height, err := m.api.ChainHead(ctx.Context()) if err != nil { - return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("getting ticket failed: %w", err)}) + log.Errorf("handlePreCommit1: api error, not proceeding: %+v", err) + return nil + } + + if height-sector.TicketEpoch > MaxTicketAge { + return ctx.Send(SectorOldTicket{}) } - pc1o, err := m.sealer.SealPreCommit1(sector.sealingCtx(ctx.Context()), m.minerSector(sector.SectorNumber), ticketValue, sector.pieceInfos()) + pc1o, err := m.sealer.SealPreCommit1(sector.sealingCtx(ctx.Context()), m.minerSector(sector.SectorNumber), sector.TicketValue, sector.pieceInfos()) if err != nil { return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("seal pre commit(1) failed: %w", err)}) } return ctx.Send(SectorPreCommit1{ PreCommit1Out: pc1o, - TicketValue: ticketValue, - TicketEpoch: ticketEpoch, }) } From 46a5beafe45fa975758cbd9581cb6390eece6855 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 30 Sep 2020 15:17:29 +0200 Subject: [PATCH 34/61] shed: Datastore utils --- cmd/lotus-shed/datastore.go | 193 ++++++++++++++++++++++++++++++++++++ cmd/lotus-shed/main.go | 1 + go.mod | 1 + 3 files changed, 195 insertions(+) create mode 100644 cmd/lotus-shed/datastore.go diff --git a/cmd/lotus-shed/datastore.go b/cmd/lotus-shed/datastore.go new file mode 100644 index 00000000000..dcd774a90a9 --- /dev/null +++ b/cmd/lotus-shed/datastore.go @@ -0,0 +1,193 @@ +package main + +import ( + "encoding/json" + "fmt" + "strings" + + "github.com/ipfs/go-datastore" + dsq "github.com/ipfs/go-datastore/query" + logging "github.com/ipfs/go-log" + "github.com/polydawn/refmt/cbor" + "github.com/urfave/cli/v2" + "golang.org/x/xerrors" + + "github.com/filecoin-project/lotus/node/repo" +) + +var datastoreCmd = &cli.Command{ + Name: "datastore", + Description: "access node datastores directly", + Subcommands: []*cli.Command{ + datastoreListCmd, + datastoreGetCmd, + }, +} + +var datastoreListCmd = &cli.Command{ + Name: "list", + Description: "list datastore keys", + Flags: []cli.Flag{ + &cli.IntFlag{ + Name: "repo-type", + Value: 1, + }, + &cli.BoolFlag{ + Name: "top-level", + Usage: "only print top-level keys", + }, + &cli.StringFlag{ + Name: "get-enc", + Usage: "print values [esc/hex/cbor]", + }, + }, + ArgsUsage: "[namespace prefix]", + Action: func(cctx *cli.Context) error { + logging.SetLogLevel("badger", "ERROR") // nolint:errchec + + r, err := repo.NewFS(cctx.String("repo")) + if err != nil { + return xerrors.Errorf("opening fs repo: %w", err) + } + + exists, err := r.Exists() + if err != nil { + return err + } + if !exists { + return xerrors.Errorf("lotus repo doesn't exist") + } + + lr, err := r.Lock(repo.RepoType(cctx.Int("repo-type"))) + if err != nil { + return err + } + defer lr.Close() //nolint:errcheck + + ds, err := lr.Datastore(datastore.NewKey(cctx.Args().First()).String()) + if err != nil { + return err + } + + genc := cctx.String("get-enc") + + q, err := ds.Query(dsq.Query{ + Prefix: datastore.NewKey(cctx.Args().Get(1)).String(), + KeysOnly: genc == "", + }) + if err != nil { + return xerrors.Errorf("datastore query: %w", err) + } + defer q.Close() //nolint:errcheck + + seen := map[string]struct{}{} + for res := range q.Next() { + s := res.Key + if cctx.Bool("top-level") { + k := datastore.NewKey(datastore.NewKey(s).List()[0]) + if k.Type() != "" { + s = k.Type() + } else { + s = k.String() + } + + _, has := seen[s] + if has { + continue + } + seen[s] = struct{}{} + } + + + s = fmt.Sprintf("%q", s) + s = strings.Trim(s, "\"") + fmt.Println(s) + + if genc != "" { + fmt.Print("\t") + if err := printVal(genc, res.Value); err != nil { + return err + } + } + } + + return nil + }, +} + +var datastoreGetCmd = &cli.Command{ + Name: "get", + Description: "list datastore keys", + Flags: []cli.Flag{ + &cli.IntFlag{ + Name: "repo-type", + Value: 1, + }, + &cli.StringFlag{ + Name: "enc", + Usage: "encoding (esc/hex/cbor)", + Value: "esc", + }, + }, + ArgsUsage: "[namespace key]", + Action: func(cctx *cli.Context) error { + logging.SetLogLevel("badger", "ERROR") // nolint:errchec + + r, err := repo.NewFS(cctx.String("repo")) + if err != nil { + return xerrors.Errorf("opening fs repo: %w", err) + } + + exists, err := r.Exists() + if err != nil { + return err + } + if !exists { + return xerrors.Errorf("lotus repo doesn't exist") + } + + lr, err := r.Lock(repo.RepoType(cctx.Int("repo-type"))) + if err != nil { + return err + } + defer lr.Close() //nolint:errcheck + + ds, err := lr.Datastore(datastore.NewKey(cctx.Args().First()).String()) + if err != nil { + return err + } + + val, err := ds.Get(datastore.NewKey(cctx.Args().Get(1))) + if err != nil { + return xerrors.Errorf("get: %w", err) + } + + return printVal(cctx.String("enc"), val) + }, +} + +func printVal(enc string, val []byte) error { + switch enc { + case "esc": + s := fmt.Sprintf("%q", string(val)) + s = strings.Trim(s, "\"") + fmt.Println(s) + case "hex": + fmt.Printf("%x\n", val) + case "cbor": + var out interface{} + if err := cbor.Unmarshal(cbor.DecodeOptions{}, val, &out); err != nil { + return xerrors.Errorf("unmarshaling cbor: %w", err) + } + s, err := json.Marshal(&out) + if err != nil { + return xerrors.Errorf("remarshaling as json: %w", err) + } + + fmt.Println(string(s)) + default: + return xerrors.New("unknown encoding") + } + + return nil +} diff --git a/cmd/lotus-shed/main.go b/cmd/lotus-shed/main.go index 118b4ea7274..61e6967d646 100644 --- a/cmd/lotus-shed/main.go +++ b/cmd/lotus-shed/main.go @@ -37,6 +37,7 @@ func main() { exportChainCmd, consensusCmd, serveDealStatsCmd, + datastoreCmd, } app := &cli.App{ diff --git a/go.mod b/go.mod index 67d87347f5d..99f689f3d47 100644 --- a/go.mod +++ b/go.mod @@ -106,6 +106,7 @@ require ( github.com/multiformats/go-multibase v0.0.3 github.com/multiformats/go-multihash v0.0.14 github.com/opentracing/opentracing-go v1.2.0 + github.com/polydawn/refmt v0.0.0-20190809202753-05966cbd336a github.com/raulk/clock v1.1.0 github.com/stretchr/testify v1.6.1 github.com/supranational/blst v0.1.1 From 6855284d88c8e19beed4c5bc8a7f374d1babd0da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 30 Sep 2020 17:26:09 +0200 Subject: [PATCH 35/61] sectorstorage: Cancel non-running work in case of abort in sched --- extern/sector-storage/manager.go | 12 +++-- extern/sector-storage/manager_calltracker.go | 49 +++++++++++++++++--- 2 files changed, 51 insertions(+), 10 deletions(-) diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 7d49cc958f2..afcc28ffd0c 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -364,10 +364,11 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke ctx, cancel := context.WithCancel(ctx) defer cancel() - wk, wait, err := m.getWork(ctx, sealtasks.TTPreCommit1, sector, ticket, pieces) + wk, wait, cancel, err := m.getWork(ctx, sealtasks.TTPreCommit1, sector, ticket, pieces) if err != nil { return nil, xerrors.Errorf("getWork: %w", err) } + defer cancel() waitRes := func() { p, werr := m.waitWork(ctx, wk) @@ -408,10 +409,11 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase ctx, cancel := context.WithCancel(ctx) defer cancel() - wk, wait, err := m.getWork(ctx, sealtasks.TTPreCommit2, sector, phase1Out) + wk, wait, cancel, err := m.getWork(ctx, sealtasks.TTPreCommit2, sector, phase1Out) if err != nil { return storage.SectorCids{}, xerrors.Errorf("getWork: %w", err) } + defer cancel() waitRes := func() { p, werr := m.waitWork(ctx, wk) @@ -449,10 +451,11 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a ctx, cancel := context.WithCancel(ctx) defer cancel() - wk, wait, err := m.getWork(ctx, sealtasks.TTCommit1, sector, ticket, seed, pieces, cids) + wk, wait, cancel, err := m.getWork(ctx, sealtasks.TTCommit1, sector, ticket, seed, pieces, cids) if err != nil { return storage.Commit1Out{}, xerrors.Errorf("getWork: %w", err) } + defer cancel() waitRes := func() { p, werr := m.waitWork(ctx, wk) @@ -490,10 +493,11 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a } func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage.Commit1Out) (out storage.Proof, err error) { - wk, wait, err := m.getWork(ctx, sealtasks.TTCommit2, sector, phase1Out) + wk, wait, cancel, err := m.getWork(ctx, sealtasks.TTCommit2, sector, phase1Out) if err != nil { return storage.Proof{}, xerrors.Errorf("getWork: %w", err) } + defer cancel() waitRes := func() { p, werr := m.waitWork(ctx, wk) diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index 147e11b9164..1135af4af41 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -102,10 +102,10 @@ func (m *Manager) setupWorkTracker() { } // returns wait=true when the task is already tracked/running -func (m *Manager) getWork(ctx context.Context, method sealtasks.TaskType, params ...interface{}) (wid WorkID, wait bool, err error) { +func (m *Manager) getWork(ctx context.Context, method sealtasks.TaskType, params ...interface{}) (wid WorkID, wait bool, cancel func(), err error) { wid, err = newWorkID(method, params) if err != nil { - return WorkID{}, false, xerrors.Errorf("creating WorkID: %w", err) + return WorkID{}, false, nil, xerrors.Errorf("creating WorkID: %w", err) } m.workLk.Lock() @@ -113,7 +113,7 @@ func (m *Manager) getWork(ctx context.Context, method sealtasks.TaskType, params have, err := m.work.Has(wid) if err != nil { - return WorkID{}, false, xerrors.Errorf("failed to check if the task is already tracked: %w", err) + return WorkID{}, false, nil, xerrors.Errorf("failed to check if the task is already tracked: %w", err) } if !have { @@ -122,15 +122,52 @@ func (m *Manager) getWork(ctx context.Context, method sealtasks.TaskType, params Status: wsStarted, }) if err != nil { - return WorkID{}, false, xerrors.Errorf("failed to track task start: %w", err) + return WorkID{}, false, nil, xerrors.Errorf("failed to track task start: %w", err) } - return wid, false, nil + return wid, false, func() { + m.workLk.Lock() + defer m.workLk.Unlock() + + have, err := m.work.Has(wid) + if err != nil { + log.Errorf("cancel: work has error: %+v", err) + return + } + + if !have { + return // expected / happy path + } + + var ws WorkState + if err := m.work.Get(wid).Get(&ws); err != nil { + log.Errorf("cancel: get work %s: %+v", wid, err) + return + } + + switch ws.Status { + case wsStarted: + log.Warn("canceling started (not running) work %s", wid) + + if err := m.work.Get(wid).End(); err != nil { + log.Errorf("cancel: failed to cancel started work %s: %+v", wid, err) + return + } + case wsDone: + // TODO: still remove? + log.Warn("cancel called on work %s in 'done' state", wid) + case wsRunning: + log.Warn("cancel called on work %s in 'running' state (manager shutting down?)", wid) + } + + }, nil } // already started - return wid, true, nil + return wid, true, func() { + // TODO + }, nil } func (m *Manager) startWork(ctx context.Context, wk WorkID) func(callID storiface.CallID, err error) error { From 6ddea62d09992679b0fd8e9c0bcc91ccd0c06f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 30 Sep 2020 17:26:24 +0200 Subject: [PATCH 36/61] shed: gofmt --- cmd/lotus-shed/datastore.go | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/cmd/lotus-shed/datastore.go b/cmd/lotus-shed/datastore.go index dcd774a90a9..5c668e0819a 100644 --- a/cmd/lotus-shed/datastore.go +++ b/cmd/lotus-shed/datastore.go @@ -16,7 +16,7 @@ import ( ) var datastoreCmd = &cli.Command{ - Name: "datastore", + Name: "datastore", Description: "access node datastores directly", Subcommands: []*cli.Command{ datastoreListCmd, @@ -25,19 +25,19 @@ var datastoreCmd = &cli.Command{ } var datastoreListCmd = &cli.Command{ - Name: "list", + Name: "list", Description: "list datastore keys", Flags: []cli.Flag{ &cli.IntFlag{ - Name: "repo-type", + Name: "repo-type", Value: 1, }, &cli.BoolFlag{ - Name: "top-level", + Name: "top-level", Usage: "only print top-level keys", }, &cli.StringFlag{ - Name: "get-enc", + Name: "get-enc", Usage: "print values [esc/hex/cbor]", }, }, @@ -72,8 +72,8 @@ var datastoreListCmd = &cli.Command{ genc := cctx.String("get-enc") q, err := ds.Query(dsq.Query{ - Prefix: datastore.NewKey(cctx.Args().Get(1)).String(), - KeysOnly: genc == "", + Prefix: datastore.NewKey(cctx.Args().Get(1)).String(), + KeysOnly: genc == "", }) if err != nil { return xerrors.Errorf("datastore query: %w", err) @@ -98,7 +98,6 @@ var datastoreListCmd = &cli.Command{ seen[s] = struct{}{} } - s = fmt.Sprintf("%q", s) s = strings.Trim(s, "\"") fmt.Println(s) @@ -116,15 +115,15 @@ var datastoreListCmd = &cli.Command{ } var datastoreGetCmd = &cli.Command{ - Name: "get", + Name: "get", Description: "list datastore keys", Flags: []cli.Flag{ &cli.IntFlag{ - Name: "repo-type", + Name: "repo-type", Value: 1, }, &cli.StringFlag{ - Name: "enc", + Name: "enc", Usage: "encoding (esc/hex/cbor)", Value: "esc", }, From 54fdd6ba5a24a2178bb9de410a74ab3fd71a0e49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 30 Sep 2020 17:48:48 +0200 Subject: [PATCH 37/61] sectorstorage: Variable scopes are hard --- extern/sector-storage/manager.go | 35 ++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index afcc28ffd0c..2eee50b1b99 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -370,10 +370,11 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke } defer cancel() + var waitErr error waitRes := func() { p, werr := m.waitWork(ctx, wk) if werr != nil { - err = werr + waitErr = werr return } out = p.(storage.PreCommit1Out) @@ -401,8 +402,11 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke waitRes() return nil }) + if err != nil { + return nil, err + } - return out, err + return out, waitErr } func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage.PreCommit1Out) (out storage.SectorCids, err error) { @@ -415,10 +419,11 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase } defer cancel() + var waitErr error waitRes := func() { p, werr := m.waitWork(ctx, wk) if werr != nil { - err = werr + waitErr = werr return } out = p.(storage.SectorCids) @@ -444,7 +449,11 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase waitRes() return nil }) - return out, err + if err != nil { + return storage.SectorCids{}, err + } + + return out, waitErr } func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, seed abi.InteractiveSealRandomness, pieces []abi.PieceInfo, cids storage.SectorCids) (out storage.Commit1Out, err error) { @@ -457,10 +466,11 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a } defer cancel() + var waitErr error waitRes := func() { p, werr := m.waitWork(ctx, wk) if werr != nil { - err = werr + waitErr = werr return } out = p.(storage.Commit1Out) @@ -489,7 +499,11 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a waitRes() return nil }) - return out, err + if err != nil { + return nil, err + } + + return out, waitErr } func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Out storage.Commit1Out) (out storage.Proof, err error) { @@ -499,10 +513,11 @@ func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Ou } defer cancel() + var waitErr error waitRes := func() { p, werr := m.waitWork(ctx, wk) if werr != nil { - err = werr + waitErr = werr return } out = p.(storage.Proof) @@ -525,7 +540,11 @@ func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Ou return nil }) - return out, err + if err != nil { + return nil, err + } + + return out, waitErr } func (m *Manager) FinalizeSector(ctx context.Context, sector abi.SectorID, keepUnsealed []storage.Range) error { From a783bf9b8b1db350238212e351266eb8e684b772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 30 Sep 2020 17:53:15 +0200 Subject: [PATCH 38/61] storagefsm: Handle PC2 with missing replica --- extern/storage-sealing/fsm.go | 1 + extern/storage-sealing/states_sealing.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/extern/storage-sealing/fsm.go b/extern/storage-sealing/fsm.go index 12af83f56bf..a936e14b875 100644 --- a/extern/storage-sealing/fsm.go +++ b/extern/storage-sealing/fsm.go @@ -57,6 +57,7 @@ var fsmPlanners = map[SectorState]func(events []statemachine.Event, state *Secto PreCommit2: planOne( on(SectorPreCommit2{}, PreCommitting), on(SectorSealPreCommit2Failed{}, SealPreCommit2Failed), + on(SectorSealPreCommit1Failed{}, SealPreCommit1Failed), ), PreCommitting: planOne( on(SectorSealPreCommit1Failed{}, SealPreCommit1Failed), diff --git a/extern/storage-sealing/states_sealing.go b/extern/storage-sealing/states_sealing.go index 55a3f27e81b..08371f7faf1 100644 --- a/extern/storage-sealing/states_sealing.go +++ b/extern/storage-sealing/states_sealing.go @@ -3,6 +3,7 @@ package sealing import ( "bytes" "context" + "github.com/ipfs/go-cid" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/actors/builtin/miner" @@ -138,6 +139,10 @@ func (m *Sealing) handlePreCommit2(ctx statemachine.Context, sector SectorInfo) return ctx.Send(SectorSealPreCommit2Failed{xerrors.Errorf("seal pre commit(2) failed: %w", err)}) } + if cids.Unsealed == cid.Undef { + return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("seal pre commit(2) returned undefined CommD")}) + } + return ctx.Send(SectorPreCommit2{ Unsealed: cids.Unsealed, Sealed: cids.Sealed, From c22859809897c7096d4738b827d4a22901803e5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 30 Sep 2020 18:16:07 +0200 Subject: [PATCH 39/61] sectorstorage: Variable scopes are really hard --- extern/sector-storage/manager.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 2eee50b1b99..6ed7d998999 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -382,7 +382,7 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke if wait { // already in progress waitRes() - return + return out, waitErr } if err := m.index.StorageLock(ctx, sector, storiface.FTUnsealed, storiface.FTSealed|storiface.FTCache); err != nil { @@ -431,7 +431,7 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase if wait { // already in progress waitRes() - return + return out, waitErr } if err := m.index.StorageLock(ctx, sector, storiface.FTSealed, storiface.FTCache); err != nil { @@ -478,7 +478,7 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a if wait { // already in progress waitRes() - return + return out, waitErr } if err := m.index.StorageLock(ctx, sector, storiface.FTSealed, storiface.FTCache); err != nil { @@ -525,7 +525,7 @@ func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Ou if wait { // already in progress waitRes() - return + return out, waitErr } selector := newTaskSelector() From 4f97d9637ea667f35fcd78e2ab32e2f7ea2cb2b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 30 Sep 2020 19:11:58 +0200 Subject: [PATCH 40/61] Fix storage-fsm tests --- extern/storage-sealing/fsm_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/extern/storage-sealing/fsm_test.go b/extern/storage-sealing/fsm_test.go index 51fd2a37bf9..5b4541f7516 100644 --- a/extern/storage-sealing/fsm_test.go +++ b/extern/storage-sealing/fsm_test.go @@ -44,6 +44,9 @@ func TestHappyPath(t *testing.T) { } m.planSingle(SectorPacked{}) + require.Equal(m.t, m.state.State, GetTicket) + + m.planSingle(SectorTicket{}) require.Equal(m.t, m.state.State, PreCommit1) m.planSingle(SectorPreCommit1{}) @@ -73,7 +76,7 @@ func TestHappyPath(t *testing.T) { m.planSingle(SectorFinalized{}) require.Equal(m.t, m.state.State, Proving) - expected := []SectorState{Packing, PreCommit1, PreCommit2, PreCommitting, PreCommitWait, WaitSeed, Committing, SubmitCommit, CommitWait, FinalizeSector, Proving} + expected := []SectorState{Packing, GetTicket, PreCommit1, PreCommit2, PreCommitting, PreCommitWait, WaitSeed, Committing, SubmitCommit, CommitWait, FinalizeSector, Proving} for i, n := range notif { if n.before.State != expected[i] { t.Fatalf("expected before state: %s, got: %s", expected[i], n.before.State) @@ -98,6 +101,9 @@ func TestSeedRevert(t *testing.T) { } m.planSingle(SectorPacked{}) + require.Equal(m.t, m.state.State, GetTicket) + + m.planSingle(SectorTicket{}) require.Equal(m.t, m.state.State, PreCommit1) m.planSingle(SectorPreCommit1{}) From 2d16af6ee60c75cf762c407152bcc62f8ccddf7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 30 Sep 2020 19:18:38 +0200 Subject: [PATCH 41/61] sectorstorage: Fix TestRedoPC1 --- extern/sector-storage/manager_test.go | 3 +++ extern/sector-storage/mock/mock.go | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/extern/sector-storage/manager_test.go b/extern/sector-storage/manager_test.go index d87ec082757..fe82c0ca031 100644 --- a/extern/sector-storage/manager_test.go +++ b/extern/sector-storage/manager_test.go @@ -198,6 +198,9 @@ func TestRedoPC1(t *testing.T) { _, err = m.SealPreCommit1(ctx, sid, ticket, pieces) require.NoError(t, err) + // tell mock ffi that we expect PC1 again + require.NoError(t, tw.mockSeal.ForceState(sid, 0)) // sectorPacking + _, err = m.SealPreCommit1(ctx, sid, ticket, pieces) require.NoError(t, err) diff --git a/extern/sector-storage/mock/mock.go b/extern/sector-storage/mock/mock.go index a292c2bbb99..8eb65482d7c 100644 --- a/extern/sector-storage/mock/mock.go +++ b/extern/sector-storage/mock/mock.go @@ -127,6 +127,19 @@ func (mgr *SectorMgr) AcquireSectorNumber() (abi.SectorNumber, error) { return id, nil } +func (mgr *SectorMgr) ForceState(sid abi.SectorID, st int) error { + mgr.lk.Lock() + ss, ok := mgr.sectors[sid] + mgr.lk.Unlock() + if !ok { + return xerrors.Errorf("no sector with id %d in storage", sid) + } + + ss.state = st + + return nil +} + func (mgr *SectorMgr) SealPreCommit1(ctx context.Context, sid abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (out storage.PreCommit1Out, err error) { mgr.lk.Lock() ss, ok := mgr.sectors[sid] From e3ee4e4718b03e315e2dfda46010ceb0ba841bd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 30 Sep 2020 19:32:19 +0200 Subject: [PATCH 42/61] Fix lint errors --- chain/messagepool/selection.go | 1 + cmd/lotus-seed/seed/seed.go | 2 +- cmd/lotus-storage-miner/storage.go | 6 +++--- extern/sector-storage/ffiwrapper/verifier_cgo.go | 10 ++++------ extern/sector-storage/manager.go | 2 +- extern/sector-storage/manager_calltracker.go | 9 +++------ extern/sector-storage/manager_test.go | 2 +- extern/sector-storage/roprov.go | 2 +- extern/sector-storage/selector_alloc.go | 2 +- extern/sector-storage/selector_existing.go | 2 +- extern/sector-storage/stats.go | 3 ++- extern/sector-storage/stores/http_handler.go | 2 +- extern/sector-storage/stores/index.go | 6 +++--- extern/sector-storage/stores/index_locks.go | 3 ++- extern/sector-storage/stores/index_locks_test.go | 3 ++- extern/sector-storage/stores/local.go | 4 ++-- extern/storage-sealing/states_sealing.go | 12 ++++++------ 17 files changed, 35 insertions(+), 36 deletions(-) diff --git a/chain/messagepool/selection.go b/chain/messagepool/selection.go index 2ddbed0ad18..4ade92a799c 100644 --- a/chain/messagepool/selection.go +++ b/chain/messagepool/selection.go @@ -12,6 +12,7 @@ import ( "github.com/filecoin-project/go-address" "github.com/filecoin-project/go-state-types/abi" tbig "github.com/filecoin-project/go-state-types/big" + "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/messagepool/gasguess" "github.com/filecoin-project/lotus/chain/types" diff --git a/cmd/lotus-seed/seed/seed.go b/cmd/lotus-seed/seed/seed.go index 32a7a68e101..92837cec58f 100644 --- a/cmd/lotus-seed/seed/seed.go +++ b/cmd/lotus-seed/seed/seed.go @@ -6,7 +6,6 @@ import ( "encoding/hex" "encoding/json" "fmt" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "io/ioutil" "os" "path/filepath" @@ -31,6 +30,7 @@ import ( "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper/basicfs" "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "github.com/filecoin-project/lotus/genesis" ) diff --git a/cmd/lotus-storage-miner/storage.go b/cmd/lotus-storage-miner/storage.go index ebc5d2fbe9e..cc91c9d6b26 100644 --- a/cmd/lotus-storage-miner/storage.go +++ b/cmd/lotus-storage-miner/storage.go @@ -3,7 +3,6 @@ package main import ( "encoding/json" "fmt" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "io/ioutil" "os" "path/filepath" @@ -20,11 +19,12 @@ import ( "github.com/filecoin-project/go-address" "github.com/filecoin-project/go-state-types/abi" - "github.com/filecoin-project/lotus/extern/sector-storage/fsutil" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/chain/types" lcli "github.com/filecoin-project/lotus/cli" + "github.com/filecoin-project/lotus/extern/sector-storage/fsutil" + "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) const metaFile = "sectorstore.json" diff --git a/extern/sector-storage/ffiwrapper/verifier_cgo.go b/extern/sector-storage/ffiwrapper/verifier_cgo.go index c560d4328fb..8a9ac16d817 100644 --- a/extern/sector-storage/ffiwrapper/verifier_cgo.go +++ b/extern/sector-storage/ffiwrapper/verifier_cgo.go @@ -4,17 +4,15 @@ package ffiwrapper import ( "context" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" - - "github.com/filecoin-project/specs-actors/actors/runtime/proof" + "go.opencensus.io/trace" "golang.org/x/xerrors" - "github.com/filecoin-project/go-state-types/abi" - ffi "github.com/filecoin-project/filecoin-ffi" + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/specs-actors/actors/runtime/proof" - "go.opencensus.io/trace" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) func (sb *Sealer) GenerateWinningPoSt(ctx context.Context, minerID abi.ActorID, sectorInfo []proof.SectorInfo, randomness abi.PoStRandomness) ([]proof.PoStProof, error) { diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 6ed7d998999..9445bdd2a3f 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -3,7 +3,6 @@ package sectorstorage import ( "context" "errors" - "github.com/filecoin-project/go-statestore" "io" "net/http" "sync" @@ -15,6 +14,7 @@ import ( "golang.org/x/xerrors" "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-statestore" "github.com/filecoin-project/specs-storage/storage" "github.com/filecoin-project/lotus/extern/sector-storage/ffiwrapper" diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index 1135af4af41..8315c6fe6f2 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -7,9 +7,10 @@ import ( "encoding/json" "errors" "fmt" - "golang.org/x/xerrors" "os" + "golang.org/x/xerrors" + "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) @@ -71,10 +72,6 @@ func (m *Manager) setupWorkTracker() { for _, st := range ids { wid := st.ID - if err := m.work.Get(wid).Get(&st); err != nil { - log.Errorf("getting work state for %s", wid) - continue - } if os.Getenv("LOTUS_MINER_ABORT_UNFINISHED_WORK") == "1" { st.Status = wsDone @@ -363,7 +360,7 @@ func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr stri _, ok = m.results[wid] if ok { - return xerrors.Errorf("result for call %v already reported") + return xerrors.Errorf("result for call %v already reported", wid) } m.results[wid] = res diff --git a/extern/sector-storage/manager_test.go b/extern/sector-storage/manager_test.go index fe82c0ca031..da89a04526a 100644 --- a/extern/sector-storage/manager_test.go +++ b/extern/sector-storage/manager_test.go @@ -265,7 +265,7 @@ func TestRestartManager(t *testing.T) { cwg.Wait() require.Error(t, perr) - m, lstor, _, _ = newTestMgr(ctx, t, ds) + m, _, _, _ = newTestMgr(ctx, t, ds) tw.ret = m // simulate jsonrpc auto-reconnect err = m.AddWorker(ctx, tw) require.NoError(t, err) diff --git a/extern/sector-storage/roprov.go b/extern/sector-storage/roprov.go index b808bfd9532..996d707218c 100644 --- a/extern/sector-storage/roprov.go +++ b/extern/sector-storage/roprov.go @@ -2,13 +2,13 @@ package sectorstorage import ( "context" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "golang.org/x/xerrors" "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) type readonlyProvider struct { diff --git a/extern/sector-storage/selector_alloc.go b/extern/sector-storage/selector_alloc.go index 800b8706b13..77ed447082e 100644 --- a/extern/sector-storage/selector_alloc.go +++ b/extern/sector-storage/selector_alloc.go @@ -2,7 +2,6 @@ package sectorstorage import ( "context" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "golang.org/x/xerrors" @@ -10,6 +9,7 @@ import ( "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) type allocSelector struct { diff --git a/extern/sector-storage/selector_existing.go b/extern/sector-storage/selector_existing.go index 298c3d3bdf5..100f6dc4db8 100644 --- a/extern/sector-storage/selector_existing.go +++ b/extern/sector-storage/selector_existing.go @@ -2,7 +2,6 @@ package sectorstorage import ( "context" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "golang.org/x/xerrors" @@ -10,6 +9,7 @@ import ( "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/stores" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) type existingSelector struct { diff --git a/extern/sector-storage/stats.go b/extern/sector-storage/stats.go index 849322be00a..bba47d169cc 100644 --- a/extern/sector-storage/stats.go +++ b/extern/sector-storage/stats.go @@ -1,8 +1,9 @@ package sectorstorage import ( - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "time" + + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) func (m *Manager) WorkerStats() map[uint64]storiface.WorkerStats { diff --git a/extern/sector-storage/stores/http_handler.go b/extern/sector-storage/stores/http_handler.go index 8891132ce71..2237bd4071b 100644 --- a/extern/sector-storage/stores/http_handler.go +++ b/extern/sector-storage/stores/http_handler.go @@ -2,7 +2,6 @@ package stores import ( "encoding/json" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "io" "net/http" "os" @@ -11,6 +10,7 @@ import ( logging "github.com/ipfs/go-log/v2" "golang.org/x/xerrors" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "github.com/filecoin-project/lotus/extern/sector-storage/tarutil" ) diff --git a/extern/sector-storage/stores/index.go b/extern/sector-storage/stores/index.go index e65e52ab1a3..9b3798ae1ac 100644 --- a/extern/sector-storage/stores/index.go +++ b/extern/sector-storage/stores/index.go @@ -2,7 +2,6 @@ package stores import ( "context" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "net/url" gopath "path" "sort" @@ -11,10 +10,11 @@ import ( "golang.org/x/xerrors" - "github.com/filecoin-project/lotus/extern/sector-storage/fsutil" - "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/go-state-types/big" + + "github.com/filecoin-project/lotus/extern/sector-storage/fsutil" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) var HeartbeatInterval = 10 * time.Second diff --git a/extern/sector-storage/stores/index_locks.go b/extern/sector-storage/stores/index_locks.go index 59385e77cfd..3a5ff940ef5 100644 --- a/extern/sector-storage/stores/index_locks.go +++ b/extern/sector-storage/stores/index_locks.go @@ -2,12 +2,13 @@ package stores import ( "context" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "sync" "golang.org/x/xerrors" "github.com/filecoin-project/go-state-types/abi" + + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) type sectorLock struct { diff --git a/extern/sector-storage/stores/index_locks_test.go b/extern/sector-storage/stores/index_locks_test.go index 3b1ccbf44b2..ec7f3492760 100644 --- a/extern/sector-storage/stores/index_locks_test.go +++ b/extern/sector-storage/stores/index_locks_test.go @@ -2,13 +2,14 @@ package stores import ( "context" - "github.com/filecoin-project/lotus/extern/sector-storage/storiface" "testing" "time" "github.com/stretchr/testify/require" "github.com/filecoin-project/go-state-types/abi" + + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) var aSector = abi.SectorID{ diff --git a/extern/sector-storage/stores/local.go b/extern/sector-storage/stores/local.go index 216e88cbabf..e2dd9f1742e 100644 --- a/extern/sector-storage/stores/local.go +++ b/extern/sector-storage/stores/local.go @@ -212,12 +212,12 @@ func (st *Local) Redeclare(ctx context.Context) error { for id, p := range st.paths { mb, err := ioutil.ReadFile(filepath.Join(p.local, MetaFile)) if err != nil { - return xerrors.Errorf("reading storage metadata for %s: %w", p, err) + return xerrors.Errorf("reading storage metadata for %s: %w", p.local, err) } var meta LocalStorageMeta if err := json.Unmarshal(mb, &meta); err != nil { - return xerrors.Errorf("unmarshalling storage metadata for %s: %w", p, err) + return xerrors.Errorf("unmarshalling storage metadata for %s: %w", p.local, err) } fst, err := p.stat(st.localStorage) diff --git a/extern/storage-sealing/states_sealing.go b/extern/storage-sealing/states_sealing.go index 08371f7faf1..545f023f7a8 100644 --- a/extern/storage-sealing/states_sealing.go +++ b/extern/storage-sealing/states_sealing.go @@ -3,13 +3,8 @@ package sealing import ( "bytes" "context" - "github.com/ipfs/go-cid" - - "github.com/filecoin-project/lotus/build" - "github.com/filecoin-project/lotus/chain/actors/builtin/miner" - "github.com/filecoin-project/lotus/chain/actors/policy" - miner0 "github.com/filecoin-project/specs-actors/actors/builtin/miner" + "github.com/ipfs/go-cid" "golang.org/x/xerrors" "github.com/filecoin-project/go-state-types/abi" @@ -18,7 +13,12 @@ import ( "github.com/filecoin-project/go-state-types/exitcode" "github.com/filecoin-project/go-statemachine" "github.com/filecoin-project/specs-actors/actors/builtin" + miner0 "github.com/filecoin-project/specs-actors/actors/builtin/miner" "github.com/filecoin-project/specs-storage/storage" + + "github.com/filecoin-project/lotus/build" + "github.com/filecoin-project/lotus/chain/actors/builtin/miner" + "github.com/filecoin-project/lotus/chain/actors/policy" ) var DealSectorPriority = 1024 From 79d2ddf24ff452dc9c561c4f91d16e36a17bfc8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 30 Sep 2020 21:18:12 +0200 Subject: [PATCH 43/61] Review --- extern/sector-storage/sched.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/extern/sector-storage/sched.go b/extern/sector-storage/sched.go index 760fe9cba52..e91c9252532 100644 --- a/extern/sector-storage/sched.go +++ b/extern/sector-storage/sched.go @@ -803,13 +803,7 @@ func (sh *scheduler) workerCleanup(wid WorkerID, w *workerHandle) { } sh.openWindows = newWindows - log.Debugf("dropWorker %d", wid) - - /*go func() { // TODO: just remove? - if err := w.w.Close(); err != nil { - log.Warnf("closing worker %d: %+v", err) - } - }()*/ + log.Debugf("worker %d dropped", wid) } } From b74a3229f8d9d5d4b009d287d5a8df501f00b8ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Tue, 13 Oct 2020 21:35:29 +0200 Subject: [PATCH 44/61] fsm: process expired-ticket sectors --- api/api_full.go | 2 ++ api/apistruct/struct.go | 5 +++++ extern/storage-sealing/fsm.go | 8 ++++++-- extern/storage-sealing/fsm_events.go | 5 +++++ extern/storage-sealing/sealing.go | 1 + extern/storage-sealing/states_failed.go | 2 +- extern/storage-sealing/states_sealing.go | 15 +++++++++++++++ node/impl/full/state.go | 19 +++++++++++++++++++ storage/adapter_storage_miner.go | 9 +++++++++ storage/miner.go | 1 + 10 files changed, 64 insertions(+), 3 deletions(-) diff --git a/api/api_full.go b/api/api_full.go index 657e945caf5..957529eaa31 100644 --- a/api/api_full.go +++ b/api/api_full.go @@ -355,6 +355,8 @@ type FullNode interface { StateMinerInitialPledgeCollateral(context.Context, address.Address, miner.SectorPreCommitInfo, types.TipSetKey) (types.BigInt, error) // StateMinerAvailableBalance returns the portion of a miner's balance that can be withdrawn or spent StateMinerAvailableBalance(context.Context, address.Address, types.TipSetKey) (types.BigInt, error) + // StateMinerSectorAllocated checks if a sector is allocated + StateMinerSectorAllocated(context.Context, address.Address, abi.SectorNumber, types.TipSetKey) (bool, error) // StateSectorPreCommitInfo returns the PreCommit info for the specified miner's sector StateSectorPreCommitInfo(context.Context, address.Address, abi.SectorNumber, types.TipSetKey) (miner.SectorPreCommitOnChainInfo, error) // StateSectorGetInfo returns the on-chain info for the specified miner's sector. Returns null in case the sector info isn't found diff --git a/api/apistruct/struct.go b/api/apistruct/struct.go index 61863044e69..edf11911451 100644 --- a/api/apistruct/struct.go +++ b/api/apistruct/struct.go @@ -181,6 +181,7 @@ type FullNodeStruct struct { StateMinerPreCommitDepositForPower func(context.Context, address.Address, miner.SectorPreCommitInfo, types.TipSetKey) (types.BigInt, error) `perm:"read"` StateMinerInitialPledgeCollateral func(context.Context, address.Address, miner.SectorPreCommitInfo, types.TipSetKey) (types.BigInt, error) `perm:"read"` StateMinerAvailableBalance func(context.Context, address.Address, types.TipSetKey) (types.BigInt, error) `perm:"read"` + StateMinerSectorAllocated func(context.Context, address.Address, abi.SectorNumber, types.TipSetKey) (bool, error) `perm:"read"` StateSectorPreCommitInfo func(context.Context, address.Address, abi.SectorNumber, types.TipSetKey) (miner.SectorPreCommitOnChainInfo, error) `perm:"read"` StateSectorGetInfo func(context.Context, address.Address, abi.SectorNumber, types.TipSetKey) (*miner.SectorOnChainInfo, error) `perm:"read"` StateSectorExpiration func(context.Context, address.Address, abi.SectorNumber, types.TipSetKey) (*miner.SectorExpiration, error) `perm:"read"` @@ -863,6 +864,10 @@ func (c *FullNodeStruct) StateMinerAvailableBalance(ctx context.Context, maddr a return c.Internal.StateMinerAvailableBalance(ctx, maddr, tsk) } +func (c *FullNodeStruct) StateMinerSectorAllocated(ctx context.Context, maddr address.Address, s abi.SectorNumber, tsk types.TipSetKey) (bool, error) { + return c.Internal.StateMinerSectorAllocated(ctx, maddr, s, tsk) +} + func (c *FullNodeStruct) StateSectorPreCommitInfo(ctx context.Context, maddr address.Address, n abi.SectorNumber, tsk types.TipSetKey) (miner.SectorPreCommitOnChainInfo, error) { return c.Internal.StateSectorPreCommitInfo(ctx, maddr, n, tsk) } diff --git a/extern/storage-sealing/fsm.go b/extern/storage-sealing/fsm.go index a936e14b875..3a5931c8bf8 100644 --- a/extern/storage-sealing/fsm.go +++ b/extern/storage-sealing/fsm.go @@ -45,8 +45,11 @@ var fsmPlanners = map[SectorState]func(events []statemachine.Event, state *Secto on(SectorAddPiece{}, WaitDeals), on(SectorStartPacking{}, Packing), ), - Packing: planOne(on(SectorPacked{}, GetTicket)), - GetTicket: planOne(on(SectorTicket{}, PreCommit1)), + Packing: planOne(on(SectorPacked{}, GetTicket)), + GetTicket: planOne( + on(SectorTicket{}, PreCommit1), + on(SectorCommitFailed{}, CommitFailed), + ), PreCommit1: planOne( on(SectorPreCommit1{}, PreCommit2), on(SectorSealPreCommit1Failed{}, SealPreCommit1Failed), @@ -124,6 +127,7 @@ var fsmPlanners = map[SectorState]func(events []statemachine.Event, state *Secto on(SectorRetryCommitWait{}, CommitWait), on(SectorDealsExpired{}, DealsExpired), on(SectorInvalidDealIDs{}, RecoverDealIDs), + on(SectorTicketExpired{}, Removing), ), FinalizeFailed: planOne( on(SectorRetryFinalize{}, FinalizeSector), diff --git a/extern/storage-sealing/fsm_events.go b/extern/storage-sealing/fsm_events.go index aec2beb0a12..59f5e77e68c 100644 --- a/extern/storage-sealing/fsm_events.go +++ b/extern/storage-sealing/fsm_events.go @@ -206,6 +206,11 @@ type SectorDealsExpired struct{ error } func (evt SectorDealsExpired) FormatError(xerrors.Printer) (next error) { return evt.error } func (evt SectorDealsExpired) apply(*SectorInfo) {} +type SectorTicketExpired struct{ error } + +func (evt SectorTicketExpired) FormatError(xerrors.Printer) (next error) { return evt.error } +func (evt SectorTicketExpired) apply(*SectorInfo) {} + type SectorCommitted struct { Proof []byte } diff --git a/extern/storage-sealing/sealing.go b/extern/storage-sealing/sealing.go index 1ba53661a6e..d9953eee009 100644 --- a/extern/storage-sealing/sealing.go +++ b/extern/storage-sealing/sealing.go @@ -53,6 +53,7 @@ type SealingAPI interface { StateMinerWorkerAddress(ctx context.Context, maddr address.Address, tok TipSetToken) (address.Address, error) StateMinerPreCommitDepositForPower(context.Context, address.Address, miner.SectorPreCommitInfo, TipSetToken) (big.Int, error) StateMinerInitialPledgeCollateral(context.Context, address.Address, miner.SectorPreCommitInfo, TipSetToken) (big.Int, error) + StateMinerSectorAllocated(context.Context, address.Address, abi.SectorNumber, TipSetToken) (bool, error) StateMarketStorageDeal(context.Context, abi.DealID, TipSetToken) (market.DealProposal, error) StateNetworkVersion(ctx context.Context, tok TipSetToken) (network.Version, error) SendMsg(ctx context.Context, from, to address.Address, method abi.MethodNum, value, maxFee abi.TokenAmount, params []byte) (cid.Cid, error) diff --git a/extern/storage-sealing/states_failed.go b/extern/storage-sealing/states_failed.go index d2283025378..b583701aea9 100644 --- a/extern/storage-sealing/states_failed.go +++ b/extern/storage-sealing/states_failed.go @@ -170,7 +170,7 @@ func (m *Sealing) handleCommitFailed(ctx statemachine.Context, sector SectorInfo case *ErrExpiredTicket: return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("ticket expired error: %w", err)}) case *ErrBadTicket: - return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("bad ticket: %w", err)}) + return ctx.Send(SectorTicketExpired{xerrors.Errorf("expired ticket: %w", err)}) case *ErrInvalidDeals: log.Warnf("invalid deals in sector %d: %v", sector.SectorNumber, err) return ctx.Send(SectorInvalidDealIDs{Return: RetCommitFailed}) diff --git a/extern/storage-sealing/states_sealing.go b/extern/storage-sealing/states_sealing.go index 7915660fa10..415335f6853 100644 --- a/extern/storage-sealing/states_sealing.go +++ b/extern/storage-sealing/states_sealing.go @@ -87,6 +87,21 @@ func (m *Sealing) getTicket(ctx statemachine.Context, sector SectorInfo) (abi.Se func (m *Sealing) handleGetTicket(ctx statemachine.Context, sector SectorInfo) error { ticketValue, ticketEpoch, err := m.getTicket(ctx, sector) if err != nil { + allocated, aerr := m.api.StateMinerSectorAllocated(ctx.Context(), m.maddr, sector.SectorNumber, nil) + if aerr == nil { + log.Errorf("error checking if sector is allocated: %+v", err) + } + + if allocated { + if sector.CommitMessage != nil { + // Some recovery paths with unfortunate timing lead here + return ctx.Send(SectorCommitFailed{xerrors.Errorf("sector %s is committed but got into the GetTicket state", sector.SectorNumber)}) + } + + log.Errorf("Sector %s precommitted but expired", sector.SectorNumber) + return ctx.Send(SectorRemove{}) + } + return ctx.Send(SectorSealPreCommit1Failed{xerrors.Errorf("getting ticket failed: %w", err)}) } diff --git a/node/impl/full/state.go b/node/impl/full/state.go index 1c39b59da3e..2cdbf00e4b7 100644 --- a/node/impl/full/state.go +++ b/node/impl/full/state.go @@ -1096,6 +1096,25 @@ func (a *StateAPI) StateMinerAvailableBalance(ctx context.Context, maddr address return types.BigAdd(abal, vested), nil } +func (a *StateAPI) StateMinerSectorAllocated(ctx context.Context, maddr address.Address, s abi.SectorNumber, tsk types.TipSetKey) (bool, error) { + ts, err := a.Chain.GetTipSetFromKey(tsk) + if err != nil { + return false, xerrors.Errorf("loading tipset %s: %w", tsk, err) + } + + act, err := a.StateManager.LoadActor(ctx, maddr, ts) + if err != nil { + return false, xerrors.Errorf("failed to load miner actor: %w", err) + } + + mas, err := miner.Load(a.StateManager.ChainStore().Store(ctx), act) + if err != nil { + return false, xerrors.Errorf("failed to load miner actor state: %w", err) + } + + return mas.IsAllocated(s) +} + // StateVerifiedClientStatus returns the data cap for the given address. // Returns zero if there is no entry in the data cap table for the // address. diff --git a/storage/adapter_storage_miner.go b/storage/adapter_storage_miner.go index 380fb44715c..8d74a897a8d 100644 --- a/storage/adapter_storage_miner.go +++ b/storage/adapter_storage_miner.go @@ -94,6 +94,15 @@ func (s SealingAPIAdapter) StateMinerDeadlines(ctx context.Context, maddr addres return s.delegate.StateMinerDeadlines(ctx, maddr, tsk) } +func (s SealingAPIAdapter) StateMinerSectorAllocated(ctx context.Context, maddr address.Address, sid abi.SectorNumber, tok sealing.TipSetToken) (bool, error) { + tsk, err := types.TipSetKeyFromBytes(tok) + if err != nil { + return false, xerrors.Errorf("failed to unmarshal TipSetToken to TipSetKey: %w", err) + } + + return s.delegate.StateMinerSectorAllocated(ctx, maddr, sid, tsk) +} + func (s SealingAPIAdapter) StateWaitMsg(ctx context.Context, mcid cid.Cid) (sealing.MsgLookup, error) { wmsg, err := s.delegate.StateWaitMsg(ctx, mcid, build.MessageConfidence) if err != nil { diff --git a/storage/miner.go b/storage/miner.go index 74a048c8e1b..b8985c1a566 100644 --- a/storage/miner.go +++ b/storage/miner.go @@ -83,6 +83,7 @@ type storageMinerApi interface { StateMinerProvingDeadline(context.Context, address.Address, types.TipSetKey) (*dline.Info, error) StateMinerPreCommitDepositForPower(context.Context, address.Address, miner.SectorPreCommitInfo, types.TipSetKey) (types.BigInt, error) StateMinerInitialPledgeCollateral(context.Context, address.Address, miner.SectorPreCommitInfo, types.TipSetKey) (types.BigInt, error) + StateMinerSectorAllocated(context.Context, address.Address, abi.SectorNumber, types.TipSetKey) (bool, error) StateSearchMsg(context.Context, cid.Cid) (*api.MsgLookup, error) StateWaitMsg(ctx context.Context, cid cid.Cid, confidence uint64) (*api.MsgLookup, error) // TODO: removeme eventually StateGetActor(ctx context.Context, actor address.Address, ts types.TipSetKey) (*types.Actor, error) From 68be28ca6dfce8d77120f90450c25b98d675de08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sat, 17 Oct 2020 12:53:42 +0200 Subject: [PATCH 45/61] Add Session API --- api/api_common.go | 5 +++++ api/apistruct/struct.go | 9 +++++++-- node/impl/common/common.go | 10 ++++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/api/api_common.go b/api/api_common.go index f8fcbe8c5fb..5b036d1f6d9 100644 --- a/api/api_common.go +++ b/api/api_common.go @@ -4,6 +4,8 @@ import ( "context" "fmt" + "github.com/google/uuid" + "github.com/filecoin-project/go-jsonrpc/auth" metrics "github.com/libp2p/go-libp2p-core/metrics" "github.com/libp2p/go-libp2p-core/network" @@ -58,6 +60,9 @@ type Common interface { // trigger graceful shutdown Shutdown(context.Context) error + // Session returns a random UUID of api provider session + Session(context.Context) (uuid.UUID, error) + Closing(context.Context) (<-chan struct{}, error) } diff --git a/api/apistruct/struct.go b/api/apistruct/struct.go index edf11911451..b664f594f66 100644 --- a/api/apistruct/struct.go +++ b/api/apistruct/struct.go @@ -5,8 +5,7 @@ import ( "io" "time" - stnetwork "github.com/filecoin-project/go-state-types/network" - + "github.com/google/uuid" "github.com/ipfs/go-cid" metrics "github.com/libp2p/go-libp2p-core/metrics" "github.com/libp2p/go-libp2p-core/network" @@ -24,6 +23,7 @@ import ( "github.com/filecoin-project/go-state-types/big" "github.com/filecoin-project/go-state-types/crypto" "github.com/filecoin-project/go-state-types/dline" + stnetwork "github.com/filecoin-project/go-state-types/network" "github.com/filecoin-project/lotus/extern/sector-storage/fsutil" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/stores" @@ -67,6 +67,7 @@ type CommonStruct struct { LogSetLevel func(context.Context, string, string) error `perm:"write"` Shutdown func(context.Context) error `perm:"admin"` + Session func(context.Context) (uuid.UUID, error) `perm:"read"` Closing func(context.Context) (<-chan struct{}, error) `perm:"read"` } } @@ -487,6 +488,10 @@ func (c *CommonStruct) Shutdown(ctx context.Context) error { return c.Internal.Shutdown(ctx) } +func (c *CommonStruct) Session(ctx context.Context) (uuid.UUID, error) { + return c.Internal.Session(ctx) +} + func (c *CommonStruct) Closing(ctx context.Context) (<-chan struct{}, error) { return c.Internal.Closing(ctx) } diff --git a/node/impl/common/common.go b/node/impl/common/common.go index da7cfff25aa..79478e489f9 100644 --- a/node/impl/common/common.go +++ b/node/impl/common/common.go @@ -5,9 +5,9 @@ import ( "sort" "strings" - logging "github.com/ipfs/go-log/v2" - "github.com/gbrlsnchs/jwt/v3" + "github.com/google/uuid" + logging "github.com/ipfs/go-log/v2" "github.com/libp2p/go-libp2p-core/host" metrics "github.com/libp2p/go-libp2p-core/metrics" "github.com/libp2p/go-libp2p-core/network" @@ -27,6 +27,8 @@ import ( "github.com/filecoin-project/lotus/node/modules/lp2p" ) +var session = uuid.New() + type CommonAPI struct { fx.In @@ -202,6 +204,10 @@ func (a *CommonAPI) Shutdown(ctx context.Context) error { return nil } +func (a *CommonAPI) Session(ctx context.Context) (uuid.UUID, error) { + return session, nil +} + func (a *CommonAPI) Closing(ctx context.Context) (<-chan struct{}, error) { return make(chan struct{}), nil // relies on jsonrpc closing } From 8d06cca073a49911a7018296b11951aceb32b484 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 18 Oct 2020 12:35:44 +0200 Subject: [PATCH 46/61] sched: Handle workers using sessions instead of connections --- api/api_storage.go | 5 +- api/api_worker.go | 4 +- api/apistruct/struct.go | 16 +- cmd/lotus-seal-worker/main.go | 2 +- cmd/lotus-storage-miner/sealing.go | 15 +- extern/sector-storage/manager.go | 21 +- extern/sector-storage/sched.go | 315 +++++++++++++++------- extern/sector-storage/sched_test.go | 32 +-- extern/sector-storage/sched_watch.go | 100 ------- extern/sector-storage/stats.go | 22 +- extern/sector-storage/storiface/worker.go | 3 +- extern/sector-storage/testworker_test.go | 8 +- extern/sector-storage/worker_local.go | 11 +- node/impl/storminer.go | 5 +- storage/wdpost_run_test.go | 6 +- 15 files changed, 293 insertions(+), 272 deletions(-) delete mode 100644 extern/sector-storage/sched_watch.go diff --git a/api/api_storage.go b/api/api_storage.go index 79d538fe5b4..5520ad11457 100644 --- a/api/api_storage.go +++ b/api/api_storage.go @@ -5,6 +5,7 @@ import ( "context" "time" + "github.com/google/uuid" "github.com/ipfs/go-cid" "github.com/filecoin-project/go-address" @@ -62,8 +63,8 @@ type StorageMiner interface { // WorkerConnect tells the node to connect to workers RPC WorkerConnect(context.Context, string) error - WorkerStats(context.Context) (map[uint64]storiface.WorkerStats, error) - WorkerJobs(context.Context) (map[int64][]storiface.WorkerJob, error) + WorkerStats(context.Context) (map[uuid.UUID]storiface.WorkerStats, error) + WorkerJobs(context.Context) (map[uuid.UUID][]storiface.WorkerJob, error) storiface.WorkerReturn // SealingSchedDiag dumps internal sealing scheduler state diff --git a/api/api_worker.go b/api/api_worker.go index 42eea9289f9..036748ec6f9 100644 --- a/api/api_worker.go +++ b/api/api_worker.go @@ -3,6 +3,8 @@ package api import ( "context" + "github.com/google/uuid" + "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" "github.com/filecoin-project/lotus/extern/sector-storage/stores" @@ -26,5 +28,5 @@ type WorkerAPI interface { StorageAddLocal(ctx context.Context, path string) error - Closing(context.Context) (<-chan struct{}, error) + Session(context.Context) (uuid.UUID, error) } diff --git a/api/apistruct/struct.go b/api/apistruct/struct.go index 180570742e7..70eb518e4b5 100644 --- a/api/apistruct/struct.go +++ b/api/apistruct/struct.go @@ -296,9 +296,9 @@ type StorageMinerStruct struct { SectorRemove func(context.Context, abi.SectorNumber) error `perm:"admin"` SectorMarkForUpgrade func(ctx context.Context, id abi.SectorNumber) error `perm:"admin"` - WorkerConnect func(context.Context, string) error `perm:"admin" retry:"true"` // TODO: worker perm - WorkerStats func(context.Context) (map[uint64]storiface.WorkerStats, error) `perm:"admin"` - WorkerJobs func(context.Context) (map[int64][]storiface.WorkerJob, error) `perm:"admin"` + WorkerConnect func(context.Context, string) error `perm:"admin" retry:"true"` // TODO: worker perm + WorkerStats func(context.Context) (map[uuid.UUID]storiface.WorkerStats, error) `perm:"admin"` + WorkerJobs func(context.Context) (map[uuid.UUID][]storiface.WorkerJob, error) `perm:"admin"` ReturnAddPiece func(ctx context.Context, callID storiface.CallID, pi abi.PieceInfo, err string) error `perm:"admin" retry:"true"` ReturnSealPreCommit1 func(ctx context.Context, callID storiface.CallID, p1o storage.PreCommit1Out, err string) error `perm:"admin" retry:"true"` @@ -376,7 +376,7 @@ type WorkerStruct struct { Remove func(ctx context.Context, sector abi.SectorID) error `perm:"admin"` StorageAddLocal func(ctx context.Context, path string) error `perm:"admin"` - Closing func(context.Context) (<-chan struct{}, error) `perm:"admin"` + Session func(context.Context) (uuid.UUID, error) `perm:"admin"` } } @@ -1200,11 +1200,11 @@ func (c *StorageMinerStruct) WorkerConnect(ctx context.Context, url string) erro return c.Internal.WorkerConnect(ctx, url) } -func (c *StorageMinerStruct) WorkerStats(ctx context.Context) (map[uint64]storiface.WorkerStats, error) { +func (c *StorageMinerStruct) WorkerStats(ctx context.Context) (map[uuid.UUID]storiface.WorkerStats, error) { return c.Internal.WorkerStats(ctx) } -func (c *StorageMinerStruct) WorkerJobs(ctx context.Context) (map[int64][]storiface.WorkerJob, error) { +func (c *StorageMinerStruct) WorkerJobs(ctx context.Context) (map[uuid.UUID][]storiface.WorkerJob, error) { return c.Internal.WorkerJobs(ctx) } @@ -1490,8 +1490,8 @@ func (w *WorkerStruct) StorageAddLocal(ctx context.Context, path string) error { return w.Internal.StorageAddLocal(ctx, path) } -func (w *WorkerStruct) Closing(ctx context.Context) (<-chan struct{}, error) { - return w.Internal.Closing(ctx) +func (w *WorkerStruct) Session(ctx context.Context) (uuid.UUID, error) { + return w.Internal.Session(ctx) } func (g GatewayStruct) ChainHasObj(ctx context.Context, c cid.Cid) (bool, error) { diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index bf7ce1e52f9..3472192e8e5 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -449,7 +449,7 @@ var runCmd = &cli.Command{ // TODO: we could get rid of this, but that requires tracking resources for restarted tasks correctly workerApi.LocalWorker.WaitQuiet() - if err := nodeApi.WorkerConnect(ctx, "ws://"+address+"/rpc/v0"); err != nil { + if err := nodeApi.WorkerConnect(ctx, "http://"+address+"/rpc/v0"); err != nil { log.Errorf("Registering worker failed: %+v", err) cancel() return diff --git a/cmd/lotus-storage-miner/sealing.go b/cmd/lotus-storage-miner/sealing.go index 3e33f218515..8649ad7d45a 100644 --- a/cmd/lotus-storage-miner/sealing.go +++ b/cmd/lotus-storage-miner/sealing.go @@ -11,6 +11,7 @@ import ( "time" "github.com/fatih/color" + "github.com/google/uuid" "github.com/urfave/cli/v2" "golang.org/x/xerrors" @@ -53,7 +54,7 @@ var sealingWorkersCmd = &cli.Command{ } type sortableStat struct { - id uint64 + id uuid.UUID storiface.WorkerStats } @@ -63,7 +64,7 @@ var sealingWorkersCmd = &cli.Command{ } sort.Slice(st, func(i, j int) bool { - return st[i].id < st[j].id + return st[i].id.String() < st[j].id.String() }) for _, stat := range st { @@ -74,7 +75,7 @@ var sealingWorkersCmd = &cli.Command{ gpuUse = "" } - fmt.Printf("Worker %d, host %s\n", stat.id, color.MagentaString(stat.Info.Hostname)) + fmt.Printf("Worker %s, host %s\n", stat.id, color.MagentaString(stat.Info.Hostname)) var barCols = uint64(64) cpuBars := int(stat.CpuUse * barCols / stat.Info.Resources.CPUs) @@ -140,7 +141,7 @@ var sealingJobsCmd = &cli.Command{ type line struct { storiface.WorkerJob - wid int64 + wid uuid.UUID } lines := make([]line, 0) @@ -165,7 +166,7 @@ var sealingJobsCmd = &cli.Command{ return lines[i].Start.Before(lines[j].Start) }) - workerHostnames := map[int64]string{} + workerHostnames := map[uuid.UUID]string{} wst, err := nodeApi.WorkerStats(ctx) if err != nil { @@ -173,7 +174,7 @@ var sealingJobsCmd = &cli.Command{ } for wid, st := range wst { - workerHostnames[int64(wid)] = st.Info.Hostname + workerHostnames[wid] = st.Info.Hostname } tw := tabwriter.NewWriter(os.Stdout, 2, 4, 2, ' ', 0) @@ -192,7 +193,7 @@ var sealingJobsCmd = &cli.Command{ dur = time.Now().Sub(l.Start).Truncate(time.Millisecond * 100).String() } - _, _ = fmt.Fprintf(tw, "%s\t%d\t%d\t%s\t%s\t%s\t%s\n", hex.EncodeToString(l.ID.ID[10:]), l.Sector.Number, l.wid, workerHostnames[l.wid], l.Task.Short(), state, dur) + _, _ = fmt.Fprintf(tw, "%s\t%d\t%s\t%s\t%s\t%s\t%s\n", hex.EncodeToString(l.ID.ID[10:]), l.Sector.Number, l.wid, workerHostnames[l.wid], l.Task.Short(), state, dur) } return tw.Flush() diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 9445bdd2a3f..0a8ff433999 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -7,6 +7,7 @@ import ( "net/http" "sync" + "github.com/google/uuid" "github.com/hashicorp/go-multierror" "github.com/ipfs/go-cid" logging "github.com/ipfs/go-log/v2" @@ -40,8 +41,7 @@ type Worker interface { Info(context.Context) (storiface.WorkerInfo, error) - // returns channel signalling worker shutdown - Closing(context.Context) (<-chan struct{}, error) + Session(context.Context) (uuid.UUID, error) Close() error // TODO: do we need this? } @@ -57,7 +57,8 @@ type SectorManager interface { FaultTracker } -type WorkerID int64 +type WorkerID uuid.UUID // worker session UUID +var ClosedWorkerID = uuid.UUID{} type Manager struct { scfg *ffiwrapper.Config @@ -190,19 +191,7 @@ func (m *Manager) AddLocalStorage(ctx context.Context, path string) error { } func (m *Manager) AddWorker(ctx context.Context, w Worker) error { - info, err := w.Info(ctx) - if err != nil { - return xerrors.Errorf("getting worker info: %w", err) - } - - m.sched.newWorkers <- &workerHandle{ - w: w, - - info: info, - preparing: &activeResources{}, - active: &activeResources{}, - } - return nil + return m.sched.runWorker(ctx, w) } func (m *Manager) ServeHTTP(w http.ResponseWriter, r *http.Request) { diff --git a/extern/sector-storage/sched.go b/extern/sector-storage/sched.go index e91c9252532..a4e6a6239ad 100644 --- a/extern/sector-storage/sched.go +++ b/extern/sector-storage/sched.go @@ -2,7 +2,6 @@ package sectorstorage import ( "context" - "fmt" "math/rand" "sort" "sync" @@ -13,6 +12,7 @@ import ( "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" + "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) @@ -53,17 +53,13 @@ type WorkerSelector interface { type scheduler struct { spt abi.RegisteredSealProof - workersLk sync.RWMutex - nextWorker WorkerID - workers map[WorkerID]*workerHandle - - newWorkers chan *workerHandle - - watchClosing chan WorkerID - workerClosing chan WorkerID + workersLk sync.RWMutex + workers map[WorkerID]*workerHandle schedule chan *workerRequest windowRequests chan *schedWindowRequest + workerChange chan struct{} // worker added / changed/freed resources + workerDisable chan workerDisableReq // owned by the sh.runSched goroutine schedQueue *requestQueue @@ -91,6 +87,8 @@ type workerHandle struct { wndLk sync.Mutex activeWindows []*schedWindow + enabled bool + // for sync manager goroutine closing cleanupStarted bool closedMgr chan struct{} @@ -108,6 +106,12 @@ type schedWindow struct { todo []*workerRequest } +type workerDisableReq struct { + activeWindows []*schedWindow + wid WorkerID + done func() +} + type activeResources struct { memUsedMin uint64 memUsedMax uint64 @@ -143,16 +147,12 @@ func newScheduler(spt abi.RegisteredSealProof) *scheduler { return &scheduler{ spt: spt, - nextWorker: 0, - workers: map[WorkerID]*workerHandle{}, - - newWorkers: make(chan *workerHandle), - - watchClosing: make(chan WorkerID), - workerClosing: make(chan WorkerID), + workers: map[WorkerID]*workerHandle{}, schedule: make(chan *workerRequest), windowRequests: make(chan *schedWindowRequest, 20), + workerChange: make(chan struct{}, 20), + workerDisable: make(chan workerDisableReq), schedQueue: &requestQueue{}, @@ -224,21 +224,19 @@ type SchedDiagInfo struct { func (sh *scheduler) runSched() { defer close(sh.closed) - go sh.runWorkerWatcher() - iw := time.After(InitWait) var initialised bool for { var doSched bool + var toDisable []workerDisableReq select { - case w := <-sh.newWorkers: - sh.newWorker(w) - - case wid := <-sh.workerClosing: - sh.dropWorker(wid) - + case <-sh.workerChange: + doSched = true + case dreq := <-sh.workerDisable: + toDisable = append(toDisable, dreq) + doSched = true case req := <-sh.schedule: sh.schedQueue.Push(req) doSched = true @@ -267,6 +265,9 @@ func (sh *scheduler) runSched() { loop: for { select { + case <-sh.workerChange: + case dreq := <-sh.workerDisable: + toDisable = append(toDisable, dreq) case req := <-sh.schedule: sh.schedQueue.Push(req) if sh.testSync != nil { @@ -279,6 +280,28 @@ func (sh *scheduler) runSched() { } } + for _, req := range toDisable { + for _, window := range req.activeWindows { + for _, request := range window.todo { + sh.schedQueue.Push(request) + } + } + + openWindows := make([]*schedWindowRequest, 0, len(sh.openWindows)) + for _, window := range sh.openWindows { + if window.worker != req.wid { + openWindows = append(openWindows, window) + } + } + sh.openWindows = openWindows + + sh.workersLk.Lock() + sh.workers[req.wid].enabled = false + sh.workersLk.Unlock() + + req.done() + } + sh.trySched() } @@ -298,6 +321,9 @@ func (sh *scheduler) diag() SchedDiagInfo { }) } + sh.workersLk.RLock() + defer sh.workersLk.RUnlock() + for _, window := range sh.openWindows { out.OpenWindows = append(out.OpenWindows, window.worker) } @@ -322,13 +348,14 @@ func (sh *scheduler) trySched() { */ + sh.workersLk.RLock() + defer sh.workersLk.RUnlock() + windows := make([]schedWindow, len(sh.openWindows)) acceptableWindows := make([][]int, sh.schedQueue.Len()) log.Debugf("SCHED %d queued; %d open windows", sh.schedQueue.Len(), len(windows)) - sh.workersLk.RLock() - defer sh.workersLk.RUnlock() if len(sh.openWindows) == 0 { // nothing to schedule on return @@ -357,11 +384,16 @@ func (sh *scheduler) trySched() { for wnd, windowRequest := range sh.openWindows { worker, ok := sh.workers[windowRequest.worker] if !ok { - log.Errorf("worker referenced by windowRequest not found (worker: %d)", windowRequest.worker) + log.Errorf("worker referenced by windowRequest not found (worker: %s)", windowRequest.worker) // TODO: How to move forward here? continue } + if !worker.enabled { + log.Debugw("skipping disabled worker", "worker", windowRequest.worker) + continue + } + // TODO: allow bigger windows if !windows[wnd].allocated.canHandleRequest(needRes, windowRequest.worker, "schedAcceptable", worker.info.Resources) { continue @@ -499,21 +531,48 @@ func (sh *scheduler) trySched() { sh.openWindows = newOpenWindows } -func (sh *scheduler) runWorker(wid WorkerID) { - var ready sync.WaitGroup - ready.Add(1) - defer ready.Wait() +// context only used for startup +func (sh *scheduler) runWorker(ctx context.Context, w Worker) error { + info, err := w.Info(ctx) + if err != nil { + return xerrors.Errorf("getting worker info: %w", err) + } - go func() { - sh.workersLk.RLock() - worker, found := sh.workers[wid] - sh.workersLk.RUnlock() + sessID, err := w.Session(ctx) + if err != nil { + return xerrors.Errorf("getting worker session: %w", err) + } + if sessID == ClosedWorkerID { + return xerrors.Errorf("worker already closed") + } - ready.Done() + worker := &workerHandle{ + w: w, + info: info, - if !found { - panic(fmt.Sprintf("worker %d not found", wid)) - } + preparing: &activeResources{}, + active: &activeResources{}, + enabled: true, + + closingMgr: make(chan struct{}), + closedMgr: make(chan struct{}), + } + + wid := WorkerID(sessID) + + sh.workersLk.Lock() + _, exist := sh.workers[wid] + if exist { + // this is ok, we're already handling this worker in a different goroutine + return nil + } + + sh.workers[wid] = worker + sh.workersLk.Unlock() + + go func() { + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() defer close(worker.closedMgr) @@ -521,23 +580,60 @@ func (sh *scheduler) runWorker(wid WorkerID) { taskDone := make(chan struct{}, 1) windowsRequested := 0 - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() + disable := func(ctx context.Context) error { + done := make(chan struct{}) - workerClosing, err := worker.w.Closing(ctx) - if err != nil { - return + // request cleanup in the main scheduler goroutine + select { + case sh.workerDisable <- workerDisableReq{ + activeWindows: worker.activeWindows, + wid: wid, + done: func() { + close(done) + }, + }: + case <-ctx.Done(): + return ctx.Err() + case <-sh.closing: + return nil + } + + // wait for cleanup to complete + select { + case <-done: + case <-ctx.Done(): + return ctx.Err() + case <-sh.closing: + return nil + } + + worker.activeWindows = worker.activeWindows[:0] + windowsRequested = 0 + return nil } defer func() { - log.Warnw("Worker closing", "workerid", wid) + log.Warnw("Worker closing", "workerid", sessID) - // TODO: close / return all queued tasks + if err := disable(ctx); err != nil { + log.Warnw("failed to disable worker", "worker", wid, "error", err) + } + + sh.workersLk.Lock() + delete(sh.workers, wid) + sh.workersLk.Unlock() }() + heartbeatTimer := time.NewTicker(stores.HeartbeatInterval) + defer heartbeatTimer.Stop() + for { - // ask for more windows if we need them - for ; windowsRequested < SchedWindows; windowsRequested++ { + sh.workersLk.Lock() + enabled := worker.enabled + sh.workersLk.Unlock() + + // ask for more windows if we need them (non-blocking) + for ; enabled && windowsRequested < SchedWindows; windowsRequested++ { select { case sh.windowRequests <- &schedWindowRequest{ worker: wid, @@ -545,33 +641,90 @@ func (sh *scheduler) runWorker(wid WorkerID) { }: case <-sh.closing: return - case <-workerClosing: - return case <-worker.closingMgr: return } } - select { - case w := <-scheduledWindows: - worker.wndLk.Lock() - worker.activeWindows = append(worker.activeWindows, w) - worker.wndLk.Unlock() - case <-taskDone: - log.Debugw("task done", "workerid", wid) - case <-sh.closing: - return - case <-workerClosing: - return - case <-worker.closingMgr: - return + // wait for more windows to come in, or for tasks to get finished (blocking) + for { + + // first ping the worker and check session + { + sctx, scancel := context.WithTimeout(ctx, stores.HeartbeatInterval/2) + curSes, err := worker.w.Session(sctx) + scancel() + if err != nil { + // Likely temporary error + + log.Warnw("failed to check worker session", "error", err) + + if err := disable(ctx); err != nil { + log.Warnw("failed to disable worker with session error", "worker", wid, "error", err) + } + + select { + case <-heartbeatTimer.C: + continue + case w := <-scheduledWindows: + // was in flight when initially disabled, return + worker.wndLk.Lock() + worker.activeWindows = append(worker.activeWindows, w) + worker.wndLk.Unlock() + + if err := disable(ctx); err != nil { + log.Warnw("failed to disable worker with session error", "worker", wid, "error", err) + } + case <-sh.closing: + return + case <-worker.closingMgr: + return + } + continue + } + + if curSes != sessID { + if curSes != ClosedWorkerID { + // worker restarted + log.Warnw("worker session changed (worker restarted?)", "initial", sessID, "current", curSes) + } + + return + } + + // session looks good + if !enabled { + sh.workersLk.Lock() + worker.enabled = true + sh.workersLk.Unlock() + + // we'll send window requests on the next loop + } + } + + select { + case <-heartbeatTimer.C: + continue + case w := <-scheduledWindows: + worker.wndLk.Lock() + worker.activeWindows = append(worker.activeWindows, w) + worker.wndLk.Unlock() + case <-taskDone: + log.Debugw("task done", "workerid", wid) + case <-sh.closing: + return + case <-worker.closingMgr: + return + } + + break } + // process assigned windows (non-blocking) sh.workersLk.RLock() worker.wndLk.Lock() windowsRequested -= sh.workerCompactWindows(worker, wid) - assignLoop: // process windows in order for len(worker.activeWindows) > 0 { @@ -622,6 +775,8 @@ func (sh *scheduler) runWorker(wid WorkerID) { sh.workersLk.RUnlock() } }() + + return nil } func (sh *scheduler) workerCompactWindows(worker *workerHandle, wid WorkerID) int { @@ -745,38 +900,6 @@ func (sh *scheduler) assignWorker(taskDone chan struct{}, wid WorkerID, w *worke return nil } -func (sh *scheduler) newWorker(w *workerHandle) { - w.closedMgr = make(chan struct{}) - w.closingMgr = make(chan struct{}) - - sh.workersLk.Lock() - - id := sh.nextWorker - sh.workers[id] = w - sh.nextWorker++ - - sh.workersLk.Unlock() - - sh.runWorker(id) - - select { - case sh.watchClosing <- id: - case <-sh.closing: - return - } -} - -func (sh *scheduler) dropWorker(wid WorkerID) { - sh.workersLk.Lock() - defer sh.workersLk.Unlock() - - w := sh.workers[wid] - - sh.workerCleanup(wid, w) - - delete(sh.workers, wid) -} - func (sh *scheduler) workerCleanup(wid WorkerID, w *workerHandle) { select { case <-w.closingMgr: diff --git a/extern/sector-storage/sched_test.go b/extern/sector-storage/sched_test.go index 3a198bad5f0..1afa92b642c 100644 --- a/extern/sector-storage/sched_test.go +++ b/extern/sector-storage/sched_test.go @@ -10,6 +10,7 @@ import ( "testing" "time" + "github.com/google/uuid" "github.com/ipfs/go-cid" logging "github.com/ipfs/go-log/v2" "github.com/stretchr/testify/require" @@ -43,7 +44,7 @@ type schedTestWorker struct { paths []stores.StoragePath closed bool - closing chan struct{} + session uuid.UUID } func (s *schedTestWorker) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticket abi.SealRandomness, pieces []abi.PieceInfo) (storiface.CallID, error) { @@ -121,15 +122,15 @@ func (s *schedTestWorker) Info(ctx context.Context) (storiface.WorkerInfo, error }, nil } -func (s *schedTestWorker) Closing(ctx context.Context) (<-chan struct{}, error) { - return s.closing, nil +func (s *schedTestWorker) Session(context.Context) (uuid.UUID, error) { + return s.session, nil } func (s *schedTestWorker) Close() error { if !s.closed { log.Info("close schedTestWorker") s.closed = true - close(s.closing) + s.session = uuid.UUID{} } return nil } @@ -142,7 +143,7 @@ func addTestWorker(t *testing.T, sched *scheduler, index *stores.Index, name str taskTypes: taskTypes, paths: []stores.StoragePath{{ID: "bb-8", Weight: 2, LocalPath: "food", CanSeal: true, CanStore: true}}, - closing: make(chan struct{}), + session: uuid.New(), } for _, path := range w.paths { @@ -160,16 +161,7 @@ func addTestWorker(t *testing.T, sched *scheduler, index *stores.Index, name str require.NoError(t, err) } - info, err := w.Info(context.TODO()) - require.NoError(t, err) - - sched.newWorkers <- &workerHandle{ - w: w, - - info: info, - preparing: &activeResources{}, - active: &activeResources{}, - } + require.NoError(t, sched.runWorker(context.TODO(), w)) } func TestSchedStartStop(t *testing.T) { @@ -433,7 +425,7 @@ func TestSched(t *testing.T) { type line struct { storiface.WorkerJob - wid uint64 + wid uuid.UUID } lines := make([]line, 0) @@ -442,7 +434,7 @@ func TestSched(t *testing.T) { for _, job := range jobs { lines = append(lines, line{ WorkerJob: job, - wid: uint64(wid), + wid: wid, }) } } @@ -537,7 +529,7 @@ func BenchmarkTrySched(b *testing.B) { b.StopTimer() sched := newScheduler(spt) - sched.workers[0] = &workerHandle{ + sched.workers[WorkerID{}] = &workerHandle{ w: nil, info: storiface.WorkerInfo{ Hostname: "t", @@ -549,7 +541,7 @@ func BenchmarkTrySched(b *testing.B) { for i := 0; i < windows; i++ { sched.openWindows = append(sched.openWindows, &schedWindowRequest{ - worker: 0, + worker: WorkerID{}, done: make(chan *schedWindow, 1000), }) } @@ -599,7 +591,7 @@ func TestWindowCompact(t *testing.T) { wh.activeWindows = append(wh.activeWindows, window) } - n := sh.workerCompactWindows(wh, 0) + n := sh.workerCompactWindows(wh, WorkerID{}) require.Equal(t, len(start)-len(expect), n) for wi, tasks := range expect { diff --git a/extern/sector-storage/sched_watch.go b/extern/sector-storage/sched_watch.go deleted file mode 100644 index 2dd9875d77a..00000000000 --- a/extern/sector-storage/sched_watch.go +++ /dev/null @@ -1,100 +0,0 @@ -package sectorstorage - -import ( - "context" - "reflect" -) - -func (sh *scheduler) runWorkerWatcher() { - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() - - nilch := reflect.ValueOf(new(chan struct{})).Elem() - - cases := []reflect.SelectCase{ - { - Dir: reflect.SelectRecv, - Chan: reflect.ValueOf(sh.closing), - }, - { - Dir: reflect.SelectRecv, - Chan: reflect.ValueOf(sh.watchClosing), - }, - } - - caseToWorker := map[int]WorkerID{} - - for { - n, rv, ok := reflect.Select(cases) - - switch { - case n == 0: // sh.closing - return - case n == 1: // sh.watchClosing - if !ok { - log.Errorf("watchClosing channel closed") - return - } - - wid, ok := rv.Interface().(WorkerID) - if !ok { - panic("got a non-WorkerID message") - } - - sh.workersLk.Lock() - workerClosing, err := sh.workers[wid].w.Closing(ctx) - sh.workersLk.Unlock() - if err != nil { - log.Errorf("getting worker closing channel: %+v", err) - select { - case sh.workerClosing <- wid: - case <-sh.closing: - return - } - - continue - } - - toSet := -1 - for i, sc := range cases { - if sc.Chan == nilch { - toSet = i - break - } - } - if toSet == -1 { - toSet = len(cases) - cases = append(cases, reflect.SelectCase{}) - } - - cases[toSet] = reflect.SelectCase{ - Dir: reflect.SelectRecv, - Chan: reflect.ValueOf(workerClosing), - } - - caseToWorker[toSet] = wid - default: - wid, found := caseToWorker[n] - if !found { - log.Errorf("worker ID not found for case %d", n) - continue - } - - delete(caseToWorker, n) - cases[n] = reflect.SelectCase{ - Dir: reflect.SelectRecv, - Chan: nilch, - } - - log.Warnf("worker %d dropped", wid) - // send in a goroutine to avoid a deadlock between workerClosing / watchClosing - go func() { - select { - case sh.workerClosing <- wid: - case <-sh.closing: - return - } - }() - } - } -} diff --git a/extern/sector-storage/stats.go b/extern/sector-storage/stats.go index bba47d169cc..f9d96fc5d5c 100644 --- a/extern/sector-storage/stats.go +++ b/extern/sector-storage/stats.go @@ -3,18 +3,22 @@ package sectorstorage import ( "time" + "github.com/google/uuid" + "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) -func (m *Manager) WorkerStats() map[uint64]storiface.WorkerStats { +func (m *Manager) WorkerStats() map[uuid.UUID]storiface.WorkerStats { m.sched.workersLk.RLock() defer m.sched.workersLk.RUnlock() - out := map[uint64]storiface.WorkerStats{} + out := map[uuid.UUID]storiface.WorkerStats{} for id, handle := range m.sched.workers { - out[uint64(id)] = storiface.WorkerStats{ - Info: handle.info, + out[uuid.UUID(id)] = storiface.WorkerStats{ + Info: handle.info, + Enabled: handle.enabled, + MemUsedMin: handle.active.memUsedMin, MemUsedMax: handle.active.memUsedMax, GpuUsed: handle.active.gpuUsed, @@ -25,12 +29,12 @@ func (m *Manager) WorkerStats() map[uint64]storiface.WorkerStats { return out } -func (m *Manager) WorkerJobs() map[int64][]storiface.WorkerJob { - out := map[int64][]storiface.WorkerJob{} +func (m *Manager) WorkerJobs() map[uuid.UUID][]storiface.WorkerJob { + out := map[uuid.UUID][]storiface.WorkerJob{} calls := map[storiface.CallID]struct{}{} for _, t := range m.sched.wt.Running() { - out[int64(t.worker)] = append(out[int64(t.worker)], t.job) + out[uuid.UUID(t.worker)] = append(out[uuid.UUID(t.worker)], t.job) calls[t.job.ID] = struct{}{} } @@ -40,7 +44,7 @@ func (m *Manager) WorkerJobs() map[int64][]storiface.WorkerJob { handle.wndLk.Lock() for wi, window := range handle.activeWindows { for _, request := range window.todo { - out[int64(id)] = append(out[int64(id)], storiface.WorkerJob{ + out[uuid.UUID(id)] = append(out[uuid.UUID(id)], storiface.WorkerJob{ ID: storiface.UndefCall, Sector: request.sector, Task: request.taskType, @@ -63,7 +67,7 @@ func (m *Manager) WorkerJobs() map[int64][]storiface.WorkerJob { continue } - out[-1] = append(out[-1], storiface.WorkerJob{ + out[uuid.UUID{}] = append(out[uuid.UUID{}], storiface.WorkerJob{ ID: id, Sector: id.Sector, Task: work.Method, diff --git a/extern/sector-storage/storiface/worker.go b/extern/sector-storage/storiface/worker.go index e6ab2246fdd..bbc9ca55482 100644 --- a/extern/sector-storage/storiface/worker.go +++ b/extern/sector-storage/storiface/worker.go @@ -32,7 +32,8 @@ type WorkerResources struct { } type WorkerStats struct { - Info WorkerInfo + Info WorkerInfo + Enabled bool MemUsedMin uint64 MemUsedMax uint64 diff --git a/extern/sector-storage/testworker_test.go b/extern/sector-storage/testworker_test.go index 94a87cdd204..fda25643ab5 100644 --- a/extern/sector-storage/testworker_test.go +++ b/extern/sector-storage/testworker_test.go @@ -27,6 +27,8 @@ type testWorker struct { pc1s int pc1lk sync.Mutex pc1wait *sync.WaitGroup + + session uuid.UUID } func newTestWorker(wcfg WorkerConfig, lstor *stores.Local, ret storiface.WorkerReturn) *testWorker { @@ -46,6 +48,8 @@ func newTestWorker(wcfg WorkerConfig, lstor *stores.Local, ret storiface.WorkerR ret: ret, mockSeal: mock.NewMockSectorMgr(ssize, nil), + + session: uuid.New(), } } @@ -158,8 +162,8 @@ func (t *testWorker) Info(ctx context.Context) (storiface.WorkerInfo, error) { }, nil } -func (t *testWorker) Closing(ctx context.Context) (<-chan struct{}, error) { - return ctx.Done(), nil +func (t *testWorker) Session(context.Context) (uuid.UUID, error) { + return t.session, nil } func (t *testWorker) Close() error { diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index e38b84d402c..739f70fa0cc 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -48,6 +48,7 @@ type LocalWorker struct { acceptTasks map[sealtasks.TaskType]struct{} running sync.WaitGroup + session uuid.UUID closing chan struct{} } @@ -73,6 +74,7 @@ func newLocalWorker(executor func() (ffiwrapper.Storage, error), wcfg WorkerConf executor: executor, noSwap: wcfg.NoSwap, + session: uuid.New(), closing: make(chan struct{}), } @@ -465,8 +467,13 @@ func (l *LocalWorker) Info(context.Context) (storiface.WorkerInfo, error) { }, nil } -func (l *LocalWorker) Closing(ctx context.Context) (<-chan struct{}, error) { - return l.closing, nil +func (l *LocalWorker) Session(ctx context.Context) (uuid.UUID, error) { + select { + case <-l.closing: + return ClosedWorkerID, nil + default: + return l.session, nil + } } func (l *LocalWorker) Close() error { diff --git a/node/impl/storminer.go b/node/impl/storminer.go index f7da9171108..b27ea9edbf1 100644 --- a/node/impl/storminer.go +++ b/node/impl/storminer.go @@ -8,6 +8,7 @@ import ( "strconv" "time" + "github.com/google/uuid" "github.com/ipfs/go-cid" "github.com/libp2p/go-libp2p-core/host" "golang.org/x/xerrors" @@ -85,11 +86,11 @@ func (sm *StorageMinerAPI) ServeRemote(w http.ResponseWriter, r *http.Request) { sm.StorageMgr.ServeHTTP(w, r) } -func (sm *StorageMinerAPI) WorkerStats(context.Context) (map[uint64]storiface.WorkerStats, error) { +func (sm *StorageMinerAPI) WorkerStats(context.Context) (map[uuid.UUID]storiface.WorkerStats, error) { return sm.StorageMgr.WorkerStats(), nil } -func (sm *StorageMinerAPI) WorkerJobs(ctx context.Context) (map[int64][]storiface.WorkerJob, error) { +func (sm *StorageMinerAPI) WorkerJobs(ctx context.Context) (map[uuid.UUID][]storiface.WorkerJob, error) { return sm.StorageMgr.WorkerJobs(), nil } diff --git a/storage/wdpost_run_test.go b/storage/wdpost_run_test.go index dd7ac4c24c5..10dfbd2817c 100644 --- a/storage/wdpost_run_test.go +++ b/storage/wdpost_run_test.go @@ -16,7 +16,6 @@ import ( "github.com/filecoin-project/go-state-types/big" "github.com/filecoin-project/go-state-types/crypto" "github.com/filecoin-project/go-state-types/dline" - "github.com/filecoin-project/go-state-types/network" builtin0 "github.com/filecoin-project/specs-actors/actors/builtin" miner0 "github.com/filecoin-project/specs-actors/actors/builtin/miner" proof0 "github.com/filecoin-project/specs-actors/actors/runtime/proof" @@ -31,6 +30,7 @@ import ( type mockStorageMinerAPI struct { partitions []api.Partition pushedMessages chan *types.Message + storageMinerApi } func newMockStorageMinerAPI() *mockStorageMinerAPI { @@ -46,10 +46,6 @@ func (m *mockStorageMinerAPI) StateMinerInfo(ctx context.Context, a address.Addr }, nil } -func (m *mockStorageMinerAPI) StateNetworkVersion(ctx context.Context, key types.TipSetKey) (network.Version, error) { - panic("implement me") -} - func (m *mockStorageMinerAPI) ChainGetRandomnessFromTickets(ctx context.Context, tsk types.TipSetKey, personalization crypto.DomainSeparationTag, randEpoch abi.ChainEpoch, entropy []byte) (abi.Randomness, error) { return abi.Randomness("ticket rand"), nil } From f933e1d2b7c847dfa733aed0c8d0f1c65d36d02f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 18 Oct 2020 13:03:17 +0200 Subject: [PATCH 47/61] miner cli: Update to uuid worker IDs --- cmd/lotus-storage-miner/sealing.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/cmd/lotus-storage-miner/sealing.go b/cmd/lotus-storage-miner/sealing.go index 8649ad7d45a..440d4aaea76 100644 --- a/cmd/lotus-storage-miner/sealing.go +++ b/cmd/lotus-storage-miner/sealing.go @@ -75,7 +75,12 @@ var sealingWorkersCmd = &cli.Command{ gpuUse = "" } - fmt.Printf("Worker %s, host %s\n", stat.id, color.MagentaString(stat.Info.Hostname)) + var disabled string + if !stat.Enabled { + disabled = color.RedString(" (disabled)") + } + + fmt.Printf("Worker %s, host %s%s\n", stat.id, color.MagentaString(stat.Info.Hostname), disabled) var barCols = uint64(64) cpuBars := int(stat.CpuUse * barCols / stat.Info.Resources.CPUs) @@ -193,7 +198,14 @@ var sealingJobsCmd = &cli.Command{ dur = time.Now().Sub(l.Start).Truncate(time.Millisecond * 100).String() } - _, _ = fmt.Fprintf(tw, "%s\t%d\t%s\t%s\t%s\t%s\t%s\n", hex.EncodeToString(l.ID.ID[10:]), l.Sector.Number, l.wid, workerHostnames[l.wid], l.Task.Short(), state, dur) + _, _ = fmt.Fprintf(tw, "%s\t%d\t%s\t%s\t%s\t%s\t%s\n", + hex.EncodeToString(l.ID.ID[10:]), + l.Sector.Number, + hex.EncodeToString(l.wid[5:]), + workerHostnames[l.wid], + l.Task.Short(), + state, + dur) } return tw.Flush() From cf4dfa3a051f46afd95a3637ccec79c64b37c6ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 18 Oct 2020 13:59:38 +0200 Subject: [PATCH 48/61] worker: Use http rpc for miner API --- cli/cmd.go | 36 +++++++++++++++++++++++++++++++++-- cmd/lotus-seal-worker/main.go | 7 +------ 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/cli/cmd.go b/cli/cmd.go index eef73b241f2..02ef06002af 100644 --- a/cli/cmd.go +++ b/cli/cmd.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net/http" + "net/url" "os" "os/signal" "strings" @@ -206,7 +207,22 @@ func GetFullNodeAPI(ctx *cli.Context) (api.FullNode, jsonrpc.ClientCloser, error return client.NewFullNodeRPC(ctx.Context, addr, headers) } -func GetStorageMinerAPI(ctx *cli.Context, opts ...jsonrpc.Option) (api.StorageMiner, jsonrpc.ClientCloser, error) { +type GetStorageMinerOptions struct { + PreferHttp bool +} + +type GetStorageMinerOption func(*GetStorageMinerOptions) + +func StorageMinerUseHttp(opts *GetStorageMinerOptions) { + opts.PreferHttp = true +} + +func GetStorageMinerAPI(ctx *cli.Context, opts ...GetStorageMinerOption) (api.StorageMiner, jsonrpc.ClientCloser, error) { + var options GetStorageMinerOptions + for _, opt := range opts { + opt(&options) + } + if tn, ok := ctx.App.Metadata["testnode-storage"]; ok { return tn.(api.StorageMiner), func() {}, nil } @@ -216,7 +232,23 @@ func GetStorageMinerAPI(ctx *cli.Context, opts ...jsonrpc.Option) (api.StorageMi return nil, nil, err } - return client.NewStorageMinerRPC(ctx.Context, addr, headers, opts...) + if options.PreferHttp { + u, err := url.Parse(addr) + if err != nil { + return nil, nil, xerrors.Errorf("parsing miner api URL: %w", err) + } + + switch u.Scheme { + case "ws": + u.Scheme = "http" + case "wss": + u.Scheme = "https" + } + + addr = u.String() + } + + return client.NewStorageMinerRPC(ctx.Context, addr, headers) } func GetWorkerAPI(ctx *cli.Context) (api.WorkerAPI, jsonrpc.ClientCloser, error) { diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index 3472192e8e5..454f1efe233 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -176,7 +176,7 @@ var runCmd = &cli.Command{ var closer func() var err error for { - nodeApi, closer, err = lcli.GetStorageMinerAPI(cctx, jsonrpc.WithTimeout(30*time.Second)) + nodeApi, closer, err = lcli.GetStorageMinerAPI(cctx, lcli.StorageMinerUseHttp) if err == nil { break } @@ -457,11 +457,6 @@ var runCmd = &cli.Command{ log.Info("Worker registered successfully, waiting for tasks") - closing, err := nodeApi.Closing(ctx) - if err != nil { - log.Errorf("failed to get remote closing channel: %+v", err) - } - select { case <-closing: case <-ctx.Done(): From 879aa9512daa58a0c518d0c6ade1463e3b7ed3c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 18 Oct 2020 17:56:29 +0200 Subject: [PATCH 49/61] worker: Use miner session for connectivity check --- cmd/lotus-seal-worker/main.go | 62 +++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index 454f1efe233..5575679ddce 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -431,17 +431,33 @@ var runCmd = &cli.Command{ } } + minerSession, err := nodeApi.Session(ctx) + if err != nil { + return xerrors.Errorf("getting miner session: %w", err) + } + go func() { - var reconnect bool + heartbeats := time.NewTicker(stores.HeartbeatInterval) + defer heartbeats.Stop() + + var connected, reconnect bool for { + // If we're reconnecting, redeclare storage first if reconnect { log.Info("Redeclaring local storage") if err := localStore.Redeclare(ctx); err != nil { log.Errorf("Redeclaring local storage failed: %+v", err) - cancel() - return + + select { + case <-ctx.Done(): + return // graceful shutdown + case <-heartbeats.C: + } + continue } + + connected = false } log.Info("Making sure no local tasks are running") @@ -449,21 +465,33 @@ var runCmd = &cli.Command{ // TODO: we could get rid of this, but that requires tracking resources for restarted tasks correctly workerApi.LocalWorker.WaitQuiet() - if err := nodeApi.WorkerConnect(ctx, "http://"+address+"/rpc/v0"); err != nil { - log.Errorf("Registering worker failed: %+v", err) - cancel() - return - } - - log.Info("Worker registered successfully, waiting for tasks") - - select { - case <-closing: - case <-ctx.Done(): - } + for { + curSession, err := nodeApi.Session(ctx) + if err != nil { + log.Errorf("heartbeat: checking remote session failed: %+v", err) + } else { + if curSession != minerSession { + minerSession = curSession + break + } + + if !connected { + if err := nodeApi.WorkerConnect(ctx, "http://"+address+"/rpc/v0"); err != nil { + log.Errorf("Registering worker failed: %+v", err) + cancel() + return + } + + log.Info("Worker registered successfully, waiting for tasks") + connected = true + } + } - if ctx.Err() != nil { - return // graceful shutdown + select { + case <-ctx.Done(): + return // graceful shutdown + case <-heartbeats.C: + } } log.Errorf("LOTUS-MINER CONNECTION LOST") From dbb421c4f725dc90c232b6643fc8cfd2762accaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 18 Oct 2020 19:09:13 +0200 Subject: [PATCH 50/61] localworker: Use better context for calling returnFunc --- extern/sector-storage/worker_local.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index 739f70fa0cc..9733fc76f73 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -215,10 +215,12 @@ func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt Ret go func() { defer l.running.Done() - res, err := work(&wctx{ + ctx := &wctx{ vals: ctx, closing: l.closing, - }, ci) + } + + res, err := work(ctx, ci) { rb, err := json.Marshal(res) From 8c86ea6b75acdfe81bd4879a45e18e44eb60cde1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 18 Oct 2020 19:45:11 +0200 Subject: [PATCH 51/61] localworker: Try very hard to get ruselts to manager --- extern/sector-storage/worker_local.go | 45 +++++++++++++++++++-------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index 9733fc76f73..ec027b4e23f 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -92,13 +92,12 @@ func newLocalWorker(executor func() (ffiwrapper.Storage, error), wcfg WorkerConf for _, call := range unfinished { err := xerrors.Errorf("worker restarted") - if err := returnFunc[call.RetType](context.TODO(), call.ID, ret, nil, err); err != nil { - log.Errorf("return error: %s: %+v", call.RetType, err) - continue - } + // TODO: Handle restarting PC1 once support is merged - if err := w.ct.onReturned(call.ID); err != nil { - log.Errorf("marking call as returned failed: %s: %+v", call.RetType, err) + if doReturn(context.TODO(), call.RetType, call.ID, ret, nil, err) { + if err := w.ct.onReturned(call.ID); err != nil { + log.Errorf("marking call as returned failed: %s: %+v", call.RetType, err) + } } } }() @@ -231,20 +230,40 @@ func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt Ret log.Errorf("tracking call (done): %+v", err) } } + } + if doReturn(ctx, rt, ci, l.ret, res, err) { + if err := l.ct.onReturned(ci); err != nil { + log.Errorf("tracking call (done): %+v", err) + } } + }() + + return ci, nil +} - if err := returnFunc[rt](ctx, ci, l.ret, res, err); err != nil { - log.Errorf("return error: %s: %+v", rt, err) - return +// doReturn tries to send the result to manager, returns true if successful +func doReturn(ctx context.Context, rt ReturnType, ci storiface.CallID, ret storiface.WorkerReturn, res interface{}, rerr error) bool { + for { + err := returnFunc[rt](ctx, ci, ret, res, rerr) + if err == nil { + break } - if err := l.ct.onReturned(ci); err != nil { - log.Errorf("tracking call (done): %+v", err) + log.Errorf("return error, will retry in 5s: %s: %+v", rt, err) + select { + case <-time.After(5 * time.Second): + case <-ctx.Done(): + log.Errorf("failed to return results: %s", ctx.Err()) + + // fine to just return, worker is most likely shutting down, and + // we didn't mark the result as returned yet, so we'll try to + // re-submit it on restart + return false } - }() + } - return ci, nil + return true } func errstr(err error) string { From 1a10f95973caaf7869b100228210aa612c57d49f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 18 Oct 2020 19:53:23 +0200 Subject: [PATCH 52/61] worker: Better miner connectivity check on startup --- cmd/lotus-seal-worker/main.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cmd/lotus-seal-worker/main.go b/cmd/lotus-seal-worker/main.go index 5575679ddce..9073c860e0f 100644 --- a/cmd/lotus-seal-worker/main.go +++ b/cmd/lotus-seal-worker/main.go @@ -172,13 +172,18 @@ var runCmd = &cli.Command{ } // Connect to storage-miner + ctx := lcli.ReqContext(cctx) + var nodeApi api.StorageMiner var closer func() var err error for { nodeApi, closer, err = lcli.GetStorageMinerAPI(cctx, lcli.StorageMinerUseHttp) if err == nil { - break + _, err = nodeApi.Version(ctx) + if err == nil { + break + } } fmt.Printf("\r\x1b[0KConnecting to miner API... (%s)", err) time.Sleep(time.Second) @@ -186,7 +191,6 @@ var runCmd = &cli.Command{ } defer closer() - ctx := lcli.ReqContext(cctx) ctx, cancel := context.WithCancel(ctx) defer cancel() From 268d29222a100fccc8fba0680d1c1b774b0059c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Sun, 18 Oct 2020 20:10:39 +0200 Subject: [PATCH 53/61] docsgen --- documentation/en/api-methods.md | 35 +++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/documentation/en/api-methods.md b/documentation/en/api-methods.md index c5bc24b0405..74f132d66b3 100644 --- a/documentation/en/api-methods.md +++ b/documentation/en/api-methods.md @@ -1,6 +1,7 @@ # Groups * [](#) * [Closing](#Closing) + * [Session](#Session) * [Shutdown](#Shutdown) * [Version](#Version) * [Auth](#Auth) @@ -154,6 +155,7 @@ * [StateMinerPreCommitDepositForPower](#StateMinerPreCommitDepositForPower) * [StateMinerProvingDeadline](#StateMinerProvingDeadline) * [StateMinerRecoveries](#StateMinerRecoveries) + * [StateMinerSectorAllocated](#StateMinerSectorAllocated) * [StateMinerSectorCount](#StateMinerSectorCount) * [StateMinerSectors](#StateMinerSectors) * [StateNetworkName](#StateNetworkName) @@ -207,6 +209,15 @@ Inputs: `null` Response: `{}` +### Session + + +Perms: read + +Inputs: `null` + +Response: `"07070707-0707-0707-0707-070707070707"` + ### Shutdown @@ -3918,6 +3929,30 @@ Response: ] ``` +### StateMinerSectorAllocated +StateMinerSectorAllocated checks if a sector is allocated + + +Perms: read + +Inputs: +```json +[ + "f01234", + 9, + [ + { + "/": "bafy2bzacea3wsdh6y3a36tb3skempjoxqpuyompjbmfeyf34fi3uy6uue42v4" + }, + { + "/": "bafy2bzacebp3shtrn43k7g3unredz7fxn4gj533d3o43tqn2p2ipxxhrvchve" + } + ] +] +``` + +Response: `true` + ### StateMinerSectorCount StateMinerSectorCount returns the number of sectors in a miner's sector set and proving set From 4d874730538ae7b4845976b7a4c415bd58739942 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Fri, 23 Oct 2020 23:31:18 +0200 Subject: [PATCH 54/61] Fix lint --- storage/wdpost_run_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/storage/wdpost_run_test.go b/storage/wdpost_run_test.go index b987b604cfe..3a0a36ad7d9 100644 --- a/storage/wdpost_run_test.go +++ b/storage/wdpost_run_test.go @@ -22,7 +22,6 @@ import ( tutils "github.com/filecoin-project/specs-actors/v2/support/testing" "github.com/filecoin-project/lotus/api" - "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/actors/builtin/miner" "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/journal" From 84b567c790c45f467ce6274be14c50c9d8097b1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 28 Oct 2020 13:39:28 +0100 Subject: [PATCH 55/61] sched: move worker funcs to a separate file --- extern/sector-storage/sched.go | 400 ------------------------- extern/sector-storage/sched_worker.go | 411 ++++++++++++++++++++++++++ 2 files changed, 411 insertions(+), 400 deletions(-) create mode 100644 extern/sector-storage/sched_worker.go diff --git a/extern/sector-storage/sched.go b/extern/sector-storage/sched.go index a4e6a6239ad..34cba4cf2fe 100644 --- a/extern/sector-storage/sched.go +++ b/extern/sector-storage/sched.go @@ -12,7 +12,6 @@ import ( "github.com/filecoin-project/go-state-types/abi" "github.com/filecoin-project/lotus/extern/sector-storage/sealtasks" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" "github.com/filecoin-project/lotus/extern/sector-storage/storiface" ) @@ -531,405 +530,6 @@ func (sh *scheduler) trySched() { sh.openWindows = newOpenWindows } -// context only used for startup -func (sh *scheduler) runWorker(ctx context.Context, w Worker) error { - info, err := w.Info(ctx) - if err != nil { - return xerrors.Errorf("getting worker info: %w", err) - } - - sessID, err := w.Session(ctx) - if err != nil { - return xerrors.Errorf("getting worker session: %w", err) - } - if sessID == ClosedWorkerID { - return xerrors.Errorf("worker already closed") - } - - worker := &workerHandle{ - w: w, - info: info, - - preparing: &activeResources{}, - active: &activeResources{}, - enabled: true, - - closingMgr: make(chan struct{}), - closedMgr: make(chan struct{}), - } - - wid := WorkerID(sessID) - - sh.workersLk.Lock() - _, exist := sh.workers[wid] - if exist { - // this is ok, we're already handling this worker in a different goroutine - return nil - } - - sh.workers[wid] = worker - sh.workersLk.Unlock() - - go func() { - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() - - defer close(worker.closedMgr) - - scheduledWindows := make(chan *schedWindow, SchedWindows) - taskDone := make(chan struct{}, 1) - windowsRequested := 0 - - disable := func(ctx context.Context) error { - done := make(chan struct{}) - - // request cleanup in the main scheduler goroutine - select { - case sh.workerDisable <- workerDisableReq{ - activeWindows: worker.activeWindows, - wid: wid, - done: func() { - close(done) - }, - }: - case <-ctx.Done(): - return ctx.Err() - case <-sh.closing: - return nil - } - - // wait for cleanup to complete - select { - case <-done: - case <-ctx.Done(): - return ctx.Err() - case <-sh.closing: - return nil - } - - worker.activeWindows = worker.activeWindows[:0] - windowsRequested = 0 - return nil - } - - defer func() { - log.Warnw("Worker closing", "workerid", sessID) - - if err := disable(ctx); err != nil { - log.Warnw("failed to disable worker", "worker", wid, "error", err) - } - - sh.workersLk.Lock() - delete(sh.workers, wid) - sh.workersLk.Unlock() - }() - - heartbeatTimer := time.NewTicker(stores.HeartbeatInterval) - defer heartbeatTimer.Stop() - - for { - sh.workersLk.Lock() - enabled := worker.enabled - sh.workersLk.Unlock() - - // ask for more windows if we need them (non-blocking) - for ; enabled && windowsRequested < SchedWindows; windowsRequested++ { - select { - case sh.windowRequests <- &schedWindowRequest{ - worker: wid, - done: scheduledWindows, - }: - case <-sh.closing: - return - case <-worker.closingMgr: - return - } - } - - // wait for more windows to come in, or for tasks to get finished (blocking) - for { - - // first ping the worker and check session - { - sctx, scancel := context.WithTimeout(ctx, stores.HeartbeatInterval/2) - curSes, err := worker.w.Session(sctx) - scancel() - if err != nil { - // Likely temporary error - - log.Warnw("failed to check worker session", "error", err) - - if err := disable(ctx); err != nil { - log.Warnw("failed to disable worker with session error", "worker", wid, "error", err) - } - - select { - case <-heartbeatTimer.C: - continue - case w := <-scheduledWindows: - // was in flight when initially disabled, return - worker.wndLk.Lock() - worker.activeWindows = append(worker.activeWindows, w) - worker.wndLk.Unlock() - - if err := disable(ctx); err != nil { - log.Warnw("failed to disable worker with session error", "worker", wid, "error", err) - } - case <-sh.closing: - return - case <-worker.closingMgr: - return - } - continue - } - - if curSes != sessID { - if curSes != ClosedWorkerID { - // worker restarted - log.Warnw("worker session changed (worker restarted?)", "initial", sessID, "current", curSes) - } - - return - } - - // session looks good - if !enabled { - sh.workersLk.Lock() - worker.enabled = true - sh.workersLk.Unlock() - - // we'll send window requests on the next loop - } - } - - select { - case <-heartbeatTimer.C: - continue - case w := <-scheduledWindows: - worker.wndLk.Lock() - worker.activeWindows = append(worker.activeWindows, w) - worker.wndLk.Unlock() - case <-taskDone: - log.Debugw("task done", "workerid", wid) - case <-sh.closing: - return - case <-worker.closingMgr: - return - } - - break - } - - // process assigned windows (non-blocking) - sh.workersLk.RLock() - worker.wndLk.Lock() - - windowsRequested -= sh.workerCompactWindows(worker, wid) - assignLoop: - // process windows in order - for len(worker.activeWindows) > 0 { - firstWindow := worker.activeWindows[0] - - // process tasks within a window, preferring tasks at lower indexes - for len(firstWindow.todo) > 0 { - tidx := -1 - - worker.lk.Lock() - for t, todo := range firstWindow.todo { - needRes := ResourceTable[todo.taskType][sh.spt] - if worker.preparing.canHandleRequest(needRes, wid, "startPreparing", worker.info.Resources) { - tidx = t - break - } - } - worker.lk.Unlock() - - if tidx == -1 { - break assignLoop - } - - todo := firstWindow.todo[tidx] - - log.Debugf("assign worker sector %d", todo.sector.Number) - err := sh.assignWorker(taskDone, wid, worker, todo) - - if err != nil { - log.Error("assignWorker error: %+v", err) - go todo.respond(xerrors.Errorf("assignWorker error: %w", err)) - } - - // Note: we're not freeing window.allocated resources here very much on purpose - copy(firstWindow.todo[tidx:], firstWindow.todo[tidx+1:]) - firstWindow.todo[len(firstWindow.todo)-1] = nil - firstWindow.todo = firstWindow.todo[:len(firstWindow.todo)-1] - } - - copy(worker.activeWindows, worker.activeWindows[1:]) - worker.activeWindows[len(worker.activeWindows)-1] = nil - worker.activeWindows = worker.activeWindows[:len(worker.activeWindows)-1] - - windowsRequested-- - } - - worker.wndLk.Unlock() - sh.workersLk.RUnlock() - } - }() - - return nil -} - -func (sh *scheduler) workerCompactWindows(worker *workerHandle, wid WorkerID) int { - // move tasks from older windows to newer windows if older windows - // still can fit them - if len(worker.activeWindows) > 1 { - for wi, window := range worker.activeWindows[1:] { - lower := worker.activeWindows[wi] - var moved []int - - for ti, todo := range window.todo { - needRes := ResourceTable[todo.taskType][sh.spt] - if !lower.allocated.canHandleRequest(needRes, wid, "compactWindows", worker.info.Resources) { - continue - } - - moved = append(moved, ti) - lower.todo = append(lower.todo, todo) - lower.allocated.add(worker.info.Resources, needRes) - window.allocated.free(worker.info.Resources, needRes) - } - - if len(moved) > 0 { - newTodo := make([]*workerRequest, 0, len(window.todo)-len(moved)) - for i, t := range window.todo { - if len(moved) > 0 && moved[0] == i { - moved = moved[1:] - continue - } - - newTodo = append(newTodo, t) - } - window.todo = newTodo - } - } - } - - var compacted int - var newWindows []*schedWindow - - for _, window := range worker.activeWindows { - if len(window.todo) == 0 { - compacted++ - continue - } - - newWindows = append(newWindows, window) - } - - worker.activeWindows = newWindows - - return compacted -} - -func (sh *scheduler) assignWorker(taskDone chan struct{}, wid WorkerID, w *workerHandle, req *workerRequest) error { - needRes := ResourceTable[req.taskType][sh.spt] - - w.lk.Lock() - w.preparing.add(w.info.Resources, needRes) - w.lk.Unlock() - - go func() { - err := req.prepare(req.ctx, sh.wt.worker(wid, w.w)) - sh.workersLk.Lock() - - if err != nil { - w.lk.Lock() - w.preparing.free(w.info.Resources, needRes) - w.lk.Unlock() - sh.workersLk.Unlock() - - select { - case taskDone <- struct{}{}: - case <-sh.closing: - log.Warnf("scheduler closed while sending response (prepare error: %+v)", err) - } - - select { - case req.ret <- workerResponse{err: err}: - case <-req.ctx.Done(): - log.Warnf("request got cancelled before we could respond (prepare error: %+v)", err) - case <-sh.closing: - log.Warnf("scheduler closed while sending response (prepare error: %+v)", err) - } - return - } - - err = w.active.withResources(wid, w.info.Resources, needRes, &sh.workersLk, func() error { - w.lk.Lock() - w.preparing.free(w.info.Resources, needRes) - w.lk.Unlock() - sh.workersLk.Unlock() - defer sh.workersLk.Lock() // we MUST return locked from this function - - select { - case taskDone <- struct{}{}: - case <-sh.closing: - } - - err = req.work(req.ctx, sh.wt.worker(wid, w.w)) - - select { - case req.ret <- workerResponse{err: err}: - case <-req.ctx.Done(): - log.Warnf("request got cancelled before we could respond") - case <-sh.closing: - log.Warnf("scheduler closed while sending response") - } - - return nil - }) - - sh.workersLk.Unlock() - - // This error should always be nil, since nothing is setting it, but just to be safe: - if err != nil { - log.Errorf("error executing worker (withResources): %+v", err) - } - }() - - return nil -} - -func (sh *scheduler) workerCleanup(wid WorkerID, w *workerHandle) { - select { - case <-w.closingMgr: - default: - close(w.closingMgr) - } - - sh.workersLk.Unlock() - select { - case <-w.closedMgr: - case <-time.After(time.Second): - log.Errorf("timeout closing worker manager goroutine %d", wid) - } - sh.workersLk.Lock() - - if !w.cleanupStarted { - w.cleanupStarted = true - - newWindows := make([]*schedWindowRequest, 0, len(sh.openWindows)) - for _, window := range sh.openWindows { - if window.worker != wid { - newWindows = append(newWindows, window) - } - } - sh.openWindows = newWindows - - log.Debugf("worker %d dropped", wid) - } -} - func (sh *scheduler) schedClose() { sh.workersLk.Lock() defer sh.workersLk.Unlock() diff --git a/extern/sector-storage/sched_worker.go b/extern/sector-storage/sched_worker.go new file mode 100644 index 00000000000..4897c30dccb --- /dev/null +++ b/extern/sector-storage/sched_worker.go @@ -0,0 +1,411 @@ +package sectorstorage + +import ( + "context" + "time" + + "golang.org/x/xerrors" + + "github.com/filecoin-project/lotus/extern/sector-storage/stores" +) + +// context only used for startup +func (sh *scheduler) runWorker(ctx context.Context, w Worker) error { + info, err := w.Info(ctx) + if err != nil { + return xerrors.Errorf("getting worker info: %w", err) + } + + sessID, err := w.Session(ctx) + if err != nil { + return xerrors.Errorf("getting worker session: %w", err) + } + if sessID == ClosedWorkerID { + return xerrors.Errorf("worker already closed") + } + + worker := &workerHandle{ + w: w, + info: info, + + preparing: &activeResources{}, + active: &activeResources{}, + enabled: true, + + closingMgr: make(chan struct{}), + closedMgr: make(chan struct{}), + } + + wid := WorkerID(sessID) + + sh.workersLk.Lock() + _, exist := sh.workers[wid] + if exist { + log.Warnw("duplicated worker added", "id", wid) + + // this is ok, we're already handling this worker in a different goroutine + return nil + } + + sh.workers[wid] = worker + sh.workersLk.Unlock() + + go func() { + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + defer close(worker.closedMgr) + + scheduledWindows := make(chan *schedWindow, SchedWindows) + taskDone := make(chan struct{}, 1) + windowsRequested := 0 + + disable := func(ctx context.Context) error { + done := make(chan struct{}) + + // request cleanup in the main scheduler goroutine + select { + case sh.workerDisable <- workerDisableReq{ + activeWindows: worker.activeWindows, + wid: wid, + done: func() { + close(done) + }, + }: + case <-ctx.Done(): + return ctx.Err() + case <-sh.closing: + return nil + } + + // wait for cleanup to complete + select { + case <-done: + case <-ctx.Done(): + return ctx.Err() + case <-sh.closing: + return nil + } + + worker.activeWindows = worker.activeWindows[:0] + windowsRequested = 0 + return nil + } + + defer func() { + log.Warnw("Worker closing", "workerid", sessID) + + if err := disable(ctx); err != nil { + log.Warnw("failed to disable worker", "worker", wid, "error", err) + } + + sh.workersLk.Lock() + delete(sh.workers, wid) + sh.workersLk.Unlock() + }() + + heartbeatTimer := time.NewTicker(stores.HeartbeatInterval) + defer heartbeatTimer.Stop() + + for { + sh.workersLk.Lock() + enabled := worker.enabled + sh.workersLk.Unlock() + + // ask for more windows if we need them (non-blocking) + for ; enabled && windowsRequested < SchedWindows; windowsRequested++ { + select { + case sh.windowRequests <- &schedWindowRequest{ + worker: wid, + done: scheduledWindows, + }: + case <-sh.closing: + return + case <-worker.closingMgr: + return + } + } + + // wait for more windows to come in, or for tasks to get finished (blocking) + for { + + // first ping the worker and check session + { + sctx, scancel := context.WithTimeout(ctx, stores.HeartbeatInterval/2) + curSes, err := worker.w.Session(sctx) + scancel() + if err != nil { + // Likely temporary error + + log.Warnw("failed to check worker session", "error", err) + + if err := disable(ctx); err != nil { + log.Warnw("failed to disable worker with session error", "worker", wid, "error", err) + } + + select { + case <-heartbeatTimer.C: + continue + case w := <-scheduledWindows: + // was in flight when initially disabled, return + worker.wndLk.Lock() + worker.activeWindows = append(worker.activeWindows, w) + worker.wndLk.Unlock() + + if err := disable(ctx); err != nil { + log.Warnw("failed to disable worker with session error", "worker", wid, "error", err) + } + case <-sh.closing: + return + case <-worker.closingMgr: + return + } + continue + } + + if curSes != sessID { + if curSes != ClosedWorkerID { + // worker restarted + log.Warnw("worker session changed (worker restarted?)", "initial", sessID, "current", curSes) + } + + return + } + + // session looks good + if !enabled { + sh.workersLk.Lock() + worker.enabled = true + sh.workersLk.Unlock() + + // we'll send window requests on the next loop + } + } + + select { + case <-heartbeatTimer.C: + continue + case w := <-scheduledWindows: + worker.wndLk.Lock() + worker.activeWindows = append(worker.activeWindows, w) + worker.wndLk.Unlock() + case <-taskDone: + log.Debugw("task done", "workerid", wid) + case <-sh.closing: + return + case <-worker.closingMgr: + return + } + + break + } + + // process assigned windows (non-blocking) + sh.workersLk.RLock() + worker.wndLk.Lock() + + windowsRequested -= sh.workerCompactWindows(worker, wid) + assignLoop: + // process windows in order + for len(worker.activeWindows) > 0 { + firstWindow := worker.activeWindows[0] + + // process tasks within a window, preferring tasks at lower indexes + for len(firstWindow.todo) > 0 { + tidx := -1 + + worker.lk.Lock() + for t, todo := range firstWindow.todo { + needRes := ResourceTable[todo.taskType][sh.spt] + if worker.preparing.canHandleRequest(needRes, wid, "startPreparing", worker.info.Resources) { + tidx = t + break + } + } + worker.lk.Unlock() + + if tidx == -1 { + break assignLoop + } + + todo := firstWindow.todo[tidx] + + log.Debugf("assign worker sector %d", todo.sector.Number) + err := sh.assignWorker(taskDone, wid, worker, todo) + + if err != nil { + log.Error("assignWorker error: %+v", err) + go todo.respond(xerrors.Errorf("assignWorker error: %w", err)) + } + + // Note: we're not freeing window.allocated resources here very much on purpose + copy(firstWindow.todo[tidx:], firstWindow.todo[tidx+1:]) + firstWindow.todo[len(firstWindow.todo)-1] = nil + firstWindow.todo = firstWindow.todo[:len(firstWindow.todo)-1] + } + + copy(worker.activeWindows, worker.activeWindows[1:]) + worker.activeWindows[len(worker.activeWindows)-1] = nil + worker.activeWindows = worker.activeWindows[:len(worker.activeWindows)-1] + + windowsRequested-- + } + + worker.wndLk.Unlock() + sh.workersLk.RUnlock() + } + }() + + return nil +} + +func (sh *scheduler) workerCompactWindows(worker *workerHandle, wid WorkerID) int { + // move tasks from older windows to newer windows if older windows + // still can fit them + if len(worker.activeWindows) > 1 { + for wi, window := range worker.activeWindows[1:] { + lower := worker.activeWindows[wi] + var moved []int + + for ti, todo := range window.todo { + needRes := ResourceTable[todo.taskType][sh.spt] + if !lower.allocated.canHandleRequest(needRes, wid, "compactWindows", worker.info.Resources) { + continue + } + + moved = append(moved, ti) + lower.todo = append(lower.todo, todo) + lower.allocated.add(worker.info.Resources, needRes) + window.allocated.free(worker.info.Resources, needRes) + } + + if len(moved) > 0 { + newTodo := make([]*workerRequest, 0, len(window.todo)-len(moved)) + for i, t := range window.todo { + if len(moved) > 0 && moved[0] == i { + moved = moved[1:] + continue + } + + newTodo = append(newTodo, t) + } + window.todo = newTodo + } + } + } + + var compacted int + var newWindows []*schedWindow + + for _, window := range worker.activeWindows { + if len(window.todo) == 0 { + compacted++ + continue + } + + newWindows = append(newWindows, window) + } + + worker.activeWindows = newWindows + + return compacted +} + +func (sh *scheduler) assignWorker(taskDone chan struct{}, wid WorkerID, w *workerHandle, req *workerRequest) error { + needRes := ResourceTable[req.taskType][sh.spt] + + w.lk.Lock() + w.preparing.add(w.info.Resources, needRes) + w.lk.Unlock() + + go func() { + err := req.prepare(req.ctx, sh.wt.worker(wid, w.w)) + sh.workersLk.Lock() + + if err != nil { + w.lk.Lock() + w.preparing.free(w.info.Resources, needRes) + w.lk.Unlock() + sh.workersLk.Unlock() + + select { + case taskDone <- struct{}{}: + case <-sh.closing: + log.Warnf("scheduler closed while sending response (prepare error: %+v)", err) + } + + select { + case req.ret <- workerResponse{err: err}: + case <-req.ctx.Done(): + log.Warnf("request got cancelled before we could respond (prepare error: %+v)", err) + case <-sh.closing: + log.Warnf("scheduler closed while sending response (prepare error: %+v)", err) + } + return + } + + err = w.active.withResources(wid, w.info.Resources, needRes, &sh.workersLk, func() error { + w.lk.Lock() + w.preparing.free(w.info.Resources, needRes) + w.lk.Unlock() + sh.workersLk.Unlock() + defer sh.workersLk.Lock() // we MUST return locked from this function + + select { + case taskDone <- struct{}{}: + case <-sh.closing: + } + + err = req.work(req.ctx, sh.wt.worker(wid, w.w)) + + select { + case req.ret <- workerResponse{err: err}: + case <-req.ctx.Done(): + log.Warnf("request got cancelled before we could respond") + case <-sh.closing: + log.Warnf("scheduler closed while sending response") + } + + return nil + }) + + sh.workersLk.Unlock() + + // This error should always be nil, since nothing is setting it, but just to be safe: + if err != nil { + log.Errorf("error executing worker (withResources): %+v", err) + } + }() + + return nil +} + +func (sh *scheduler) workerCleanup(wid WorkerID, w *workerHandle) { + select { + case <-w.closingMgr: + default: + close(w.closingMgr) + } + + sh.workersLk.Unlock() + select { + case <-w.closedMgr: + case <-time.After(time.Second): + log.Errorf("timeout closing worker manager goroutine %d", wid) + } + sh.workersLk.Lock() + + if !w.cleanupStarted { + w.cleanupStarted = true + + newWindows := make([]*schedWindowRequest, 0, len(sh.openWindows)) + for _, window := range sh.openWindows { + if window.worker != wid { + newWindows = append(newWindows, window) + } + } + sh.openWindows = newWindows + + log.Debugf("worker %d dropped", wid) + } +} From 8731fe9112eb0a0e4603c596532b93108d81544e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 28 Oct 2020 14:14:38 +0100 Subject: [PATCH 56/61] sched: split worker handling into more funcs --- extern/sector-storage/sched_worker.go | 419 +++++++++++++++----------- extern/sector-storage/worker_local.go | 2 +- 2 files changed, 241 insertions(+), 180 deletions(-) diff --git a/extern/sector-storage/sched_worker.go b/extern/sector-storage/sched_worker.go index 4897c30dccb..037176a11da 100644 --- a/extern/sector-storage/sched_worker.go +++ b/extern/sector-storage/sched_worker.go @@ -2,13 +2,25 @@ package sectorstorage import ( "context" + "github.com/filecoin-project/lotus/extern/sector-storage/stores" "time" "golang.org/x/xerrors" - - "github.com/filecoin-project/lotus/extern/sector-storage/stores" ) +type schedWorker struct { + sh *scheduler + worker *workerHandle + + wid WorkerID + + heartbeatTimer *time.Ticker + scheduledWindows chan *schedWindow + taskDone chan struct{} + + windowsRequested int +} + // context only used for startup func (sh *scheduler) runWorker(ctx context.Context, w Worker) error { info, err := w.Info(ctx) @@ -50,213 +62,210 @@ func (sh *scheduler) runWorker(ctx context.Context, w Worker) error { sh.workers[wid] = worker sh.workersLk.Unlock() - go func() { - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() + sw := &schedWorker{ + sh: sh, + worker: worker, - defer close(worker.closedMgr) + wid: wid, - scheduledWindows := make(chan *schedWindow, SchedWindows) - taskDone := make(chan struct{}, 1) - windowsRequested := 0 + heartbeatTimer: time.NewTicker(stores.HeartbeatInterval), + scheduledWindows: make(chan *schedWindow, SchedWindows), + taskDone: make(chan struct{}, 1), - disable := func(ctx context.Context) error { - done := make(chan struct{}) + windowsRequested: 0, + } - // request cleanup in the main scheduler goroutine - select { - case sh.workerDisable <- workerDisableReq{ - activeWindows: worker.activeWindows, - wid: wid, - done: func() { - close(done) - }, - }: - case <-ctx.Done(): - return ctx.Err() - case <-sh.closing: - return nil - } + go sw.handleWorker() - // wait for cleanup to complete - select { - case <-done: - case <-ctx.Done(): - return ctx.Err() - case <-sh.closing: - return nil - } + return nil +} - worker.activeWindows = worker.activeWindows[:0] - windowsRequested = 0 - return nil +func (sw *schedWorker) handleWorker() { + worker, sh := sw.worker, sw.sh + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + defer close(worker.closedMgr) + + defer func() { + log.Warnw("Worker closing", "workerid", sw.wid) + + if err := sw.disable(ctx); err != nil { + log.Warnw("failed to disable worker", "worker", sw.wid, "error", err) } - defer func() { - log.Warnw("Worker closing", "workerid", sessID) + sh.workersLk.Lock() + delete(sh.workers, sw.wid) + sh.workersLk.Unlock() + }() - if err := disable(ctx); err != nil { - log.Warnw("failed to disable worker", "worker", wid, "error", err) - } + defer sw.heartbeatTimer.Stop() - sh.workersLk.Lock() - delete(sh.workers, wid) - sh.workersLk.Unlock() - }() + for { + sh.workersLk.Lock() + enabled := worker.enabled + sh.workersLk.Unlock() - heartbeatTimer := time.NewTicker(stores.HeartbeatInterval) - defer heartbeatTimer.Stop() + // ask for more windows if we need them (non-blocking) + if enabled { + if !sw.requestWindows() { + return // graceful shutdown + } + } + // wait for more windows to come in, or for tasks to get finished (blocking) for { - sh.workersLk.Lock() - enabled := worker.enabled - sh.workersLk.Unlock() + // ping the worker and check session + if !sw.checkSession(ctx) { + return // invalid session / exiting + } - // ask for more windows if we need them (non-blocking) - for ; enabled && windowsRequested < SchedWindows; windowsRequested++ { - select { - case sh.windowRequests <- &schedWindowRequest{ - worker: wid, - done: scheduledWindows, - }: - case <-sh.closing: - return - case <-worker.closingMgr: - return - } + // session looks good + if !enabled { + sh.workersLk.Lock() + worker.enabled = true + sh.workersLk.Unlock() + + // we'll send window requests on the next loop } - // wait for more windows to come in, or for tasks to get finished (blocking) - for { - - // first ping the worker and check session - { - sctx, scancel := context.WithTimeout(ctx, stores.HeartbeatInterval/2) - curSes, err := worker.w.Session(sctx) - scancel() - if err != nil { - // Likely temporary error - - log.Warnw("failed to check worker session", "error", err) - - if err := disable(ctx); err != nil { - log.Warnw("failed to disable worker with session error", "worker", wid, "error", err) - } - - select { - case <-heartbeatTimer.C: - continue - case w := <-scheduledWindows: - // was in flight when initially disabled, return - worker.wndLk.Lock() - worker.activeWindows = append(worker.activeWindows, w) - worker.wndLk.Unlock() - - if err := disable(ctx); err != nil { - log.Warnw("failed to disable worker with session error", "worker", wid, "error", err) - } - case <-sh.closing: - return - case <-worker.closingMgr: - return - } - continue - } + // wait for more tasks to be assigned by the main scheduler or for the worker + // to finish precessing a task + update, ok := sw.waitForUpdates() + if !ok { + return + } + if update { + break + } + } - if curSes != sessID { - if curSes != ClosedWorkerID { - // worker restarted - log.Warnw("worker session changed (worker restarted?)", "initial", sessID, "current", curSes) - } + // process assigned windows (non-blocking) + sh.workersLk.RLock() + worker.wndLk.Lock() - return - } + sw.windowsRequested -= sh.workerCompactWindows(worker, sw.wid) - // session looks good - if !enabled { - sh.workersLk.Lock() - worker.enabled = true - sh.workersLk.Unlock() + // send tasks to the worker + sw.processAssignedWindows() - // we'll send window requests on the next loop - } - } + worker.wndLk.Unlock() + sh.workersLk.RUnlock() + } +} - select { - case <-heartbeatTimer.C: - continue - case w := <-scheduledWindows: - worker.wndLk.Lock() - worker.activeWindows = append(worker.activeWindows, w) - worker.wndLk.Unlock() - case <-taskDone: - log.Debugw("task done", "workerid", wid) - case <-sh.closing: - return - case <-worker.closingMgr: - return - } +func (sw *schedWorker) disable(ctx context.Context) error { + done := make(chan struct{}) - break - } + // request cleanup in the main scheduler goroutine + select { + case sw.sh.workerDisable <- workerDisableReq{ + activeWindows: sw.worker.activeWindows, + wid: sw.wid, + done: func() { + close(done) + }, + }: + case <-ctx.Done(): + return ctx.Err() + case <-sw.sh.closing: + return nil + } - // process assigned windows (non-blocking) - sh.workersLk.RLock() - worker.wndLk.Lock() - - windowsRequested -= sh.workerCompactWindows(worker, wid) - assignLoop: - // process windows in order - for len(worker.activeWindows) > 0 { - firstWindow := worker.activeWindows[0] - - // process tasks within a window, preferring tasks at lower indexes - for len(firstWindow.todo) > 0 { - tidx := -1 - - worker.lk.Lock() - for t, todo := range firstWindow.todo { - needRes := ResourceTable[todo.taskType][sh.spt] - if worker.preparing.canHandleRequest(needRes, wid, "startPreparing", worker.info.Resources) { - tidx = t - break - } - } - worker.lk.Unlock() + // wait for cleanup to complete + select { + case <-done: + case <-ctx.Done(): + return ctx.Err() + case <-sw.sh.closing: + return nil + } - if tidx == -1 { - break assignLoop - } + sw.worker.activeWindows = sw.worker.activeWindows[:0] + sw.windowsRequested = 0 + return nil +} - todo := firstWindow.todo[tidx] +func (sw *schedWorker) checkSession(ctx context.Context) bool { + for { + sctx, scancel := context.WithTimeout(ctx, stores.HeartbeatInterval/2) + curSes, err := sw.worker.w.Session(sctx) + scancel() + if err != nil { + // Likely temporary error - log.Debugf("assign worker sector %d", todo.sector.Number) - err := sh.assignWorker(taskDone, wid, worker, todo) + log.Warnw("failed to check worker session", "error", err) - if err != nil { - log.Error("assignWorker error: %+v", err) - go todo.respond(xerrors.Errorf("assignWorker error: %w", err)) - } + if err := sw.disable(ctx); err != nil { + log.Warnw("failed to disable worker with session error", "worker", sw.wid, "error", err) + } - // Note: we're not freeing window.allocated resources here very much on purpose - copy(firstWindow.todo[tidx:], firstWindow.todo[tidx+1:]) - firstWindow.todo[len(firstWindow.todo)-1] = nil - firstWindow.todo = firstWindow.todo[:len(firstWindow.todo)-1] + select { + case <-sw.heartbeatTimer.C: + continue + case w := <-sw.scheduledWindows: + // was in flight when initially disabled, return + sw.worker.wndLk.Lock() + sw.worker.activeWindows = append(sw.worker.activeWindows, w) + sw.worker.wndLk.Unlock() + + if err := sw.disable(ctx); err != nil { + log.Warnw("failed to disable worker with session error", "worker", sw.wid, "error", err) } + case <-sw.sh.closing: + return false + case <-sw.worker.closingMgr: + return false + } + continue + } - copy(worker.activeWindows, worker.activeWindows[1:]) - worker.activeWindows[len(worker.activeWindows)-1] = nil - worker.activeWindows = worker.activeWindows[:len(worker.activeWindows)-1] - - windowsRequested-- + if WorkerID(curSes) != sw.wid { + if curSes != ClosedWorkerID { + // worker restarted + log.Warnw("worker session changed (worker restarted?)", "initial", sw.wid, "current", curSes) } - worker.wndLk.Unlock() - sh.workersLk.RUnlock() + return false } - }() - return nil + return true + } +} + +func (sw *schedWorker) requestWindows() bool { + for ; sw.windowsRequested < SchedWindows; sw.windowsRequested++ { + select { + case sw.sh.windowRequests <- &schedWindowRequest{ + worker: sw.wid, + done: sw.scheduledWindows, + }: + case <-sw.sh.closing: + return false + case <-sw.worker.closingMgr: + return false + } + } + return true +} + +func (sw *schedWorker) waitForUpdates() (update bool, ok bool) { + select { + case <-sw.heartbeatTimer.C: + return false, true + case w := <-sw.scheduledWindows: + sw.worker.wndLk.Lock() + sw.worker.activeWindows = append(sw.worker.activeWindows, w) + sw.worker.wndLk.Unlock() + return true, true + case <-sw.taskDone: + log.Debugw("task done", "workerid", sw.wid) + return true, true + case <-sw.sh.closing: + case <-sw.worker.closingMgr: + } + return false, false } func (sh *scheduler) workerCompactWindows(worker *workerHandle, wid WorkerID) int { @@ -311,7 +320,59 @@ func (sh *scheduler) workerCompactWindows(worker *workerHandle, wid WorkerID) in return compacted } -func (sh *scheduler) assignWorker(taskDone chan struct{}, wid WorkerID, w *workerHandle, req *workerRequest) error { +func (sw *schedWorker) processAssignedWindows() { + worker := sw.worker + +assignLoop: + // process windows in order + for len(worker.activeWindows) > 0 { + firstWindow := worker.activeWindows[0] + + // process tasks within a window, preferring tasks at lower indexes + for len(firstWindow.todo) > 0 { + tidx := -1 + + worker.lk.Lock() + for t, todo := range firstWindow.todo { + needRes := ResourceTable[todo.taskType][sw.sh.spt] + if worker.preparing.canHandleRequest(needRes, sw.wid, "startPreparing", worker.info.Resources) { + tidx = t + break + } + } + worker.lk.Unlock() + + if tidx == -1 { + break assignLoop + } + + todo := firstWindow.todo[tidx] + + log.Debugf("assign worker sector %d", todo.sector.Number) + err := sw.startProcessingTask(sw.taskDone, todo) + + if err != nil { + log.Error("startProcessingTask error: %+v", err) + go todo.respond(xerrors.Errorf("startProcessingTask error: %w", err)) + } + + // Note: we're not freeing window.allocated resources here very much on purpose + copy(firstWindow.todo[tidx:], firstWindow.todo[tidx+1:]) + firstWindow.todo[len(firstWindow.todo)-1] = nil + firstWindow.todo = firstWindow.todo[:len(firstWindow.todo)-1] + } + + copy(worker.activeWindows, worker.activeWindows[1:]) + worker.activeWindows[len(worker.activeWindows)-1] = nil + worker.activeWindows = worker.activeWindows[:len(worker.activeWindows)-1] + + sw.windowsRequested-- + } +} + +func (sw *schedWorker) startProcessingTask(taskDone chan struct{}, req *workerRequest) error { + w, sh := sw.worker, sw.sh + needRes := ResourceTable[req.taskType][sh.spt] w.lk.Lock() @@ -319,7 +380,7 @@ func (sh *scheduler) assignWorker(taskDone chan struct{}, wid WorkerID, w *worke w.lk.Unlock() go func() { - err := req.prepare(req.ctx, sh.wt.worker(wid, w.w)) + err := req.prepare(req.ctx, sh.wt.worker(sw.wid, w.w)) sh.workersLk.Lock() if err != nil { @@ -344,7 +405,7 @@ func (sh *scheduler) assignWorker(taskDone chan struct{}, wid WorkerID, w *worke return } - err = w.active.withResources(wid, w.info.Resources, needRes, &sh.workersLk, func() error { + err = w.active.withResources(sw.wid, w.info.Resources, needRes, &sh.workersLk, func() error { w.lk.Lock() w.preparing.free(w.info.Resources, needRes) w.lk.Unlock() @@ -356,7 +417,7 @@ func (sh *scheduler) assignWorker(taskDone chan struct{}, wid WorkerID, w *worke case <-sh.closing: } - err = req.work(req.ctx, sh.wt.worker(wid, w.w)) + err = req.work(req.ctx, sh.wt.worker(sw.wid, w.w)) select { case req.ret <- workerResponse{err: err}: diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index 54b26b0ac15..b6a8793c724 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -225,7 +225,7 @@ func (l *LocalWorker) asyncCall(ctx context.Context, sector abi.SectorID, rt Ret res, err := work(ctx, ci) - { + if err != nil { rb, err := json.Marshal(res) if err != nil { log.Errorf("tracking call (marshaling results): %+v", err) From 96c5ff7e7f12427115f7db0712e212d871ba4ff8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 28 Oct 2020 14:23:38 +0100 Subject: [PATCH 57/61] sched: use more letters for variables --- extern/sector-storage/manager_calltracker.go | 2 +- extern/sector-storage/sched.go | 6 +- extern/sector-storage/sched_test.go | 2 +- extern/sector-storage/sched_worker.go | 72 +++++++++++--------- extern/sector-storage/selector_alloc.go | 4 +- extern/sector-storage/selector_existing.go | 4 +- extern/sector-storage/selector_task.go | 6 +- extern/sector-storage/stats.go | 2 +- 8 files changed, 52 insertions(+), 46 deletions(-) diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index 8315c6fe6f2..f0aa0445eff 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -334,7 +334,7 @@ func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr stri err: err, } - m.sched.wt.onDone(callID) + m.sched.workTracker.onDone(callID) m.workLk.Lock() defer m.workLk.Unlock() diff --git a/extern/sector-storage/sched.go b/extern/sector-storage/sched.go index 34cba4cf2fe..426658c4100 100644 --- a/extern/sector-storage/sched.go +++ b/extern/sector-storage/sched.go @@ -64,7 +64,7 @@ type scheduler struct { schedQueue *requestQueue openWindows []*schedWindowRequest - wt *workTracker + workTracker *workTracker info chan func(interface{}) @@ -74,7 +74,7 @@ type scheduler struct { } type workerHandle struct { - w Worker + workerRpc Worker info storiface.WorkerInfo @@ -155,7 +155,7 @@ func newScheduler(spt abi.RegisteredSealProof) *scheduler { schedQueue: &requestQueue{}, - wt: &workTracker{ + workTracker: &workTracker{ done: map[storiface.CallID]struct{}{}, running: map[storiface.CallID]trackedWork{}, }, diff --git a/extern/sector-storage/sched_test.go b/extern/sector-storage/sched_test.go index 1afa92b642c..93014a117e8 100644 --- a/extern/sector-storage/sched_test.go +++ b/extern/sector-storage/sched_test.go @@ -530,7 +530,7 @@ func BenchmarkTrySched(b *testing.B) { sched := newScheduler(spt) sched.workers[WorkerID{}] = &workerHandle{ - w: nil, + workerRpc: nil, info: storiface.WorkerInfo{ Hostname: "t", Resources: decentWorkerResources, diff --git a/extern/sector-storage/sched_worker.go b/extern/sector-storage/sched_worker.go index 037176a11da..ff43009d382 100644 --- a/extern/sector-storage/sched_worker.go +++ b/extern/sector-storage/sched_worker.go @@ -2,14 +2,15 @@ package sectorstorage import ( "context" - "github.com/filecoin-project/lotus/extern/sector-storage/stores" "time" "golang.org/x/xerrors" + + "github.com/filecoin-project/lotus/extern/sector-storage/stores" ) type schedWorker struct { - sh *scheduler + sched *scheduler worker *workerHandle wid WorkerID @@ -37,8 +38,8 @@ func (sh *scheduler) runWorker(ctx context.Context, w Worker) error { } worker := &workerHandle{ - w: w, - info: info, + workerRpc: w, + info: info, preparing: &activeResources{}, active: &activeResources{}, @@ -63,7 +64,7 @@ func (sh *scheduler) runWorker(ctx context.Context, w Worker) error { sh.workersLk.Unlock() sw := &schedWorker{ - sh: sh, + sched: sh, worker: worker, wid: wid, @@ -81,7 +82,7 @@ func (sh *scheduler) runWorker(ctx context.Context, w Worker) error { } func (sw *schedWorker) handleWorker() { - worker, sh := sw.worker, sw.sh + worker, sched := sw.worker, sw.sched ctx, cancel := context.WithCancel(context.TODO()) defer cancel() @@ -95,17 +96,17 @@ func (sw *schedWorker) handleWorker() { log.Warnw("failed to disable worker", "worker", sw.wid, "error", err) } - sh.workersLk.Lock() - delete(sh.workers, sw.wid) - sh.workersLk.Unlock() + sched.workersLk.Lock() + delete(sched.workers, sw.wid) + sched.workersLk.Unlock() }() defer sw.heartbeatTimer.Stop() for { - sh.workersLk.Lock() + sched.workersLk.Lock() enabled := worker.enabled - sh.workersLk.Unlock() + sched.workersLk.Unlock() // ask for more windows if we need them (non-blocking) if enabled { @@ -123,9 +124,9 @@ func (sw *schedWorker) handleWorker() { // session looks good if !enabled { - sh.workersLk.Lock() + sched.workersLk.Lock() worker.enabled = true - sh.workersLk.Unlock() + sched.workersLk.Unlock() // we'll send window requests on the next loop } @@ -142,16 +143,16 @@ func (sw *schedWorker) handleWorker() { } // process assigned windows (non-blocking) - sh.workersLk.RLock() + sched.workersLk.RLock() worker.wndLk.Lock() - sw.windowsRequested -= sh.workerCompactWindows(worker, sw.wid) + sw.workerCompactWindows() // send tasks to the worker sw.processAssignedWindows() worker.wndLk.Unlock() - sh.workersLk.RUnlock() + sched.workersLk.RUnlock() } } @@ -160,7 +161,7 @@ func (sw *schedWorker) disable(ctx context.Context) error { // request cleanup in the main scheduler goroutine select { - case sw.sh.workerDisable <- workerDisableReq{ + case sw.sched.workerDisable <- workerDisableReq{ activeWindows: sw.worker.activeWindows, wid: sw.wid, done: func() { @@ -169,7 +170,7 @@ func (sw *schedWorker) disable(ctx context.Context) error { }: case <-ctx.Done(): return ctx.Err() - case <-sw.sh.closing: + case <-sw.sched.closing: return nil } @@ -178,7 +179,7 @@ func (sw *schedWorker) disable(ctx context.Context) error { case <-done: case <-ctx.Done(): return ctx.Err() - case <-sw.sh.closing: + case <-sw.sched.closing: return nil } @@ -190,7 +191,7 @@ func (sw *schedWorker) disable(ctx context.Context) error { func (sw *schedWorker) checkSession(ctx context.Context) bool { for { sctx, scancel := context.WithTimeout(ctx, stores.HeartbeatInterval/2) - curSes, err := sw.worker.w.Session(sctx) + curSes, err := sw.worker.workerRpc.Session(sctx) scancel() if err != nil { // Likely temporary error @@ -213,7 +214,7 @@ func (sw *schedWorker) checkSession(ctx context.Context) bool { if err := sw.disable(ctx); err != nil { log.Warnw("failed to disable worker with session error", "worker", sw.wid, "error", err) } - case <-sw.sh.closing: + case <-sw.sched.closing: return false case <-sw.worker.closingMgr: return false @@ -237,11 +238,11 @@ func (sw *schedWorker) checkSession(ctx context.Context) bool { func (sw *schedWorker) requestWindows() bool { for ; sw.windowsRequested < SchedWindows; sw.windowsRequested++ { select { - case sw.sh.windowRequests <- &schedWindowRequest{ + case sw.sched.windowRequests <- &schedWindowRequest{ worker: sw.wid, done: sw.scheduledWindows, }: - case <-sw.sh.closing: + case <-sw.sched.closing: return false case <-sw.worker.closingMgr: return false @@ -262,13 +263,16 @@ func (sw *schedWorker) waitForUpdates() (update bool, ok bool) { case <-sw.taskDone: log.Debugw("task done", "workerid", sw.wid) return true, true - case <-sw.sh.closing: + case <-sw.sched.closing: case <-sw.worker.closingMgr: } + return false, false } -func (sh *scheduler) workerCompactWindows(worker *workerHandle, wid WorkerID) int { +func (sw *schedWorker) workerCompactWindows() { + worker := sw.worker + // move tasks from older windows to newer windows if older windows // still can fit them if len(worker.activeWindows) > 1 { @@ -277,8 +281,8 @@ func (sh *scheduler) workerCompactWindows(worker *workerHandle, wid WorkerID) in var moved []int for ti, todo := range window.todo { - needRes := ResourceTable[todo.taskType][sh.spt] - if !lower.allocated.canHandleRequest(needRes, wid, "compactWindows", worker.info.Resources) { + needRes := ResourceTable[todo.taskType][sw.sched.spt] + if !lower.allocated.canHandleRequest(needRes, sw.wid, "compactWindows", worker.info.Resources) { continue } @@ -316,8 +320,7 @@ func (sh *scheduler) workerCompactWindows(worker *workerHandle, wid WorkerID) in } worker.activeWindows = newWindows - - return compacted + sw.windowsRequested -= compacted } func (sw *schedWorker) processAssignedWindows() { @@ -334,7 +337,7 @@ assignLoop: worker.lk.Lock() for t, todo := range firstWindow.todo { - needRes := ResourceTable[todo.taskType][sw.sh.spt] + needRes := ResourceTable[todo.taskType][sw.sched.spt] if worker.preparing.canHandleRequest(needRes, sw.wid, "startPreparing", worker.info.Resources) { tidx = t break @@ -371,7 +374,7 @@ assignLoop: } func (sw *schedWorker) startProcessingTask(taskDone chan struct{}, req *workerRequest) error { - w, sh := sw.worker, sw.sh + w, sh := sw.worker, sw.sched needRes := ResourceTable[req.taskType][sh.spt] @@ -380,7 +383,8 @@ func (sw *schedWorker) startProcessingTask(taskDone chan struct{}, req *workerRe w.lk.Unlock() go func() { - err := req.prepare(req.ctx, sh.wt.worker(sw.wid, w.w)) + // first run the prepare step (e.g. fetching sector data from other worker) + err := req.prepare(req.ctx, sh.workTracker.worker(sw.wid, w.workerRpc)) sh.workersLk.Lock() if err != nil { @@ -405,6 +409,7 @@ func (sw *schedWorker) startProcessingTask(taskDone chan struct{}, req *workerRe return } + // wait (if needed) for resources in the 'active' window err = w.active.withResources(sw.wid, w.info.Resources, needRes, &sh.workersLk, func() error { w.lk.Lock() w.preparing.free(w.info.Resources, needRes) @@ -417,7 +422,8 @@ func (sw *schedWorker) startProcessingTask(taskDone chan struct{}, req *workerRe case <-sh.closing: } - err = req.work(req.ctx, sh.wt.worker(sw.wid, w.w)) + // Do the work! + err = req.work(req.ctx, sh.workTracker.worker(sw.wid, w.workerRpc)) select { case req.ret <- workerResponse{err: err}: diff --git a/extern/sector-storage/selector_alloc.go b/extern/sector-storage/selector_alloc.go index 9afa6abaab2..14724fbe83f 100644 --- a/extern/sector-storage/selector_alloc.go +++ b/extern/sector-storage/selector_alloc.go @@ -27,7 +27,7 @@ func newAllocSelector(index stores.SectorIndex, alloc storiface.SectorFileType, } func (s *allocSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *workerHandle) (bool, error) { - tasks, err := whnd.w.TaskTypes(ctx) + tasks, err := whnd.workerRpc.TaskTypes(ctx) if err != nil { return false, xerrors.Errorf("getting supported worker task types: %w", err) } @@ -35,7 +35,7 @@ func (s *allocSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi return false, nil } - paths, err := whnd.w.Paths(ctx) + paths, err := whnd.workerRpc.Paths(ctx) if err != nil { return false, xerrors.Errorf("getting worker paths: %w", err) } diff --git a/extern/sector-storage/selector_existing.go b/extern/sector-storage/selector_existing.go index 025ad3f21b5..0e3a41aeb3d 100644 --- a/extern/sector-storage/selector_existing.go +++ b/extern/sector-storage/selector_existing.go @@ -29,7 +29,7 @@ func newExistingSelector(index stores.SectorIndex, sector abi.SectorID, alloc st } func (s *existingSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *workerHandle) (bool, error) { - tasks, err := whnd.w.TaskTypes(ctx) + tasks, err := whnd.workerRpc.TaskTypes(ctx) if err != nil { return false, xerrors.Errorf("getting supported worker task types: %w", err) } @@ -37,7 +37,7 @@ func (s *existingSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt return false, nil } - paths, err := whnd.w.Paths(ctx) + paths, err := whnd.workerRpc.Paths(ctx) if err != nil { return false, xerrors.Errorf("getting worker paths: %w", err) } diff --git a/extern/sector-storage/selector_task.go b/extern/sector-storage/selector_task.go index 807b531038b..ffed40d683f 100644 --- a/extern/sector-storage/selector_task.go +++ b/extern/sector-storage/selector_task.go @@ -20,7 +20,7 @@ func newTaskSelector() *taskSelector { } func (s *taskSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi.RegisteredSealProof, whnd *workerHandle) (bool, error) { - tasks, err := whnd.w.TaskTypes(ctx) + tasks, err := whnd.workerRpc.TaskTypes(ctx) if err != nil { return false, xerrors.Errorf("getting supported worker task types: %w", err) } @@ -30,11 +30,11 @@ func (s *taskSelector) Ok(ctx context.Context, task sealtasks.TaskType, spt abi. } func (s *taskSelector) Cmp(ctx context.Context, _ sealtasks.TaskType, a, b *workerHandle) (bool, error) { - atasks, err := a.w.TaskTypes(ctx) + atasks, err := a.workerRpc.TaskTypes(ctx) if err != nil { return false, xerrors.Errorf("getting supported worker task types: %w", err) } - btasks, err := b.w.TaskTypes(ctx) + btasks, err := b.workerRpc.TaskTypes(ctx) if err != nil { return false, xerrors.Errorf("getting supported worker task types: %w", err) } diff --git a/extern/sector-storage/stats.go b/extern/sector-storage/stats.go index f9d96fc5d5c..bae60b4268b 100644 --- a/extern/sector-storage/stats.go +++ b/extern/sector-storage/stats.go @@ -33,7 +33,7 @@ func (m *Manager) WorkerJobs() map[uuid.UUID][]storiface.WorkerJob { out := map[uuid.UUID][]storiface.WorkerJob{} calls := map[storiface.CallID]struct{}{} - for _, t := range m.sched.wt.Running() { + for _, t := range m.sched.workTracker.Running() { out[uuid.UUID(t.worker)] = append(out[uuid.UUID(t.worker)], t.job) calls[t.job.ID] = struct{}{} } From 4cf00b8b428a2934fabda1a63bedf629a5c1630d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 28 Oct 2020 14:29:17 +0100 Subject: [PATCH 58/61] worker_local: address review --- extern/sector-storage/worker_local.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/extern/sector-storage/worker_local.go b/extern/sector-storage/worker_local.go index b6a8793c724..cb1a43c531f 100644 --- a/extern/sector-storage/worker_local.go +++ b/extern/sector-storage/worker_local.go @@ -35,13 +35,16 @@ type WorkerConfig struct { NoSwap bool } +// used do provide custom proofs impl (mostly used in testing) +type ExecutorFunc func() (ffiwrapper.Storage, error) + type LocalWorker struct { scfg *ffiwrapper.Config storage stores.Store localStore *stores.Local sindex stores.SectorIndex ret storiface.WorkerReturn - executor func() (ffiwrapper.Storage, error) + executor ExecutorFunc noSwap bool ct *workerCallTracker @@ -52,7 +55,7 @@ type LocalWorker struct { closing chan struct{} } -func newLocalWorker(executor func() (ffiwrapper.Storage, error), wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex, ret storiface.WorkerReturn, cst *statestore.StateStore) *LocalWorker { +func newLocalWorker(executor ExecutorFunc, wcfg WorkerConfig, store stores.Store, local *stores.Local, sindex stores.SectorIndex, ret storiface.WorkerReturn, cst *statestore.StateStore) *LocalWorker { acceptTasks := map[sealtasks.TaskType]struct{}{} for _, taskType := range wcfg.TaskTypes { acceptTasks[taskType] = struct{}{} From ed2f81da2f99b09c90aca1587f74e987ec2c8c2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 28 Oct 2020 14:34:28 +0100 Subject: [PATCH 59/61] sched: Fix tests --- extern/sector-storage/sched_test.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/extern/sector-storage/sched_test.go b/extern/sector-storage/sched_test.go index 93014a117e8..d6368431593 100644 --- a/extern/sector-storage/sched_test.go +++ b/extern/sector-storage/sched_test.go @@ -591,8 +591,13 @@ func TestWindowCompact(t *testing.T) { wh.activeWindows = append(wh.activeWindows, window) } - n := sh.workerCompactWindows(wh, WorkerID{}) - require.Equal(t, len(start)-len(expect), n) + sw := schedWorker{ + sched: &sh, + worker: wh, + } + + sw.workerCompactWindows() + require.Equal(t, len(start)-len(expect), -sw.windowsRequested) for wi, tasks := range expect { var expectRes activeResources From 4100f6eeadabd73c7d24ab167e9f2ca4cdf53c6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 28 Oct 2020 15:10:43 +0100 Subject: [PATCH 60/61] fix TestWDPostDoPost --- extern/sector-storage/sched_test.go | 4 ++-- storage/wdpost_run_test.go | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/extern/sector-storage/sched_test.go b/extern/sector-storage/sched_test.go index d6368431593..849896ff6f1 100644 --- a/extern/sector-storage/sched_test.go +++ b/extern/sector-storage/sched_test.go @@ -592,8 +592,8 @@ func TestWindowCompact(t *testing.T) { } sw := schedWorker{ - sched: &sh, - worker: wh, + sched: &sh, + worker: wh, } sw.workerCompactWindows() diff --git a/storage/wdpost_run_test.go b/storage/wdpost_run_test.go index 6c05f806f05..a76483a5f09 100644 --- a/storage/wdpost_run_test.go +++ b/storage/wdpost_run_test.go @@ -16,12 +16,14 @@ import ( "github.com/filecoin-project/go-state-types/big" "github.com/filecoin-project/go-state-types/crypto" "github.com/filecoin-project/go-state-types/dline" + "github.com/filecoin-project/go-state-types/network" builtin2 "github.com/filecoin-project/specs-actors/v2/actors/builtin" miner2 "github.com/filecoin-project/specs-actors/v2/actors/builtin/miner" proof2 "github.com/filecoin-project/specs-actors/v2/actors/runtime/proof" tutils "github.com/filecoin-project/specs-actors/v2/support/testing" "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/actors/builtin/miner" "github.com/filecoin-project/lotus/chain/types" "github.com/filecoin-project/lotus/journal" @@ -91,6 +93,10 @@ func (m *mockStorageMinerAPI) StateWaitMsg(ctx context.Context, cid cid.Cid, con }, nil } +func (m *mockStorageMinerAPI) StateNetworkVersion(context.Context, types.TipSetKey) (network.Version, error) { + return build.NewestNetworkVersion, nil +} + type mockProver struct { } From da7ecc1527255d4f2f09cbff097a9f483e9b6f68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Wed, 28 Oct 2020 16:15:17 +0100 Subject: [PATCH 61/61] Fix flaky sealing manager tests --- extern/sector-storage/manager_test.go | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/extern/sector-storage/manager_test.go b/extern/sector-storage/manager_test.go index 6efcc304e0b..93863296761 100644 --- a/extern/sector-storage/manager_test.go +++ b/extern/sector-storage/manager_test.go @@ -85,9 +85,8 @@ func (t *testStorage) Stat(path string) (fsutil.FsStat, error) { var _ stores.LocalStorage = &testStorage{} -func newTestMgr(ctx context.Context, t *testing.T, ds datastore.Datastore) (*Manager, *stores.Local, *stores.Remote, *stores.Index) { +func newTestMgr(ctx context.Context, t *testing.T, ds datastore.Datastore) (*Manager, *stores.Local, *stores.Remote, *stores.Index, func()) { st := newTestStorage(t) - defer st.cleanup() si := stores.NewIndex() cfg := &ffiwrapper.Config{ @@ -126,14 +125,15 @@ func newTestMgr(ctx context.Context, t *testing.T, ds datastore.Datastore) (*Man go m.sched.runSched() - return m, lstor, stor, si + return m, lstor, stor, si, st.cleanup } func TestSimple(t *testing.T) { logging.SetAllLoggers(logging.LevelDebug) ctx := context.Background() - m, lstor, _, _ := newTestMgr(ctx, t, datastore.NewMapDatastore()) + m, lstor, _, _, cleanup := newTestMgr(ctx, t, datastore.NewMapDatastore()) + defer cleanup() localTasks := []sealtasks.TaskType{ sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTFetch, @@ -167,7 +167,8 @@ func TestRedoPC1(t *testing.T) { logging.SetAllLoggers(logging.LevelDebug) ctx := context.Background() - m, lstor, _, _ := newTestMgr(ctx, t, datastore.NewMapDatastore()) + m, lstor, _, _, cleanup := newTestMgr(ctx, t, datastore.NewMapDatastore()) + defer cleanup() localTasks := []sealtasks.TaskType{ sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTFetch, @@ -216,7 +217,8 @@ func TestRestartManager(t *testing.T) { ds := datastore.NewMapDatastore() - m, lstor, _, _ := newTestMgr(ctx, t, ds) + m, lstor, _, _, cleanup := newTestMgr(ctx, t, ds) + defer cleanup() localTasks := []sealtasks.TaskType{ sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTFetch, @@ -265,7 +267,9 @@ func TestRestartManager(t *testing.T) { cwg.Wait() require.Error(t, perr) - m, _, _, _ = newTestMgr(ctx, t, ds) + m, _, _, _, cleanup2 := newTestMgr(ctx, t, ds) + defer cleanup2() + tw.ret = m // simulate jsonrpc auto-reconnect err = m.AddWorker(ctx, tw) require.NoError(t, err) @@ -287,7 +291,8 @@ func TestRestartWorker(t *testing.T) { ds := datastore.NewMapDatastore() - m, lstor, stor, idx := newTestMgr(ctx, t, ds) + m, lstor, stor, idx, cleanup := newTestMgr(ctx, t, ds) + defer cleanup() localTasks := []sealtasks.TaskType{ sealtasks.TTAddPiece, sealtasks.TTPreCommit1, sealtasks.TTCommit1, sealtasks.TTFinalize, sealtasks.TTFetch,