diff --git a/api/api_storage.go b/api/api_storage.go index d003ec776b6..738a05e095d 100644 --- a/api/api_storage.go +++ b/api/api_storage.go @@ -71,6 +71,7 @@ type StorageMiner interface { // SealingSchedDiag dumps internal sealing scheduler state SealingSchedDiag(ctx context.Context, doSched bool) (interface{}, error) + SealingAbort(ctx context.Context, call storiface.CallID) error stores.SectorIndex diff --git a/api/apistruct/struct.go b/api/apistruct/struct.go index 8db09af7102..08b3ba9a934 100644 --- a/api/apistruct/struct.go +++ b/api/apistruct/struct.go @@ -323,7 +323,8 @@ type StorageMinerStruct struct { ReturnReadPiece func(ctx context.Context, callID storiface.CallID, ok bool, err string) error `perm:"admin" retry:"true"` ReturnFetch func(ctx context.Context, callID storiface.CallID, err string) error `perm:"admin" retry:"true"` - SealingSchedDiag func(context.Context, bool) (interface{}, error) `perm:"admin"` + SealingSchedDiag func(context.Context, bool) (interface{}, error) `perm:"admin"` + SealingAbort func(ctx context.Context, call storiface.CallID) error `perm:"admin"` StorageList func(context.Context) (map[stores.ID][]stores.Decl, error) `perm:"admin"` StorageLocal func(context.Context) (map[stores.ID]string, error) `perm:"admin"` @@ -1318,6 +1319,10 @@ func (c *StorageMinerStruct) SealingSchedDiag(ctx context.Context, doSched bool) return c.Internal.SealingSchedDiag(ctx, doSched) } +func (c *StorageMinerStruct) SealingAbort(ctx context.Context, call storiface.CallID) error { + return c.Internal.SealingAbort(ctx, call) +} + func (c *StorageMinerStruct) StorageAttach(ctx context.Context, si stores.StorageInfo, st fsutil.FsStat) error { return c.Internal.StorageAttach(ctx, si, st) } diff --git a/cmd/lotus-storage-miner/sealing.go b/cmd/lotus-storage-miner/sealing.go index faaab7b01f9..ad890129d0b 100644 --- a/cmd/lotus-storage-miner/sealing.go +++ b/cmd/lotus-storage-miner/sealing.go @@ -28,6 +28,7 @@ var sealingCmd = &cli.Command{ sealingJobsCmd, sealingWorkersCmd, sealingSchedDiagCmd, + sealingAbortCmd, }, } @@ -124,9 +125,13 @@ var sealingWorkersCmd = &cli.Command{ var sealingJobsCmd = &cli.Command{ Name: "jobs", - Usage: "list workers", + Usage: "list running jobs", Flags: []cli.Flag{ &cli.BoolFlag{Name: "color"}, + &cli.BoolFlag{ + Name: "show-ret-done", + Usage: "show returned but not consumed calls", + }, }, Action: func(cctx *cli.Context) error { color.NoColor = !cctx.Bool("color") @@ -191,6 +196,9 @@ var sealingJobsCmd = &cli.Command{ case l.RunWait > 0: state = fmt.Sprintf("assigned(%d)", l.RunWait-1) case l.RunWait == storiface.RWRetDone: + if !cctx.Bool("show-ret-done") { + continue + } state = "ret-done" case l.RunWait == storiface.RWReturned: state = "returned" @@ -208,9 +216,9 @@ var sealingJobsCmd = &cli.Command{ } _, _ = fmt.Fprintf(tw, "%s\t%d\t%s\t%s\t%s\t%s\t%s\n", - hex.EncodeToString(l.ID.ID[10:]), + hex.EncodeToString(l.ID.ID[:4]), l.Sector.Number, - hex.EncodeToString(l.wid[5:]), + hex.EncodeToString(l.wid[:4]), hostname, l.Task.Short(), state, @@ -253,3 +261,47 @@ var sealingSchedDiagCmd = &cli.Command{ return nil }, } + +var sealingAbortCmd = &cli.Command{ + Name: "abort", + Usage: "Abort a running job", + ArgsUsage: "[callid]", + Action: func(cctx *cli.Context) error { + if cctx.Args().Len() != 1 { + return xerrors.Errorf("expected 1 argument") + } + + nodeApi, closer, err := lcli.GetStorageMinerAPI(cctx) + if err != nil { + return err + } + defer closer() + + ctx := lcli.ReqContext(cctx) + + jobs, err := nodeApi.WorkerJobs(ctx) + if err != nil { + return xerrors.Errorf("getting worker jobs: %w", err) + } + + var job *storiface.WorkerJob + outer: + for _, workerJobs := range jobs { + for _, j := range workerJobs { + if strings.HasPrefix(j.ID.ID.String(), cctx.Args().First()) { + j := j + job = &j + break outer + } + } + } + + if job == nil { + return xerrors.Errorf("job with specified id prefix not found") + } + + fmt.Printf("aborting job %s, task %s, sector %d, running on host %s\n", job.ID.String(), job.Task.Short(), job.Sector.Number, job.Hostname) + + return nodeApi.SealingAbort(ctx, job.ID) + }, +} diff --git a/extern/sector-storage/manager.go b/extern/sector-storage/manager.go index 5e86cb4d0eb..a3d6a413104 100644 --- a/extern/sector-storage/manager.go +++ b/extern/sector-storage/manager.go @@ -220,7 +220,9 @@ func (m *Manager) readPiece(sink io.Writer, sector abi.SectorID, offset storifac if err != nil { return err } - *rok = r.(bool) + if r != nil { + *rok = r.(bool) + } return nil } } @@ -342,7 +344,9 @@ func (m *Manager) AddPiece(ctx context.Context, sector abi.SectorID, existingPie if err != nil { return err } - out = p.(abi.PieceInfo) + if p != nil { + out = p.(abi.PieceInfo) + } return nil }) @@ -366,7 +370,9 @@ func (m *Manager) SealPreCommit1(ctx context.Context, sector abi.SectorID, ticke waitErr = werr return } - out = p.(storage.PreCommit1Out) + if p != nil { + out = p.(storage.PreCommit1Out) + } } if wait { // already in progress @@ -415,7 +421,9 @@ func (m *Manager) SealPreCommit2(ctx context.Context, sector abi.SectorID, phase waitErr = werr return } - out = p.(storage.SectorCids) + if p != nil { + out = p.(storage.SectorCids) + } } if wait { // already in progress @@ -462,7 +470,9 @@ func (m *Manager) SealCommit1(ctx context.Context, sector abi.SectorID, ticket a waitErr = werr return } - out = p.(storage.Commit1Out) + if p != nil { + out = p.(storage.Commit1Out) + } } if wait { // already in progress @@ -509,7 +519,9 @@ func (m *Manager) SealCommit2(ctx context.Context, sector abi.SectorID, phase1Ou waitErr = werr return } - out = p.(storage.Proof) + if p != nil { + out = p.(storage.Proof) + } } if wait { // already in progress @@ -688,7 +700,48 @@ func (m *Manager) SchedDiag(ctx context.Context, doSched bool) (interface{}, err } } - return m.sched.Info(ctx) + si, err := m.sched.Info(ctx) + if err != nil { + return nil, err + } + + type SchedInfo interface{} + i := struct { + SchedInfo + + ReturnedWork []string + Waiting []string + + CallToWork map[string]string + + EarlyRet []string + }{ + SchedInfo: si, + + CallToWork: map[string]string{}, + } + + m.workLk.Lock() + + for w := range m.results { + i.ReturnedWork = append(i.ReturnedWork, w.String()) + } + + for id := range m.callRes { + i.EarlyRet = append(i.EarlyRet, id.String()) + } + + for w := range m.waitRes { + i.Waiting = append(i.Waiting, w.String()) + } + + for c, w := range m.callToWork { + i.CallToWork[c.String()] = w.String() + } + + m.workLk.Unlock() + + return i, nil } func (m *Manager) Close(ctx context.Context) error { diff --git a/extern/sector-storage/manager_calltracker.go b/extern/sector-storage/manager_calltracker.go index e6182adb0f9..9b39a5a7046 100644 --- a/extern/sector-storage/manager_calltracker.go +++ b/extern/sector-storage/manager_calltracker.go @@ -414,3 +414,7 @@ func (m *Manager) returnResult(callID storiface.CallID, r interface{}, serr stri return nil } + +func (m *Manager) Abort(ctx context.Context, call storiface.CallID) error { + return m.returnResult(call, nil, "task aborted") +} diff --git a/node/impl/storminer.go b/node/impl/storminer.go index a58621c97f5..b90734e3c7b 100644 --- a/node/impl/storminer.go +++ b/node/impl/storminer.go @@ -300,6 +300,10 @@ func (sm *StorageMinerAPI) SealingSchedDiag(ctx context.Context, doSched bool) ( return sm.StorageMgr.SchedDiag(ctx, doSched) } +func (sm *StorageMinerAPI) SealingAbort(ctx context.Context, call storiface.CallID) error { + return sm.StorageMgr.Abort(ctx, call) +} + func (sm *StorageMinerAPI) MarketImportDealData(ctx context.Context, propCid cid.Cid, path string) error { fi, err := os.Open(path) if err != nil {