Skip to content

Commit

Permalink
delay-between-restarts should apply between CMS batches as well (#58)
Browse files Browse the repository at this point in the history
'
  • Loading branch information
Jorres authored Jan 22, 2025
1 parent b455160 commit 022413a
Show file tree
Hide file tree
Showing 5 changed files with 2,253 additions and 6 deletions.
2 changes: 1 addition & 1 deletion pkg/rolling/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ ydbops will try to figure out if you broke this rule by comparing before\after o
`The limit on the number of simultaneous node restarts`)

fs.DurationVar(&o.DelayBetweenRestarts, "delay-between-restarts", DefaultDelayBetweenRestarts,
`Delay between two consecutive restarts in seconds. The number of simultaneous is set by 'max-concurrent-restarts'`)
`Delay between two consecutive restarts. E.g. '60s', '2m'. The number of simultaneous restarts is limited by 'nodes-inflight'.`)
}

func (o *RestartOptions) GetRestartDuration() *durationpb.Duration {
Expand Down
9 changes: 7 additions & 2 deletions pkg/rolling/restart_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,10 @@ func (rh *restartHandler) run() {

select {
case <-rh.done:
rh.logger.Debug("received from rh.done")
return
case <-time.After(rh.delayBetweenRestarts):
rh.logger.Debugf("successfully waited for %s", rh.delayBetweenRestarts.String())
continue
}
}
Expand All @@ -87,9 +89,12 @@ func (rh *restartHandler) run() {
}
}

func (rh *restartHandler) stop() {
close(rh.done)
func (rh *restartHandler) stop(waitForDelay bool) {
close(rh.queue)
if waitForDelay {
rh.wg.Wait()
}
close(rh.done)
}

func newRestartHandler(
Expand Down
10 changes: 7 additions & 3 deletions pkg/rolling/rolling.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ func (r *Rolling) processActionGroupStates(actions []*Ydb_Maintenance.ActionGrou
}

<-done
restartHandler.stop()

result, err := r.cms.CompleteAction(r.completedActions)
if err != nil {
r.logger.Warnf("Failed to complete action: %+v", err)
Expand All @@ -371,8 +371,12 @@ func (r *Rolling) processActionGroupStates(actions []*Ydb_Maintenance.ActionGrou
r.logCompleteResult(result)
r.state.unreportedButFinishedActionIds = []string{}

// completed when all actions marked as completed
return len(actions) == len(result.ActionStatuses)
restartCompleted := len(actions) == len(result.ActionStatuses)
waitForDelay := !restartCompleted

restartHandler.stop(waitForDelay)

return restartCompleted
}

func (r *Rolling) atomicHasActionInUnreported(actionID string) bool {
Expand Down
4 changes: 4 additions & 0 deletions sammyvimes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
2024-12-25T18:13:51.105+0100 debug Current logging level enabled: debug
2024-12-25T18:13:51.105+0100 debug Authentication type not specified, implicitly assuming type: iam-token
2024-12-25T18:13:51.105+0100 info Start rolling restart
2024-12-25T18:13:51.105+0100 debug Invoke ListClusterNodes method
Loading

0 comments on commit 022413a

Please sign in to comment.