-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix startup with failing configuration #26126
Changes from 5 commits
a25f2bd
1fcaded
3f8aeb5
6b74838
12b1455
47db569
ae3951d
89a1c35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,7 @@ import ( | |
var ( | ||
// ErrAppNotRunning is returned when configuration is performed on not running application. | ||
ErrAppNotRunning = errors.New("application is not running", errors.TypeApplication) | ||
procExitTimeout = 10 * time.Second | ||
) | ||
|
||
// Application encapsulates a concrete application ran by elastic-agent e.g Beat. | ||
|
@@ -47,6 +48,7 @@ type Application struct { | |
tag app.Taggable | ||
state state.State | ||
reporter state.Reporter | ||
watchClosers map[int]context.CancelFunc | ||
|
||
uid int | ||
gid int | ||
|
@@ -105,6 +107,7 @@ func NewApplication( | |
uid: uid, | ||
gid: gid, | ||
statusReporter: statusController.RegisterApp(id, appName), | ||
watchClosers: make(map[int]context.CancelFunc), | ||
}, nil | ||
} | ||
|
||
|
@@ -159,6 +162,8 @@ func (a *Application) Stop() { | |
|
||
a.srvState = nil | ||
if a.state.ProcessInfo != nil { | ||
// stop and clean watcher | ||
a.stopWatcher(a.state.ProcessInfo) | ||
if err := a.state.ProcessInfo.Process.Signal(stopSig); err == nil { | ||
// no error on signal, so wait for it to stop | ||
_, _ = a.state.ProcessInfo.Process.Wait() | ||
|
@@ -192,33 +197,52 @@ func (a *Application) watch(ctx context.Context, p app.Taggable, proc *process.I | |
case ps := <-a.waitProc(proc.Process): | ||
procState = ps | ||
case <-a.bgContext.Done(): | ||
a.Stop() | ||
return | ||
case <-ctx.Done(): | ||
// closer called | ||
return | ||
} | ||
|
||
a.appLock.Lock() | ||
defer a.appLock.Unlock() | ||
if a.state.ProcessInfo != proc { | ||
// already another process started, another watcher is watching instead | ||
a.appLock.Unlock() | ||
gracefulKill(proc) | ||
return | ||
} | ||
|
||
// stop the watcher | ||
a.stopWatcher(a.state.ProcessInfo) | ||
|
||
// was already stopped by Stop, do not restart | ||
if a.state.Status == state.Stopped { | ||
return | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are we missing a `a.appLock.Unlock()t here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. true There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. checking the complete function body, we always use |
||
} | ||
|
||
a.state.ProcessInfo = nil | ||
srvState := a.srvState | ||
|
||
if srvState == nil || srvState.Expected() == proto.StateExpected_STOPPING { | ||
a.appLock.Unlock() | ||
return | ||
} | ||
|
||
msg := fmt.Sprintf("exited with code: %d", procState.ExitCode()) | ||
a.setState(state.Crashed, msg, nil) | ||
a.setState(state.Restarting, msg, nil) | ||
|
||
// it was a crash | ||
a.start(ctx, p, cfg) | ||
a.appLock.Unlock() | ||
a.start(ctx, p, cfg, true) | ||
}() | ||
} | ||
|
||
func (a *Application) stopWatcher(procInfo *process.Info) { | ||
if procInfo != nil { | ||
if closer, ok := a.watchClosers[procInfo.PID]; ok { | ||
closer() | ||
delete(a.watchClosers, procInfo.PID) | ||
} | ||
} | ||
} | ||
|
||
func (a *Application) waitProc(proc *os.Process) <-chan *os.ProcessState { | ||
resChan := make(chan *os.ProcessState) | ||
|
||
|
@@ -250,3 +274,31 @@ func (a *Application) setState(s state.Status, msg string, payload map[string]in | |
func (a *Application) cleanUp() { | ||
a.monitor.Cleanup(a.desc.Spec(), a.pipelineID) | ||
} | ||
|
||
func gracefulKill(proc *process.Info) { | ||
if proc == nil || proc.Process == nil { | ||
return | ||
} | ||
|
||
// send stop signal to request stop | ||
proc.Process.Signal(os.Interrupt) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this work on Windows? I thought There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you're right windows does not implement sending interupt. using Stop function instead |
||
|
||
var wg sync.WaitGroup | ||
doneChan := make(chan struct{}) | ||
wg.Add(1) | ||
go func() { | ||
wg.Done() | ||
_, _ = proc.Process.Wait() | ||
close(doneChan) | ||
}() | ||
|
||
// wait for awaiter | ||
wg.Wait() | ||
|
||
// kill in case it's still running after timeout | ||
select { | ||
case <-doneChan: | ||
case <-time.After(procExitTimeout): | ||
_ = proc.Process.Kill() | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just curious: is there any connection between
a.bgContext
andctx
? Doesctx
has to be cancelled whena.bgContext
is cancelled?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bgContext is like a cancellation token passed from top of the app and cancelled on exit or unenroll, so agent cleans and backs up everything in a nice manner. we want to avoid just shutting down agent without any cleaning as this may turn out problematic