-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
logmon: recover from shutting down call locally #5616
Changes from 3 commits
c23d673
b21849c
978fc65
ba373fe
1f1551a
658a734
a321901
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,13 +6,15 @@ import ( | |
"fmt" | ||
"path/filepath" | ||
"runtime" | ||
"time" | ||
|
||
hclog "github.com/hashicorp/go-hclog" | ||
plugin "github.com/hashicorp/go-plugin" | ||
"github.com/hashicorp/nomad/client/allocrunner/interfaces" | ||
"github.com/hashicorp/nomad/client/logmon" | ||
"github.com/hashicorp/nomad/helper/uuid" | ||
"github.com/hashicorp/nomad/nomad/structs" | ||
bstructs "github.com/hashicorp/nomad/plugins/base/structs" | ||
pstructs "github.com/hashicorp/nomad/plugins/shared/structs" | ||
) | ||
|
||
|
@@ -95,7 +97,37 @@ func reattachConfigFromHookData(data map[string]string) (*plugin.ReattachConfig, | |
func (h *logmonHook) Prestart(ctx context.Context, | ||
req *interfaces.TaskPrestartRequest, resp *interfaces.TaskPrestartResponse) error { | ||
|
||
// Attempt to reattach to logmon | ||
tries := 0 | ||
for { | ||
err := h.prestartOneLoop(ctx, req) | ||
if err == bstructs.ErrPluginShutdown { | ||
h.logger.Warn("logmon shutdown while making request", "error", err) | ||
|
||
if tries > 3 { | ||
return err | ||
} | ||
|
||
// retry after killing process and ensure we start a new logmon process | ||
tries++ | ||
h.logmonPluginClient.Kill() | ||
time.Sleep(1 * time.Second) | ||
continue | ||
} else if err != nil { | ||
return err | ||
} | ||
|
||
rCfg := pstructs.ReattachConfigFromGoPlugin(h.logmonPluginClient.ReattachConfig()) | ||
jsonCfg, err := json.Marshal(rCfg) | ||
if err != nil { | ||
return err | ||
} | ||
resp.State = map[string]string{logmonReattachKey: string(jsonCfg)} | ||
return nil | ||
} | ||
} | ||
|
||
func (h *logmonHook) prestartOneLoop(ctx context.Context, req *interfaces.TaskPrestartRequest) error { | ||
// attach to a running logmon if state indicates one | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I opted not to change the logic here as the logic is somewhat brittle and I don't want to make it worse. But the idea is that if grpc call fails with shutting down, |
||
if h.logmonPluginClient == nil { | ||
reattachConfig, err := reattachConfigFromHookData(req.PreviousState) | ||
if err != nil { | ||
|
@@ -105,12 +137,13 @@ func (h *logmonHook) Prestart(ctx context.Context, | |
if reattachConfig != nil { | ||
if err := h.launchLogMon(reattachConfig); err != nil { | ||
h.logger.Warn("failed to reattach to logmon process", "error", err) | ||
// if we failed to launch logmon, try again below | ||
} | ||
} | ||
|
||
} | ||
|
||
// We did not reattach to a plugin and one is still not running. | ||
// create a new client in initial starts, failed reattachment, or if we detect exits | ||
if h.logmonPluginClient == nil || h.logmonPluginClient.Exited() { | ||
if err := h.launchLogMon(nil); err != nil { | ||
// Retry errors launching logmon as logmon may have crashed on start and | ||
|
@@ -134,12 +167,6 @@ func (h *logmonHook) Prestart(ctx context.Context, | |
return err | ||
} | ||
|
||
rCfg := pstructs.ReattachConfigFromGoPlugin(h.logmonPluginClient.ReattachConfig()) | ||
jsonCfg, err := json.Marshal(rCfg) | ||
if err != nil { | ||
return err | ||
} | ||
resp.State = map[string]string{logmonReattachKey: string(jsonCfg)} | ||
return nil | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -73,5 +73,8 @@ func (p *Plugin) GRPCServer(broker *plugin.GRPCBroker, s *grpc.Server) error { | |
} | ||
|
||
func (p *Plugin) GRPCClient(ctx context.Context, broker *plugin.GRPCBroker, c *grpc.ClientConn) (interface{}, error) { | ||
return &logmonClient{client: proto.NewLogMonClient(c)}, nil | ||
return &logmonClient{ | ||
doneCtx: ctx, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not one 100% percent following the plugin Interestingly the logmon plugin client doesn't embed |
||
client: proto.NewLogMonClient(c), | ||
}, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
maybe log here that its out of retries before returning error?