-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
consul/connect: dynamically select envoy sidecar at runtime #8945
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
package taskrunner | ||
|
||
import ( | ||
"context" | ||
"strings" | ||
|
||
"github.com/hashicorp/go-hclog" | ||
"github.com/hashicorp/go-version" | ||
ifs "github.com/hashicorp/nomad/client/allocrunner/interfaces" | ||
"github.com/hashicorp/nomad/client/consul" | ||
"github.com/hashicorp/nomad/nomad/structs" | ||
"github.com/pkg/errors" | ||
) | ||
|
||
const ( | ||
// envoyVersionHookName is the name of this hook and appears in logs. | ||
envoyVersionHookName = "envoy_version" | ||
|
||
// envoyLegacyImage is used when the version of Consul is too old to support | ||
// the SupportedProxies field in the self API. | ||
// | ||
// This is the version defaulted by Nomad before v0.13.0 and/or when using versions | ||
// of Consul before v1.7.8, v1.8.5, and v1.9.0. | ||
envoyLegacyImage = "envoyproxy/envoy:v1.11.2@sha256:a7769160c9c1a55bb8d07a3b71ce5d64f72b1f665f10d81aa1581bc3cf850d09" | ||
) | ||
|
||
type envoyVersionHookConfig struct { | ||
alloc *structs.Allocation | ||
proxiesClient consul.SupportedProxiesAPI | ||
logger hclog.Logger | ||
} | ||
|
||
func newEnvoyVersionHookConfig(alloc *structs.Allocation, proxiesClient consul.SupportedProxiesAPI, logger hclog.Logger) *envoyVersionHookConfig { | ||
return &envoyVersionHookConfig{ | ||
alloc: alloc, | ||
logger: logger, | ||
proxiesClient: proxiesClient, | ||
} | ||
} | ||
|
||
// envoyVersionHook is used to determine and set the Docker image used for Consul | ||
// Connect sidecar proxy tasks. It will query Consul for a set of preferred Envoy | ||
// versions if the task image is unset or references ${NOMAD_envoy_version}. Nomad | ||
// will fallback the image to the previous default Envoy v1.11.2 if Consul is too old | ||
// to support the supported proxies API. | ||
type envoyVersionHook struct { | ||
// alloc is the allocation with the envoy task being rewritten. | ||
alloc *structs.Allocation | ||
|
||
// proxiesClient is the subset of the Consul API for getting information | ||
// from Consul about the versions of Envoy it supports. | ||
proxiesClient consul.SupportedProxiesAPI | ||
|
||
// logger is used to log things. | ||
logger hclog.Logger | ||
} | ||
|
||
func newEnvoyVersionHook(c *envoyVersionHookConfig) *envoyVersionHook { | ||
return &envoyVersionHook{ | ||
alloc: c.alloc, | ||
proxiesClient: c.proxiesClient, | ||
logger: c.logger.Named(envoyVersionHookName), | ||
} | ||
} | ||
|
||
func (envoyVersionHook) Name() string { | ||
return envoyVersionHookName | ||
} | ||
|
||
func (h *envoyVersionHook) Prestart(_ context.Context, request *ifs.TaskPrestartRequest, response *ifs.TaskPrestartResponse) error { | ||
if h.skip(request) { | ||
response.Done = true | ||
return nil | ||
} | ||
|
||
// We either need to acquire Consul's preferred Envoy version or fallback | ||
// to the legacy default. Query Consul and use the (possibly empty) result. | ||
proxies, err := h.proxiesClient.Proxies() | ||
if err != nil { | ||
return errors.Wrap(err, "error retrieving supported Envoy versions from Consul") | ||
} | ||
|
||
// Determine the concrete Envoy image identifier by applying version string | ||
// substitution (${NOMAD_envoy_version}). | ||
image, err := h.tweakImage(h.taskImage(request.Task.Config), proxies) | ||
if err != nil { | ||
return errors.Wrap(err, "error interpreting desired Envoy version from Consul") | ||
} | ||
|
||
// Set the resulting image. | ||
h.logger.Trace("setting task envoy image", "image", image) | ||
request.Task.Config["image"] = image | ||
response.Done = true | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we should actually skip this to ensure we re-run it in the event Envoy fails? Seems cheap enough we might as well. Bootstrap has a similar problem though, so it might not be worth worrying about here if there's a chance we're just going to fail later due to bootstrap changes not being picked up or something. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking the opposite - if Envoy gets restarted the generated envoy config and the envoy version that gets launched should continue to work together. It's kind of interesting in that Envoy will try twice - interpreting the config at versions N and N-1, but it'd be more consistent to just have the version and bootstrap hooks behave the same. OTOH, maybe now it would make sense to query the version and regenerate the config on every sidecar task restart. |
||
return nil | ||
} | ||
|
||
// skip will return true if the request does not contain a task that should have | ||
// its envoy proxy version resolved automatically. | ||
func (h *envoyVersionHook) skip(request *ifs.TaskPrestartRequest) bool { | ||
switch { | ||
case request.Task.Driver != "docker": | ||
return true | ||
case !request.Task.UsesConnectSidecar(): | ||
return true | ||
case !h.needsVersion(request.Task.Config): | ||
return true | ||
} | ||
return false | ||
} | ||
|
||
// getConfiguredImage extracts the configured config.image value from the request. | ||
// If the image is empty or not a string, Nomad will fallback to the normal | ||
// official Envoy image as if the setting was not configured. This is also what | ||
// Nomad would do if the sidecar_task was not set in the first place. | ||
func (_ *envoyVersionHook) taskImage(config map[string]interface{}) string { | ||
value, exists := config["image"] | ||
if !exists { | ||
return structs.EnvoyImageFormat | ||
} | ||
|
||
image, ok := value.(string) | ||
if !ok { | ||
return structs.EnvoyImageFormat | ||
} | ||
|
||
return image | ||
} | ||
|
||
// needsVersion returns true if the docker.config.image is making use of the | ||
// ${NOMAD_envoy_version} faux environment variable. | ||
// Nomad does not need to query Consul to get the preferred Envoy version, etc.) | ||
func (h *envoyVersionHook) needsVersion(config map[string]interface{}) bool { | ||
if len(config) == 0 { | ||
return false | ||
} | ||
|
||
image := h.taskImage(config) | ||
|
||
return strings.Contains(image, structs.EnvoyVersionVar) | ||
} | ||
|
||
// image determines the best Envoy version to use. If supported is nil or empty | ||
// Nomad will fallback to the legacy envoy image used before Nomad v0.13. | ||
func (_ *envoyVersionHook) tweakImage(configured string, supported map[string][]string) (string, error) { | ||
versions := supported["envoy"] | ||
if len(versions) == 0 { | ||
return envoyLegacyImage, nil | ||
} | ||
|
||
latest, err := semver(versions[0]) | ||
if err != nil { | ||
return "", err | ||
} | ||
|
||
return strings.ReplaceAll(configured, structs.EnvoyVersionVar, latest), nil | ||
} | ||
|
||
// semver sanitizes the envoy version string coming from Consul into the format | ||
// used by the Envoy project when publishing images (i.e. proper semver). This | ||
// resulting string value does NOT contain the 'v' prefix for 2 reasons: | ||
// 1) the version library does not include the 'v' | ||
// 2) its plausible unofficial images use the 3 numbers without the prefix for | ||
// tagging their own images | ||
func semver(chosen string) (string, error) { | ||
v, err := version.NewVersion(chosen) | ||
if err != nil { | ||
return "", errors.Wrap(err, "unexpected envoy version format") | ||
} | ||
return v.String(), nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If Consul is being restarted when this is called it will fail the task and go into a restart loop which... seems ok?
In configurations where Consul does not start before Nomad there could be spurious task failures, but I think that's already true for Connect? Bootstrap has that awkward bulitin retry that avoids spurious task failures, but it's not a very robust approach.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The envoy bootstrap hook's retry block I believe really targets the race condition between launching the sidecar and Nomad actually registering the service to be proxied - potentially a common occurrence. I'm thinking
/v1/agent/self
response failures are either rare enough or catastrophic enough that letting the task do a restart in those cases is probably fine