-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
kube-agent-update: Use the RFD-184 webapi proxy update protocol by default when possible #50464
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ import ( | |
"sigs.k8s.io/controller-runtime/pkg/healthz" | ||
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" | ||
|
||
"github.com/gravitational/teleport/api/client/webclient" | ||
kubeversionupdater "github.com/gravitational/teleport/integrations/kube-agent-updater" | ||
"github.com/gravitational/teleport/integrations/kube-agent-updater/pkg/controller" | ||
"github.com/gravitational/teleport/integrations/kube-agent-updater/pkg/img" | ||
|
@@ -91,6 +92,8 @@ func main() { | |
var disableLeaderElection bool | ||
var credSource string | ||
var logLevel string | ||
var proxyAddress string | ||
var updateGroup string | ||
|
||
flag.StringVar(&agentName, "agent-name", "", "The name of the agent that should be updated. This is mandatory.") | ||
flag.StringVar(&agentNamespace, "agent-namespace", "", "The namespace of the agent that should be updated. This is mandatory.") | ||
|
@@ -100,6 +103,8 @@ func main() { | |
flag.BoolVar(&insecureNoVerify, "insecure-no-verify-image", false, "Disable image signature verification. The image tag is still resolved and image must exist.") | ||
flag.BoolVar(&insecureNoResolve, "insecure-no-resolve-image", false, "Disable image signature verification AND resolution. The updater can update to non-existing images.") | ||
flag.BoolVar(&disableLeaderElection, "disable-leader-election", false, "Disable leader election, used when running the kube-agent-updater outside of Kubernetes.") | ||
flag.StringVar(&proxyAddress, "proxy-address", "", "The proxy address of the teleport cluster. When set, the updater will try to get update via the /find proxy endpoint.") | ||
flag.StringVar(&updateGroup, "update-group", "", "The agent update group, as defined in the `autoupdate_config` resource. When unset or set to an unknown value, agent will update with the default group.") | ||
flag.StringVar(&versionServer, "version-server", "https://updates.releases.teleport.dev/v1/", "URL of the HTTP server advertising target version and critical maintenances. Trailing slash is optional.") | ||
flag.StringVar(&versionChannel, "version-channel", "stable/cloud", "Version channel to get updates from.") | ||
flag.StringVar(&baseImageName, "base-image", "public.ecr.aws/gravitational/teleport", "Image reference containing registry and repository.") | ||
|
@@ -119,6 +124,7 @@ func main() { | |
} | ||
slogLeveler.Set(lvl) | ||
|
||
// Validate configuration. | ||
if agentName == "" { | ||
ctrl.Log.Error(trace.BadParameter("--agent-name empty"), "agent-name must be provided") | ||
os.Exit(1) | ||
|
@@ -127,7 +133,16 @@ func main() { | |
ctrl.Log.Error(trace.BadParameter("--agent-namespace empty"), "agent-namespace must be provided") | ||
os.Exit(1) | ||
} | ||
if versionServer == "" && proxyAddress == "" { | ||
ctrl.Log.Error( | ||
trace.BadParameter("at least one of --proxy-address or --version-server must be provided"), | ||
"the updater has no upstream configured, it cannot retrieve the version and check when to update", | ||
) | ||
os.Exit(1) | ||
} | ||
|
||
// Build a new controller manager. We need to do this early as some trigger | ||
// need a Kubernetes client and the manager is the one providing it. | ||
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ | ||
Scheme: scheme, | ||
Metrics: metricsserver.Options{BindAddress: metricsAddr}, | ||
|
@@ -151,16 +166,76 @@ func main() { | |
os.Exit(1) | ||
} | ||
|
||
versionServerURL, err := url.Parse(strings.TrimRight(versionServer, "/") + "/" + versionChannel) | ||
if err != nil { | ||
ctrl.Log.Error(err, "failed to parse version server URL, exiting") | ||
os.Exit(1) | ||
// Craft the version getter and update triggers based on the configuration (use RFD-109 APIs, RFD-184, or both). | ||
var criticalUpdateTriggers []maintenance.Trigger | ||
var plannedMaintenanceTriggers []maintenance.Trigger | ||
var versionGetters []version.Getter | ||
|
||
// If the proxy server is specified, we enabled RFD-184 updates | ||
// See https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md#updater-apis | ||
if proxyAddress != "" { | ||
ctrl.Log.Info("fetching versions from the proxy /find endpoint", "proxy_server_url", proxyAddress, "update_group", updateGroup) | ||
|
||
proxyClt, err := webclient.NewReusableClient(&webclient.Config{ | ||
Context: ctx, | ||
ProxyAddr: proxyAddress, | ||
UpdateGroup: updateGroup, | ||
hugoShaka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
}) | ||
if err != nil { | ||
ctrl.Log.Error(err, "failed to create proxy client, exiting") | ||
os.Exit(1) | ||
} | ||
|
||
// We do a preflight check before starting to know if the proxy is correctly configured and reachable. | ||
ctrl.Log.Info("preflight check: ping the proxy server", "proxy_server_url", proxyAddress) | ||
pong, err := proxyClt.Ping() | ||
if err != nil { | ||
ctrl.Log.Error(err, "failed to ping proxy, either the proxy address is wrong, or the network blocks connections to the proxy", | ||
"proxy_address", proxyAddress, | ||
) | ||
os.Exit(1) | ||
} | ||
ctrl.Log.Info("proxy server successfully pinged", | ||
"proxy_server_url", proxyAddress, | ||
"proxy_cluster_name", pong.ClusterName, | ||
"proxy_version", pong.ServerVersion, | ||
) | ||
|
||
versionGetters = append(versionGetters, version.NewProxyVersionGetter("proxy update protocol", proxyClt)) | ||
|
||
// In RFD 184, the server is driving the update, so both regular maintenances and | ||
// critical ones are fetched from the proxy. Using the same trigger ensures we hit the cache if both triggers | ||
// are evaluated and don't actually make 2 calls. | ||
proxyTrigger := maintenance.NewProxyMaintenanceTrigger("proxy update protocol", proxyClt) | ||
criticalUpdateTriggers = append(criticalUpdateTriggers, proxyTrigger) | ||
plannedMaintenanceTriggers = append(plannedMaintenanceTriggers, proxyTrigger) | ||
} | ||
versionGetter := version.NewBasicHTTPVersionGetter(versionServerURL) | ||
|
||
// If the version server is specified, we enable RFD-109 updates | ||
// See https://github.com/gravitational/teleport/blob/master/rfd/0109-cloud-agent-upgrades.md#kubernetes-model | ||
if versionServer != "" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be a if else instead? Having both versionServer and proxyAddress can create problems? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We explicitly want to support both at the same time and do a failover based on which one is implemented. That's because we don't know in Helm if the proxy server supports the new updater API. This way we are doing some opportunistic thing by default:
|
||
rawUrl := strings.TrimRight(versionServer, "/") + "/" + versionChannel | ||
versionServerURL, err := url.Parse(rawUrl) | ||
if err != nil { | ||
ctrl.Log.Error(err, "failed to parse version server URL, exiting", "url", rawUrl) | ||
os.Exit(1) | ||
} | ||
ctrl.Log.Info("fetching versions from the version server", "version_server_url", versionServerURL.String()) | ||
|
||
versionGetters = append(versionGetters, version.NewBasicHTTPVersionGetter(versionServerURL)) | ||
// critical updates are advertised by the version channel | ||
criticalUpdateTriggers = append(criticalUpdateTriggers, maintenance.NewBasicHTTPMaintenanceTrigger("critical update", versionServerURL)) | ||
// planned maintenance windows are exported by the pods | ||
plannedMaintenanceTriggers = append(plannedMaintenanceTriggers, podmaintenance.NewWindowTrigger("maintenance window", mgr.GetClient())) | ||
} | ||
|
||
maintenanceTriggers := maintenance.Triggers{ | ||
maintenance.NewBasicHTTPMaintenanceTrigger("critical update", versionServerURL), | ||
// We check if the update is critical. | ||
maintenance.FailoverTrigger(criticalUpdateTriggers), | ||
// We check if the agent in unhealthy. | ||
podmaintenance.NewUnhealthyWorkloadTrigger("unhealthy pods", mgr.GetClient()), | ||
podmaintenance.NewWindowTrigger("maintenance window", mgr.GetClient()), | ||
// We check if we're in a maintenance window. | ||
maintenance.FailoverTrigger(plannedMaintenanceTriggers), | ||
} | ||
|
||
var imageValidators img.Validators | ||
|
@@ -190,7 +265,12 @@ func main() { | |
os.Exit(1) | ||
} | ||
|
||
versionUpdater := controller.NewVersionUpdater(versionGetter, imageValidators, maintenanceTriggers, baseImage) | ||
versionUpdater := controller.NewVersionUpdater( | ||
version.FailoverGetter(versionGetters), | ||
imageValidators, | ||
maintenanceTriggers, | ||
baseImage, | ||
) | ||
|
||
// Controller registration | ||
deploymentController := controller.DeploymentVersionUpdater{ | ||
|
@@ -224,7 +304,7 @@ func main() { | |
os.Exit(1) | ||
} | ||
|
||
ctrl.Log.Info("starting the updater", "version", kubeversionupdater.Version, "url", versionServerURL.String()) | ||
ctrl.Log.Info("starting the updater", "version", kubeversionupdater.Version) | ||
|
||
if err := mgr.Start(ctx); err != nil { | ||
ctrl.Log.Error(err, "failed to start manager, exiting") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we also validate that both aren't empty?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Having both set is a valid configuration (and the default one for most clusters). We will try first via the proxy addr, then fallback to the version server if the proxy responds but does not support the new agent AUs.