Skip to content

Commit

Permalink
Add autoupdate_agenbt_rollout support
Browse files Browse the repository at this point in the history
  • Loading branch information
hugoShaka committed Nov 26, 2024
1 parent 123b7f5 commit f605d57
Show file tree
Hide file tree
Showing 6 changed files with 1,033 additions and 62 deletions.
8 changes: 6 additions & 2 deletions api/client/webclient/webclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ import (
"github.com/gravitational/teleport/api/utils/keys"
)

const (
AgentUpdateGroupParameter = "group"
)

// Config specifies information when building requests with the
// webclient.
type Config struct {
Expand Down Expand Up @@ -179,7 +183,7 @@ func Find(cfg *Config) (*PingResponse, error) {
}
if cfg.UpdateGroup != "" {
endpoint.RawQuery = url.Values{
"group": []string{cfg.UpdateGroup},
AgentUpdateGroupParameter: []string{cfg.UpdateGroup},
}.Encode()
}

Expand Down Expand Up @@ -224,7 +228,7 @@ func Ping(cfg *Config) (*PingResponse, error) {
}
if cfg.UpdateGroup != "" {
endpoint.RawQuery = url.Values{
"group": []string{cfg.UpdateGroup},
AgentUpdateGroupParameter: []string{cfg.UpdateGroup},
}.Encode()
}
if cfg.ConnectorName != "" {
Expand Down
14 changes: 11 additions & 3 deletions lib/web/apiserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -1545,22 +1545,30 @@ func (h *Handler) ping(w http.ResponseWriter, r *http.Request, p httprouter.Para
return nil, trace.Wrap(err)
}

group := r.URL.Query().Get(webclient.AgentUpdateGroupParameter)

return webclient.PingResponse{
Auth: authSettings,
Proxy: *proxyConfig,
ServerVersion: teleport.Version,
MinClientVersion: teleport.MinClientVersion,
ClusterName: h.auth.clusterName,
AutomaticUpgrades: pr.ServerFeatures.GetAutomaticUpgrades(),
AutoUpdate: h.automaticUpdateSettings184(r.Context()),
AutoUpdate: h.automaticUpdateSettings184(r.Context(), group, "" /* updater UUID */),
Edition: modules.GetModules().BuildType(),
FIPS: modules.IsBoringBinary(),
}, nil
}

func (h *Handler) find(w http.ResponseWriter, r *http.Request, p httprouter.Params) (interface{}, error) {
group := r.URL.Query().Get(webclient.AgentUpdateGroupParameter)
cacheKey := "find"
if group != "" {
cacheKey += "-" + group
}

// cache the generic answer to avoid doing work for each request
resp, err := utils.FnCacheGet[*webclient.PingResponse](r.Context(), h.findEndpointCache, "find", func(ctx context.Context) (*webclient.PingResponse, error) {
resp, err := utils.FnCacheGet[*webclient.PingResponse](r.Context(), h.findEndpointCache, cacheKey, func(ctx context.Context) (*webclient.PingResponse, error) {
proxyConfig, err := h.cfg.ProxySettings.GetProxySettings(ctx)
if err != nil {
return nil, trace.Wrap(err)
Expand All @@ -1579,7 +1587,7 @@ func (h *Handler) find(w http.ResponseWriter, r *http.Request, p httprouter.Para
ClusterName: h.auth.clusterName,
Edition: modules.GetModules().BuildType(),
FIPS: modules.IsBoringBinary(),
AutoUpdate: h.automaticUpdateSettings184(ctx),
AutoUpdate: h.automaticUpdateSettings184(ctx, group, "" /* updater UUID */),
}, nil
})
if err != nil {
Expand Down
218 changes: 218 additions & 0 deletions lib/web/autoupdate_common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
/*
* Teleport
* Copyright (C) 2024 Gravitational, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package web

import (
"context"
autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1"
"github.com/gravitational/teleport/api/types/autoupdate"
"github.com/gravitational/teleport/lib/automaticupgrades"
"github.com/gravitational/trace"
"strings"
)

// autoUpdateAgentVersion returns the version the agent should install/update to based on
// its group and updater UUID.
// If the cluster contains an autoupdate_agent_rollout resource from RFD184 it should take precedence.
// If the resource is not there, we fall back to RFD109-style updates with channels
// and maintenance window derived from the cluster_maintenance_config resource.
// Version returned follows semver without the leading "v".
func (h *Handler) autoUpdateAgentVersion(ctx context.Context, group, updaterUUID string) (string, error) {
rollout, err := h.cfg.AccessPoint.GetAutoUpdateAgentRollout(ctx)
switch {
case err == nil:
return getVersionFromRollout(rollout, group, updaterUUID)
case trace.IsNotFound(err):
return getVersionFromChannel(ctx, h.cfg.AutomaticUpgradesChannels, group)
default:
return "", trace.Wrap(err, "Failed to get auto-update rollout")
}
}

// autoUpdateAgentShouldUpdate returns if the agent should update now to based on its group
// and updater UUID.
// If the cluster contains an autoupdate_agent_rollout resource from RFD184 it should take precedence.
// If the resource is not there, we fall back to RFD109-style updates with channels
// and maintenance window derived from the cluster_maintenance_config resource.
func (h *Handler) autoUpdateAgentShouldUpdate(ctx context.Context, group, updaterUUID string, windowLookup bool) (bool, error) {
rollout, err := h.cfg.AccessPoint.GetAutoUpdateAgentRollout(ctx)
switch {
case err == nil:
return getTriggerFromRollout(rollout, group, updaterUUID)
case trace.IsNotFound(err):
// Updaters using the RFD184 API are not aware of maintenance windows
// like RFD109 updaters are. To have both updaters adopt the same behaviour
// we must do the CMC window lookup for them.
if windowLookup {
return h.getTriggerFromWindowThenChannel(ctx, group)
}
return getTriggerFromChannel(ctx, h.cfg.AutomaticUpgradesChannels, group)
default:
return false, trace.Wrap(err, "Failed to get auto-update rollout")
}
}

// getVersionFromRollout returns the version we should serve to the agent based
// on the RFD184 agent rollout, the agent group name, and its UUID.
// This logic is pretty complex and described in RFD 184.
// The spec is summed up in the following table:
// https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md#rollout-status-disabled
// Version returned follows semver without the leading "v".
func getVersionFromRollout(
rollout *autoupdatepb.AutoUpdateAgentRollout,
groupName, updaterUUID string,
) (string, error) {
switch rollout.GetSpec().GetAutoupdateMode() {
case autoupdate.AgentsUpdateModeDisabled:
// If AUs are disabled, we always answer the target version
return rollout.GetSpec().GetTargetVersion(), nil
case autoupdate.AgentsUpdateModeSuspended, autoupdate.AgentsUpdateModeEnabled:
// If AUs are enabled or suspended, we modulate the response based on the schedule and agent group state
default:
return "", trace.BadParameter("unsupported agent update mode %q", rollout.GetSpec().GetAutoupdateMode())
}

// If the schedule is immediate, agents always update to the latest version
if rollout.GetSpec().GetSchedule() == autoupdate.AgentsScheduleImmediate {
return rollout.GetSpec().GetTargetVersion(), nil
}

// Else we follow the regular schedule and answer based on the agent group state
group, err := getGroup(rollout, groupName)
if err != nil {
return "", trace.Wrap(err, "getting group %q", groupName)
}

switch group.GetState() {
case autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED,
autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK:
return rollout.GetSpec().GetStartVersion(), nil
case autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE,
autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE:
return rollout.GetSpec().GetTargetVersion(), nil
default:
return "", trace.NotImplemented("Unsupported group state %q", group.GetState())
}
}

// getTriggerFromRollout returns the version we should serve to the agent based
// on the RFD184 agent rollout, the agent group name, and its UUID.
// This logic is pretty complex and described in RFD 184.
// The spec is summed up in the following table:
// https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md#rollout-status-disabled
func getTriggerFromRollout(rollout *autoupdatepb.AutoUpdateAgentRollout, groupName, updaterUUID string) (bool, error) {
// If the mode is "paused" or "disabled", we never tell to update
switch rollout.GetSpec().GetAutoupdateMode() {
case autoupdate.AgentsUpdateModeDisabled, autoupdate.AgentsUpdateModeSuspended:
// If AUs are disabled or suspended, never tell to update
return false, nil
case autoupdate.AgentsUpdateModeEnabled:
// If AUs are enabled, we modulate the response based on the schedule and agent group state
default:
return false, trace.BadParameter("unsupported agent update mode %q", rollout.GetSpec().GetAutoupdateMode())
}

// If the schedule is immediate, agents always update to the latest version
if rollout.GetSpec().GetSchedule() == autoupdate.AgentsScheduleImmediate {
return true, nil
}

// Else we follow the regular schedule and answer based on the agent group state
group, err := getGroup(rollout, groupName)
if err != nil {
return false, trace.Wrap(err, "getting group %q", groupName)
}

switch group.GetState() {
case autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED:
return false, nil
case autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE,
autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK:
return true, nil
case autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE:
return rollout.GetSpec().GetStrategy() == autoupdate.AgentsStrategyHaltOnError, nil
default:
return false, trace.NotImplemented("Unsupported group state %q", group.GetState())
}
}

// getGroup returns the agent rollout group the requesting agent belongs to.
// If a group matches the agent-provided group name, this group is returned.
// Else the default group is returned. The default group currently is the last
// one. This might change in the future.
func getGroup(
rollout *autoupdatepb.AutoUpdateAgentRollout,
groupName string,
) (*autoupdatepb.AutoUpdateAgentRolloutStatusGroup, error) {
groups := rollout.GetStatus().GetGroups()
if len(groups) == 0 {
return nil, trace.BadParameter("no groups found")
}

// Try to find a group with our name
for _, group := range groups {
if group.Name == groupName {
return group, nil
}
}

// Fallback to the default group (currently the last one but this might change).
return groups[len(groups)-1], nil
}

// getVersionFromChannel gets the target version from the RFD109 channels.
// Version returned follows semver without the leading "v".
func getVersionFromChannel(ctx context.Context, channels automaticupgrades.Channels, groupName string) (version string, err error) {
// RFD109 channels return the version with the 'v' prefix.
// We can't change the internals for backward compatibility, so we must trim the prefix if it's here.
defer func() {
version = strings.TrimPrefix(version, "v")
}()

if channel, ok := channels[groupName]; ok {
return channel.GetVersion(ctx)
}
return channels.DefaultVersion(ctx)
}

// getTriggerFromWindowThenChannel gets the target version from the RFD109 maintenance window and channels.
func (h *Handler) getTriggerFromWindowThenChannel(ctx context.Context, groupName string) (bool, error) {
// TODO: cache the CMC
cmc, err := h.cfg.ProxyClient.GetClusterMaintenanceConfig(ctx)
if err == nil {
if cmc.WithinUpgradeWindow(h.clock.Now()) {
return true, nil
}
}

return getTriggerFromChannel(ctx, h.cfg.AutomaticUpgradesChannels, groupName)

}

// getTriggerFromWindowThenChannel gets the target version from the RFD109 channels.
func getTriggerFromChannel(ctx context.Context, channels automaticupgrades.Channels, groupName string) (bool, error) {
if channel, ok := channels[groupName]; ok {
return channel.GetCritical(ctx)
}
defaultChannel, err := channels.DefaultChannel()
if err != nil {
return false, trace.Wrap(err, "creating new default channel")
}
return defaultChannel.GetCritical(ctx)
}
Loading

0 comments on commit f605d57

Please sign in to comment.