Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⚠️ KCP: Stop updating and using Kubeadm's ClusterStatus with Kuberntes v1.22 #4485

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion controlplane/kubeadm/controllers/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,12 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context
return ctrl.Result{}, errors.Wrap(err, "cannot get remote client to workload cluster")
}

removedMembers, err := workloadCluster.ReconcileEtcdMembers(ctx, nodeNames)
kubernetesVersion := controlPlane.KCP.Spec.Version
parsedVersion, err := semver.ParseTolerant(kubernetesVersion)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion)
fabriziopandini marked this conversation as resolved.
Show resolved Hide resolved
}
removedMembers, err := workloadCluster.ReconcileEtcdMembers(ctx, nodeNames, parsedVersion)
if err != nil {
return ctrl.Result{}, errors.Wrap(err, "failed attempt to reconcile etcd members")
}
Expand Down
4 changes: 2 additions & 2 deletions controlplane/kubeadm/controllers/fakes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func (f fakeWorkloadCluster) ForwardEtcdLeadership(_ context.Context, _ *cluster
return nil
}

func (f fakeWorkloadCluster) ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error) {
func (f fakeWorkloadCluster) ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) {
return nil, nil
}

Expand Down Expand Up @@ -100,7 +100,7 @@ func (f fakeWorkloadCluster) RemoveEtcdMemberForMachine(ctx context.Context, mac
return nil
}

func (f fakeWorkloadCluster) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine) error {
func (f fakeWorkloadCluster) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine, version semver.Version) error {
return nil
}

Expand Down
10 changes: 8 additions & 2 deletions controlplane/kubeadm/controllers/remediation.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"

"github.com/blang/semver"
"github.com/pkg/errors"
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3"
Expand Down Expand Up @@ -135,7 +136,12 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
}
}

if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToBeRemediated); err != nil {
kubernetesVersion := controlPlane.KCP.Spec.Version
parsedVersion, err := semver.ParseTolerant(kubernetesVersion)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion)
}
if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToBeRemediated, parsedVersion); err != nil {
logger.Error(err, "Failed to remove machine from kubeadm ConfigMap")
return ctrl.Result{}, err
}
Expand Down Expand Up @@ -164,7 +170,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
// - etc.
//
// NOTE: this func assumes the list of members in sync with the list of machines/nodes, it is required to call reconcileEtcdMembers
// ans well as reconcileControlPlaneConditions before this.
// and well as reconcileControlPlaneConditions before this.
func (r *KubeadmControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Context, controlPlane *internal.ControlPlane, machineToBeRemediated *clusterv1.Machine) (bool, error) {
logger := r.Log.WithValues("namespace", controlPlane.KCP.Namespace, "kubeadmControlPlane", controlPlane.KCP.Name, "cluster", controlPlane.Cluster.Name)

Expand Down
3 changes: 3 additions & 0 deletions controlplane/kubeadm/controllers/remediation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
controlPlane := &internal.ControlPlane{
KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{
Replicas: utilpointer.Int32Ptr(2),
Version: "v1.19.1",
}},
Cluster: &clusterv1.Cluster{},
Machines: internal.NewFilterableMachineCollection(m1, m2),
Expand Down Expand Up @@ -270,6 +271,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
controlPlane := &internal.ControlPlane{
KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{
Replicas: utilpointer.Int32Ptr(3),
Version: "v1.19.1",
}},
Cluster: &clusterv1.Cluster{},
Machines: internal.NewFilterableMachineCollection(m1, m2, m3),
Expand Down Expand Up @@ -320,6 +322,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
controlPlane := &internal.ControlPlane{
KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{
Replicas: utilpointer.Int32Ptr(4),
Version: "v1.19.1",
}},
Cluster: &clusterv1.Cluster{},
Machines: internal.NewFilterableMachineCollection(m1, m2, m3, m4),
Expand Down
8 changes: 7 additions & 1 deletion controlplane/kubeadm/controllers/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"strings"

"github.com/blang/semver"
"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -128,7 +129,12 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
}
}

if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToDelete); err != nil {
kubernetesVersion := controlPlane.KCP.Spec.Version
parsedVersion, err := semver.ParseTolerant(kubernetesVersion)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion)
}
if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToDelete, parsedVersion); err != nil {
logger.Error(err, "Failed to remove machine from kubeadm ConfigMap")
return ctrl.Result{}, err
}
Expand Down
12 changes: 10 additions & 2 deletions controlplane/kubeadm/controllers/scale_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,11 @@ func TestKubeadmControlPlaneReconciler_scaleDownControlPlane_NoError(t *testing.
}

cluster := &clusterv1.Cluster{}
kcp := &controlplanev1.KubeadmControlPlane{}
kcp := &controlplanev1.KubeadmControlPlane{
Spec: controlplanev1.KubeadmControlPlaneSpec{
Version: "v1.19.1",
},
}
setKCPHealthy(kcp)
controlPlane := &internal.ControlPlane{
KCP: kcp,
Expand Down Expand Up @@ -230,7 +234,11 @@ func TestKubeadmControlPlaneReconciler_scaleDownControlPlane_NoError(t *testing.
}

cluster := &clusterv1.Cluster{}
kcp := &controlplanev1.KubeadmControlPlane{}
kcp := &controlplanev1.KubeadmControlPlane{
Spec: controlplanev1.KubeadmControlPlaneSpec{
Version: "v1.19.1",
},
}
controlPlane := &internal.ControlPlane{
KCP: kcp,
Cluster: cluster,
Expand Down
23 changes: 17 additions & 6 deletions controlplane/kubeadm/internal/workload_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,13 @@ type WorkloadCluster interface {
UpdateKubeProxyImageInfo(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane) error
UpdateCoreDNS(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane) error
RemoveEtcdMemberForMachine(ctx context.Context, machine *clusterv1.Machine) error
RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine) error
RemoveNodeFromKubeadmConfigMap(ctx context.Context, nodeName string) error
RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine, version semver.Version) error
RemoveNodeFromKubeadmConfigMap(ctx context.Context, nodeName string, version semver.Version) error
ForwardEtcdLeadership(ctx context.Context, machine *clusterv1.Machine, leaderCandidate *clusterv1.Machine) error
AllowBootstrapTokensToGetNodes(ctx context.Context) error

// State recovery tasks.
ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error)
ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error)
}

// Workload defines operations on workload clusters.
Expand Down Expand Up @@ -296,17 +296,28 @@ func (w *Workload) UpdateSchedulerInKubeadmConfigMap(ctx context.Context, schedu
}

// RemoveMachineFromKubeadmConfigMap removes the entry for the machine from the kubeadm configmap.
func (w *Workload) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine) error {
func (w *Workload) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine, version semver.Version) error {
if machine == nil || machine.Status.NodeRef == nil {
// Nothing to do, no node for Machine
return nil
}

return w.RemoveNodeFromKubeadmConfigMap(ctx, machine.Status.NodeRef.Name)
return w.RemoveNodeFromKubeadmConfigMap(ctx, machine.Status.NodeRef.Name, version)
}

var (
// Starting from v1.22.0 kubeadm dropped usage of the ClusterStatus entry from the kubeadm-config ConfigMap
// so it isn't necessary anymore to remove API endpoints for control plane nodes after deletion.
// NOTE: This assume kubeadm version equals to Kubernetes version.
minKubernetesVersionWithoutClusterStatus = semver.MustParse("1.22.0")
)

// RemoveNodeFromKubeadmConfigMap removes the entry for the node from the kubeadm configmap.
func (w *Workload) RemoveNodeFromKubeadmConfigMap(ctx context.Context, name string) error {
func (w *Workload) RemoveNodeFromKubeadmConfigMap(ctx context.Context, name string, version semver.Version) error {
if version.GTE(minKubernetesVersionWithoutClusterStatus) {
return nil
}

return util.Retry(func() (bool, error) {
configMapKey := ctrlclient.ObjectKey{Name: kubeadmConfigKey, Namespace: metav1.NamespaceSystem}
kubeadmConfigMap, err := w.getConfigMap(ctx, configMapKey)
Expand Down
5 changes: 3 additions & 2 deletions controlplane/kubeadm/internal/workload_cluster_etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package internal
import (
"context"

"github.com/blang/semver"
"github.com/pkg/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kerrors "k8s.io/apimachinery/pkg/util/errors"
Expand All @@ -35,7 +36,7 @@ type etcdClientFor interface {

// ReconcileEtcdMembers iterates over all etcd members and finds members that do not have corresponding nodes.
// If there are any such members, it deletes them from etcd and removes their nodes from the kubeadm configmap so that kubeadm does not run etcd health checks on them.
func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error) {
func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) {
removedMembers := []string{}
errs := []error{}
for _, nodeName := range nodeNames {
Expand Down Expand Up @@ -73,7 +74,7 @@ func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string)
errs = append(errs, err)
}

if err := w.RemoveNodeFromKubeadmConfigMap(ctx, member.Name); err != nil {
if err := w.RemoveNodeFromKubeadmConfigMap(ctx, member.Name, version); err != nil {
errs = append(errs, err)
}
}
Expand Down
81 changes: 61 additions & 20 deletions controlplane/kubeadm/internal/workload_cluster_etcd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"errors"
"testing"

"github.com/blang/semver"
. "github.com/onsi/gomega"

"go.etcd.io/etcd/clientv3"
Expand Down Expand Up @@ -466,21 +467,24 @@ func TestReconcileEtcdMembers(t *testing.T) {
Namespace: metav1.NamespaceSystem,
},
Data: map[string]string{
clusterStatusKey: `apiEndpoints:
ip-10-0-0-1.ec2.internal:
advertiseAddress: 10.0.0.1
bindPort: 6443
ip-10-0-0-2.ec2.internal:
advertiseAddress: 10.0.0.2
bindPort: 6443
someFieldThatIsAddedInTheFuture: bar
ip-10-0-0-3.ec2.internal:
advertiseAddress: 10.0.0.3
bindPort: 6443
apiVersion: kubeadm.k8s.io/vNbetaM
kind: ClusterStatus`,
clusterStatusKey: "apiEndpoints:\n" +
" ip-10-0-0-1.ec2.internal:\n" +
" advertiseAddress: 10.0.0.1\n" +
" bindPort: 6443\n" +
" ip-10-0-0-2.ec2.internal:\n" +
" advertiseAddress: 10.0.0.2\n" +
" bindPort: 6443\n" +
" someFieldThatIsAddedInTheFuture: bar\n" +
" ip-10-0-0-3.ec2.internal:\n" +
" advertiseAddress: 10.0.0.3\n" +
" bindPort: 6443\n" +
"apiVersion: kubeadm.k8s.io/vNbetaM\n" +
"kind: ClusterStatus\n",
},
}
kubeadmConfigWithoutClusterStatus := kubeadmConfig.DeepCopy()
delete(kubeadmConfigWithoutClusterStatus.Data, clusterStatusKey)

node1 := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "ip-10-0-0-1.ec2.internal",
Expand Down Expand Up @@ -508,25 +512,62 @@ kind: ClusterStatus`,

tests := []struct {
name string
kubernetesVersion semver.Version
objs []runtime.Object
nodes []string
etcdClientGenerator etcdClientFor
expectErr bool
assert func(*WithT)
assert func(*WithT, client.Client)
}{
{
// the node to be removed is ip-10-0-0-3.ec2.internal since the
// other two have nodes
name: "successfully removes the etcd member without a node and removes the node from kubeadm config",
objs: []runtime.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfig.DeepCopy()},
nodes: []string{node1.Name, node2.Name},
name: "successfully removes the etcd member without a node and removes the node from kubeadm config for Kubernetes version < 1.22.0",
kubernetesVersion: kubernetesVersionWithClusterStatus, // Kubernetes version < 1.22.0 has ClusterStatus
objs: []runtime.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfig.DeepCopy()},
nodes: []string{node1.Name, node2.Name},
etcdClientGenerator: &fakeEtcdClientGenerator{
forNodesClient: &etcd.Client{
EtcdClient: fakeEtcdClient,
},
},
expectErr: false,
assert: func(g *WithT, c client.Client) {
g.Expect(fakeEtcdClient.RemovedMember).To(Equal(uint64(3)))

var actualConfig corev1.ConfigMap
g.Expect(c.Get(
ctx,
ctrlclient.ObjectKey{Name: kubeadmConfigKey, Namespace: metav1.NamespaceSystem},
&actualConfig,
)).To(Succeed())
g.Expect(actualConfig.Data[clusterStatusKey]).To(Equal("apiEndpoints:\n" +
" ip-10-0-0-1.ec2.internal:\n" +
" advertiseAddress: 10.0.0.1\n" +
" bindPort: 6443\n" +
" ip-10-0-0-2.ec2.internal:\n" +
" advertiseAddress: 10.0.0.2\n" +
" bindPort: 6443\n" +
" someFieldThatIsAddedInTheFuture: bar\n" +
"apiVersion: kubeadm.k8s.io/vNbetaM\n" +
"kind: ClusterStatus\n"))

},
},
{
// the node to be removed is ip-10-0-0-3.ec2.internal since the
// other two have nodes
name: "successfully removes the etcd member without a node for Kubernetes version >= 1.22.0",
kubernetesVersion: minKubernetesVersionWithoutClusterStatus, // Kubernetes version >= 1.22.0 should not manage ClusterStatus
objs: []runtime.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfigWithoutClusterStatus.DeepCopy()},
nodes: []string{node1.Name, node2.Name},
etcdClientGenerator: &fakeEtcdClientGenerator{
forNodesClient: &etcd.Client{
EtcdClient: fakeEtcdClient,
},
},
expectErr: false,
assert: func(g *WithT) {
assert: func(g *WithT, c client.Client) {
g.Expect(fakeEtcdClient.RemovedMember).To(Equal(uint64(3)))
},
},
Expand Down Expand Up @@ -559,15 +600,15 @@ kind: ClusterStatus`,
etcdClientGenerator: tt.etcdClientGenerator,
}
ctx := context.TODO()
_, err := w.ReconcileEtcdMembers(ctx, tt.nodes)
_, err := w.ReconcileEtcdMembers(ctx, tt.nodes, tt.kubernetesVersion)
if tt.expectErr {
g.Expect(err).To(HaveOccurred())
return
}
g.Expect(err).ToNot(HaveOccurred())

if tt.assert != nil {
tt.assert(g)
tt.assert(g, testEnv.Client)
}
})
}
Expand Down
Loading