Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VPA: prune stale container aggregates, split recommendations over true number of containers #6745

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions vertical-pod-autoscaler/e2e/v1/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,14 @@ func PatchVpaRecommendation(f *framework.Framework, vpa *vpa_types.VerticalPodAu
gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to patch VPA.")
}

// PatchDeployment patches a deployment with a given patch.
func PatchDeployment(f *framework.Framework, deployment *appsv1.Deployment, patch *patchRecord) {
patchBytes, err := json.Marshal([]patchRecord{*patch})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
_, err = f.ClientSet.AppsV1().Deployments(f.Namespace.Name).Patch(context.TODO(), deployment.Name, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred(), "unexpected error patching deployment")
}

// AnnotatePod adds annotation for an existing pod.
func AnnotatePod(f *framework.Framework, podName, annotationName, annotationValue string) {
bytes, err := json.Marshal([]patchRecord{{
Expand Down Expand Up @@ -498,6 +506,29 @@ func WaitForUncappedCPURecommendationAbove(c vpa_clientset.Interface, vpa *vpa_t
})
}

// WaitForNumberOfCheckpoints polls until the specified number of VerticalPodAutoscalerCheckpoints is present.
// Returns the list of checkpoints. On timeout returns error.
func WaitForNumberOfCheckpoints(c vpa_clientset.Interface, namespace string, count int) (*vpa_types.VerticalPodAutoscalerCheckpointList, error) {
var checkpoints *vpa_types.VerticalPodAutoscalerCheckpointList
err := wait.PollUntilContextTimeout(context.Background(), pollInterval, pollTimeout, true, func(ctx context.Context) (done bool, err error) {
checkpoints, err = c.AutoscalingV1().VerticalPodAutoscalerCheckpoints(namespace).List(context.TODO(), metav1.ListOptions{})
if err != nil {
return false, err
}

if len(checkpoints.Items) == count {
return true, nil
}

return false, nil
})

if err != nil {
return nil, fmt.Errorf("error waiting for %v checkpoints: %v", count, err)
}
return checkpoints, nil
}

func installLimitRange(f *framework.Framework, minCpuLimit, minMemoryLimit, maxCpuLimit, maxMemoryLimit *resource.Quantity, lrType apiv1.LimitType) {
lr := &apiv1.LimitRange{
ObjectMeta: metav1.ObjectMeta{
Expand Down
178 changes: 164 additions & 14 deletions vertical-pod-autoscaler/e2e/v1/recommender.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
vpa_clientset "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/client/clientset/versioned"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/test"
vpa_api_util "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/vpa"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
klog "k8s.io/klog/v2"
Expand Down Expand Up @@ -141,21 +142,9 @@ var _ = RecommenderE2eDescribe("Checkpoints", func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())

klog.InfoS("Sleeping for up to 15 minutes...")

maxRetries := 90
retryDelay := 10 * time.Second
for i := 0; i < maxRetries; i++ {
list, err := vpaClientSet.AutoscalingV1().VerticalPodAutoscalerCheckpoints(ns).List(context.TODO(), metav1.ListOptions{})
if err == nil && len(list.Items) == 0 {
break
}
klog.InfoS("Still waiting...")
time.Sleep(retryDelay)
}

list, err := vpaClientSet.AutoscalingV1().VerticalPodAutoscalerCheckpoints(ns).List(context.TODO(), metav1.ListOptions{})
checkpoints, err := WaitForNumberOfCheckpoints(vpaClientSet, ns, 0)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(list.Items).To(gomega.BeEmpty())
gomega.Expect(checkpoints.Items).To(gomega.BeEmpty())
})
})

Expand Down Expand Up @@ -411,6 +400,167 @@ var _ = RecommenderE2eDescribe("VPA CRD object", func() {
})
})

const recommendationLoopInterval = 1 * time.Minute

var _ = RecommenderE2eDescribe("VPA CRD object", func() {
f := framework.NewDefaultFramework("vertical-pod-autoscaling")
f.NamespacePodSecurityEnforceLevel = podsecurity.LevelBaseline

var vpaClientSet vpa_clientset.Interface

ginkgo.BeforeEach(func() {
vpaClientSet = getVpaClientSet(f)
})

ginkgo.It("only provides recommendation to containers that exist when renaming a container", func() {
ginkgo.By("Setting up a hamster deployment")
d := NewNHamstersDeployment(f, 1 /*number of containers*/)
_ = startDeploymentPods(f, d)

ginkgo.By("Setting up VPA CRD")
vpaCRD := test.VerticalPodAutoscaler().
WithName("hamster-vpa").
WithNamespace(f.Namespace.Name).
WithTargetRef(hamsterTargetRef).
WithContainer("*").
WithAnnotations(map[string]string{
vpa_api_util.VpaPruningGracePeriodAnnotation: "0",
}).
Get()

InstallVPA(f, vpaCRD)

ginkgo.By("Waiting for recommendation to be filled for the container")
vpa, err := WaitForRecommendationPresent(vpaClientSet, vpaCRD)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations).Should(gomega.HaveLen(1))
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations[0].ContainerName).To(gomega.Equal(GetHamsterContainerNameByIndex(0)))

ginkgo.By("Renaming the container")
newContainerName := "renamed-container"
patchRecord := &patchRecord{
Op: "replace",
Path: "/spec/template/spec/containers/0/name",
Value: newContainerName,
}
PatchDeployment(f, d, patchRecord)

ginkgo.By("Waiting for recommendation to be filled for the renamed container and only the renamed container")
time.Sleep(recommendationLoopInterval)
vpa, err = WaitForRecommendationPresent(vpaClientSet, vpaCRD)

gomega.Expect(err).NotTo(gomega.HaveOccurred())
errMsg := fmt.Sprintf("%s is the only container in the VPA CR. There should not be any recommendations for %s",
newContainerName,
GetHamsterContainerNameByIndex(0))
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations).Should(gomega.HaveLen(1), errMsg)
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations[0].ContainerName).To(gomega.Equal(newContainerName), errMsg)
})

ginkgo.It("only provides recommendation to containers that exist when removing a container + deletes stale checkpoint", func() {
ginkgo.By("Setting up a hamster deployment")
d := NewNHamstersDeployment(f, 2 /*number of containers*/)
_ = startDeploymentPods(f, d)
vpaName := "hamster-vpa"

ginkgo.By("Setting up VPA CRD")
vpaCRD := test.VerticalPodAutoscaler().
WithName(vpaName).
WithNamespace(f.Namespace.Name).
WithTargetRef(hamsterTargetRef).
WithContainer("*").
WithAnnotations(map[string]string{
vpa_api_util.VpaPruningGracePeriodAnnotation: "0",
}).
Get()

InstallVPA(f, vpaCRD)

ginkgo.By("Waiting for recommendation to be filled for both containers")
vpa, err := WaitForRecommendationPresent(vpaClientSet, vpaCRD)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations).Should(gomega.HaveLen(2))

ginkgo.By("Waiting for VPA checkpoints for each container (2 checkpoints), sleeping for up to 15 minutes...")
checkpoints, err := WaitForNumberOfCheckpoints(vpaClientSet, f.Namespace.Name, 2)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(checkpoints.Items).To(gomega.HaveLen(2))

ginkgo.By("Removing the second container")
patchRecord := &patchRecord{
Op: "remove",
Path: "/spec/template/spec/containers/1",
}
PatchDeployment(f, d, patchRecord)

ginkgo.By("Waiting for recommendation to be filled for just one container")
time.Sleep(recommendationLoopInterval)
vpa, err = WaitForRecommendationPresent(vpaClientSet, vpaCRD)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
errMsg := fmt.Sprintf("%s is now the only container in the VPA CR. There should not be any recommendations for %s",
GetHamsterContainerNameByIndex(0),
GetHamsterContainerNameByIndex(1))
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations).Should(gomega.HaveLen(1), errMsg)
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations[0].ContainerName).To(gomega.Equal(GetHamsterContainerNameByIndex(0)), errMsg)

ginkgo.By("Waiting for the garbage collection of the stale checkpoint, sleeping for up to 15 minutes...")
checkpoints, err = WaitForNumberOfCheckpoints(vpaClientSet, f.Namespace.Name, 1)
expectedCheckpointName := fmt.Sprintf("%s-%s", vpaName, GetHamsterContainerNameByIndex(0))
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(checkpoints.Items).To(gomega.HaveLen(1))
gomega.Expect(checkpoints.Items[0].Name).To(gomega.Equal(expectedCheckpointName),
fmt.Sprintf("Expected checkpoint name to be %s, got %s", expectedCheckpointName, checkpoints.Items[0].Name))
})

ginkgo.It("only removes a recommendation until after the pruning grace period", func() {
ginkgo.By("Setting up a hamster deployment")
d := NewNHamstersDeployment(f, 2 /*number of containers*/)
_ = startDeploymentPods(f, d)

ginkgo.By("Setting up VPA CRD")
vpaCRD := test.VerticalPodAutoscaler().
WithName("hamster-vpa").
WithNamespace(f.Namespace.Name).
WithTargetRef(hamsterTargetRef).
WithContainer("*").
WithAnnotations(map[string]string{
vpa_api_util.VpaPruningGracePeriodAnnotation: "3m",
}).
Get()

InstallVPA(f, vpaCRD)

ginkgo.By("Waiting for recommendation to be filled for the container")
vpa, err := WaitForRecommendationPresent(vpaClientSet, vpaCRD)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations).Should(gomega.HaveLen(2))

ginkgo.By("Removing the second container")
patchRecord := &patchRecord{
Op: "remove",
Path: "/spec/template/spec/containers/1",
}
PatchDeployment(f, d, patchRecord)

ginkgo.By("Waiting the duration of the grace period, hoping the recommendation is still there, sleeping for 3 minutes...")
vpa, err = WaitForRecommendationPresent(vpaClientSet, vpaCRD)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations).Should(gomega.HaveLen(2))

ginkgo.By("Waiting for recommendation to be filled for just one container")
vpa, err = WaitForVPAMatch(vpaClientSet, vpaCRD, func(thisVpa *vpa_types.VerticalPodAutoscaler) bool {
return thisVpa.Status.Recommendation != nil && len(thisVpa.Status.Recommendation.ContainerRecommendations) == 1
})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
errMsg := fmt.Sprintf("%s is now the only container in the VPA CR. There should not be any recommendations for %s",
GetHamsterContainerNameByIndex(0),
GetHamsterContainerNameByIndex(1))
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations).Should(gomega.HaveLen(1), errMsg)
gomega.Expect(vpa.Status.Recommendation.ContainerRecommendations[0].ContainerName).To(gomega.Equal(GetHamsterContainerNameByIndex(0)), errMsg)
})

})

func deleteRecommender(c clientset.Interface) error {
namespace := "kube-system"
listOptions := metav1.ListOptions{}
Expand Down
70 changes: 70 additions & 0 deletions vertical-pod-autoscaler/pkg/recommender/input/cluster_feeder.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ type ClusterStateFeeder interface {

// GarbageCollectCheckpoints removes historical checkpoints that don't have a matching VPA.
GarbageCollectCheckpoints()

// MarkAggregates marks all aggregates in all VPAs as not under VPAs
MarkAggregates()

// SweepAggregates garbage collects all aggregates in all VPAs aggregate lists that are no longer under VPAs
SweepAggregates()
}

// ClusterStateFeederFactory makes instances of ClusterStateFeeder.
Expand Down Expand Up @@ -208,6 +214,7 @@ func (feeder *clusterStateFeeder) InitFromHistoryProvider(historyProvider histor
}
for podID, podHistory := range clusterHistory {
klog.V(4).InfoS("Adding pod with labels", "pod", podID, "labels", podHistory.LastLabels)
_, existedBefore := feeder.clusterState.Pods[podID]
feeder.clusterState.AddOrUpdatePod(podID, podHistory.LastLabels, apiv1.PodUnknown)
for containerName, sampleList := range podHistory.Samples {
containerID := model.ContainerID{
Expand All @@ -228,6 +235,14 @@ func (feeder *clusterStateFeeder) InitFromHistoryProvider(historyProvider histor
}
}
}
// If the pod never existed before, we did not set VPAContainersPerPod in AddOrUpdatePod because podState.Containers
// has not initialized yet from AddOrUpdateContainer. So we explicitly set it here the first time we see the pod.
if !existedBefore {
podState, podExists := feeder.clusterState.Pods[podID]
if podExists && len(podHistory.Samples) > 1 {
feeder.clusterState.SetVPAContainersPerPod(podState, false)
}
}
}
}

Expand Down Expand Up @@ -316,6 +331,20 @@ func (feeder *clusterStateFeeder) GarbageCollectCheckpoints() {
klog.ErrorS(err, "Orphaned VPA checkpoint cleanup - error deleting", "checkpoint", klog.KRef(namespace, checkpoint.Name))
}
}
// Also clean up a checkpoint if the VPA is still there, but the container is gone. AggregateStateByContainerName
// merges in the initial aggregates so we can use it to check "both lists" (initial, aggregates) at once
vpa, vpaExists := feeder.clusterState.Vpas[vpaID]
if vpaExists {
_, aggregateExists := vpa.AggregateStateByContainerName()[checkpoint.Spec.ContainerName]
if !aggregateExists {
err = feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).Delete(context.TODO(), checkpoint.Name, metav1.DeleteOptions{})
if err == nil {
klog.V(3).InfoS("Orphaned VPA checkpoint cleanup - deleting", "checkpoint", klog.KRef(namespace, checkpoint.Name))
} else {
klog.ErrorS(err, "Orphaned VPA checkpoint cleanup - error deleting", "checkpoint", klog.KRef(namespace, checkpoint.Name))
}
}
}
}
}
}
Expand Down Expand Up @@ -413,6 +442,38 @@ func (feeder *clusterStateFeeder) LoadVPAs(ctx context.Context) {
feeder.clusterState.ObservedVpas = vpaCRDs
}

// MarkAggregates marks all aggregates IsUnderVPA=false, so when we go
// through LoadPods(), the valid ones will get marked back to true, and
// we can garbage collect the false ones from the VPAs' aggregate lists.
func (feeder *clusterStateFeeder) MarkAggregates() {
for _, vpa := range feeder.clusterState.Vpas {
for _, container := range vpa.AggregateContainerStates() {
container.IsUnderVPA = false
}
for _, container := range vpa.ContainersInitialAggregateState {
container.IsUnderVPA = false
}
}
}

// SweepAggregates prunes all aggregates/initial aggregates from the VPA where the
// all containers related to an initial or aggregate state are no longer present.
func (feeder *clusterStateFeeder) SweepAggregates() {
now := time.Now()
for _, vpa := range feeder.clusterState.Vpas {
// use merged aggregate state to check both initial and aggregates
for containerName, container := range vpa.AggregateStateByContainerName() {
if !container.IsUnderVPA && container.IsAggregateStale(now) {
klog.V(4).InfoS("Deleting stale aggregate container states; container no longer present",
"namespace", vpa.ID.Namespace,
"vpaName", vpa.ID.VpaName,
"containerName", containerName)
vpa.DeleteAllAggregatesByContainerName(containerName)
}
}
}
}

// LoadPods loads pod into the cluster state.
func (feeder *clusterStateFeeder) LoadPods() {
podSpecs, err := feeder.specClient.GetPodSpecs()
Expand All @@ -433,12 +494,21 @@ func (feeder *clusterStateFeeder) LoadPods() {
if feeder.memorySaveMode && !feeder.matchesVPA(pod) {
continue
}
_, existedBefore := feeder.clusterState.Pods[pod.ID]
feeder.clusterState.AddOrUpdatePod(pod.ID, pod.PodLabels, pod.Phase)
for _, container := range pod.Containers {
if err = feeder.clusterState.AddOrUpdateContainer(container.ID, container.Request); err != nil {
klog.V(0).InfoS("Failed to add container", "container", container.ID, "error", err)
}
}
// If the pod never existed before, we did not set VPAContainersPerPod in AddOrUpdatePod because podState.Containers
// has not initialized yet from AddOrUpdateContainer. So we explicitly set it here the first time we see the pod.
if !existedBefore {
podState, podExists := feeder.clusterState.Pods[pod.ID]
if podExists && len(pod.Containers) > 1 {
feeder.clusterState.SetVPAContainersPerPod(podState, false)
}
}
}
}

Expand Down
8 changes: 5 additions & 3 deletions vertical-pod-autoscaler/pkg/recommender/logic/recommender.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/recommender/model"
"k8s.io/klog/v2"
)

var (
Expand All @@ -39,7 +40,7 @@ var (

// PodResourceRecommender computes resource recommendation for a Vpa object.
type PodResourceRecommender interface {
GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) RecommendedPodResources
GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap, containersPerPod int) RecommendedPodResources
}

// RecommendedPodResources is a Map from container name to recommended resources.
Expand All @@ -65,13 +66,14 @@ type podResourceRecommender struct {
upperBoundMemory MemoryEstimator
}

func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap) RecommendedPodResources {
func (r *podResourceRecommender) GetRecommendedPodResources(containerNameToAggregateStateMap model.ContainerNameToAggregateStateMap, containersPerPod int) RecommendedPodResources {
var recommendation = make(RecommendedPodResources)
if len(containerNameToAggregateStateMap) == 0 {
return recommendation
}

fraction := 1.0 / float64(len(containerNameToAggregateStateMap))
fraction := 1.0 / float64(containersPerPod)
klog.V(5).InfoS("Spreading recommendation across containers", "containerCount", containersPerPod, "fraction", fraction)
minCPU := model.ScaleResource(model.CPUAmountFromCores(*podMinCPUMillicores*0.001), fraction)
minMemory := model.ScaleResource(model.MemoryAmountFromBytes(*podMinMemoryMb*1024*1024), fraction)

Expand Down
Loading
Loading