diff --git a/pkg/kubelet/eviction/eviction_manager.go b/pkg/kubelet/eviction/eviction_manager.go index 5c393917f77e7..944c8c8cd820f 100644 --- a/pkg/kubelet/eviction/eviction_manager.go +++ b/pkg/kubelet/eviction/eviction_manager.go @@ -442,8 +442,9 @@ func (m *managerImpl) reclaimNodeLevelResources(signalToReclaim evictionapi.Sign observations, _ := makeSignalObservations(summary) debugLogObservations("observations after resource reclaim", observations) - // determine the set of thresholds met independent of grace period - thresholds := thresholdsMet(m.config.Thresholds, observations, false) + // evaluate all thresholds independently of their grace period to see if with + // the new observations, we think we have met min reclaim goals + thresholds := thresholdsMet(m.config.Thresholds, observations, true) debugLogThresholdsWithObservation("thresholds after resource reclaim - ignoring grace period", thresholds, observations) if len(thresholds) == 0 { diff --git a/pkg/kubelet/eviction/eviction_manager_test.go b/pkg/kubelet/eviction/eviction_manager_test.go index 3abdbb8c4c85a..da5c57c297a3f 100644 --- a/pkg/kubelet/eviction/eviction_manager_test.go +++ b/pkg/kubelet/eviction/eviction_manager_test.go @@ -886,6 +886,51 @@ func TestNodeReclaimFuncs(t *testing.T) { t.Errorf("Manager should not report disk pressure") } + // synchronize + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should not have disk pressure + if manager.IsUnderDiskPressure() { + t.Errorf("Manager should not report disk pressure") + } + + // induce hard threshold + fakeClock.Step(1 * time.Minute) + summaryProvider.result = summaryStatsMaker(".9Gi", "200Gi", podStats) + // make GC return disk usage bellow the threshold, but not satisfying minReclaim + diskGC.summaryAfterGC = summaryStatsMaker("1.1Gi", "200Gi", podStats) + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should have disk pressure + if !manager.IsUnderDiskPressure() { + t.Errorf("Manager should report disk pressure since soft threshold was met") + } + + // verify image gc was invoked + if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked { + t.Errorf("Manager should have invoked image gc") + } + + // verify a pod was killed because image gc was not enough to satisfy minReclaim + if podKiller.pod == nil { + t.Errorf("Manager should have killed a pod, but didn't") + } + + // reset state + diskGC.imageGCInvoked = false + diskGC.containerGCInvoked = false + podKiller.pod = nil + + // remove disk pressure + fakeClock.Step(20 * time.Minute) + summaryProvider.result = summaryStatsMaker("16Gi", "200Gi", podStats) + manager.synchronize(diskInfoProvider, activePodsFunc) + + // we should not have disk pressure + if manager.IsUnderDiskPressure() { + t.Errorf("Manager should not report disk pressure") + } + // induce disk pressure! fakeClock.Step(1 * time.Minute) summaryProvider.result = summaryStatsMaker("400Mi", "200Gi", podStats)