Skip to content

Commit

Permalink
UPSTREAM: 98956: Fix race when KillPod followed by IsPodPendingTermin…
Browse files Browse the repository at this point in the history
…ation
  • Loading branch information
rphillips authored and soltysh committed Sep 8, 2021
1 parent 1607d8f commit 5743df3
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 17 deletions.
24 changes: 7 additions & 17 deletions pkg/kubelet/kubelet_pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,7 @@ func (kl *Kubelet) HandlePodCleanups() error {
// PodKiller handles requests for killing pods
type PodKiller interface {
// KillPod receives pod speficier representing the pod to kill
KillPod(pair *kubecontainer.PodPair)
KillPod(podPair *kubecontainer.PodPair)
// PerformPodKillingWork performs the actual pod killing work via calling CRI
// It returns after its Close() func is called and all outstanding pod killing requests are served
PerformPodKillingWork()
Expand Down Expand Up @@ -1215,10 +1215,9 @@ func (pk *podKillerWithChannel) markPodTerminated(uid string) {
delete(pk.podTerminationMap, uid)
}

// checkAndMarkPodPendingTerminationByPod checks to see if the pod is being
// killed and returns true if it is, otherwise the pod is added to the map and
// returns false
func (pk *podKillerWithChannel) checkAndMarkPodPendingTerminationByPod(podPair *kubecontainer.PodPair) bool {
// KillPod sends pod killing request to the killer after marks the pod
// unless the given pod has been marked to be killed
func (pk *podKillerWithChannel) KillPod(podPair *kubecontainer.PodPair) {
pk.podKillingLock.Lock()
defer pk.podKillingLock.Unlock()
var apiPodExists bool
Expand Down Expand Up @@ -1249,30 +1248,21 @@ func (pk *podKillerWithChannel) checkAndMarkPodPendingTerminationByPod(podPair *
} else {
klog.V(4).Infof("running pod %q is pending termination", podPair.RunningPod.ID)
}
return true
return
}
return false
// Limit to one request per pod
pk.podKillingCh <- podPair
}

// Close closes the channel through which requests are delivered
func (pk *podKillerWithChannel) Close() {
close(pk.podKillingCh)
}

// KillPod sends pod killing request to the killer
func (pk *podKillerWithChannel) KillPod(pair *kubecontainer.PodPair) {
pk.podKillingCh <- pair
}

// PerformPodKillingWork launches a goroutine to kill a pod received from the channel if
// another goroutine isn't already in action.
func (pk *podKillerWithChannel) PerformPodKillingWork() {
for podPair := range pk.podKillingCh {
if pk.checkAndMarkPodPendingTerminationByPod(podPair) {
// Pod is already being killed
continue
}

runningPod := podPair.RunningPod
apiPod := podPair.APIPod

Expand Down
36 changes: 36 additions & 0 deletions pkg/kubelet/kubelet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,9 @@ func TestSyncPodsDeletesWhenSourcesAreReadyPerQOS(t *testing.T) {
// assert that unwanted pods were killed
fakeRuntime.AssertKilledPods([]string{"12345678"})

// simulate Runtime.KillPod
fakeRuntime.PodList = nil

kubelet.HandlePodCleanups()
kubelet.HandlePodCleanups()
kubelet.HandlePodCleanups()
Expand Down Expand Up @@ -497,6 +500,39 @@ func TestSyncPodsDeletesWhenSourcesAreReady(t *testing.T) {
fakeRuntime.AssertKilledPods([]string{"12345678"})
}

func TestKillPodFollwedByIsPodPendingTermination(t *testing.T) {
testKubelet := newTestKubelet(t, false /* controllerAttachDetachEnabled */)
defer testKubelet.Cleanup()
defer testKubelet.kubelet.podKiller.Close()
go testKubelet.kubelet.podKiller.PerformPodKillingWork()

pod := &kubecontainer.Pod{
ID: "12345678",
Name: "foo",
Namespace: "new",
Containers: []*kubecontainer.Container{
{Name: "bar"},
},
}

fakeRuntime := testKubelet.fakeRuntime
fakeContainerManager := testKubelet.fakeContainerManager
fakeContainerManager.PodContainerManager.AddPodFromCgroups(pod) // add pod to mock cgroup
fakeRuntime.PodList = []*containertest.FakePod{
{Pod: pod},
}

kl := testKubelet.kubelet
kl.podKiller.KillPod(&kubecontainer.PodPair{
APIPod: nil,
RunningPod: pod,
})

if !(kl.podKiller.IsPodPendingTerminationByUID(pod.ID) || fakeRuntime.AssertKilledPods([]string{"12345678"}) == nil) {
t.Fatal("Race condition: When KillPod is complete, the pod should be pending termination or be killed")
}
}

type testNodeLister struct {
nodes []*v1.Node
}
Expand Down

0 comments on commit 5743df3

Please sign in to comment.