Skip to content

Commit

Permalink
support fix containerrecreaterequest (openkruise#1182)
Browse files Browse the repository at this point in the history
Co-authored-by: jicheng.sk <jicheng.sk@alibaba-inc.com>
  • Loading branch information
BH4AWS and BH4AWS authored Mar 16, 2023
1 parent 9ae0f40 commit 92eafa9
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 24 deletions.
4 changes: 4 additions & 0 deletions apis/apps/v1alpha1/containerrecreaterequest_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ type ContainerRecreateRequestStrategy struct {
FailurePolicy ContainerRecreateRequestFailurePolicyType `json:"failurePolicy,omitempty"`
// OrderedRecreate indicates whether to recreate the next container only if the previous one has recreated completely.
OrderedRecreate bool `json:"orderedRecreate,omitempty"`
// ForceRecreate indicates whether to force kill the container even if the previous container is starting.
ForceRecreate bool `json:"forceRecreate,omitempty"`
// TerminationGracePeriodSeconds is the optional duration in seconds to wait the container terminating gracefully.
// Value must be non-negative integer. The value zero indicates delete immediately.
// If this value is nil, we will use pod.Spec.TerminationGracePeriodSeconds as default value.
Expand Down Expand Up @@ -158,6 +160,8 @@ type ContainerRecreateRequestContainerRecreateState struct {
Phase ContainerRecreateRequestPhase `json:"phase"`
// A human readable message indicating details about this state.
Message string `json:"message,omitempty"`
// Containers are killed by kruise daemon
IsKilled bool `json:"isKilled,omitempty"`
}

// ContainerRecreateRequestSyncContainerStatus only uses in the annotation `crr.apps.kruise.io/sync-container-statuses`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,10 @@ spec:
description: FailurePolicy decides whether to continue if one
container fails to recreate
type: string
forceRecreate:
description: ForceRecreate indicates whether to force kill the
container even if the previous container is starting.
type: boolean
minStartedSeconds:
description: Minimum number of seconds for which a newly created
container should be started and ready without any of its container
Expand Down Expand Up @@ -289,6 +293,9 @@ spec:
description: ContainerRecreateRequestContainerRecreateState contains
the recreation state of the container.
properties:
isKilled:
description: Containers are killed by kruise daemon
type: boolean
message:
description: A human readable message indicating details about
this state.
Expand Down
2 changes: 2 additions & 0 deletions pkg/daemon/containerrecreate/crr_daemon_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ func (c *Controller) manage(crr *appsv1alpha1.ContainerRecreateRequest) error {
}

if state.Phase == appsv1alpha1.ContainerRecreateRequestRecreating {
state.IsKilled = true
if crr.Spec.Strategy.OrderedRecreate {
break
}
Expand All @@ -362,6 +363,7 @@ func (c *Controller) manage(crr *appsv1alpha1.ContainerRecreateRequest) error {
}
return c.patchCRRContainerRecreateStates(crr, newCRRContainerRecreateStates)
}
state.IsKilled = true
state.Phase = appsv1alpha1.ContainerRecreateRequestRecreating
break
}
Expand Down
38 changes: 27 additions & 11 deletions pkg/daemon/containerrecreate/crr_daemon_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,40 +85,49 @@ func getCurrentCRRContainersRecreateStates(
kubeContainerStatus := podStatus.FindContainerStatusByName(c.Name)

var currentState appsv1alpha1.ContainerRecreateRequestContainerRecreateState

if kubeContainerStatus == nil {
// not found the real container
currentState = appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
Name: c.Name,
Phase: appsv1alpha1.ContainerRecreateRequestPending,
Message: "not found container on Node",
Name: c.Name,
Phase: appsv1alpha1.ContainerRecreateRequestPending,
IsKilled: getPreviousContainerKillState(previousContainerRecreateState),
Message: "not found container on Node",
}

} else if kubeContainerStatus.State != kubeletcontainer.ContainerStateRunning {
} else if kubeContainerStatus.State == kubeletcontainer.ContainerStateExited {
// for no-running state, we consider it will be recreated or restarted soon
currentState = appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
Name: c.Name,
Phase: appsv1alpha1.ContainerRecreateRequestRecreating,
IsKilled: getPreviousContainerKillState(previousContainerRecreateState),
}
} else if crr.Spec.Strategy.ForceRecreate && (previousContainerRecreateState == nil || !previousContainerRecreateState.IsKilled) {
// for forceKill scenarios, when the previous recreate state is empty or has not been killed, the current restart requirement will be set immediately
currentState = appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
Name: c.Name,
Phase: appsv1alpha1.ContainerRecreateRequestRecreating,
Phase: appsv1alpha1.ContainerRecreateRequestPending,
}

} else if kubeContainerStatus.ID.String() != c.StatusContext.ContainerID ||
kubeContainerStatus.RestartCount > int(c.StatusContext.RestartCount) ||
kubeContainerStatus.StartedAt.After(crr.CreationTimestamp.Time) {
// already recreated or restarted
currentState = appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
Name: c.Name,
Phase: appsv1alpha1.ContainerRecreateRequestRecreating,
Name: c.Name,
Phase: appsv1alpha1.ContainerRecreateRequestRecreating,
IsKilled: getPreviousContainerKillState(previousContainerRecreateState),
}
if syncContainerStatus != nil &&
syncContainerStatus.ContainerID == kubeContainerStatus.ID.String() &&
time.Since(kubeContainerStatus.StartedAt) > minStartedDuration &&
syncContainerStatus.Ready {
currentState.Phase = appsv1alpha1.ContainerRecreateRequestSucceeded
}

} else {
currentState = appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
Name: c.Name,
Phase: appsv1alpha1.ContainerRecreateRequestPending,
Name: c.Name,
Phase: appsv1alpha1.ContainerRecreateRequestPending,
IsKilled: getPreviousContainerKillState(previousContainerRecreateState),
}
}

Expand All @@ -128,6 +137,13 @@ func getCurrentCRRContainersRecreateStates(
return statuses
}

func getPreviousContainerKillState(previousContainerRecreateState *appsv1alpha1.ContainerRecreateRequestContainerRecreateState) bool {
if previousContainerRecreateState == nil {
return false
}
return previousContainerRecreateState.IsKilled
}

func getCRRContainerRecreateState(crr *appsv1alpha1.ContainerRecreateRequest, name string) *appsv1alpha1.ContainerRecreateRequestContainerRecreateState {
for i := range crr.Status.ContainerRecreateStates {
c := &crr.Status.ContainerRecreateStates[i]
Expand Down
147 changes: 135 additions & 12 deletions test/e2e/apps/containerrecreate.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ var _ = SIGDescribe("ContainerRecreateRequest", func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
return crr.Labels[appsv1alpha1.ContainerRecreateRequestActiveKey]
}, 5*time.Second, 1*time.Second).Should(gomega.Equal(""))
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded}}))
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true}}))

ginkgo.By("Check Pod containers recreated and started for minStartedSeconds")
pod, err = tester.GetPod(pod.Name)
Expand Down Expand Up @@ -158,8 +158,8 @@ var _ = SIGDescribe("ContainerRecreateRequest", func() {
return crr.Labels[appsv1alpha1.ContainerRecreateRequestActiveKey]
}, 5*time.Second, 1*time.Second).Should(gomega.Equal(""))
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded},
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
}))

ginkgo.By("Check Pod containers recreated")
Expand Down Expand Up @@ -218,8 +218,8 @@ var _ = SIGDescribe("ContainerRecreateRequest", func() {
}, 60*time.Second, 3*time.Second).Should(gomega.Equal(appsv1alpha1.ContainerRecreateRequestCompleted))
gomega.Expect(crr.Status.CompletionTime).ShouldNot(gomega.BeNil())
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded},
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
}))
gomega.Eventually(func() string {
crr, err = tester.GetCRR(crr.Name)
Expand Down Expand Up @@ -277,8 +277,8 @@ var _ = SIGDescribe("ContainerRecreateRequest", func() {
return crr.Labels[appsv1alpha1.ContainerRecreateRequestActiveKey]
}, 5*time.Second, 1*time.Second).Should(gomega.Equal(""))
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded},
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
}))

ginkgo.By("Check Pod containers recreated")
Expand Down Expand Up @@ -347,8 +347,8 @@ var _ = SIGDescribe("ContainerRecreateRequest", func() {
return crr.Labels[appsv1alpha1.ContainerRecreateRequestActiveKey]
}, 5*time.Second, 1*time.Second).Should(gomega.Equal(""))
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded},
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
}))

ginkgo.By("Check Pod containers recreated")
Expand Down Expand Up @@ -425,7 +425,7 @@ var _ = SIGDescribe("ContainerRecreateRequest", func() {
crr, err = tester.GetCRR(crr.Name)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestRecreating},
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestRecreating, IsKilled: true},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestPending},
}))
}
Expand Down Expand Up @@ -474,8 +474,8 @@ var _ = SIGDescribe("ContainerRecreateRequest", func() {
return crr.Status.Phase
}, 60*time.Second, time.Second).Should(gomega.Equal(appsv1alpha1.ContainerRecreateRequestCompleted))
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded},
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
}))

ginkgo.By("Check Kruise readiness condition True")
Expand All @@ -498,5 +498,128 @@ var _ = SIGDescribe("ContainerRecreateRequest", func() {

})

framework.ConformanceIt("recreates containers by force", func() {
ginkgo.By("Create CloneSet and wait Pods ready")
pods = tester.CreateTestCloneSetAndGetPods(randStr, 2, []v1.Container{
{
Name: "app",
Image: WebserverImage,
Lifecycle: &v1.Lifecycle{PostStart: &v1.Handler{
Exec: &v1.ExecAction{Command: []string{"sleep", "5"}},
}},
},
{
Name: "sidecar",
Image: AgnhostImage,
},
})

{
ginkgo.By("Create CRR for pods[0], recreate container: app(postStartHook) and sidecar by force")
pod := pods[0]
crr := &appsv1alpha1.ContainerRecreateRequest{
ObjectMeta: metav1.ObjectMeta{Namespace: ns, Name: "crr-" + randStr + "-0"},
Spec: appsv1alpha1.ContainerRecreateRequestSpec{
PodName: pod.Name,
Containers: []appsv1alpha1.ContainerRecreateRequestContainer{
{Name: "app"},
{Name: "sidecar"},
},
Strategy: &appsv1alpha1.ContainerRecreateRequestStrategy{
ForceRecreate: true,
},
},
}
crr, err = tester.CreateCRR(crr)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(crr.Spec.Containers[0].StatusContext.ContainerID).Should(gomega.Equal(util.GetContainerStatus("app", pod).ContainerID))

ginkgo.By("Wait CRR recreate completion")
gomega.Eventually(func() appsv1alpha1.ContainerRecreateRequestPhase {
crr, err = tester.GetCRR(crr.Name)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
return crr.Status.Phase
}, 60*time.Second, 3*time.Second).Should(gomega.Equal(appsv1alpha1.ContainerRecreateRequestCompleted))
gomega.Expect(crr.Status.CompletionTime).ShouldNot(gomega.BeNil())
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
}))
gomega.Eventually(func() string {
crr, err = tester.GetCRR(crr.Name)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
return crr.Labels[appsv1alpha1.ContainerRecreateRequestActiveKey]
}, 5*time.Second, time.Second).Should(gomega.Equal(""))

ginkgo.By("Check Pod containers recreated")
pod, err = tester.GetPod(pod.Name)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(podutil.IsPodReady(pod)).Should(gomega.Equal(true))
appContainerStatus := util.GetContainerStatus("app", pod)
sidecarContainerStatus := util.GetContainerStatus("sidecar", pod)
gomega.Expect(sidecarContainerStatus.ContainerID).ShouldNot(gomega.Equal(crr.Spec.Containers[1].StatusContext.ContainerID))
gomega.Expect(appContainerStatus.RestartCount).Should(gomega.Equal(int32(1)))
gomega.Expect(sidecarContainerStatus.RestartCount).Should(gomega.Equal(int32(1)))

ginkgo.By("Check Pod sidecar container recreated not waiting for app container ready")
interval := sidecarContainerStatus.LastTerminationState.Terminated.FinishedAt.Sub(appContainerStatus.LastTerminationState.Terminated.FinishedAt.Time)
gomega.Expect(interval < 3*time.Second).Should(gomega.Equal(true))
}

{
ginkgo.By("Create CRR for pods[1] with orderedRecreate by force, recreate container: app(postStartHook) and sidecar")
pod := pods[1]
crr := &appsv1alpha1.ContainerRecreateRequest{
ObjectMeta: metav1.ObjectMeta{Namespace: ns, Name: "crr-" + randStr + "-1"},
Spec: appsv1alpha1.ContainerRecreateRequestSpec{
PodName: pod.Name,
Containers: []appsv1alpha1.ContainerRecreateRequestContainer{
{Name: "app"},
{Name: "sidecar"},
},
Strategy: &appsv1alpha1.ContainerRecreateRequestStrategy{
OrderedRecreate: true,
ForceRecreate: true,
},
},
}
crr, err = tester.CreateCRR(crr)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(crr.Spec.Containers[0].StatusContext.ContainerID).Should(gomega.Equal(util.GetContainerStatus("app", pod).ContainerID))
gomega.Expect(crr.Spec.Containers[1].StatusContext.ContainerID).Should(gomega.Equal(util.GetContainerStatus("sidecar", pod).ContainerID))

ginkgo.By("Wait CRR recreate completion")
gomega.Eventually(func() appsv1alpha1.ContainerRecreateRequestPhase {
crr, err = tester.GetCRR(crr.Name)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
return crr.Status.Phase
}, 60*time.Second, 3*time.Second).Should(gomega.Equal(appsv1alpha1.ContainerRecreateRequestCompleted))
gomega.Expect(crr.Status.CompletionTime).ShouldNot(gomega.BeNil())
gomega.Eventually(func() string {
crr, err = tester.GetCRR(crr.Name)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
return crr.Labels[appsv1alpha1.ContainerRecreateRequestActiveKey]
}, 5*time.Second, 1*time.Second).Should(gomega.Equal(""))
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{
{Name: "app", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
{Name: "sidecar", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true},
}))

ginkgo.By("Check Pod containers recreated")
pod, err = tester.GetPod(pod.Name)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
gomega.Expect(podutil.IsPodReady(pod)).Should(gomega.Equal(true))
appContainerStatus := util.GetContainerStatus("app", pod)
sidecarContainerStatus := util.GetContainerStatus("sidecar", pod)
gomega.Expect(appContainerStatus.ContainerID).ShouldNot(gomega.Equal(crr.Spec.Containers[0].StatusContext.ContainerID))
gomega.Expect(sidecarContainerStatus.ContainerID).ShouldNot(gomega.Equal(crr.Spec.Containers[1].StatusContext.ContainerID))
gomega.Expect(appContainerStatus.RestartCount).Should(gomega.Equal(int32(1)))
gomega.Expect(sidecarContainerStatus.RestartCount).Should(gomega.Equal(int32(1)))

ginkgo.By("Check Pod sidecar container recreated after app container ready")
interval := sidecarContainerStatus.LastTerminationState.Terminated.FinishedAt.Sub(appContainerStatus.LastTerminationState.Terminated.FinishedAt.Time)
gomega.Expect(interval >= 5*time.Second).Should(gomega.Equal(true))
}
})
})
})
2 changes: 1 addition & 1 deletion test/e2e/apps/ephemeraljob.go
Original file line number Diff line number Diff line change
Expand Up @@ -587,7 +587,7 @@ var _ = SIGDescribe("EphemeralJob", func() {
gomega.Expect(err).NotTo(gomega.HaveOccurred())
return crr.Labels[appsv1alpha1.ContainerRecreateRequestActiveKey]
}, 5*time.Second, 1*time.Second).Should(gomega.Equal(""))
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{{Name: "nginx", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded}}))
gomega.Expect(crr.Status.ContainerRecreateStates).Should(gomega.Equal([]appsv1alpha1.ContainerRecreateRequestContainerRecreateState{{Name: "nginx", Phase: appsv1alpha1.ContainerRecreateRequestSucceeded, IsKilled: true}}))

ginkgo.By("Check Pod containers recreated and started for minStartedSeconds")
pod, err = resetartContainerTester.GetPod(pod.Name)
Expand Down

0 comments on commit 92eafa9

Please sign in to comment.