diff --git a/test/e2e/common/05_tektonhubdeployment_test.go b/test/e2e/common/05_tektonhubdeployment_test.go index 1b7042281b..450c912ee6 100644 --- a/test/e2e/common/05_tektonhubdeployment_test.go +++ b/test/e2e/common/05_tektonhubdeployment_test.go @@ -55,6 +55,7 @@ type TektonHubTestSuite struct { resourceNames utils.ResourceNames clients *utils.Clients deployments []string + pvcs []string dbMigrationJobName string interval time.Duration timeout time.Duration @@ -79,6 +80,9 @@ func NewTektonHubTestSuite(t *testing.T) *TektonHubTestSuite { "tekton-hub-api", "tekton-hub-ui", }, + pvcs: []string{ + "tekton-hub-api", + }, dbMigrationJobName: "tekton-hub-db-migration", interval: 5 * time.Second, timeout: 5 * time.Minute, @@ -302,6 +306,16 @@ func (s *TektonHubTestSuite) undeploy(databaseNamespace string) { err := resources.WaitForDeploymentDeletion(s.clients.KubeClient, deploymentName, namespace, pollInterval, timeout) require.NoError(t, err) } + // verify pvcs are removed + for _, pvcName := range s.pvcs { + namespace := s.resourceNames.TargetNamespace + if databaseNamespace != "" { + // no need to verify external database removal + continue + } + err := resources.WaitForPVCDeletion(s.clients.KubeClient, pvcName, namespace, pollInterval, timeout) + require.NoError(t, err) + } // verify migration job is removed err := resources.WaitForJobDeletion(s.clients.KubeClient, s.dbMigrationJobName, s.resourceNames.TargetNamespace, pollInterval, timeout) require.NoError(t, err) diff --git a/test/resources/deployment.go b/test/resources/deployment.go index efeeb1c108..8f8c405c4a 100644 --- a/test/resources/deployment.go +++ b/test/resources/deployment.go @@ -126,3 +126,19 @@ func WaitForDeploymentDeletion(kubeClient kubernetes.Interface, name, namespace return wait.PollUntilContextTimeout(context.TODO(), interval, timeout, true, verifyFunc) } + +// WaitForPVCDeletion waits for the PVC to be deleted. +func WaitForPVCDeletion(kubeClient kubernetes.Interface, name, namespace string, interval, timeout time.Duration) error { + verifyFunc := func(ctx context.Context) (bool, error) { + _, err := kubeClient.CoreV1().PersistentVolumeClaims(namespace).Get(context.TODO(), name, metav1.GetOptions{}) + if err != nil { + if apierrs.IsNotFound(err) { + return true, nil + } + return false, err + } + return false, nil + } + + return wait.PollUntilContextTimeout(context.TODO(), interval, timeout, true, verifyFunc) +} diff --git a/test/resources/pod.go b/test/resources/pod.go index d304504c1f..0663a089bc 100644 --- a/test/resources/pod.go +++ b/test/resources/pod.go @@ -21,6 +21,7 @@ import ( "time" core "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/kubernetes" @@ -37,6 +38,18 @@ func DeletePodByLabelSelector(kubeClient kubernetes.Interface, labelSelector, na if err != nil { return err } + // wait for the pod to be deleted + err = wait.PollUntilContextTimeout(context.TODO(), 5*time.Second, 3*time.Minute, true, + func(ctx context.Context) (bool, error) { + _, err := kubeClient.CoreV1().Pods(namespace).Get(context.TODO(), pod.GetName(), metav1.GetOptions{}) + if err != nil && apierrors.IsNotFound(err) { + return true, nil + } + return false, nil + }) + if err != nil { + return err + } } return nil } @@ -49,11 +62,17 @@ func WaitForPodByLabelSelector(kubeClient kubernetes.Interface, labelSelector, n } for _, pod := range pods.Items { - if pod.Status.Phase == core.PodRunning { - return true, nil + if pod.Status.Phase != core.PodRunning { + return false, nil + } + // Only when the Ready status of the Pod is True is the Pod considered Ready + for _, c := range pod.Status.Conditions { + if c.Type == core.PodReady && c.Status != core.ConditionTrue { + return false, nil + } } } - return false, nil + return true, nil } return wait.PollUntilContextTimeout(context.TODO(), interval, timeout, true, verifyFunc) diff --git a/test/resources/tektonconfigs.go b/test/resources/tektonconfigs.go index a768bcc8bd..b49465fb20 100644 --- a/test/resources/tektonconfigs.go +++ b/test/resources/tektonconfigs.go @@ -181,6 +181,7 @@ func verifyNoTektonConfigCR(clients *utils.Clients) error { } func WaitForTektonConfigReady(client operatorV1alpha1.TektonConfigInterface, name string, interval, timeout time.Duration) error { + readyCount := 0 isReady := func(ctx context.Context) (bool, error) { configCR, err := client.Get(context.TODO(), name, metav1.GetOptions{}) if err != nil { @@ -189,7 +190,22 @@ func WaitForTektonConfigReady(client operatorV1alpha1.TektonConfigInterface, nam } return false, err } - return configCR.Status.IsReady(), nil + if configCR.Status.IsReady() { + readyCount++ + } else { + readyCount = 0 + } + // This needs to be confirmed multiple times because: + // 1. The previous Pod may have just restarted, and the new Pod has not + // yet entered the Reconcile logic to modify the resource status. + // 2. Even if the Pod has started properly, it may not have entered the + // Reconcile logic yet, which could also lead to instability. + // For example: + // In the testing logic of TektonHub, it immediately deletes the + // CR of TektonHub and then polls whether the CR has been cleaned up. + // At this point, the tekton-operator enters the Reconcile logic and + // automatically creates the CR of TektonHub again, causing the test to fail. + return readyCount >= 3, nil } return wait.PollUntilContextTimeout(context.TODO(), interval, timeout, true, isReady)