From 0185ce6698e992ae1b2788315c3e81c36989966a Mon Sep 17 00:00:00 2001 From: Jan Stourac Date: Wed, 27 Nov 2024 14:24:56 +0100 Subject: [PATCH] [RHOAIENG-15141] Fix failing E2E tests There were two E2E tests in failure: --- FAIL: TestE2ENotebookController/update/thoth-minimal-oauth-notebook/Notebook_Statefulset_Validation_After_Update (60.28s) --- FAIL: TestE2ENotebookController/update/thoth-minimal-oauth-notebook/Verify_Notebook_Traffic_After_Update (10.75s) The reason was that after the culling test, the culling configuration is reverted, but the Notebook CR isn't updated and the annotation that was added by the notebook controller: `kubeflow-resource-stopped` stays in that CR and as such, the workbench instance isn't started back. This change deletes this annotation at the end of the culler test and the workbench is up and running and ready for the followup tests. Another issue there was that the update test updated the workbench to a non-existent image link, which resulted in a ImagePullBackOff Error in the end. This change updates this link to some existing image. --- .../e2e/helper_test.go | 46 +++++++++++++++++-- .../e2e/notebook_creation_test.go | 21 +-------- .../e2e/notebook_update_test.go | 6 ++- 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/components/odh-notebook-controller/e2e/helper_test.go b/components/odh-notebook-controller/e2e/helper_test.go index 792187e4a69..9fe72ec2bc9 100644 --- a/components/odh-notebook-controller/e2e/helper_test.go +++ b/components/odh-notebook-controller/e2e/helper_test.go @@ -3,7 +3,6 @@ package e2e import ( "crypto/tls" "fmt" - netv1 "k8s.io/api/networking/v1" "log" "net/http" "time" @@ -12,9 +11,11 @@ import ( routev1 "github.com/openshift/api/route/v1" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" + netv1 "k8s.io/api/networking/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/wait" "sigs.k8s.io/controller-runtime/pkg/client" @@ -130,18 +131,57 @@ func (tc *testContext) rolloutDeployment(depMeta metav1.ObjectMeta) error { return nil } -func (tc *testContext) revertCullingConfiguration(cmMeta metav1.ObjectMeta, depMeta metav1.ObjectMeta) { +func (tc *testContext) waitForStatefulSet(nbMeta *metav1.ObjectMeta, availableReplicas int32, readyReplicas int32) error { + // Verify StatefulSet is running expected number of replicas + err := wait.Poll(tc.resourceRetryInterval, tc.resourceCreationTimeout, func() (done bool, err error) { + notebookStatefulSet, err1 := tc.kubeClient.AppsV1().StatefulSets(tc.testNamespace).Get(tc.ctx, + nbMeta.Name, metav1.GetOptions{}) + + if err1 != nil { + if errors.IsNotFound(err1) { + return false, nil + } else { + log.Printf("Failed to get %s statefulset", nbMeta.Name) + return false, err1 + } + } + if notebookStatefulSet.Status.AvailableReplicas == availableReplicas && + notebookStatefulSet.Status.ReadyReplicas == readyReplicas { + return true, nil + } + return false, nil + }) + return err +} + +func (tc *testContext) revertCullingConfiguration(cmMeta metav1.ObjectMeta, depMeta metav1.ObjectMeta, nbMeta *metav1.ObjectMeta) { // Delete the culling configuration Configmap once the test is completed err := tc.kubeClient.CoreV1().ConfigMaps(tc.testNamespace).Delete(tc.ctx, cmMeta.Name, metav1.DeleteOptions{}) if err != nil { - log.Printf("error creating configmap notebook-controller-culler-config: %v ", err) + log.Printf("error deleting configmap notebook-controller-culler-config: %v ", err) } // Roll out the controller deployment err = tc.rolloutDeployment(depMeta) if err != nil { log.Printf("error rolling out the deployment %v: %v ", depMeta.Name, err) } + + testNotebook := &nbv1.Notebook{ + ObjectMeta: *nbMeta, + } + // The NBC added the annotation to stop the idle workbench: kubeflow-resource-stopped: '2024-11-26T17:20:42Z' + // To make the workbench running again, we need to also remove that annotation. + patch := client.RawPatch(types.JSONPatchType, []byte(`[{"op": "remove", "path": "/metadata/annotations/kubeflow-resource-stopped"}]`)) + + if err := tc.customClient.Patch(tc.ctx, testNotebook, patch); err != nil { + log.Printf("failed to patch Notebook CR removing the kubeflow-resource-stopped annotation: %v ", err) + } + // now we should wait for pod to start again + err = tc.waitForStatefulSet(nbMeta, 1, 1) + if err != nil { + log.Printf("notebook StatefulSet: %s isn't ready as expected: %s", nbMeta.Name, err) + } } func (tc *testContext) scaleDeployment(depMeta metav1.ObjectMeta, desiredReplicas int32) error { diff --git a/components/odh-notebook-controller/e2e/notebook_creation_test.go b/components/odh-notebook-controller/e2e/notebook_creation_test.go index e066a36ad3a..c6bea3b0a9c 100644 --- a/components/odh-notebook-controller/e2e/notebook_creation_test.go +++ b/components/odh-notebook-controller/e2e/notebook_creation_test.go @@ -198,24 +198,7 @@ func (tc *testContext) ensureNetworkPolicyAllowingAccessToOnlyNotebookController func (tc *testContext) testNotebookValidation(nbMeta *metav1.ObjectMeta) error { // Verify StatefulSet is running - err := wait.Poll(tc.resourceRetryInterval, tc.resourceCreationTimeout, func() (done bool, err error) { - notebookStatefulSet, err1 := tc.kubeClient.AppsV1().StatefulSets(tc.testNamespace).Get(tc.ctx, - nbMeta.Name, metav1.GetOptions{}) - - if err1 != nil { - if errors.IsNotFound(err1) { - return false, nil - } else { - log.Printf("Failed to get %s statefulset", nbMeta.Name) - return false, err1 - } - } - if notebookStatefulSet.Status.AvailableReplicas == 1 && - notebookStatefulSet.Status.ReadyReplicas == 1 { - return true, nil - } - return false, nil - }) + err := tc.waitForStatefulSet(nbMeta, 1, 1) if err != nil { return fmt.Errorf("error validating notebook StatefulSet: %s", err) } @@ -283,7 +266,7 @@ func (tc *testContext) testNotebookCulling(nbMeta *metav1.ObjectMeta) error { return fmt.Errorf("error getting deployment %v: %v", controllerDeployment.Name, err) } - defer tc.revertCullingConfiguration(cullingConfigMap.ObjectMeta, controllerDeployment.ObjectMeta) + defer tc.revertCullingConfiguration(cullingConfigMap.ObjectMeta, controllerDeployment.ObjectMeta, nbMeta) err = tc.rolloutDeployment(controllerDeployment.ObjectMeta) if err != nil { diff --git a/components/odh-notebook-controller/e2e/notebook_update_test.go b/components/odh-notebook-controller/e2e/notebook_update_test.go index 6bd0a94b9d3..024b0e4ada5 100644 --- a/components/odh-notebook-controller/e2e/notebook_update_test.go +++ b/components/odh-notebook-controller/e2e/notebook_update_test.go @@ -65,7 +65,9 @@ func (tc *testContext) testNotebookUpdate(nbContext notebookContext) error { } // Example update: Change the Notebook image - updatedNotebook.Spec.Template.Spec.Containers[0].Image = "new-image:latest" + // newImage := "new-image:latest" quay.io/thoth-station/s2i-minimal-notebook:v0.2.2 + newImage := "quay.io/opendatahub/workbench-images:jupyter-minimal-ubi9-python-3.11-20241119-3ceb400" + updatedNotebook.Spec.Template.Spec.Containers[0].Image = newImage err = tc.customClient.Update(tc.ctx, updatedNotebook) if err != nil { @@ -79,7 +81,7 @@ func (tc *testContext) testNotebookUpdate(nbContext notebookContext) error { if err != nil { return false, err } - if note.Spec.Template.Spec.Containers[0].Image == "new-image:latest" { + if note.Spec.Template.Spec.Containers[0].Image == newImage { return true, nil } return false, nil