From f9c678a57d030ed2b988d23f5f4ced37e51331e3 Mon Sep 17 00:00:00 2001 From: ericzzzzzzz <102683393+ericzzzzzzz@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:57:50 -0400 Subject: [PATCH] fix: status check list all events --- pkg/diag/validator/deployment.go | 7 ++++ pkg/diag/validator/validator.go | 8 ++-- .../kubernetes/status/resource/deployment.go | 42 ++++--------------- 3 files changed, 19 insertions(+), 38 deletions(-) diff --git a/pkg/diag/validator/deployment.go b/pkg/diag/validator/deployment.go index 2bd3e3265af..3f300958116 100644 --- a/pkg/diag/validator/deployment.go +++ b/pkg/diag/validator/deployment.go @@ -18,6 +18,8 @@ package validator import ( "context" + "fmt" + "strings" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" @@ -45,6 +47,11 @@ func (s *deploymentPodsSelector) Select(ctx context.Context, ns string, opts met log.Entry(ctx).Debugf("deployment replica set not created yet.") return nil, nil } + for _, c := range controller.Status.Conditions { + if c.Type == "ReplicaFailure" && c.Reason == "FailedCreate" && c.Status == "True" && strings.Contains(c.Message, "admission webhook") { + return nil, fmt.Errorf("%s: %s", ReplicaFailureAdmissionErr, c.Message) + } + } pods, err := s.k.CoreV1().Pods(ns).List(ctx, opts) if err != nil { diff --git a/pkg/diag/validator/validator.go b/pkg/diag/validator/validator.go index 7c752cab150..db74cd05d7b 100644 --- a/pkg/diag/validator/validator.go +++ b/pkg/diag/validator/validator.go @@ -46,9 +46,11 @@ const ( ImagePullErr = "ErrImagePull" ImagePullBackOff = "ImagePullBackOff" ErrImagePullBackOff = "ErrImagePullBackOff" - containerCreating = "ContainerCreating" - podInitializing = "PodInitializing" - podKind = "pod" + + ReplicaFailureAdmissionErr = "ReplicaFailureAdmissionErr" + containerCreating = "ContainerCreating" + podInitializing = "PodInitializing" + podKind = "pod" failedScheduling = "FailedScheduling" unhealthy = "Unhealthy" diff --git a/pkg/skaffold/kubernetes/status/resource/deployment.go b/pkg/skaffold/kubernetes/status/resource/deployment.go index b0912e44784..f2ac4c59b29 100644 --- a/pkg/skaffold/kubernetes/status/resource/deployment.go +++ b/pkg/skaffold/kubernetes/status/resource/deployment.go @@ -24,16 +24,12 @@ import ( "strings" "time" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - "github.com/GoogleContainerTools/skaffold/v2/pkg/diag" "github.com/GoogleContainerTools/skaffold/v2/pkg/diag/validator" sErrors "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/errors" "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/event" eventV2 "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/event/v2" "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/kubectl" - kubernetesclient "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/kubernetes/client" "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/output/log" "github.com/GoogleContainerTools/skaffold/v2/proto/v1" protoV2 "github.com/GoogleContainerTools/skaffold/v2/proto/v2" @@ -215,41 +211,10 @@ func (r *Resource) checkRolloutStatus(ctx context.Context, cfg kubectl.Config) * return &proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_USER_CANCELLED} } details := r.cleanupStatus(string(b)) - if err != nil { - return parseKubectlRolloutError(details, r.deadline, r.tolerateFailures, err) - } - - client, cErr := kubernetesclient.Client(cfg.GetKubeContext()) - if cErr != nil { - log.Entry(ctx).Debugf("error attempting to create kubernetes client for k8s event listing: %s", err) - } else { - err = checkK8sEventsForPodFailedCreateEvent(ctx, client, r.namespace, r.name) - } - // additional logic added here which checks kubernetes events to see if skaffold managed pod has a FailedCreatEvent - // this can be raised by an admission controller and if we don't error here, skaffold will wait for the pod to come up - // indefinitely even thought the admission controller has denied it return parseKubectlRolloutError(details, r.deadline, r.tolerateFailures, err) } -func checkK8sEventsForPodFailedCreateEvent(ctx context.Context, client kubernetes.Interface, namespace string, deploymentName string) error { - // Create a watcher for events - eventList, err := client.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{}) - if err != nil { - return fmt.Errorf("error attempting to list kubernetes events in namespace: %s, %w", namespace, err) - } - - for _, event := range eventList.Items { - if event.Reason == "FailedCreate" { - if strings.HasPrefix(event.InvolvedObject.Name, deploymentName+"-") { - errMsg := fmt.Sprintf("Failed to create Pod for Deployment %s: %s\n", deploymentName, event.Message) - return fmt.Errorf(errMsg) - } - } - } - return nil -} - func (r *Resource) CheckStatus(ctx context.Context, cfg kubectl.Config) { var ae *proto.ActionableErr switch r.rType { @@ -277,6 +242,13 @@ func (r *Resource) CheckStatus(ctx context.Context, cfg kubectl.Config) { return } if err := r.fetchPods(ctx); err != nil { + // This hack is used to fail status check if deployment request is denied by admission webhook, without this status check + // will hang. This hack doesn't honor tolerate-failure-until-deadline flag, as when running kubectl apply with a standalone pod resource + // the command will fail directly, we won't even be here. So it probably makes more sense that we just stop here instead of repeating status checks. + if strings.Contains(err.Error(), validator.ReplicaFailureAdmissionErr) { + r.UpdateStatus(&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN, Message: fmt.Sprintf("replica set creation failed: %s", err.Error())}) + return + } log.Entry(ctx).Debugf("pod statuses could not be fetched this time due to %s", err) } }