Skip to content

Commit

Permalink
fix: status check list all events (#9015)
Browse files Browse the repository at this point in the history
  • Loading branch information
ericzzzzzzz authored Aug 15, 2023
1 parent e6c1363 commit c18d6c0
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 38 deletions.
7 changes: 7 additions & 0 deletions pkg/diag/validator/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package validator

import (
"context"
"fmt"
"strings"

appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -45,6 +47,11 @@ func (s *deploymentPodsSelector) Select(ctx context.Context, ns string, opts met
log.Entry(ctx).Debugf("deployment replica set not created yet.")
return nil, nil
}
for _, c := range controller.Status.Conditions {
if c.Type == "ReplicaFailure" && c.Reason == "FailedCreate" && c.Status == "True" && strings.Contains(c.Message, "admission webhook") {
return nil, fmt.Errorf("%s: %s", ReplicaFailureAdmissionErr, c.Message)
}
}

pods, err := s.k.CoreV1().Pods(ns).List(ctx, opts)
if err != nil {
Expand Down
8 changes: 5 additions & 3 deletions pkg/diag/validator/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@ const (
ImagePullErr = "ErrImagePull"
ImagePullBackOff = "ImagePullBackOff"
ErrImagePullBackOff = "ErrImagePullBackOff"
containerCreating = "ContainerCreating"
podInitializing = "PodInitializing"
podKind = "pod"

ReplicaFailureAdmissionErr = "ReplicaFailureAdmissionErr"
containerCreating = "ContainerCreating"
podInitializing = "PodInitializing"
podKind = "pod"

failedScheduling = "FailedScheduling"
unhealthy = "Unhealthy"
Expand Down
42 changes: 7 additions & 35 deletions pkg/skaffold/kubernetes/status/resource/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,12 @@ import (
"strings"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"

"github.com/GoogleContainerTools/skaffold/v2/pkg/diag"
"github.com/GoogleContainerTools/skaffold/v2/pkg/diag/validator"
sErrors "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/errors"
"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/event"
eventV2 "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/event/v2"
"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/kubectl"
kubernetesclient "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/kubernetes/client"
"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/output/log"
"github.com/GoogleContainerTools/skaffold/v2/proto/v1"
protoV2 "github.com/GoogleContainerTools/skaffold/v2/proto/v2"
Expand Down Expand Up @@ -215,41 +211,10 @@ func (r *Resource) checkRolloutStatus(ctx context.Context, cfg kubectl.Config) *
return &proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_USER_CANCELLED}
}
details := r.cleanupStatus(string(b))
if err != nil {
return parseKubectlRolloutError(details, r.deadline, r.tolerateFailures, err)
}

client, cErr := kubernetesclient.Client(cfg.GetKubeContext())
if cErr != nil {
log.Entry(ctx).Debugf("error attempting to create kubernetes client for k8s event listing: %s", err)
} else {
err = checkK8sEventsForPodFailedCreateEvent(ctx, client, r.namespace, r.name)
}

// additional logic added here which checks kubernetes events to see if skaffold managed pod has a FailedCreatEvent
// this can be raised by an admission controller and if we don't error here, skaffold will wait for the pod to come up
// indefinitely even thought the admission controller has denied it
return parseKubectlRolloutError(details, r.deadline, r.tolerateFailures, err)
}

func checkK8sEventsForPodFailedCreateEvent(ctx context.Context, client kubernetes.Interface, namespace string, deploymentName string) error {
// Create a watcher for events
eventList, err := client.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("error attempting to list kubernetes events in namespace: %s, %w", namespace, err)
}

for _, event := range eventList.Items {
if event.Reason == "FailedCreate" {
if strings.HasPrefix(event.InvolvedObject.Name, deploymentName+"-") {
errMsg := fmt.Sprintf("Failed to create Pod for Deployment %s: %s\n", deploymentName, event.Message)
return fmt.Errorf(errMsg)
}
}
}
return nil
}

func (r *Resource) CheckStatus(ctx context.Context, cfg kubectl.Config) {
var ae *proto.ActionableErr
switch r.rType {
Expand Down Expand Up @@ -277,6 +242,13 @@ func (r *Resource) CheckStatus(ctx context.Context, cfg kubectl.Config) {
return
}
if err := r.fetchPods(ctx); err != nil {
// This hack is used to fail status check if deployment request is denied by admission webhook, without this status check
// will hang. This hack doesn't honor tolerate-failure-until-deadline flag, as when running kubectl apply with a standalone pod resource
// the command will fail directly, we won't even be here. So it probably makes more sense that we just stop here instead of repeating status checks.
if strings.Contains(err.Error(), validator.ReplicaFailureAdmissionErr) {
r.UpdateStatus(&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN, Message: fmt.Sprintf("replica set creation failed: %s", err.Error())})
return
}
log.Entry(ctx).Debugf("pod statuses could not be fetched this time due to %s", err)
}
}
Expand Down

0 comments on commit c18d6c0

Please sign in to comment.