Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: status check lists all events #9015

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pkg/diag/validator/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package validator

import (
"context"
"fmt"
"strings"

appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -45,6 +47,11 @@ func (s *deploymentPodsSelector) Select(ctx context.Context, ns string, opts met
log.Entry(ctx).Debugf("deployment replica set not created yet.")
return nil, nil
}
for _, c := range controller.Status.Conditions {
if c.Type == "ReplicaFailure" && c.Reason == "FailedCreate" && c.Status == "True" && strings.Contains(c.Message, "admission webhook") {
return nil, fmt.Errorf("%s: %s", ReplicaFailureAdmissionErr, c.Message)
}
}

pods, err := s.k.CoreV1().Pods(ns).List(ctx, opts)
if err != nil {
Expand Down
8 changes: 5 additions & 3 deletions pkg/diag/validator/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@ const (
ImagePullErr = "ErrImagePull"
ImagePullBackOff = "ImagePullBackOff"
ErrImagePullBackOff = "ErrImagePullBackOff"
containerCreating = "ContainerCreating"
podInitializing = "PodInitializing"
podKind = "pod"

ReplicaFailureAdmissionErr = "ReplicaFailureAdmissionErr"
containerCreating = "ContainerCreating"
podInitializing = "PodInitializing"
podKind = "pod"

failedScheduling = "FailedScheduling"
unhealthy = "Unhealthy"
Expand Down
42 changes: 7 additions & 35 deletions pkg/skaffold/kubernetes/status/resource/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,12 @@ import (
"strings"
"time"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"

"github.com/GoogleContainerTools/skaffold/v2/pkg/diag"
"github.com/GoogleContainerTools/skaffold/v2/pkg/diag/validator"
sErrors "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/errors"
"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/event"
eventV2 "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/event/v2"
"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/kubectl"
kubernetesclient "github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/kubernetes/client"
"github.com/GoogleContainerTools/skaffold/v2/pkg/skaffold/output/log"
"github.com/GoogleContainerTools/skaffold/v2/proto/v1"
protoV2 "github.com/GoogleContainerTools/skaffold/v2/proto/v2"
Expand Down Expand Up @@ -215,41 +211,10 @@ func (r *Resource) checkRolloutStatus(ctx context.Context, cfg kubectl.Config) *
return &proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_USER_CANCELLED}
}
details := r.cleanupStatus(string(b))
if err != nil {
return parseKubectlRolloutError(details, r.deadline, r.tolerateFailures, err)
}

client, cErr := kubernetesclient.Client(cfg.GetKubeContext())
if cErr != nil {
log.Entry(ctx).Debugf("error attempting to create kubernetes client for k8s event listing: %s", err)
} else {
err = checkK8sEventsForPodFailedCreateEvent(ctx, client, r.namespace, r.name)
}

// additional logic added here which checks kubernetes events to see if skaffold managed pod has a FailedCreatEvent
// this can be raised by an admission controller and if we don't error here, skaffold will wait for the pod to come up
// indefinitely even thought the admission controller has denied it
return parseKubectlRolloutError(details, r.deadline, r.tolerateFailures, err)
}

func checkK8sEventsForPodFailedCreateEvent(ctx context.Context, client kubernetes.Interface, namespace string, deploymentName string) error {
// Create a watcher for events
eventList, err := client.CoreV1().Events(namespace).List(ctx, metav1.ListOptions{})
if err != nil {
return fmt.Errorf("error attempting to list kubernetes events in namespace: %s, %w", namespace, err)
}

for _, event := range eventList.Items {
if event.Reason == "FailedCreate" {
if strings.HasPrefix(event.InvolvedObject.Name, deploymentName+"-") {
errMsg := fmt.Sprintf("Failed to create Pod for Deployment %s: %s\n", deploymentName, event.Message)
return fmt.Errorf(errMsg)
}
}
}
return nil
}

func (r *Resource) CheckStatus(ctx context.Context, cfg kubectl.Config) {
var ae *proto.ActionableErr
switch r.rType {
Expand Down Expand Up @@ -277,6 +242,13 @@ func (r *Resource) CheckStatus(ctx context.Context, cfg kubectl.Config) {
return
}
if err := r.fetchPods(ctx); err != nil {
// This hack is used to fail status check if deployment request is denied by admission webhook, without this status check
// will hang. This hack doesn't honor tolerate-failure-until-deadline flag, as when running kubectl apply with a standalone pod resource
// the command will fail directly, we won't even be here. So it probably makes more sense that we just stop here instead of repeating status checks.
if strings.Contains(err.Error(), validator.ReplicaFailureAdmissionErr) {
r.UpdateStatus(&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_UNKNOWN, Message: fmt.Sprintf("replica set creation failed: %s", err.Error())})
return
}
log.Entry(ctx).Debugf("pod statuses could not be fetched this time due to %s", err)
}
}
Expand Down