Skip to content

Commit

Permalink
feat: add --tolerate-failures-status-check flag and deploy.tolerateFa…
Browse files Browse the repository at this point in the history
…ilures config for improved ci/cd usage
  • Loading branch information
aaron-prindle committed Nov 7, 2022
1 parent dfd6015 commit fba5f93
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 96 deletions.
9 changes: 9 additions & 0 deletions cmd/skaffold/app/cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,15 @@ var flagRegistry = []Flag{
DefinedOn: []string{"dev", "debug", "deploy", "run", "apply"},
IsEnum: true,
},
{
Name: "tolerate-failures-status-check",
Usage: "Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'",
Value: &opts.TolerateFailuresStatusCheck,
DefValue: false,
FlagAddMethod: "BoolVar",
DefinedOn: []string{"dev", "debug", "deploy", "run", "apply"},
IsEnum: true,
},
{
Name: "fast-fail-status-check",
Usage: "Configures `status-check` to fail immediately if any error occurs. Otherwise `status-check` will attempt to check all resources once and only then report errors and possibly exit. Defaults to 'true'",
Expand Down
7 changes: 7 additions & 0 deletions docs-v2/content/en/schemas/v4beta1.json
Original file line number Diff line number Diff line change
Expand Up @@ -1422,6 +1422,12 @@
"type": "integer",
"description": "*beta* deadline for deployments to stabilize in seconds.",
"x-intellij-html-description": "<em>beta</em> deadline for deployments to stabilize in seconds."
},
"tolerateFailures": {
"type": "boolean",
"description": "configures the Skaffold \"status-check\" to tolerate failures (flapping deployments, etc.) until the statusCheckDeadlineSeconds duration or k8s object timeouts such as progressDeadlineSeconds, etc.",
"x-intellij-html-description": "configures the Skaffold &quot;status-check&quot; to tolerate failures (flapping deployments, etc.) until the statusCheckDeadlineSeconds duration or k8s object timeouts such as progressDeadlineSeconds, etc.",
"default": "false"
}
},
"preferredOrder": [
Expand All @@ -1432,6 +1438,7 @@
"cloudrun",
"statusCheck",
"statusCheckDeadlineSeconds",
"tolerateFailures",
"kubeContext",
"logs"
],
Expand Down
10 changes: 10 additions & 0 deletions docs/content/en/docs/references/cli/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ Options:
--status-check=: Wait for deployed resources to stabilize
--sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories.
--tail=false: Stream logs from deployed objects
--tolerate-failures-status-check=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'
--wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit
Usage:
Expand Down Expand Up @@ -168,6 +169,7 @@ Env vars:
* `SKAFFOLD_STATUS_CHECK` (same as `--status-check`)
* `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`)
* `SKAFFOLD_TAIL` (same as `--tail`)
* `SKAFFOLD_TOLERATE_FAILURES_STATUS_CHECK` (same as `--tolerate-failures-status-check`)
* `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`)

### skaffold build
Expand Down Expand Up @@ -453,6 +455,7 @@ Options:
--sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories.
-t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration
--tail=true: Stream logs from deployed objects
--tolerate-failures-status-check=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'
--toot=false: Emit a terminal beep after the deploy is complete
--trigger='notify': How is change detection triggered? (polling, notify, or manual)
--wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit
Expand Down Expand Up @@ -516,6 +519,7 @@ Env vars:
* `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`)
* `SKAFFOLD_TAG` (same as `--tag`)
* `SKAFFOLD_TAIL` (same as `--tail`)
* `SKAFFOLD_TOLERATE_FAILURES_STATUS_CHECK` (same as `--tolerate-failures-status-check`)
* `SKAFFOLD_TOOT` (same as `--toot`)
* `SKAFFOLD_TRIGGER` (same as `--trigger`)
* `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`)
Expand Down Expand Up @@ -633,6 +637,7 @@ Options:
--sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories.
-t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration
--tail=false: Stream logs from deployed objects
--tolerate-failures-status-check=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'
--toot=false: Emit a terminal beep after the deploy is complete
--wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit
--wait-for-deletions=true: Wait for pending deletions to complete before a deployment
Expand Down Expand Up @@ -681,6 +686,7 @@ Env vars:
* `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`)
* `SKAFFOLD_TAG` (same as `--tag`)
* `SKAFFOLD_TAIL` (same as `--tail`)
* `SKAFFOLD_TOLERATE_FAILURES_STATUS_CHECK` (same as `--tolerate-failures-status-check`)
* `SKAFFOLD_TOOT` (same as `--toot`)
* `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`)
* `SKAFFOLD_WAIT_FOR_DELETIONS` (same as `--wait-for-deletions`)
Expand Down Expand Up @@ -740,6 +746,7 @@ Options:
--sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories.
-t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration
--tail=true: Stream logs from deployed objects
--tolerate-failures-status-check=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'
--toot=false: Emit a terminal beep after the deploy is complete
--trigger='notify': How is change detection triggered? (polling, notify, or manual)
--wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit
Expand Down Expand Up @@ -803,6 +810,7 @@ Env vars:
* `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`)
* `SKAFFOLD_TAG` (same as `--tag`)
* `SKAFFOLD_TAIL` (same as `--tail`)
* `SKAFFOLD_TOLERATE_FAILURES_STATUS_CHECK` (same as `--tolerate-failures-status-check`)
* `SKAFFOLD_TOOT` (same as `--toot`)
* `SKAFFOLD_TRIGGER` (same as `--trigger`)
* `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`)
Expand Down Expand Up @@ -1095,6 +1103,7 @@ Options:
--sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories.
-t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration
--tail=false: Stream logs from deployed objects
--tolerate-failures-status-check=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'
--toot=false: Emit a terminal beep after the deploy is complete
--wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit
--wait-for-deletions=true: Wait for pending deletions to complete before a deployment
Expand Down Expand Up @@ -1153,6 +1162,7 @@ Env vars:
* `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`)
* `SKAFFOLD_TAG` (same as `--tag`)
* `SKAFFOLD_TAIL` (same as `--tail`)
* `SKAFFOLD_TOLERATE_FAILURES_STATUS_CHECK` (same as `--tolerate-failures-status-check`)
* `SKAFFOLD_TOOT` (same as `--toot`)
* `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`)
* `SKAFFOLD_WAIT_FOR_DELETIONS` (same as `--wait-for-deletions`)
Expand Down
149 changes: 75 additions & 74 deletions pkg/skaffold/config/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,80 +32,81 @@ type WaitForDeletions struct {

// SkaffoldOptions are options that are set by command line arguments not included in the config file itself
type SkaffoldOptions struct {
Apply bool
AutoBuild bool
AutoCreateConfig bool
AutoDeploy bool
AutoSync bool
AssumeYes bool
CacheArtifacts bool
ContainerDebugging bool
Cleanup bool
DetectMinikube bool
DryRun bool
EnableRPC bool
Force bool
ForceLoadImages bool
IterativeStatusCheck bool
FastFailStatusCheck bool
Notification bool
NoPrune bool
NoPruneChildren bool
ProfileAutoActivation bool
PropagateProfiles bool
RenderOnly bool
SkipTests bool
SkipConfigDefaults bool
Tail bool
WaitForConnection bool
EnablePlatformNodeAffinity bool
EnableGKEARMNodeToleration bool
DisableMultiPlatformBuild bool
CheckClusterNodePlatforms bool
MakePathsAbsolute *bool
MultiLevelRepo *bool
CloudRunProject string
CloudRunLocation string
ConfigurationFile string
HydrationDir string
InventoryNamespace string
InventoryID string
InventoryName string
GlobalConfig string
EventLogFile string
RenderOutput string
User string
CustomTag string
Namespace string
CacheFile string
Trigger string
KubeContext string
KubeConfig string
LastLogFile string
DigestSource string
Command string
MinikubeProfile string
RepoCacheDir string
TransformRulesFile string
VerifyDockerNetwork string
CustomLabels []string
TargetImages []string
Profiles []string
InsecureRegistries []string
ConfigurationFilter []string
HydratedManifests []string
Platforms []string
BuildConcurrency int
WatchPollInterval int
StatusCheck BoolOrUndefined
PushImages BoolOrUndefined
RPCPort IntOrUndefined
RPCHTTPPort IntOrUndefined
Muted Muted
PortForward PortForwardOptions
DefaultRepo StringOrUndefined
SyncRemoteCache SyncRemoteCacheOption
WaitForDeletions WaitForDeletions
Apply bool
AutoBuild bool
AutoCreateConfig bool
AutoDeploy bool
AutoSync bool
AssumeYes bool
CacheArtifacts bool
ContainerDebugging bool
Cleanup bool
DetectMinikube bool
DryRun bool
EnableRPC bool
Force bool
ForceLoadImages bool
IterativeStatusCheck bool
FastFailStatusCheck bool
TolerateFailuresStatusCheck bool
Notification bool
NoPrune bool
NoPruneChildren bool
ProfileAutoActivation bool
PropagateProfiles bool
RenderOnly bool
SkipTests bool
SkipConfigDefaults bool
Tail bool
WaitForConnection bool
EnablePlatformNodeAffinity bool
EnableGKEARMNodeToleration bool
DisableMultiPlatformBuild bool
CheckClusterNodePlatforms bool
MakePathsAbsolute *bool
MultiLevelRepo *bool
CloudRunProject string
CloudRunLocation string
ConfigurationFile string
HydrationDir string
InventoryNamespace string
InventoryID string
InventoryName string
GlobalConfig string
EventLogFile string
RenderOutput string
User string
CustomTag string
Namespace string
CacheFile string
Trigger string
KubeContext string
KubeConfig string
LastLogFile string
DigestSource string
Command string
MinikubeProfile string
RepoCacheDir string
TransformRulesFile string
VerifyDockerNetwork string
CustomLabels []string
TargetImages []string
Profiles []string
InsecureRegistries []string
ConfigurationFilter []string
HydratedManifests []string
Platforms []string
BuildConcurrency int
WatchPollInterval int
StatusCheck BoolOrUndefined
PushImages BoolOrUndefined
RPCPort IntOrUndefined
RPCHTTPPort IntOrUndefined
Muted Muted
PortForward PortForwardOptions
DefaultRepo StringOrUndefined
SyncRemoteCache SyncRemoteCacheOption
WaitForDeletions WaitForDeletions
}

type RunMode string
Expand Down
2 changes: 2 additions & 0 deletions pkg/skaffold/deploy/component/kubernetes/monitor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ func (m mockStatusConfig) GetKubeContext() string { return "" }

func (m mockStatusConfig) StatusCheckDeadlineSeconds() int { return 0 }

func (m mockStatusConfig) StatusCheckTolerateFailures() bool { return false }

func (m mockStatusConfig) FastFailStatusCheck() bool { return true }

func (m mockStatusConfig) Muted() config.Muted { return config.Muted{} }
Expand Down
47 changes: 25 additions & 22 deletions pkg/skaffold/kubernetes/status/status_check.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ type Config interface {

StatusCheckDeadlineSeconds() int
FastFailStatusCheck() bool
StatusCheckTolerateFailures() bool
Muted() config.Muted
StatusCheck() *bool
}
Expand All @@ -89,32 +90,34 @@ type Monitor interface {
}

type monitor struct {
cfg Config
labeller *label.DefaultLabeller
deadlineSeconds int
muteLogs bool
failFast bool
seenResources resource.Group
singleRun singleflight.Group
namespaces *[]string
kubeContext string
manifests manifest.ManifestList
cfg Config
labeller *label.DefaultLabeller
deadlineSeconds int
muteLogs bool
failFast bool
tolerateFailures bool
seenResources resource.Group
singleRun singleflight.Group
namespaces *[]string
kubeContext string
manifests manifest.ManifestList
}

// NewStatusMonitor returns a status monitor which runs checks on selected resource rollouts.
// Currently implemented for deployments and statefulsets.
func NewStatusMonitor(cfg Config, labeller *label.DefaultLabeller, namespaces *[]string) Monitor {
return &monitor{
muteLogs: cfg.Muted().MuteStatusCheck(),
cfg: cfg,
labeller: labeller,
deadlineSeconds: cfg.StatusCheckDeadlineSeconds(),
seenResources: make(resource.Group),
singleRun: singleflight.Group{},
namespaces: namespaces,
kubeContext: cfg.GetKubeContext(),
manifests: make(manifest.ManifestList, 0),
failFast: cfg.FastFailStatusCheck(),
muteLogs: cfg.Muted().MuteStatusCheck(),
cfg: cfg,
labeller: labeller,
deadlineSeconds: cfg.StatusCheckDeadlineSeconds(),
seenResources: make(resource.Group),
singleRun: singleflight.Group{},
namespaces: namespaces,
kubeContext: cfg.GetKubeContext(),
manifests: make(manifest.ManifestList, 0),
failFast: cfg.FastFailStatusCheck(),
tolerateFailures: cfg.StatusCheckTolerateFailures(),
}
}

Expand Down Expand Up @@ -353,7 +356,7 @@ func getStatefulSets(ctx context.Context, client kubernetes.Interface, ns string
return resources, nil
}

func pollResourceStatus(ctx context.Context, cfg kubectl.Config, r *resource.Resource) {
func pollResourceStatus(ctx context.Context, cfg Config, r *resource.Resource) {
pollDuration := time.Duration(defaultPollPeriodInMilliseconds) * time.Millisecond
ticker := time.NewTicker(pollDuration)
defer ticker.Stop()
Expand Down Expand Up @@ -387,7 +390,7 @@ func pollResourceStatus(ctx context.Context, cfg kubectl.Config, r *resource.Res
// As any changes to build or deploy dependencies are not triggered, exit
// immediately rather than waiting for for statusCheckDeadlineSeconds
// TODO: https://github.com/GoogleContainerTools/skaffold/pull/4591
if r.HasEncounteredUnrecoverableError() {
if r.HasEncounteredUnrecoverableError() && !cfg.StatusCheckTolerateFailures() {
r.MarkComplete()
return
}
Expand Down
15 changes: 15 additions & 0 deletions pkg/skaffold/runner/runcontext/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,17 @@ func (ps Pipelines) TransformDenyList() []latest.ResourceFilter {
return denylist
}

func (ps Pipelines) StatusCheckTolerateFailures() bool {
failureTolerance := false
// set the group status check deadline to maximum of any individually specified value
for _, p := range ps.pipelines {
if p.Deploy.TolerateFailures {
failureTolerance = true
}
}
return failureTolerance
}

func (ps Pipelines) StatusCheckDeadlineSeconds() int {
c := 0
// set the group status check deadline to maximum of any individually specified value
Expand Down Expand Up @@ -203,6 +214,10 @@ func (rc *RunContext) StatusCheckDeadlineSeconds() int {
return rc.Pipelines.StatusCheckDeadlineSeconds()
}

func (rc *RunContext) StatusCheckTolerateFailures() bool {
return rc.Opts.TolerateFailuresStatusCheck || rc.Pipelines.StatusCheckTolerateFailures()
}

func (rc *RunContext) SkipTests() bool {
return rc.Opts.SkipTests
}
Expand Down
Loading

0 comments on commit fba5f93

Please sign in to comment.