From f6daddf067b9bfd4eb55db41ebba480a64da3abc Mon Sep 17 00:00:00 2001 From: Aaron Prindle Date: Fri, 23 Sep 2022 17:29:34 +0000 Subject: [PATCH] feat: add --tolerate-failures-until-deadline flag and deploy.tolerateFailuresUntilDeadline config for improved ci/cd usage --- cmd/skaffold/app/cmd/flags.go | 9 ++ docs-v2/content/en/schemas/v4beta1.json | 7 + docs/content/en/docs/references/cli/_index.md | 10 ++ integration/run_test.go | 39 +++++ .../status-check-tolerance/skaffold.yaml | 14 ++ .../tolerance-check/Dockerfile | 7 + .../kubernetes/deployment.yaml | 19 +++ .../tolerance-check/script.sh | 19 +++ pkg/skaffold/config/options.go | 149 +++++++++--------- .../component/kubernetes/monitor_test.go | 2 + .../kubernetes/status/status_check.go | 51 +++--- pkg/skaffold/runner/runcontext/context.go | 15 ++ pkg/skaffold/schema/latest/config.go | 5 + pkg/skaffold/schema/profiles.go | 9 ++ 14 files changed, 260 insertions(+), 95 deletions(-) create mode 100644 integration/testdata/status-check-tolerance/skaffold.yaml create mode 100644 integration/testdata/status-check-tolerance/tolerance-check/Dockerfile create mode 100644 integration/testdata/status-check-tolerance/tolerance-check/kubernetes/deployment.yaml create mode 100755 integration/testdata/status-check-tolerance/tolerance-check/script.sh diff --git a/cmd/skaffold/app/cmd/flags.go b/cmd/skaffold/app/cmd/flags.go index e2120a1d9f4..4685242d84f 100644 --- a/cmd/skaffold/app/cmd/flags.go +++ b/cmd/skaffold/app/cmd/flags.go @@ -332,6 +332,15 @@ var flagRegistry = []Flag{ DefinedOn: []string{"dev", "debug", "deploy", "run", "apply"}, IsEnum: true, }, + { + Name: "tolerate-failures-until-deadline", + Usage: "Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false'", + Value: &opts.TolerateFailuresStatusCheck, + DefValue: false, + FlagAddMethod: "BoolVar", + DefinedOn: []string{"dev", "debug", "deploy", "run", "apply"}, + IsEnum: true, + }, { Name: "fast-fail-status-check", Usage: "Configures `status-check` to fail immediately if any error occurs. Otherwise `status-check` will attempt to check all resources once and only then report errors and possibly exit. Defaults to 'true'", diff --git a/docs-v2/content/en/schemas/v4beta1.json b/docs-v2/content/en/schemas/v4beta1.json index 69ac63d6890..926eb860df2 100755 --- a/docs-v2/content/en/schemas/v4beta1.json +++ b/docs-v2/content/en/schemas/v4beta1.json @@ -1422,6 +1422,12 @@ "type": "integer", "description": "*beta* deadline for deployments to stabilize in seconds.", "x-intellij-html-description": "beta deadline for deployments to stabilize in seconds." + }, + "tolerateFailuresUntilDeadline": { + "type": "boolean", + "description": "configures the Skaffold \"status-check\" to tolerate failures (flapping deployments, etc.) until the statusCheckDeadlineSeconds duration or k8s object timeouts such as progressDeadlineSeconds, etc.", + "x-intellij-html-description": "configures the Skaffold "status-check" to tolerate failures (flapping deployments, etc.) until the statusCheckDeadlineSeconds duration or k8s object timeouts such as progressDeadlineSeconds, etc.", + "default": "false" } }, "preferredOrder": [ @@ -1432,6 +1438,7 @@ "cloudrun", "statusCheck", "statusCheckDeadlineSeconds", + "tolerateFailuresUntilDeadline", "kubeContext", "logs" ], diff --git a/docs/content/en/docs/references/cli/_index.md b/docs/content/en/docs/references/cli/_index.md index 24119124625..ce95ad380e5 100644 --- a/docs/content/en/docs/references/cli/_index.md +++ b/docs/content/en/docs/references/cli/_index.md @@ -139,6 +139,7 @@ Options: --status-check=: Wait for deployed resources to stabilize --sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories. --tail=false: Stream logs from deployed objects + --tolerate-failures-until-deadline=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false' --wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit Usage: @@ -168,6 +169,7 @@ Env vars: * `SKAFFOLD_STATUS_CHECK` (same as `--status-check`) * `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`) * `SKAFFOLD_TAIL` (same as `--tail`) +* `SKAFFOLD_TOLERATE_FAILURES_UNTIL_DEADLINE` (same as `--tolerate-failures-until-deadline`) * `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`) ### skaffold build @@ -453,6 +455,7 @@ Options: --sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories. -t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration --tail=true: Stream logs from deployed objects + --tolerate-failures-until-deadline=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false' --toot=false: Emit a terminal beep after the deploy is complete --trigger='notify': How is change detection triggered? (polling, notify, or manual) --wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit @@ -516,6 +519,7 @@ Env vars: * `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`) * `SKAFFOLD_TAG` (same as `--tag`) * `SKAFFOLD_TAIL` (same as `--tail`) +* `SKAFFOLD_TOLERATE_FAILURES_UNTIL_DEADLINE` (same as `--tolerate-failures-until-deadline`) * `SKAFFOLD_TOOT` (same as `--toot`) * `SKAFFOLD_TRIGGER` (same as `--trigger`) * `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`) @@ -633,6 +637,7 @@ Options: --sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories. -t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration --tail=false: Stream logs from deployed objects + --tolerate-failures-until-deadline=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false' --toot=false: Emit a terminal beep after the deploy is complete --wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit --wait-for-deletions=true: Wait for pending deletions to complete before a deployment @@ -681,6 +686,7 @@ Env vars: * `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`) * `SKAFFOLD_TAG` (same as `--tag`) * `SKAFFOLD_TAIL` (same as `--tail`) +* `SKAFFOLD_TOLERATE_FAILURES_UNTIL_DEADLINE` (same as `--tolerate-failures-until-deadline`) * `SKAFFOLD_TOOT` (same as `--toot`) * `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`) * `SKAFFOLD_WAIT_FOR_DELETIONS` (same as `--wait-for-deletions`) @@ -740,6 +746,7 @@ Options: --sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories. -t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration --tail=true: Stream logs from deployed objects + --tolerate-failures-until-deadline=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false' --toot=false: Emit a terminal beep after the deploy is complete --trigger='notify': How is change detection triggered? (polling, notify, or manual) --wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit @@ -803,6 +810,7 @@ Env vars: * `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`) * `SKAFFOLD_TAG` (same as `--tag`) * `SKAFFOLD_TAIL` (same as `--tail`) +* `SKAFFOLD_TOLERATE_FAILURES_UNTIL_DEADLINE` (same as `--tolerate-failures-until-deadline`) * `SKAFFOLD_TOOT` (same as `--toot`) * `SKAFFOLD_TRIGGER` (same as `--trigger`) * `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`) @@ -1095,6 +1103,7 @@ Options: --sync-remote-cache='always': Controls how Skaffold manages the remote config cache (see `remote-cache-dir`). One of `always` (default), `missing`, or `never`. `always` syncs remote repositories to latest on access. `missing` only clones remote repositories if they do not exist locally. `never` means the user takes responsibility for updating remote repositories. -t, --tag='': The optional custom tag to use for images which overrides the current Tagger configuration --tail=false: Stream logs from deployed objects + --tolerate-failures-until-deadline=false: Configures `status-check` to tolerate failures until Skaffold's statusCheckDeadline duration or the deployments progressDeadlineSeconds Otherwise deployment failures skaffold encounters will immediately fail the deployment. Defaults to 'false' --toot=false: Emit a terminal beep after the deploy is complete --wait-for-connection=false: Blocks ending execution of skaffold until the /v2/events gRPC/HTTP endpoint is hit --wait-for-deletions=true: Wait for pending deletions to complete before a deployment @@ -1153,6 +1162,7 @@ Env vars: * `SKAFFOLD_SYNC_REMOTE_CACHE` (same as `--sync-remote-cache`) * `SKAFFOLD_TAG` (same as `--tag`) * `SKAFFOLD_TAIL` (same as `--tail`) +* `SKAFFOLD_TOLERATE_FAILURES_UNTIL_DEADLINE` (same as `--tolerate-failures-until-deadline`) * `SKAFFOLD_TOOT` (same as `--toot`) * `SKAFFOLD_WAIT_FOR_CONNECTION` (same as `--wait-for-connection`) * `SKAFFOLD_WAIT_FOR_DELETIONS` (same as `--wait-for-deletions`) diff --git a/integration/run_test.go b/integration/run_test.go index 731c546fc0c..d26ef7c02fa 100644 --- a/integration/run_test.go +++ b/integration/run_test.go @@ -17,6 +17,7 @@ limitations under the License. package integration import ( + "fmt" "os" "path/filepath" "strings" @@ -247,6 +248,44 @@ func TestRunTailDefaultNamespace(t *testing.T) { } } +func TestRunTailTolerateFailuresUntilDeadline(t *testing.T) { + MarkIntegrationTest(t, CanRunWithoutGcp) + var tsts = []struct { + description string + dir string + args []string + deployments []string + env []string + targetLogOne string + targetLogTwo string + }{ + { + description: "status-check-tolerance", + dir: "testdata/status-check-tolerance", + args: []string{"--tolerate-failures-until-deadline"}, + deployments: []string{"tolerance-check"}, + targetLogOne: "container will exit with error", + targetLogTwo: "Hello world!", + env: []string{fmt.Sprintf("STOP_FAILING_TIME=%d", time.Now().Unix()+10)}, + }, + } + + for _, test := range tsts { + t.Run(test.description, func(t *testing.T) { + if test.targetLogOne == "" || test.targetLogTwo == "" { + t.SkipNow() + } + ns, _ := SetupNamespace(t) + + args := append(test.args, "--tail") + out := skaffold.Run(args...).InDir(test.dir).InNs(ns.Name).WithEnv(test.env).RunLive(t) + defer skaffold.Delete().InDir(test.dir).WithEnv(test.env).RunOrFail(t) + WaitForLogs(t, out, test.targetLogOne) + WaitForLogs(t, out, test.targetLogTwo) + }) + } +} + func TestRunRenderOnly(t *testing.T) { MarkIntegrationTest(t, CanRunWithoutGcp) diff --git a/integration/testdata/status-check-tolerance/skaffold.yaml b/integration/testdata/status-check-tolerance/skaffold.yaml new file mode 100644 index 00000000000..e6726f0a3e1 --- /dev/null +++ b/integration/testdata/status-check-tolerance/skaffold.yaml @@ -0,0 +1,14 @@ +apiVersion: skaffold/v2beta29 +kind: Config +build: + artifacts: + - image: tolerance-check + context: tolerance-check + docker: + buildArgs: + STOP_FAILING_TIME: '{{.STOP_FAILING_TIME}}' +deploy: + statusCheckDeadlineSeconds: 120 + kubectl: + manifests: + - tolerance-check/kubernetes/* diff --git a/integration/testdata/status-check-tolerance/tolerance-check/Dockerfile b/integration/testdata/status-check-tolerance/tolerance-check/Dockerfile new file mode 100644 index 00000000000..5c8c7b64cee --- /dev/null +++ b/integration/testdata/status-check-tolerance/tolerance-check/Dockerfile @@ -0,0 +1,7 @@ +FROM alpine:latest +ARG STOP_FAILING_TIME +ENV STOP_FAILING_TIME=${STOP_FAILING_TIME:-not_found} + + +COPY script.sh /script.sh +ENTRYPOINT [ "/script.sh" ] \ No newline at end of file diff --git a/integration/testdata/status-check-tolerance/tolerance-check/kubernetes/deployment.yaml b/integration/testdata/status-check-tolerance/tolerance-check/kubernetes/deployment.yaml new file mode 100644 index 00000000000..e650e810e31 --- /dev/null +++ b/integration/testdata/status-check-tolerance/tolerance-check/kubernetes/deployment.yaml @@ -0,0 +1,19 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: tolerance-check + labels: + app: tolerance-check +spec: + replicas: 1 + selector: + matchLabels: + app: tolerance-check + template: + metadata: + labels: + app: tolerance-check + spec: + containers: + - name: tolerance-check + image: tolerance-check diff --git a/integration/testdata/status-check-tolerance/tolerance-check/script.sh b/integration/testdata/status-check-tolerance/tolerance-check/script.sh new file mode 100755 index 00000000000..54de665436a --- /dev/null +++ b/integration/testdata/status-check-tolerance/tolerance-check/script.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +current_time=$(date +%s) +stop_failing_time=$STOP_FAILING_TIME + +echo $current_time +echo "========" +echo $stop_failing_time +echo "========" + +if [[ $current_time -le $stop_failing_time ]]; then + echo "current time less than stop failing time, container will exit with error" + exit 1 +fi +while : +do + echo "Hello world!!!! - current time greater than stop failing time!" + sleep 2 +done \ No newline at end of file diff --git a/pkg/skaffold/config/options.go b/pkg/skaffold/config/options.go index 3d7bc05b4a0..b5a0427dc66 100644 --- a/pkg/skaffold/config/options.go +++ b/pkg/skaffold/config/options.go @@ -32,80 +32,81 @@ type WaitForDeletions struct { // SkaffoldOptions are options that are set by command line arguments not included in the config file itself type SkaffoldOptions struct { - Apply bool - AutoBuild bool - AutoCreateConfig bool - AutoDeploy bool - AutoSync bool - AssumeYes bool - CacheArtifacts bool - ContainerDebugging bool - Cleanup bool - DetectMinikube bool - DryRun bool - EnableRPC bool - Force bool - ForceLoadImages bool - IterativeStatusCheck bool - FastFailStatusCheck bool - Notification bool - NoPrune bool - NoPruneChildren bool - ProfileAutoActivation bool - PropagateProfiles bool - RenderOnly bool - SkipTests bool - SkipConfigDefaults bool - Tail bool - WaitForConnection bool - EnablePlatformNodeAffinity bool - EnableGKEARMNodeToleration bool - DisableMultiPlatformBuild bool - CheckClusterNodePlatforms bool - MakePathsAbsolute *bool - MultiLevelRepo *bool - CloudRunProject string - CloudRunLocation string - ConfigurationFile string - HydrationDir string - InventoryNamespace string - InventoryID string - InventoryName string - GlobalConfig string - EventLogFile string - RenderOutput string - User string - CustomTag string - Namespace string - CacheFile string - Trigger string - KubeContext string - KubeConfig string - LastLogFile string - DigestSource string - Command string - MinikubeProfile string - RepoCacheDir string - TransformRulesFile string - VerifyDockerNetwork string - CustomLabels []string - TargetImages []string - Profiles []string - InsecureRegistries []string - ConfigurationFilter []string - HydratedManifests []string - Platforms []string - BuildConcurrency int - WatchPollInterval int - StatusCheck BoolOrUndefined - PushImages BoolOrUndefined - RPCPort IntOrUndefined - RPCHTTPPort IntOrUndefined - Muted Muted - PortForward PortForwardOptions - DefaultRepo StringOrUndefined - SyncRemoteCache SyncRemoteCacheOption - WaitForDeletions WaitForDeletions + Apply bool + AutoBuild bool + AutoCreateConfig bool + AutoDeploy bool + AutoSync bool + AssumeYes bool + CacheArtifacts bool + ContainerDebugging bool + Cleanup bool + DetectMinikube bool + DryRun bool + EnableRPC bool + Force bool + ForceLoadImages bool + IterativeStatusCheck bool + FastFailStatusCheck bool + TolerateFailuresStatusCheck bool + Notification bool + NoPrune bool + NoPruneChildren bool + ProfileAutoActivation bool + PropagateProfiles bool + RenderOnly bool + SkipTests bool + SkipConfigDefaults bool + Tail bool + WaitForConnection bool + EnablePlatformNodeAffinity bool + EnableGKEARMNodeToleration bool + DisableMultiPlatformBuild bool + CheckClusterNodePlatforms bool + MakePathsAbsolute *bool + MultiLevelRepo *bool + CloudRunProject string + CloudRunLocation string + ConfigurationFile string + HydrationDir string + InventoryNamespace string + InventoryID string + InventoryName string + GlobalConfig string + EventLogFile string + RenderOutput string + User string + CustomTag string + Namespace string + CacheFile string + Trigger string + KubeContext string + KubeConfig string + LastLogFile string + DigestSource string + Command string + MinikubeProfile string + RepoCacheDir string + TransformRulesFile string + VerifyDockerNetwork string + CustomLabels []string + TargetImages []string + Profiles []string + InsecureRegistries []string + ConfigurationFilter []string + HydratedManifests []string + Platforms []string + BuildConcurrency int + WatchPollInterval int + StatusCheck BoolOrUndefined + PushImages BoolOrUndefined + RPCPort IntOrUndefined + RPCHTTPPort IntOrUndefined + Muted Muted + PortForward PortForwardOptions + DefaultRepo StringOrUndefined + SyncRemoteCache SyncRemoteCacheOption + WaitForDeletions WaitForDeletions } type RunMode string diff --git a/pkg/skaffold/deploy/component/kubernetes/monitor_test.go b/pkg/skaffold/deploy/component/kubernetes/monitor_test.go index bae226e7c56..a3f872b0fbf 100644 --- a/pkg/skaffold/deploy/component/kubernetes/monitor_test.go +++ b/pkg/skaffold/deploy/component/kubernetes/monitor_test.go @@ -38,6 +38,8 @@ func (m mockStatusConfig) GetKubeContext() string { return "" } func (m mockStatusConfig) StatusCheckDeadlineSeconds() int { return 0 } +func (m mockStatusConfig) StatusCheckTolerateFailures() bool { return false } + func (m mockStatusConfig) FastFailStatusCheck() bool { return true } func (m mockStatusConfig) Muted() config.Muted { return config.Muted{} } diff --git a/pkg/skaffold/kubernetes/status/status_check.go b/pkg/skaffold/kubernetes/status/status_check.go index 44e62c545d1..bd8140e5451 100644 --- a/pkg/skaffold/kubernetes/status/status_check.go +++ b/pkg/skaffold/kubernetes/status/status_check.go @@ -78,6 +78,7 @@ type Config interface { StatusCheckDeadlineSeconds() int FastFailStatusCheck() bool + StatusCheckTolerateFailures() bool Muted() config.Muted StatusCheck() *bool } @@ -89,32 +90,34 @@ type Monitor interface { } type monitor struct { - cfg Config - labeller *label.DefaultLabeller - deadlineSeconds int - muteLogs bool - failFast bool - seenResources resource.Group - singleRun singleflight.Group - namespaces *[]string - kubeContext string - manifests manifest.ManifestList + cfg Config + labeller *label.DefaultLabeller + deadlineSeconds int + muteLogs bool + failFast bool + tolerateFailures bool + seenResources resource.Group + singleRun singleflight.Group + namespaces *[]string + kubeContext string + manifests manifest.ManifestList } // NewStatusMonitor returns a status monitor which runs checks on selected resource rollouts. // Currently implemented for deployments and statefulsets. func NewStatusMonitor(cfg Config, labeller *label.DefaultLabeller, namespaces *[]string) Monitor { return &monitor{ - muteLogs: cfg.Muted().MuteStatusCheck(), - cfg: cfg, - labeller: labeller, - deadlineSeconds: cfg.StatusCheckDeadlineSeconds(), - seenResources: make(resource.Group), - singleRun: singleflight.Group{}, - namespaces: namespaces, - kubeContext: cfg.GetKubeContext(), - manifests: make(manifest.ManifestList, 0), - failFast: cfg.FastFailStatusCheck(), + muteLogs: cfg.Muted().MuteStatusCheck(), + cfg: cfg, + labeller: labeller, + deadlineSeconds: cfg.StatusCheckDeadlineSeconds(), + seenResources: make(resource.Group), + singleRun: singleflight.Group{}, + namespaces: namespaces, + kubeContext: cfg.GetKubeContext(), + manifests: make(manifest.ManifestList, 0), + failFast: cfg.FastFailStatusCheck(), + tolerateFailures: cfg.StatusCheckTolerateFailures(), } } @@ -353,7 +356,7 @@ func getStatefulSets(ctx context.Context, client kubernetes.Interface, ns string return resources, nil } -func pollResourceStatus(ctx context.Context, cfg kubectl.Config, r *resource.Resource) { +func pollResourceStatus(ctx context.Context, cfg Config, r *resource.Resource) { pollDuration := time.Duration(defaultPollPeriodInMilliseconds) * time.Millisecond ticker := time.NewTicker(pollDuration) defer ticker.Stop() @@ -388,6 +391,12 @@ func pollResourceStatus(ctx context.Context, cfg kubectl.Config, r *resource.Res // immediately rather than waiting for for statusCheckDeadlineSeconds // TODO: https://github.com/GoogleContainerTools/skaffold/pull/4591 if r.HasEncounteredUnrecoverableError() { + if cfg.StatusCheckTolerateFailures() { + // increase poll duration to reduce issues seen with kubectl/cluster becoming unresponsive with frequent requests + // exponential backoff was considered but seemed to be less effective than one large increase in my testing. + ticker = time.NewTicker(pollDuration * 10) + continue + } r.MarkComplete() return } diff --git a/pkg/skaffold/runner/runcontext/context.go b/pkg/skaffold/runner/runcontext/context.go index a207b60d905..a2d3a55a666 100644 --- a/pkg/skaffold/runner/runcontext/context.go +++ b/pkg/skaffold/runner/runcontext/context.go @@ -155,6 +155,17 @@ func (ps Pipelines) TransformDenyList() []latest.ResourceFilter { return denylist } +func (ps Pipelines) StatusCheckTolerateFailures() bool { + failureTolerance := false + // set the group status check deadline to maximum of any individually specified value + for _, p := range ps.pipelines { + if p.Deploy.TolerateFailuresUntilDeadline { + failureTolerance = true + } + } + return failureTolerance +} + func (ps Pipelines) StatusCheckDeadlineSeconds() int { c := 0 // set the group status check deadline to maximum of any individually specified value @@ -203,6 +214,10 @@ func (rc *RunContext) StatusCheckDeadlineSeconds() int { return rc.Pipelines.StatusCheckDeadlineSeconds() } +func (rc *RunContext) StatusCheckTolerateFailures() bool { + return rc.Opts.TolerateFailuresStatusCheck || rc.Pipelines.StatusCheckTolerateFailures() +} + func (rc *RunContext) SkipTests() bool { return rc.Opts.SkipTests } diff --git a/pkg/skaffold/schema/latest/config.go b/pkg/skaffold/schema/latest/config.go index ae749ee39a3..704e28387d9 100644 --- a/pkg/skaffold/schema/latest/config.go +++ b/pkg/skaffold/schema/latest/config.go @@ -703,6 +703,11 @@ type DeployConfig struct { // StatusCheckDeadlineSeconds *beta* is the deadline for deployments to stabilize in seconds. StatusCheckDeadlineSeconds int `yaml:"statusCheckDeadlineSeconds,omitempty"` + // TolerateFailuresUntilDeadline configures the Skaffold "status-check" to tolerate failures + // (flapping deployments, etc.) until the statusCheckDeadlineSeconds duration or k8s object + // timeouts such as progressDeadlineSeconds, etc. + TolerateFailuresUntilDeadline bool `yaml:"tolerateFailuresUntilDeadline,omitempty"` + // KubeContext is the Kubernetes context that Skaffold should deploy to. // For example: `minikube`. KubeContext string `yaml:"kubeContext,omitempty"` diff --git a/pkg/skaffold/schema/profiles.go b/pkg/skaffold/schema/profiles.go index b8326f3a45c..c3407677b2c 100644 --- a/pkg/skaffold/schema/profiles.go +++ b/pkg/skaffold/schema/profiles.go @@ -443,6 +443,15 @@ func overlayProfileField(profileName, fieldName string, yamlFieldName string, fi PatchIndex: -1, } return v.Interface() + case reflect.Bool: + if v.Interface() == reflect.Zero(v.Type()).Interface() { + return config + } + fieldsOverrodeByProfile["/"+path.Join(fieldPath...)] = configlocations.YAMLOverrideInfo{ + ProfileName: profileName, + PatchIndex: -1, + } + return v.Interface() case reflect.String: if reflect.DeepEqual("", v.Interface()) { return config