From b1e77fe5e4af4cd58ca1e002cc982e6f2e3f3507 Mon Sep 17 00:00:00 2001 From: Jesse Suen Date: Tue, 2 Apr 2019 19:07:45 -0700 Subject: [PATCH] Implement support for PNS (Process Namespace Sharing) executor --- CHANGELOG.md | 8 + Gopkg.lock | 27 +- cmd/argoexec/commands/init.go | 25 +- cmd/argoexec/commands/resource.go | 33 +- cmd/argoexec/commands/root.go | 109 ++--- cmd/argoexec/commands/wait.go | 34 +- cmd/argoexec/main.go | 2 +- cmd/workflow-controller/main.go | 16 +- demo.md | 9 +- docs/workflow-controller-configmap.yaml | 37 +- examples/artifact-disable-archive.yaml | 51 +++ examples/artifact-passing.yaml | 2 +- examples/ci-output-artifact.yaml | 7 +- examples/continue-on-fail.yaml | 36 ++ ...On-fail.yaml => dag-continue-on-fail.yaml} | 4 +- examples/global-outputs.yaml | 2 +- examples/output-parameter.yaml | 2 +- examples/parameter-aggregation-dag.yaml | 1 + examples/parameter-aggregation.yaml | 1 + examples/sidecar-dind.yaml | 2 +- examples/workflow-continueOn-fail.yaml | 67 --- pkg/apis/workflow/v1alpha1/types.go | 31 +- .../expectedfailures/disallow-unknown.json | 25 -- .../pns/pns-output-artifacts.yaml | 39 ++ .../pns/pns-quick-exit-output-art.yaml | 30 ++ .../functional/artifact-disable-archive.yaml | 50 +-- test/e2e/functional/continue-on-fail.yaml | 1 + test/e2e/functional/dag-argument-passing.yaml | 4 +- test/e2e/functional/global-outputs-dag.yaml | 2 +- .../functional/global-outputs-variable.yaml | 2 +- ...g-outputs.yaml => nested-dag-outputs.yaml} | 1 + .../functional/output-artifact-optional.yaml | 2 +- .../output-input-artifact-optional.yaml | 2 +- .../output-param-different-uid.yaml | 27 ++ test/e2e/functional/pns-output-params.yaml | 71 ++++ test/e2e/functional/retry-with-artifacts.yaml | 2 +- test/e2e/lintfail/disallow-unknown.yaml | 15 + .../invalid-spec.yaml | 0 .../malformed-spec.yaml} | 0 test/e2e/ui/ui-nested-steps.yaml | 12 +- util/archive/archive.go | 131 ++++++ util/archive/archive_test.go | 60 +++ workflow/artifacts/s3/s3.go | 24 +- workflow/common/common.go | 15 +- workflow/common/util.go | 162 +++++++- workflow/controller/config.go | 14 +- workflow/controller/exec_control.go | 8 +- workflow/controller/operator.go | 29 +- workflow/controller/operator_test.go | 22 +- workflow/controller/workflowpod.go | 187 +++++---- workflow/controller/workflowpod_test.go | 44 +- workflow/executor/common/common.go | 6 +- workflow/executor/docker/docker.go | 57 ++- workflow/executor/executor.go | 318 ++++++++++----- workflow/executor/k8sapi/k8sapi.go | 20 +- workflow/executor/kubelet/client.go | 86 +--- workflow/executor/kubelet/kubelet.go | 19 +- .../mocks/ContainerRuntimeExecutor.go | 44 +- workflow/executor/pns/pns.go | 385 ++++++++++++++++++ workflow/metrics/collector.go | 16 +- workflow/util/util.go | 6 +- workflow/validate/lint.go | 5 +- workflow/validate/validate.go | 78 +++- workflow/validate/validate_test.go | 148 ++++++- 64 files changed, 1946 insertions(+), 729 deletions(-) create mode 100644 examples/artifact-disable-archive.yaml create mode 100644 examples/continue-on-fail.yaml rename examples/{dag-continueOn-fail.yaml => dag-continue-on-fail.yaml} (93%) delete mode 100644 examples/workflow-continueOn-fail.yaml delete mode 100644 test/e2e/expectedfailures/disallow-unknown.json create mode 100644 test/e2e/expectedfailures/pns/pns-output-artifacts.yaml create mode 100644 test/e2e/expectedfailures/pns/pns-quick-exit-output-art.yaml mode change 100644 => 120000 test/e2e/functional/artifact-disable-archive.yaml create mode 120000 test/e2e/functional/continue-on-fail.yaml rename test/e2e/functional/{dag-outputs.yaml => nested-dag-outputs.yaml} (99%) create mode 100644 test/e2e/functional/output-param-different-uid.yaml create mode 100644 test/e2e/functional/pns-output-params.yaml create mode 100644 test/e2e/lintfail/disallow-unknown.yaml rename test/e2e/{expectedfailures => lintfail}/invalid-spec.yaml (100%) rename test/e2e/{expectedfailures/maformed-spec.yaml => lintfail/malformed-spec.yaml} (100%) create mode 100644 util/archive/archive.go create mode 100644 util/archive/archive_test.go create mode 100644 workflow/executor/pns/pns.go diff --git a/CHANGELOG.md b/CHANGELOG.md index db4a421e05e1..6ecd34f6c15b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## 2.3.0 (Not Yet Released) + +### Deprecation Notice +The workflow-controller-configmap introduces a new config field, `executor`, which is a container +spec and provides controls over the executor sidecar container (i.e. `init`/`wait`). The fields +`executorImage`, `executorResources`, and `executorImagePullPolicy` are deprecated and will be +removed in a future release. + ## 2.2.1 (2018-10-18) ### Changelog since v2.2.0 diff --git a/Gopkg.lock b/Gopkg.lock index ec3e23ccef9e..8c9207807811 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -46,10 +46,12 @@ [[projects]] branch = "master" - digest = "1:c3b7ed058146643b16d3a9827550fba317dbff9f55249dfafac7eb6c3652ad23" + digest = "1:72347a6143ccb58245c6f8055662ae6cb2d5dd655699f0fc479c25cc610fc582" name = "github.com/argoproj/pkg" packages = [ + "cli", "errors", + "exec", "file", "humanize", "json", @@ -61,7 +63,7 @@ "time", ] pruneopts = "" - revision = "a581a48d63014312c4f2762787f669e46bdb1fd9" + revision = "7e3ef65c8d44303738c7e815bd9b1b297b39f5c8" [[projects]] branch = "master" @@ -392,6 +394,14 @@ pruneopts = "" revision = "58046073cbffe2f25d425fe1331102f55cf719de" +[[projects]] + branch = "master" + digest = "1:1dee6133ab829c8559a39031ad1e0e3538e4a7b34d3e0509d1fc247737e928c1" + name = "github.com/mitchellh/go-ps" + packages = ["."] + pruneopts = "" + revision = "4fdf99ab29366514c69ccccddab5dc58b8d84062" + [[projects]] digest = "1:0c0ff2a89c1bb0d01887e1dac043ad7efbf3ec77482ef058ac423d13497e16fd" name = "github.com/modern-go/concurrent" @@ -1143,12 +1153,22 @@ pruneopts = "" revision = "e3762e86a74c878ffed47484592986685639c2cd" +[[projects]] + branch = "master" + digest = "1:f6c19347011ba9a072aa55f5c7fa630c0b88303ac4ca83008454aef95b0c2078" + name = "k8s.io/utils" + packages = ["pointer"] + pruneopts = "" + revision = "21c4ce38f2a793ec01e925ddc31216500183b773" + [solve-meta] analyzer-name = "dep" analyzer-version = 1 input-imports = [ "github.com/Knetic/govaluate", + "github.com/argoproj/pkg/cli", "github.com/argoproj/pkg/errors", + "github.com/argoproj/pkg/exec", "github.com/argoproj/pkg/file", "github.com/argoproj/pkg/humanize", "github.com/argoproj/pkg/json", @@ -1162,8 +1182,8 @@ "github.com/evanphx/json-patch", "github.com/ghodss/yaml", "github.com/go-openapi/spec", - "github.com/golang/glog", "github.com/gorilla/websocket", + "github.com/mitchellh/go-ps", "github.com/pkg/errors", "github.com/prometheus/client_golang/prometheus", "github.com/prometheus/client_golang/prometheus/promhttp", @@ -1222,6 +1242,7 @@ "k8s.io/code-generator/cmd/informer-gen", "k8s.io/code-generator/cmd/lister-gen", "k8s.io/kube-openapi/pkg/common", + "k8s.io/utils/pointer", ] solver-name = "gps-cdcl" solver-version = 1 diff --git a/cmd/argoexec/commands/init.go b/cmd/argoexec/commands/init.go index c581a240496c..270cada7e52f 100644 --- a/cmd/argoexec/commands/init.go +++ b/cmd/argoexec/commands/init.go @@ -6,19 +6,18 @@ import ( "github.com/spf13/cobra" ) -func init() { - RootCmd.AddCommand(initCmd) -} - -var initCmd = &cobra.Command{ - Use: "init", - Short: "Load artifacts", - Run: func(cmd *cobra.Command, args []string) { - err := loadArtifacts() - if err != nil { - log.Fatalf("%+v", err) - } - }, +func NewInitCommand() *cobra.Command { + var command = cobra.Command{ + Use: "init", + Short: "Load artifacts", + Run: func(cmd *cobra.Command, args []string) { + err := loadArtifacts() + if err != nil { + log.Fatalf("%+v", err) + } + }, + } + return &command } func loadArtifacts() error { diff --git a/cmd/argoexec/commands/resource.go b/cmd/argoexec/commands/resource.go index d83c767ab18a..d024e1683ae9 100644 --- a/cmd/argoexec/commands/resource.go +++ b/cmd/argoexec/commands/resource.go @@ -9,23 +9,22 @@ import ( "github.com/spf13/cobra" ) -func init() { - RootCmd.AddCommand(resourceCmd) -} - -var resourceCmd = &cobra.Command{ - Use: "resource (get|create|apply|delete) MANIFEST", - Short: "update a resource and wait for resource conditions", - Run: func(cmd *cobra.Command, args []string) { - if len(args) != 1 { - cmd.HelpFunc()(cmd, args) - os.Exit(1) - } - err := execResource(args[0]) - if err != nil { - log.Fatalf("%+v", err) - } - }, +func NewResourceCommand() *cobra.Command { + var command = cobra.Command{ + Use: "resource (get|create|apply|delete) MANIFEST", + Short: "update a resource and wait for resource conditions", + Run: func(cmd *cobra.Command, args []string) { + if len(args) != 1 { + cmd.HelpFunc()(cmd, args) + os.Exit(1) + } + err := execResource(args[0]) + if err != nil { + log.Fatalf("%+v", err) + } + }, + } + return &command } func execResource(action string) error { diff --git a/cmd/argoexec/commands/root.go b/cmd/argoexec/commands/root.go index c53dadcd2e1d..60405a8eb312 100644 --- a/cmd/argoexec/commands/root.go +++ b/cmd/argoexec/commands/root.go @@ -1,10 +1,11 @@ package commands import ( + "encoding/json" "os" - "github.com/argoproj/pkg/kube/cli" - "github.com/ghodss/yaml" + "github.com/argoproj/pkg/cli" + kubecli "github.com/argoproj/pkg/kube/cli" log "github.com/sirupsen/logrus" "github.com/spf13/cobra" "k8s.io/client-go/kubernetes" @@ -17,6 +18,7 @@ import ( "github.com/argoproj/argo/workflow/executor/docker" "github.com/argoproj/argo/workflow/executor/k8sapi" "github.com/argoproj/argo/workflow/executor/kubelet" + "github.com/argoproj/argo/workflow/executor/pns" ) const ( @@ -25,83 +27,82 @@ const ( ) var ( - // GlobalArgs hold global CLI flags - GlobalArgs globalFlags - - clientConfig clientcmd.ClientConfig -) - -type globalFlags struct { + clientConfig clientcmd.ClientConfig + logLevel string // --loglevel + glogLevel int // --gloglevel podAnnotationsPath string // --pod-annotations -} +) func init() { - clientConfig = cli.AddKubectlFlagsToCmd(RootCmd) - RootCmd.PersistentFlags().StringVar(&GlobalArgs.podAnnotationsPath, "pod-annotations", common.PodMetadataAnnotationsPath, "Pod annotations file from k8s downward API") - RootCmd.AddCommand(cmd.NewVersionCmd(CLIName)) + cobra.OnInitialize(initConfig) } -// RootCmd is the argo root level command -var RootCmd = &cobra.Command{ - Use: CLIName, - Short: "argoexec is the executor sidecar to workflow containers", - Run: func(cmd *cobra.Command, args []string) { - cmd.HelpFunc()(cmd, args) - }, +func initConfig() { + cli.SetLogLevel(logLevel) + cli.SetGLogLevel(glogLevel) } -func initExecutor() *executor.WorkflowExecutor { - podAnnotationsPath := common.PodMetadataAnnotationsPath - - // Use the path specified from the flag - if GlobalArgs.podAnnotationsPath != "" { - podAnnotationsPath = GlobalArgs.podAnnotationsPath +func NewRootCommand() *cobra.Command { + var command = cobra.Command{ + Use: CLIName, + Short: "argoexec is the executor sidecar to workflow containers", + Run: func(cmd *cobra.Command, args []string) { + cmd.HelpFunc()(cmd, args) + }, } + command.AddCommand(NewInitCommand()) + command.AddCommand(NewResourceCommand()) + command.AddCommand(NewWaitCommand()) + command.AddCommand(cmd.NewVersionCmd(CLIName)) + + clientConfig = kubecli.AddKubectlFlagsToCmd(&command) + command.PersistentFlags().StringVar(&podAnnotationsPath, "pod-annotations", common.PodMetadataAnnotationsPath, "Pod annotations file from k8s downward API") + command.PersistentFlags().StringVar(&logLevel, "loglevel", "info", "Set the logging level. One of: debug|info|warn|error") + command.PersistentFlags().IntVar(&glogLevel, "gloglevel", 0, "Set the glog logging level") + + return &command +} + +func initExecutor() *executor.WorkflowExecutor { config, err := clientConfig.ClientConfig() - if err != nil { - panic(err.Error()) - } + checkErr(err) + namespace, _, err := clientConfig.Namespace() - if err != nil { - panic(err.Error()) - } + checkErr(err) clientset, err := kubernetes.NewForConfig(config) - if err != nil { - panic(err.Error()) - } - podName, ok := os.LookupEnv(common.EnvVarPodName) - if !ok { - log.Fatalf("Unable to determine pod name from environment variable %s", common.EnvVarPodName) - } + checkErr(err) + + podName, err := os.Hostname() + checkErr(err) + + tmpl, err := executor.LoadTemplate(podAnnotationsPath) + checkErr(err) var cre executor.ContainerRuntimeExecutor switch os.Getenv(common.EnvVarContainerRuntimeExecutor) { case common.ContainerRuntimeExecutorK8sAPI: cre, err = k8sapi.NewK8sAPIExecutor(clientset, config, podName, namespace) - if err != nil { - panic(err.Error()) - } case common.ContainerRuntimeExecutorKubelet: cre, err = kubelet.NewKubeletExecutor() - if err != nil { - panic(err.Error()) - } + case common.ContainerRuntimeExecutorPNS: + cre, err = pns.NewPNSExecutor(clientset, podName, namespace, tmpl.Outputs.HasOutputs()) default: cre, err = docker.NewDockerExecutor() - if err != nil { - panic(err.Error()) - } - } - wfExecutor := executor.NewExecutor(clientset, podName, namespace, podAnnotationsPath, cre) - err = wfExecutor.LoadTemplate() - if err != nil { - panic(err.Error()) } + checkErr(err) - yamlBytes, _ := yaml.Marshal(&wfExecutor.Template) + wfExecutor := executor.NewExecutor(clientset, podName, namespace, podAnnotationsPath, cre, *tmpl) + yamlBytes, _ := json.Marshal(&wfExecutor.Template) vers := argo.GetVersion() log.Infof("Executor (version: %s, build_date: %s) initialized with template:\n%s", vers, vers.BuildDate, string(yamlBytes)) return &wfExecutor } + +// checkErr is a convenience function to panic upon error +func checkErr(err error) { + if err != nil { + panic(err.Error()) + } +} diff --git a/cmd/argoexec/commands/wait.go b/cmd/argoexec/commands/wait.go index 32d9b1114579..1e933f29fdcf 100644 --- a/cmd/argoexec/commands/wait.go +++ b/cmd/argoexec/commands/wait.go @@ -8,19 +8,18 @@ import ( "github.com/spf13/cobra" ) -func init() { - RootCmd.AddCommand(waitCmd) -} - -var waitCmd = &cobra.Command{ - Use: "wait", - Short: "wait for main container to finish and save artifacts", - Run: func(cmd *cobra.Command, args []string) { - err := waitContainer() - if err != nil { - log.Fatalf("%+v", err) - } - }, +func NewWaitCommand() *cobra.Command { + var command = cobra.Command{ + Use: "wait", + Short: "wait for main container to finish and save artifacts", + Run: func(cmd *cobra.Command, args []string) { + err := waitContainer() + if err != nil { + log.Fatalf("%+v", err) + } + }, + } + return &command } func waitContainer() error { @@ -29,11 +28,16 @@ func waitContainer() error { defer stats.LogStats() stats.StartStatsTicker(5 * time.Minute) - // Wait for main container to complete and kill sidecars + // Wait for main container to complete err := wfExecutor.Wait() if err != nil { wfExecutor.AddError(err) - // do not return here so we can still try to save outputs + // do not return here so we can still try to kill sidecars & save outputs + } + err = wfExecutor.KillSidecars() + if err != nil { + wfExecutor.AddError(err) + // do not return here so we can still try save outputs } logArt, err := wfExecutor.SaveLogs() if err != nil { diff --git a/cmd/argoexec/main.go b/cmd/argoexec/main.go index 629e1b0806fd..99c6b4d30694 100644 --- a/cmd/argoexec/main.go +++ b/cmd/argoexec/main.go @@ -14,7 +14,7 @@ import ( ) func main() { - if err := commands.RootCmd.Execute(); err != nil { + if err := commands.NewRootCommand().Execute(); err != nil { fmt.Println(err) os.Exit(1) } diff --git a/cmd/workflow-controller/main.go b/cmd/workflow-controller/main.go index f881739dbf23..141615c9fcd5 100644 --- a/cmd/workflow-controller/main.go +++ b/cmd/workflow-controller/main.go @@ -2,13 +2,12 @@ package main import ( "context" - "flag" "fmt" "os" - "strconv" "time" - "github.com/argoproj/pkg/kube/cli" + "github.com/argoproj/pkg/cli" + kubecli "github.com/argoproj/pkg/kube/cli" "github.com/argoproj/pkg/stats" "github.com/spf13/cobra" "k8s.io/client-go/kubernetes" @@ -44,16 +43,11 @@ func NewRootCommand() *cobra.Command { Use: CLIName, Short: "workflow-controller is the controller to operate on workflows", RunE: func(c *cobra.Command, args []string) error { - - cmdutil.SetLogLevel(logLevel) + cli.SetLogLevel(logLevel) + cli.SetGLogLevel(glogLevel) stats.RegisterStackDumper() stats.StartStatsTicker(5 * time.Minute) - // Set the glog level for the k8s go-client - _ = flag.CommandLine.Parse([]string{}) - _ = flag.Lookup("logtostderr").Value.Set("true") - _ = flag.Lookup("v").Value.Set(strconv.Itoa(glogLevel)) - config, err := clientConfig.ClientConfig() if err != nil { return err @@ -89,7 +83,7 @@ func NewRootCommand() *cobra.Command { }, } - clientConfig = cli.AddKubectlFlagsToCmd(&command) + clientConfig = kubecli.AddKubectlFlagsToCmd(&command) command.AddCommand(cmdutil.NewVersionCmd(CLIName)) command.Flags().StringVar(&configMap, "configmap", "workflow-controller-configmap", "Name of K8s configmap to retrieve workflow controller configuration") command.Flags().StringVar(&executorImage, "executor-image", "", "Executor image to use (overrides value in configmap)") diff --git a/demo.md b/demo.md index 63555f63f7ae..20d614797336 100644 --- a/demo.md +++ b/demo.md @@ -75,9 +75,12 @@ Argo supports S3 (AWS, GCS, Minio) as well as Artifactory as artifact repositori uses Minio for the sake of portability. Instructions on how to configure other artifact repositories are [here](https://github.com/argoproj/argo/blob/master/ARTIFACT_REPO.md). ``` -brew install kubernetes-helm # mac -helm init -helm install stable/minio --name argo-artifacts --set service.type=LoadBalancer --set persistence.enabled=false +helm install stable/minio \ + --name argo-artifacts \ + --set service.type=LoadBalancer \ + --set defaultBucket.enabled=true \ + --set defaultBucket.name=my-bucket \ + --set persistence.enabled=false ``` Login to the Minio UI using a web browser (port 9000) after exposing obtaining the external IP using `kubectl`. diff --git a/docs/workflow-controller-configmap.yaml b/docs/workflow-controller-configmap.yaml index 3e27ea2c1c70..d82b8c42a62a 100644 --- a/docs/workflow-controller-configmap.yaml +++ b/docs/workflow-controller-configmap.yaml @@ -49,6 +49,8 @@ data: endpoint: s3.amazonaws.com bucket: my-bucket region: us-west-2 + # insecure will disable TLS. Primarily used for minio installs not configured with TLS + insecure: false # keyFormat is a format pattern to define how artifacts will be organized in a bucket. # It can reference workflow metadata variables such as workflow.namespace, workflow.name, # pod.name. Can also use strftime formating of workflow.creationTimestamp so that workflow @@ -63,10 +65,8 @@ data: /{{workflow.creationTimestamp.d}}\ /{{workflow.name}}\ /{{pod.name}}" - # insecure will disable TLS. used for minio installs not configured with TLS - insecure: false # The actual secret object (in this example my-s3-credentials), should be created in every - # namespace which a workflow wants which wants to store its artifacts to S3. If omitted, + # namespace where a workflow needs to store its artifacts to S3. If omitted, # attempts to use IAM role to access the bucket (instead of accessKey/secretKey). accessKeySecret: name: my-s3-credentials @@ -84,16 +84,27 @@ data: # disable the TLS verification of the kubelet executor (default: false) kubeletInsecure: false - # executorResources specifies the resource requirements that will be used for the executor - # sidecar/init container. This is useful in clusters which require resources to be specified as - # part of admission control. - executorResources: - requests: - cpu: 0.1 - memory: 64Mi - limits: - cpu: 0.5 - memory: 512Mi + # executor controls how the init and wait container should be customized + # (available since Argo v2.3) + executor: + imagePullPolicy: IfNotPresent + resources: + requests: + cpu: 0.1 + memory: 64Mi + limits: + cpu: 0.5 + memory: 512Mi + # args & env allows command line arguments and environment variables to be appended to the + # executor container and is mainly used for development/debugging purposes. + args: + - --loglevel + - debug + - --gloglevel + - "6" + env: + - name: DEBUG_FLAG + value: "1" # metricsConfig controls the path and port for prometheus metrics metricsConfig: diff --git a/examples/artifact-disable-archive.yaml b/examples/artifact-disable-archive.yaml new file mode 100644 index 000000000000..444b01ac5b53 --- /dev/null +++ b/examples/artifact-disable-archive.yaml @@ -0,0 +1,51 @@ +# This example demonstrates the ability to disable the default behavior of archiving (tar.gz) +# when saving output artifacts. For directories, when archive is set to none, files in directory +# will be copied recursively in the case of S3. +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: artifact-disable-archive- +spec: + entrypoint: artifact-disable-archive + templates: + - name: artifact-disable-archive + steps: + - - name: generate-artifact + template: whalesay + - - name: consume-artifact + template: print-message + arguments: + artifacts: + - name: etc + from: "{{steps.generate-artifact.outputs.artifacts.etc}}" + - name: hello-txt + from: "{{steps.generate-artifact.outputs.artifacts.hello-txt}}" + + - name: whalesay + container: + image: docker/whalesay:latest + command: [sh, -c] + args: ["cowsay hello world | tee /tmp/hello_world.txt ; sleep 1"] + outputs: + artifacts: + - name: etc + path: /etc + archive: + none: {} + - name: hello-txt + path: /tmp/hello_world.txt + archive: + none: {} + + - name: print-message + inputs: + artifacts: + - name: etc + path: /tmp/etc + - name: hello-txt + path: /tmp/hello.txt + container: + image: alpine:latest + command: [sh, -c] + args: + - cat /tmp/hello.txt && cd /tmp/etc && find . diff --git a/examples/artifact-passing.yaml b/examples/artifact-passing.yaml index dd301b9ac116..90fdeacd3728 100644 --- a/examples/artifact-passing.yaml +++ b/examples/artifact-passing.yaml @@ -22,7 +22,7 @@ spec: container: image: docker/whalesay:latest command: [sh, -c] - args: ["cowsay hello world | tee /tmp/hello_world.txt"] + args: ["sleep 1; cowsay hello world | tee /tmp/hello_world.txt"] outputs: artifacts: - name: hello-art diff --git a/examples/ci-output-artifact.yaml b/examples/ci-output-artifact.yaml index fababef1bb13..591fec3cea39 100644 --- a/examples/ci-output-artifact.yaml +++ b/examples/ci-output-artifact.yaml @@ -1,7 +1,7 @@ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: - generateName: ci-example- + generateName: ci-output-artifact- spec: entrypoint: ci-example # a temporary volume, named workdir, will be used as a working @@ -75,7 +75,10 @@ spec: - name: release-artifact container: - image: debian:9.4 + image: alpine:3.8 + volumeMounts: + - name: workdir + mountPath: /go outputs: artifacts: - name: release diff --git a/examples/continue-on-fail.yaml b/examples/continue-on-fail.yaml new file mode 100644 index 000000000000..7681e99c597f --- /dev/null +++ b/examples/continue-on-fail.yaml @@ -0,0 +1,36 @@ +# Example on specifying parallelism on the outer workflow and limiting the number of its +# children workflowss to be run at the same time. +# +# If the parallelism of A is 1, the four steps of seq-step will run sequentially. + +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: continue-on-fail- +spec: + entrypoint: workflow-ignore + templates: + - name: workflow-ignore + steps: + - - name: A + template: whalesay + - - name: B + template: whalesay + - name: C + template: intentional-fail + continueOn: + failed: true + - - name: D + template: whalesay + + - name: whalesay + container: + image: docker/whalesay:latest + command: [cowsay] + args: ["hello world"] + + - name: intentional-fail + container: + image: alpine:latest + command: [sh, -c] + args: ["echo intentional failure; exit 1"] diff --git a/examples/dag-continueOn-fail.yaml b/examples/dag-continue-on-fail.yaml similarity index 93% rename from examples/dag-continueOn-fail.yaml rename to examples/dag-continue-on-fail.yaml index 2bb2f78b893b..dc9600babb52 100644 --- a/examples/dag-continueOn-fail.yaml +++ b/examples/dag-continue-on-fail.yaml @@ -1,7 +1,7 @@ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: - generateName: dag-contiueOn-fail- + generateName: dag-contiue-on-fail- spec: entrypoint: workflow templates: @@ -14,7 +14,7 @@ spec: dependencies: [A] template: intentional-fail continueOn: - failed: true + failed: true - name: C dependencies: [A] template: whalesay diff --git a/examples/global-outputs.yaml b/examples/global-outputs.yaml index e621b7c1fb5f..f2a270aa6141 100644 --- a/examples/global-outputs.yaml +++ b/examples/global-outputs.yaml @@ -19,7 +19,7 @@ spec: container: image: alpine:3.7 command: [sh, -c] - args: ["echo -n hello world > /tmp/hello_world.txt"] + args: ["sleep 1; echo -n hello world > /tmp/hello_world.txt"] outputs: parameters: # export a global parameter. The parameter will be programatically available in the completed diff --git a/examples/output-parameter.yaml b/examples/output-parameter.yaml index c9ccf686955f..d15f30f466ce 100644 --- a/examples/output-parameter.yaml +++ b/examples/output-parameter.yaml @@ -32,7 +32,7 @@ spec: container: image: docker/whalesay:latest command: [sh, -c] - args: ["echo -n hello world > /tmp/hello_world.txt"] + args: ["sleep 1; echo -n hello world > /tmp/hello_world.txt"] outputs: parameters: - name: hello-param diff --git a/examples/parameter-aggregation-dag.yaml b/examples/parameter-aggregation-dag.yaml index 49bc3bc6a24f..3a534e153bf0 100644 --- a/examples/parameter-aggregation-dag.yaml +++ b/examples/parameter-aggregation-dag.yaml @@ -49,6 +49,7 @@ spec: command: [sh, -xc] args: - | + sleep 1 && echo {{inputs.parameters.num}} > /tmp/num && if [ $(({{inputs.parameters.num}}%2)) -eq 0 ]; then echo "even" > /tmp/even; diff --git a/examples/parameter-aggregation.yaml b/examples/parameter-aggregation.yaml index f7df1f7f053a..4baec52e8af1 100644 --- a/examples/parameter-aggregation.yaml +++ b/examples/parameter-aggregation.yaml @@ -46,6 +46,7 @@ spec: command: [sh, -xc] args: - | + sleep 1 && echo {{inputs.parameters.num}} > /tmp/num && if [ $(({{inputs.parameters.num}}%2)) -eq 0 ]; then echo "even" > /tmp/even; diff --git a/examples/sidecar-dind.yaml b/examples/sidecar-dind.yaml index 467bb101dade..7bf8b67998c6 100644 --- a/examples/sidecar-dind.yaml +++ b/examples/sidecar-dind.yaml @@ -19,7 +19,7 @@ spec: value: 127.0.0.1 sidecars: - name: dind - image: docker:17.10-dind + image: docker:18.09.4-dind securityContext: privileged: true # mirrorVolumeMounts will mount the same volumes specified in the main container diff --git a/examples/workflow-continueOn-fail.yaml b/examples/workflow-continueOn-fail.yaml deleted file mode 100644 index 83a1442a617d..000000000000 --- a/examples/workflow-continueOn-fail.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# Example on specifying parallelism on the outer workflow and limiting the number of its -# children workflowss to be run at the same time. -# -# As the parallelism of A is 1, the four steps of seq-step will run sequentially. - -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: workflow-continueOn-fail- -spec: - entrypoint: workflow - templates: - - name: workflow - steps: - - - name: wf-ignore - template: workflow-ignore - - name: wf-not-ignore - template: workflow-not-ignore - - - name: workflow-ignore - steps: - - - name: A - template: whalesay - - - name: B - template: whalesay - - name: C - template: intentional-fail - continueOn: - failed: true - - - name: D - template: whalesay - - - name: workflow-not-ignore - steps: - - - name: E - template: whalesay - - - name: F - template: whalesay - - name: G - template: intentional-fail - - - name: H - template: whalesay - - # - name: B - # inputs: - # parameters: - # - name: seq-id - # steps: - # - - name: jobs - # template: one-job - # arguments: - # parameters: - # - name: seq-id - # value: "{{inputs.parameters.seq-id}}" - # withParam: "[1, 2]" - - - name: whalesay - container: - image: docker/whalesay:latest - command: [cowsay] - args: ["hello world"] - - - name: intentional-fail - container: - image: alpine:latest - command: [sh, -c] - args: ["echo intentional failure; exit 1"] diff --git a/pkg/apis/workflow/v1alpha1/types.go b/pkg/apis/workflow/v1alpha1/types.go index 3bfc57f8eb80..855b3ad51473 100644 --- a/pkg/apis/workflow/v1alpha1/types.go +++ b/pkg/apis/workflow/v1alpha1/types.go @@ -665,6 +665,10 @@ func (s *S3Artifact) String() string { return fmt.Sprintf("%s://%s/%s/%s", protocol, s.Endpoint, s.Bucket, s.Key) } +func (s *S3Artifact) HasLocation() bool { + return s != nil && s.Bucket != "" +} + // GitArtifact is the location of an git artifact type GitArtifact struct { // Repo is the git repository @@ -686,6 +690,10 @@ type GitArtifact struct { InsecureIgnoreHostKey bool `json:"insecureIgnoreHostKey,omitempty"` } +func (g *GitArtifact) HasLocation() bool { + return g != nil && g.Repo != "" +} + // ArtifactoryAuth describes the secret selectors required for authenticating to artifactory type ArtifactoryAuth struct { // UsernameSecret is the secret selector to the repository username @@ -706,6 +714,10 @@ func (a *ArtifactoryArtifact) String() string { return a.URL } +func (a *ArtifactoryArtifact) HasLocation() bool { + return a != nil && a.URL != "" +} + // HDFSArtifact is the location of an HDFS artifact type HDFSArtifact struct { HDFSConfig `json:",inline"` @@ -717,6 +729,10 @@ type HDFSArtifact struct { Force bool `json:"force,omitempty"` } +func (h *HDFSArtifact) HasLocation() bool { + return h != nil && len(h.Addresses) > 0 +} + // HDFSConfig is configurations for HDFS type HDFSConfig struct { HDFSKrbConfig `json:",inline"` @@ -774,12 +790,20 @@ type RawArtifact struct { Data string `json:"data"` } +func (r *RawArtifact) HasLocation() bool { + return r != nil +} + // HTTPArtifact allows an file served on HTTP to be placed as an input artifact in a container type HTTPArtifact struct { // URL of the artifact URL string `json:"url"` } +func (h *HTTPArtifact) HasLocation() bool { + return h != nil && h.URL != "" +} + // ScriptTemplate is a template subtype to enable scripting through code steps type ScriptTemplate struct { apiv1.Container `json:",inline"` @@ -963,7 +987,12 @@ func (args *Arguments) GetParameterByName(name string) *Parameter { // HasLocation whether or not an artifact has a location defined func (a *Artifact) HasLocation() bool { - return a.S3 != nil || a.Git != nil || a.HTTP != nil || a.Artifactory != nil || a.Raw != nil || a.HDFS != nil + return a.S3.HasLocation() || + a.Git.HasLocation() || + a.HTTP.HasLocation() || + a.Artifactory.HasLocation() || + a.Raw.HasLocation() || + a.HDFS.HasLocation() } // GetTemplate retrieves a defined template by its name diff --git a/test/e2e/expectedfailures/disallow-unknown.json b/test/e2e/expectedfailures/disallow-unknown.json deleted file mode 100644 index 659d97d24dec..000000000000 --- a/test/e2e/expectedfailures/disallow-unknown.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "apiVersion": "argoproj.io/v1alpha1", - "kind": "Workflow", - "metadata": { - "generateName": "hello-world-" - }, - "spec": { - "entrypoint": "whalesay", - "templates": [ - { - "name": "whalesay", - "container": { - "image": "docker/whalesay:latest", - "command": [ - "cowsay" - ], - "args": [ - "hello world" - ], - "someExtraField": "foo" - } - } - ] - } -} diff --git a/test/e2e/expectedfailures/pns/pns-output-artifacts.yaml b/test/e2e/expectedfailures/pns/pns-output-artifacts.yaml new file mode 100644 index 000000000000..9680ef096507 --- /dev/null +++ b/test/e2e/expectedfailures/pns/pns-output-artifacts.yaml @@ -0,0 +1,39 @@ +# Workflow specifically designed for testing process namespace sharing with output artifacts +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: pns-output-artifacts- +spec: + entrypoint: pns-output-artifacts + templates: + - name: pns-output-artifacts + archiveLocation: + archiveLogs: true + container: + image: debian:9.2 + command: [sh, -c] + args: [" + echo hello world > /mnt/workdir/foo && + echo stdout && + echo '' && + echo stderr >&2 && + sleep 1 + "] + volumeMounts: + - name: workdir + mountPath: /mnt/workdir + outputs: + artifacts: + - name: etc + path: /etc + - name: mnt + path: /mnt + - name: workdir + path: /mnt/workdir + sidecars: + - name: nginx + image: nginx:latest + + volumes: + - name: workdir + emptyDir: {} diff --git a/test/e2e/expectedfailures/pns/pns-quick-exit-output-art.yaml b/test/e2e/expectedfailures/pns/pns-quick-exit-output-art.yaml new file mode 100644 index 000000000000..286a82846e26 --- /dev/null +++ b/test/e2e/expectedfailures/pns/pns-quick-exit-output-art.yaml @@ -0,0 +1,30 @@ +# Workflow specifically designed for testing process namespace sharing with output artifacts +# This fails because the main container exits before the wait sidecar is able to establish the file +# handle of the main container's root filesystem. +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: pns-quick-exit-output-art- +spec: + entrypoint: pns-quick-exit-output-art + templates: + - name: pns-quick-exit-output-art + archiveLocation: + archiveLogs: true + container: + image: debian:9.2 + command: [sh, -x, -c] + args: [" + touch /mnt/workdir/foo + "] + volumeMounts: + - name: workdir + mountPath: /mnt/workdir + outputs: + artifacts: + - name: mnt + path: /mnt + + volumes: + - name: workdir + emptyDir: {} diff --git a/test/e2e/functional/artifact-disable-archive.yaml b/test/e2e/functional/artifact-disable-archive.yaml deleted file mode 100644 index f1f41e1bad39..000000000000 --- a/test/e2e/functional/artifact-disable-archive.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# This tests the disabling of archive, and ability to recursively copy a directory -apiVersion: argoproj.io/v1alpha1 -kind: Workflow -metadata: - generateName: artifact-disable-archive- -spec: - entrypoint: artifact-example - templates: - - name: artifact-example - steps: - - - name: generate-artifact - template: whalesay - - - name: consume-artifact - template: print-message - arguments: - artifacts: - - name: etc - from: "{{steps.generate-artifact.outputs.artifacts.etc}}" - - name: hello-txt - from: "{{steps.generate-artifact.outputs.artifacts.hello-txt}}" - - - name: whalesay - container: - image: docker/whalesay:latest - command: [sh, -c] - args: ["cowsay hello world | tee /tmp/hello_world.txt"] - outputs: - artifacts: - - name: etc - path: /etc - archive: - none: {} - - name: hello-txt - path: /tmp/hello_world.txt - archive: - none: {} - - - name: print-message - inputs: - artifacts: - - name: etc - path: /tmp/etc - - name: hello-txt - path: /tmp/hello.txt - container: - image: alpine:latest - command: [sh, -c] - args: - - cat /tmp/hello.txt && cd /tmp/etc && find . diff --git a/test/e2e/functional/artifact-disable-archive.yaml b/test/e2e/functional/artifact-disable-archive.yaml new file mode 120000 index 000000000000..109a8c619867 --- /dev/null +++ b/test/e2e/functional/artifact-disable-archive.yaml @@ -0,0 +1 @@ +../../../examples/artifact-disable-archive.yaml \ No newline at end of file diff --git a/test/e2e/functional/continue-on-fail.yaml b/test/e2e/functional/continue-on-fail.yaml new file mode 120000 index 000000000000..3bb5bfc75322 --- /dev/null +++ b/test/e2e/functional/continue-on-fail.yaml @@ -0,0 +1 @@ +../../../examples/continue-on-fail.yaml \ No newline at end of file diff --git a/test/e2e/functional/dag-argument-passing.yaml b/test/e2e/functional/dag-argument-passing.yaml index c1e51a6bb61c..24f5c7aa8c8b 100644 --- a/test/e2e/functional/dag-argument-passing.yaml +++ b/test/e2e/functional/dag-argument-passing.yaml @@ -2,7 +2,7 @@ apiVersion: argoproj.io/v1alpha1 kind: Workflow metadata: - generateName: dag-arg-passing- + generateName: dag-argument-passing- spec: entrypoint: dag-arg-passing templates: @@ -16,7 +16,7 @@ spec: container: image: alpine:3.7 command: [sh, -c, -x] - args: ['echo "{{inputs.parameters.message}}"; cat /tmp/passthrough'] + args: ['sleep 1; echo "{{inputs.parameters.message}}"; cat /tmp/passthrough'] outputs: parameters: - name: hosts diff --git a/test/e2e/functional/global-outputs-dag.yaml b/test/e2e/functional/global-outputs-dag.yaml index fa7eeb449847..cea147513f7b 100644 --- a/test/e2e/functional/global-outputs-dag.yaml +++ b/test/e2e/functional/global-outputs-dag.yaml @@ -21,7 +21,7 @@ spec: container: image: alpine:3.7 command: [sh, -c] - args: ["echo -n hello world > /tmp/hello_world.txt"] + args: ["sleep 1; echo -n hello world > /tmp/hello_world.txt"] outputs: parameters: # export a global parameter. The parameter will be programatically available in the completed diff --git a/test/e2e/functional/global-outputs-variable.yaml b/test/e2e/functional/global-outputs-variable.yaml index eed27afd1cc0..ca2222e6f61a 100644 --- a/test/e2e/functional/global-outputs-variable.yaml +++ b/test/e2e/functional/global-outputs-variable.yaml @@ -23,7 +23,7 @@ spec: container: image: alpine:3.7 command: [sh, -c] - args: ["echo -n hello world > /tmp/hello_world.txt"] + args: ["sleep 1; echo -n hello world > /tmp/hello_world.txt"] outputs: parameters: - name: hello-param diff --git a/test/e2e/functional/dag-outputs.yaml b/test/e2e/functional/nested-dag-outputs.yaml similarity index 99% rename from test/e2e/functional/dag-outputs.yaml rename to test/e2e/functional/nested-dag-outputs.yaml index 89ecc41130cc..8cc92c5003da 100644 --- a/test/e2e/functional/dag-outputs.yaml +++ b/test/e2e/functional/nested-dag-outputs.yaml @@ -38,6 +38,7 @@ spec: image: docker/whalesay:latest command: [sh, -c] args: [" + sleep 1; cowsay hello world | tee /tmp/my-output-artifact.txt && echo 'my-output-parameter' > /tmp/my-output-parameter.txt "] diff --git a/test/e2e/functional/output-artifact-optional.yaml b/test/e2e/functional/output-artifact-optional.yaml index 3713b45450de..803289d6ca85 100644 --- a/test/e2e/functional/output-artifact-optional.yaml +++ b/test/e2e/functional/output-artifact-optional.yaml @@ -16,7 +16,7 @@ spec: container: image: docker/whalesay:latest command: [sh, -c] - args: ["cowsay hello world | tee /tmp/hello_world12.txt"] + args: ["sleep 1; cowsay hello world | tee /tmp/hello_world12.txt"] outputs: artifacts: - name: hello-art diff --git a/test/e2e/functional/output-input-artifact-optional.yaml b/test/e2e/functional/output-input-artifact-optional.yaml index a29959fed04d..f1519df74d4e 100644 --- a/test/e2e/functional/output-input-artifact-optional.yaml +++ b/test/e2e/functional/output-input-artifact-optional.yaml @@ -21,7 +21,7 @@ spec: container: image: docker/whalesay:latest command: [sh, -c] - args: ["cowsay hello world | tee /tmp/hello_world123.txt"] + args: ["sleep 1; cowsay hello world | tee /tmp/hello_world123.txt"] outputs: artifacts: - name: hello-art diff --git a/test/e2e/functional/output-param-different-uid.yaml b/test/e2e/functional/output-param-different-uid.yaml new file mode 100644 index 000000000000..dbb7942fc945 --- /dev/null +++ b/test/e2e/functional/output-param-different-uid.yaml @@ -0,0 +1,27 @@ +# Tests PNS ability to capture output artifact when user id is different +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: pns-output-parameter-different-user- +spec: + entrypoint: multi-whalesay + templates: + - name: multi-whalesay + steps: + - - name: whalesay + template: whalesay + withSequence: + count: "10" + + - name: whalesay + container: + image: docker/whalesay:latest + command: [sh, -c] + args: ["sleep 1; cowsay hello world | tee /tmp/hello_world.txt"] + securityContext: + runAsUser: 1234 + outputs: + parameters: + - name: hello-art + valueFrom: + path: /tmp/hello_world.txt \ No newline at end of file diff --git a/test/e2e/functional/pns-output-params.yaml b/test/e2e/functional/pns-output-params.yaml new file mode 100644 index 000000000000..fe0001d38322 --- /dev/null +++ b/test/e2e/functional/pns-output-params.yaml @@ -0,0 +1,71 @@ +# Workflow specifically designed for testing process namespace sharing with output parameters +# This exercises the copy out regular files from volume mounted paths, or base image layer paths, +# including overlaps between the two. +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: pns-outputs-params- +spec: + entrypoint: output-parameter + templates: + - name: output-parameter + steps: + - - name: generate-parameter + template: whalesay + - - name: consume-parameter + template: print-message + arguments: + parameters: + - { name: A, value: "{{steps.generate-parameter.outputs.parameters.A}}" } + - { name: B, value: "{{steps.generate-parameter.outputs.parameters.B}}" } + - { name: C, value: "{{steps.generate-parameter.outputs.parameters.C}}" } + - { name: D, value: "{{steps.generate-parameter.outputs.parameters.D}}" } + + - name: whalesay + container: + image: docker/whalesay:latest + command: [sh, -x, -c] + args: [" + sleep 1; + echo -n A > /tmp/A && + echo -n B > /mnt/outer/inner/B && + echo -n C > /tmp/C && + echo -n D > /mnt/outer/D + "] + volumeMounts: + - name: outer + mountPath: /mnt/outer + - name: inner + mountPath: /mnt/outer/inner + outputs: + parameters: + - name: A + valueFrom: + path: /tmp/A + - name: B + valueFrom: + path: /mnt/outer/inner/B + - name: C + valueFrom: + path: /tmp/C + - name: D + valueFrom: + path: /mnt/outer/D + + - name: print-message + inputs: + parameters: + - name: A + - name: B + - name: C + - name: D + container: + image: docker/whalesay:latest + command: [cowsay] + args: ["{{inputs.parameters.A}} {{inputs.parameters.B}} {{inputs.parameters.C}} {{inputs.parameters.D}}"] + + volumes: + - name: outer + emptyDir: {} + - name: inner + emptyDir: {} diff --git a/test/e2e/functional/retry-with-artifacts.yaml b/test/e2e/functional/retry-with-artifacts.yaml index 7aa5dcd37421..4a509d568504 100644 --- a/test/e2e/functional/retry-with-artifacts.yaml +++ b/test/e2e/functional/retry-with-artifacts.yaml @@ -23,7 +23,7 @@ spec: container: image: docker/whalesay:latest command: [sh, -c] - args: ["cowsay hello world | tee /tmp/hello_world.txt"] + args: ["sleep 1; cowsay hello world | tee /tmp/hello_world.txt"] outputs: artifacts: - name: hello-art diff --git a/test/e2e/lintfail/disallow-unknown.yaml b/test/e2e/lintfail/disallow-unknown.yaml new file mode 100644 index 000000000000..4d7c349cbf7c --- /dev/null +++ b/test/e2e/lintfail/disallow-unknown.yaml @@ -0,0 +1,15 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: disallow-unknown- +spec: + entrypoint: whalesay + templates: + - name: whalesay + container: + image: docker/whalesay:latest + command: + - cowsay + args: + - hello world + someExtraField: foo diff --git a/test/e2e/expectedfailures/invalid-spec.yaml b/test/e2e/lintfail/invalid-spec.yaml similarity index 100% rename from test/e2e/expectedfailures/invalid-spec.yaml rename to test/e2e/lintfail/invalid-spec.yaml diff --git a/test/e2e/expectedfailures/maformed-spec.yaml b/test/e2e/lintfail/malformed-spec.yaml similarity index 100% rename from test/e2e/expectedfailures/maformed-spec.yaml rename to test/e2e/lintfail/malformed-spec.yaml diff --git a/test/e2e/ui/ui-nested-steps.yaml b/test/e2e/ui/ui-nested-steps.yaml index aeb03da41e1f..c091c6827a24 100644 --- a/test/e2e/ui/ui-nested-steps.yaml +++ b/test/e2e/ui/ui-nested-steps.yaml @@ -5,6 +5,9 @@ metadata: generateName: ui-nested-steps- spec: entrypoint: ui-nested-steps + volumes: + - name: workdir + emptyDir: {} templates: - name: ui-nested-steps steps: @@ -24,14 +27,17 @@ spec: - name: locate-faces container: image: alpine:latest - command: ["sh", "-c"] + command: [sh, -c] args: - - echo '[1, 2, 3]' > /result.json + - echo '[1, 2, 3]' > /workdir/result.json + volumeMounts: + - name: workdir + mountPath: /workdir outputs: parameters: - name: imagemagick-commands valueFrom: - path: /result.json + path: /workdir/result.json - name: handle-individual-faces steps: diff --git a/util/archive/archive.go b/util/archive/archive.go new file mode 100644 index 000000000000..400b12667fb8 --- /dev/null +++ b/util/archive/archive.go @@ -0,0 +1,131 @@ +package archive + +import ( + "archive/tar" + "compress/gzip" + "io" + "os" + "path/filepath" + + "github.com/argoproj/argo/errors" + "github.com/argoproj/argo/util" + log "github.com/sirupsen/logrus" +) + +type flusher interface { + Flush() error +} + +// TarGzToWriter tar.gz's the source path to the supplied writer +func TarGzToWriter(sourcePath string, w io.Writer) error { + sourcePath, err := filepath.Abs(sourcePath) + if err != nil { + return errors.InternalErrorf("getting absolute path: %v", err) + } + log.Infof("Taring %s", sourcePath) + sourceFi, err := os.Stat(sourcePath) + if err != nil { + if os.IsNotExist(err) { + return errors.New(errors.CodeNotFound, err.Error()) + } + return errors.InternalWrapError(err) + } + if !sourceFi.Mode().IsRegular() && !sourceFi.IsDir() { + return errors.InternalErrorf("%s is not a regular file or directory", sourcePath) + } + if flush, ok := w.(flusher); ok { + defer func() { _ = flush.Flush() }() + } + gzw := gzip.NewWriter(w) + defer util.Close(gzw) + tw := tar.NewWriter(gzw) + defer util.Close(tw) + + if sourceFi.IsDir() { + return tarDir(sourcePath, tw) + } + return tarFile(sourcePath, tw) +} + +func tarDir(sourcePath string, tw *tar.Writer) error { + baseName := filepath.Base(sourcePath) + return filepath.Walk(sourcePath, func(fpath string, info os.FileInfo, err error) error { + if err != nil { + return errors.InternalWrapError(err) + } + // build the name to be used in the archive + nameInArchive, err := filepath.Rel(sourcePath, fpath) + if err != nil { + return errors.InternalWrapError(err) + } + nameInArchive = filepath.Join(baseName, nameInArchive) + log.Infof("writing %s", nameInArchive) + + var header *tar.Header + if (info.Mode() & os.ModeSymlink) != 0 { + linkTarget, err := os.Readlink(fpath) + if err != nil { + return errors.InternalWrapError(err) + } + header, err = tar.FileInfoHeader(info, filepath.ToSlash(linkTarget)) + if err != nil { + return errors.InternalWrapError(err) + } + } else { + header, err = tar.FileInfoHeader(info, info.Name()) + if err != nil { + return errors.InternalWrapError(err) + } + } + header.Name = nameInArchive + + err = tw.WriteHeader(header) + if err != nil { + return errors.InternalWrapError(err) + } + if !info.Mode().IsRegular() { + return nil + } + f, err := os.Open(fpath) + if err != nil { + return errors.InternalWrapError(err) + } + + // copy file data into tar writer + _, err = io.Copy(tw, f) + closeErr := f.Close() + if err != nil { + return err + } + if closeErr != nil { + return closeErr + } + return nil + }) +} + +func tarFile(sourcePath string, tw *tar.Writer) error { + f, err := os.Open(sourcePath) + if err != nil { + return errors.InternalWrapError(err) + } + defer util.Close(f) + info, err := f.Stat() + if err != nil { + return errors.InternalWrapError(err) + } + header, err := tar.FileInfoHeader(info, f.Name()) + if err != nil { + return errors.InternalWrapError(err) + } + header.Name = filepath.Base(sourcePath) + err = tw.WriteHeader(header) + if err != nil { + return errors.InternalWrapError(err) + } + _, err = io.Copy(tw, f) + if err != nil { + return err + } + return nil +} diff --git a/util/archive/archive_test.go b/util/archive/archive_test.go new file mode 100644 index 000000000000..2b4766b01fe0 --- /dev/null +++ b/util/archive/archive_test.go @@ -0,0 +1,60 @@ +package archive + +import ( + "bufio" + "crypto/rand" + "encoding/hex" + "os" + "path/filepath" + "testing" + + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" +) + +func tempFile(dir, prefix, suffix string) (*os.File, error) { + if dir == "" { + dir = os.TempDir() + } else { + os.MkdirAll(dir, 0700) + } + randBytes := make([]byte, 16) + rand.Read(randBytes) + filePath := filepath.Join(dir, prefix+hex.EncodeToString(randBytes)+suffix) + return os.Create(filePath) +} + +func TestTarDirectory(t *testing.T) { + f, err := tempFile(os.TempDir()+"/argo-test", "dir-", ".tgz") + assert.Nil(t, err) + log.Infof("Taring to %s", f.Name()) + w := bufio.NewWriter(f) + + err = TarGzToWriter("../../test/e2e", w) + assert.Nil(t, err) + + err = f.Close() + assert.Nil(t, err) +} + +func TestTarFile(t *testing.T) { + data, err := tempFile(os.TempDir()+"/argo-test", "file-", "") + assert.Nil(t, err) + _, err = data.WriteString("hello world") + assert.Nil(t, err) + data.Close() + + dataTarPath := data.Name() + ".tgz" + f, err := os.Create(dataTarPath) + assert.Nil(t, err) + log.Infof("Taring to %s", f.Name()) + w := bufio.NewWriter(f) + + err = TarGzToWriter(data.Name(), w) + assert.Nil(t, err) + err = os.Remove(data.Name()) + assert.Nil(t, err) + + err = f.Close() + assert.Nil(t, err) +} diff --git a/workflow/artifacts/s3/s3.go b/workflow/artifacts/s3/s3.go index e58f2f404e16..38f22d5a9d14 100644 --- a/workflow/artifacts/s3/s3.go +++ b/workflow/artifacts/s3/s3.go @@ -1,13 +1,14 @@ package s3 import ( - "github.com/argoproj/pkg/file" - argos3 "github.com/argoproj/pkg/s3" + "time" + log "github.com/sirupsen/logrus" + "k8s.io/apimachinery/pkg/util/wait" wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" - "k8s.io/apimachinery/pkg/util/wait" - "time" + "github.com/argoproj/pkg/file" + argos3 "github.com/argoproj/pkg/s3" ) // S3ArtifactDriver is a driver for AWS S3 @@ -35,7 +36,7 @@ func (s3Driver *S3ArtifactDriver) newS3Client() (argos3.S3Client, error) { func (s3Driver *S3ArtifactDriver) Load(inputArtifact *wfv1.Artifact, path string) error { err := wait.ExponentialBackoff(wait.Backoff{Duration: time.Second * 2, Factor: 2.0, Steps: 5, Jitter: 0.1}, func() (bool, error) { - log.Infof("ExponentialBackoff in S3 Load for path: %s", path) + log.Infof("S3 Load path: %s, key: %s", path, inputArtifact.S3.Key) s3cli, err := s3Driver.newS3Client() if err != nil { log.Warnf("Failed to create new S3 client: %v", err) @@ -46,6 +47,7 @@ func (s3Driver *S3ArtifactDriver) Load(inputArtifact *wfv1.Artifact, path string return true, nil } if !argos3.IsS3ErrCode(origErr, "NoSuchKey") { + log.Warnf("Failed get file: %v", origErr) return false, nil } // If we get here, the error was a NoSuchKey. The key might be a s3 "directory" @@ -60,6 +62,7 @@ func (s3Driver *S3ArtifactDriver) Load(inputArtifact *wfv1.Artifact, path string } if err = s3cli.GetDirectory(inputArtifact.S3.Bucket, inputArtifact.S3.Key, path); err != nil { + log.Warnf("Failed get directory: %v", err) return false, nil } return true, nil @@ -72,7 +75,7 @@ func (s3Driver *S3ArtifactDriver) Load(inputArtifact *wfv1.Artifact, path string func (s3Driver *S3ArtifactDriver) Save(path string, outputArtifact *wfv1.Artifact) error { err := wait.ExponentialBackoff(wait.Backoff{Duration: time.Second * 2, Factor: 2.0, Steps: 5, Jitter: 0.1}, func() (bool, error) { - log.Infof("ExponentialBackoff in S3 Save for path: %s", path) + log.Infof("S3 Save path: %s, key: %s", path, outputArtifact.S3.Key) s3cli, err := s3Driver.newS3Client() if err != nil { log.Warnf("Failed to create new S3 client: %v", err) @@ -85,11 +88,14 @@ func (s3Driver *S3ArtifactDriver) Save(path string, outputArtifact *wfv1.Artifac } if isDir { if err = s3cli.PutDirectory(outputArtifact.S3.Bucket, outputArtifact.S3.Key, path); err != nil { + log.Warnf("Failed to put directory: %v", err) + return false, nil + } + } else { + if err = s3cli.PutFile(outputArtifact.S3.Bucket, outputArtifact.S3.Key, path); err != nil { + log.Warnf("Failed to put file: %v", err) return false, nil } - } - if err = s3cli.PutFile(outputArtifact.S3.Bucket, outputArtifact.S3.Key, path); err != nil { - return false, nil } return true, nil }) diff --git a/workflow/common/common.go b/workflow/common/common.go index 96b25bd5a7c5..5b68158d5d4b 100644 --- a/workflow/common/common.go +++ b/workflow/common/common.go @@ -61,10 +61,11 @@ const ( // Each artifact will be named according to its input name (e.g: /argo/inputs/artifacts/CODE) ExecutorArtifactBaseDir = "/argo/inputs/artifacts" - // InitContainerMainFilesystemDir is a path made available to the init container such that the init container - // can access the same volume mounts used in the main container. This is used for the purposes of artifact loading - // (when there is overlapping paths between artifacts and volume mounts) - InitContainerMainFilesystemDir = "/mainctrfs" + // ExecutorMainFilesystemDir is a path made available to the init/wait containers such that they + // can access the same volume mounts used in the main container. This is used for the purposes + // of artifact loading (when there is overlapping paths between artifacts and volume mounts), + // as well as artifact collection by the wait container. + ExecutorMainFilesystemDir = "/mainctrfs" // ExecutorStagingEmptyDir is the path of the emptydir which is used as a staging area to transfer a file between init/main container for script/resource templates ExecutorStagingEmptyDir = "/argo/staging" @@ -75,9 +76,6 @@ const ( // Various environment variables containing pod information exposed to the executor container(s) - // EnvVarPodName contains the name of the pod (currently unused) - EnvVarPodName = "ARGO_POD_NAME" - // EnvVarContainerRuntimeExecutor contains the name of the container runtime executor to use, empty is equal to "docker" EnvVarContainerRuntimeExecutor = "ARGO_CONTAINER_RUNTIME_EXECUTOR" // EnvVarDownwardAPINodeIP is the envvar used to get the `status.hostIP` @@ -96,6 +94,9 @@ const ( // ContainerRuntimeExecutorK8sAPI to use the Kubernetes API server as container runtime executor ContainerRuntimeExecutorK8sAPI = "k8sapi" + // ContainerRuntimeExecutorPNS indicates to use process namespace sharing as the container runtime executor + ContainerRuntimeExecutorPNS = "pns" + // Variables that are added to the scope during template execution and can be referenced using {{}} syntax // GlobalVarWorkflowName is a global workflow variable referencing the workflow's metadata.name field diff --git a/workflow/common/util.go b/workflow/common/util.go index 72432016c08f..4c1dd2514a50 100644 --- a/workflow/common/util.go +++ b/workflow/common/util.go @@ -5,16 +5,16 @@ import ( "encoding/json" "fmt" "io" + "net/http" "os/exec" "regexp" "strconv" "strings" "time" - "github.com/argoproj/argo/errors" "github.com/argoproj/argo/pkg/apis/workflow" - wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" "github.com/ghodss/yaml" + "github.com/gorilla/websocket" log "github.com/sirupsen/logrus" "github.com/valyala/fasttemplate" apiv1 "k8s.io/api/core/v1" @@ -23,11 +23,15 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/remotecommand" + + "github.com/argoproj/argo/errors" + wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" + "github.com/argoproj/argo/util" ) // FindOverlappingVolume looks an artifact path, checks if it overlaps with any // user specified volumeMounts in the template, and returns the deepest volumeMount -// (if any). +// (if any). A return value of nil indicates the path is not under any volumeMount. func FindOverlappingVolume(tmpl *wfv1.Template, path string) *apiv1.VolumeMount { if tmpl.Container == nil { return nil @@ -66,6 +70,126 @@ func KillPodContainer(restConfig *rest.Config, namespace string, pod string, con return nil } +// ContainerLogStream returns an io.ReadCloser for a container's log stream using the websocket +// interface. This was implemented in the hopes that we could selectively choose stdout from stderr, +// but due to https://github.com/kubernetes/kubernetes/issues/28167, it is not possible to discern +// stdout from stderr using the K8s API server, so this function is unused, instead preferring the +// pod logs interface from client-go. It's left as a reference for when issue #28167 is eventually +// resolved. +func ContainerLogStream(config *rest.Config, namespace string, pod string, container string) (io.ReadCloser, error) { + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, errors.InternalWrapError(err) + } + logRequest := clientset.CoreV1().RESTClient().Get(). + Resource("pods"). + Name(pod). + Namespace(namespace). + SubResource("log"). + Param("container", container) + u := logRequest.URL() + switch u.Scheme { + case "https": + u.Scheme = "wss" + case "http": + u.Scheme = "ws" + default: + return nil, errors.Errorf("Malformed URL %s", u.String()) + } + + log.Info(u.String()) + wsrc := websocketReadCloser{ + &bytes.Buffer{}, + } + + wrappedRoundTripper, err := roundTripperFromConfig(config, wsrc.WebsocketCallback) + if err != nil { + return nil, errors.InternalWrapError(err) + } + + // Send the request and let the callback do its work + req := &http.Request{ + Method: http.MethodGet, + URL: u, + } + _, err = wrappedRoundTripper.RoundTrip(req) + if err != nil && !websocket.IsCloseError(err, websocket.CloseNormalClosure) { + return nil, errors.InternalWrapError(err) + } + return &wsrc, nil +} + +type RoundTripCallback func(conn *websocket.Conn, resp *http.Response, err error) error + +type WebsocketRoundTripper struct { + Dialer *websocket.Dialer + Do RoundTripCallback +} + +func (d *WebsocketRoundTripper) RoundTrip(r *http.Request) (*http.Response, error) { + conn, resp, err := d.Dialer.Dial(r.URL.String(), r.Header) + if err == nil { + defer util.Close(conn) + } + return resp, d.Do(conn, resp, err) +} + +func (w *websocketReadCloser) WebsocketCallback(ws *websocket.Conn, resp *http.Response, err error) error { + if err != nil { + if resp != nil && resp.StatusCode != http.StatusOK { + buf := new(bytes.Buffer) + _, _ = buf.ReadFrom(resp.Body) + return errors.InternalErrorf("Can't connect to log endpoint (%d): %s", resp.StatusCode, buf.String()) + } + return errors.InternalErrorf("Can't connect to log endpoint: %s", err.Error()) + } + + for { + _, body, err := ws.ReadMessage() + if len(body) > 0 { + //log.Debugf("%d: %s", msgType, string(body)) + _, writeErr := w.Write(body) + if writeErr != nil { + return writeErr + } + } + if err != nil { + if err == io.EOF { + log.Infof("websocket closed: %v", err) + return nil + } + log.Warnf("websocket error: %v", err) + return err + } + } +} + +func roundTripperFromConfig(config *rest.Config, callback RoundTripCallback) (http.RoundTripper, error) { + tlsConfig, err := rest.TLSConfigFor(config) + if err != nil { + return nil, err + } + // Create a roundtripper which will pass in the final underlying websocket connection to a callback + wsrt := &WebsocketRoundTripper{ + Do: callback, + Dialer: &websocket.Dialer{ + Proxy: http.ProxyFromEnvironment, + TLSClientConfig: tlsConfig, + }, + } + // Make sure we inherit all relevant security headers + return rest.HTTPWrappersForConfig(config, wsrt) +} + +type websocketReadCloser struct { + *bytes.Buffer +} + +func (w *websocketReadCloser) Close() error { + //return w.conn.Close() + return nil +} + // ExecPodContainer runs a command in a container in a pod and returns the remotecommand.Executor func ExecPodContainer(restConfig *rest.Config, namespace string, pod string, container string, stdout bool, stderr bool, command ...string) (remotecommand.Executor, error) { clientset, err := kubernetes.NewForConfig(restConfig) @@ -146,17 +270,23 @@ func ProcessArgs(tmpl *wfv1.Template, args wfv1.Arguments, globalParams, localPa newInputArtifacts[i] = inArt continue } - // artifact must be supplied argArt := args.GetArtifactByName(inArt.Name) - if !inArt.Optional && argArt == nil { - return nil, errors.Errorf(errors.CodeBadRequest, "inputs.artifacts.%s was not supplied", inArt.Name) + if !inArt.Optional { + // artifact must be supplied + if argArt == nil { + return nil, errors.Errorf(errors.CodeBadRequest, "inputs.artifacts.%s was not supplied", inArt.Name) + } + if !argArt.HasLocation() && !validateOnly { + return nil, errors.Errorf(errors.CodeBadRequest, "inputs.artifacts.%s missing location information", inArt.Name) + } } - if !inArt.Optional && !argArt.HasLocation() && !validateOnly { - return nil, errors.Errorf(errors.CodeBadRequest, "inputs.artifacts.%s missing location information", inArt.Name) + if argArt != nil { + argArt.Path = inArt.Path + argArt.Mode = inArt.Mode + newInputArtifacts[i] = *argArt + } else { + newInputArtifacts[i] = inArt } - argArt.Path = inArt.Path - argArt.Mode = inArt.Mode - newInputArtifacts[i] = *argArt } tmpl.Inputs.Artifacts = newInputArtifacts @@ -259,10 +389,12 @@ func RunCommand(name string, arg ...string) error { log.Info(cmdStr) _, err := cmd.Output() if err != nil { - exErr := err.(*exec.ExitError) - errOutput := string(exErr.Stderr) - log.Errorf("`%s` failed: %s", cmdStr, errOutput) - return errors.InternalError(strings.TrimSpace(errOutput)) + if exErr, ok := err.(*exec.ExitError); ok { + errOutput := string(exErr.Stderr) + log.Errorf("`%s` failed: %s", cmdStr, errOutput) + return errors.InternalError(strings.TrimSpace(errOutput)) + } + return errors.InternalWrapError(err) } return nil } diff --git a/workflow/controller/config.go b/workflow/controller/config.go index 7da737d145bf..80077087034d 100644 --- a/workflow/controller/config.go +++ b/workflow/controller/config.go @@ -22,12 +22,18 @@ import ( // WorkflowControllerConfig contain the configuration settings for the workflow controller type WorkflowControllerConfig struct { // ExecutorImage is the image name of the executor to use when running pods + // DEPRECATED: use --executor-image flag to workflow-controller instead ExecutorImage string `json:"executorImage,omitempty"` // ExecutorImagePullPolicy is the imagePullPolicy of the executor to use when running pods + // DEPRECATED: use `executor.imagePullPolicy` in configmap instead ExecutorImagePullPolicy string `json:"executorImagePullPolicy,omitempty"` + // Executor holds container customizations for the executor to use when running pods + Executor *apiv1.Container `json:"executor,omitempty"` + // ExecutorResources specifies the resource requirements that will be used for the executor sidecar + // DEPRECATED: use `executor.resources` in configmap instead ExecutorResources *apiv1.ResourceRequirements `json:"executorResources,omitempty"` // KubeConfig specifies a kube config file for the wait & init containers @@ -162,13 +168,13 @@ func (wfc *WorkflowController) executorImage() string { // executorImagePullPolicy returns the imagePullPolicy to use for the workflow executor func (wfc *WorkflowController) executorImagePullPolicy() apiv1.PullPolicy { - var policy string if wfc.cliExecutorImagePullPolicy != "" { - policy = wfc.cliExecutorImagePullPolicy + return apiv1.PullPolicy(wfc.cliExecutorImagePullPolicy) + } else if wfc.Config.Executor != nil && wfc.Config.Executor.ImagePullPolicy != "" { + return wfc.Config.Executor.ImagePullPolicy } else { - policy = wfc.Config.ExecutorImagePullPolicy + return apiv1.PullPolicy(wfc.Config.ExecutorImagePullPolicy) } - return apiv1.PullPolicy(policy) } func (wfc *WorkflowController) watchControllerConfigMap(ctx context.Context) (cache.Controller, error) { diff --git a/workflow/controller/exec_control.go b/workflow/controller/exec_control.go index b1ad7f12ddf0..b9ab95e4603f 100644 --- a/workflow/controller/exec_control.go +++ b/workflow/controller/exec_control.go @@ -66,6 +66,12 @@ func (woc *wfOperationCtx) applyExecutionControl(pod *apiv1.Pod, wfNodesLock *sy return nil } } + if podExecCtl.Deadline != nil && podExecCtl.Deadline.IsZero() { + // If the pod has already been explicitly signaled to terminate, then do nothing. + // This can happen when daemon steps are terminated. + woc.log.Infof("Skipping sync of execution control of pod %s. pod has been signaled to terminate", pod.Name) + return nil + } woc.log.Infof("Execution control for pod %s out-of-sync desired: %v, actual: %v", pod.Name, desiredExecCtl.Deadline, podExecCtl.Deadline) return woc.updateExecutionControl(pod.Name, desiredExecCtl) } @@ -122,7 +128,7 @@ func (woc *wfOperationCtx) updateExecutionControl(podName string, execCtl common woc.log.Infof("Signalling %s of updates", podName) exec, err := common.ExecPodContainer( woc.controller.restConfig, woc.wf.ObjectMeta.Namespace, podName, - common.WaitContainerName, true, true, "sh", "-c", "kill -s USR2 1", + common.WaitContainerName, true, true, "sh", "-c", "kill -s USR2 $(pidof argoexec)", ) if err != nil { return err diff --git a/workflow/controller/operator.go b/workflow/controller/operator.go index 8cc5429fb069..60ba3c4379c4 100644 --- a/workflow/controller/operator.go +++ b/workflow/controller/operator.go @@ -21,6 +21,7 @@ import ( apierr "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/cache" + "k8s.io/utils/pointer" "github.com/argoproj/argo/errors" wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" @@ -130,7 +131,8 @@ func (woc *wfOperationCtx) operate() { // Perform one-time workflow validation if woc.wf.Status.Phase == "" { woc.markWorkflowRunning() - err := validate.ValidateWorkflow(woc.wf) + validateOpts := validate.ValidateOpts{ContainerRuntimeExecutor: woc.controller.Config.ContainerRuntimeExecutor} + err := validate.ValidateWorkflow(woc.wf, validateOpts) if err != nil { woc.markWorkflowFailed(fmt.Sprintf("invalid spec: %s", err.Error())) return @@ -596,15 +598,14 @@ func assessNodeStatus(pod *apiv1.Pod, node *wfv1.NodeStatus) *wfv1.NodeStatus { var newDaemonStatus *bool var message string updated := false - f := false switch pod.Status.Phase { case apiv1.PodPending: newPhase = wfv1.NodePending - newDaemonStatus = &f + newDaemonStatus = pointer.BoolPtr(false) message = getPendingReason(pod) case apiv1.PodSucceeded: newPhase = wfv1.NodeSucceeded - newDaemonStatus = &f + newDaemonStatus = pointer.BoolPtr(false) case apiv1.PodFailed: // ignore pod failure for daemoned steps if node.IsDaemoned() { @@ -612,7 +613,7 @@ func assessNodeStatus(pod *apiv1.Pod, node *wfv1.NodeStatus) *wfv1.NodeStatus { } else { newPhase, message = inferFailedReason(pod) } - newDaemonStatus = &f + newDaemonStatus = pointer.BoolPtr(false) case apiv1.PodRunning: newPhase = wfv1.NodeRunning tmplStr, ok := pod.Annotations[common.AnnotationKeyTemplate] @@ -635,8 +636,7 @@ func assessNodeStatus(pod *apiv1.Pod, node *wfv1.NodeStatus) *wfv1.NodeStatus { } // proceed to mark node status as running (and daemoned) newPhase = wfv1.NodeRunning - t := true - newDaemonStatus = &t + newDaemonStatus = pointer.BoolPtr(true) log.Infof("Processing ready daemon pod: %v", pod.ObjectMeta.SelfLink) } default: @@ -1534,17 +1534,12 @@ func (woc *wfOperationCtx) executeResource(nodeName string, tmpl *wfv1.Template, if node != nil { return node } - mainCtr := apiv1.Container{ - Image: woc.controller.executorImage(), - ImagePullPolicy: woc.controller.executorImagePullPolicy(), - Command: []string{"argoexec"}, - Args: []string{"resource", tmpl.Resource.Action}, - VolumeMounts: []apiv1.VolumeMount{ - volumeMountPodMetadata, - }, - Env: execEnvVars, + mainCtr := woc.newExecContainer(common.MainContainerName) + mainCtr.Command = []string{"argoexec", "resource", tmpl.Resource.Action} + mainCtr.VolumeMounts = []apiv1.VolumeMount{ + volumeMountPodMetadata, } - _, err := woc.createWorkflowPod(nodeName, mainCtr, tmpl) + _, err := woc.createWorkflowPod(nodeName, *mainCtr, tmpl) if err != nil { return woc.initializeNode(nodeName, wfv1.NodeTypePod, tmpl.Name, boundaryID, wfv1.NodeError, err.Error()) } diff --git a/workflow/controller/operator_test.go b/workflow/controller/operator_test.go index cf5d95b348b0..b30b88fd2d9c 100644 --- a/workflow/controller/operator_test.go +++ b/workflow/controller/operator_test.go @@ -435,14 +435,16 @@ func TestNestedTemplateParallelismLimit(t *testing.T) { // TestSidecarResourceLimits verifies resource limits on the sidecar can be set in the controller config func TestSidecarResourceLimits(t *testing.T) { controller := newController() - controller.Config.ExecutorResources = &apiv1.ResourceRequirements{ - Limits: apiv1.ResourceList{ - apiv1.ResourceCPU: resource.MustParse("0.5"), - apiv1.ResourceMemory: resource.MustParse("512Mi"), - }, - Requests: apiv1.ResourceList{ - apiv1.ResourceCPU: resource.MustParse("0.1"), - apiv1.ResourceMemory: resource.MustParse("64Mi"), + controller.Config.Executor = &apiv1.Container{ + Resources: apiv1.ResourceRequirements{ + Limits: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("0.5"), + apiv1.ResourceMemory: resource.MustParse("512Mi"), + }, + Requests: apiv1.ResourceList{ + apiv1.ResourceCPU: resource.MustParse("0.1"), + apiv1.ResourceMemory: resource.MustParse("64Mi"), + }, }, } wf := unmarshalWF(helloWorldWf) @@ -929,7 +931,7 @@ func TestMetadataPassing(t *testing.T) { var ( pod = pods.Items[0] - container = pod.Spec.Containers[0] + container = pod.Spec.Containers[1] foundRepo = false foundRev = false ) @@ -999,5 +1001,5 @@ func TestResolveIOPathPlaceholders(t *testing.T) { assert.Nil(t, err) assert.True(t, len(pods.Items) > 0, "pod was not created successfully") - assert.Equal(t, []string{"sh", "-c", "head -n 3 <\"/inputs/text/data\" | tee \"/outputs/text/data\" | wc -l > \"/outputs/actual-lines-count/data\""}, pods.Items[0].Spec.Containers[0].Command) + assert.Equal(t, []string{"sh", "-c", "head -n 3 <\"/inputs/text/data\" | tee \"/outputs/text/data\" | wc -l > \"/outputs/actual-lines-count/data\""}, pods.Items[0].Spec.Containers[1].Command) } diff --git a/workflow/controller/workflowpod.go b/workflow/controller/workflowpod.go index 46f6f13543e0..e582a281d2c5 100644 --- a/workflow/controller/workflowpod.go +++ b/workflow/controller/workflowpod.go @@ -5,16 +5,19 @@ import ( "fmt" "io" "path" + "path/filepath" "strconv" - "github.com/argoproj/argo/errors" - wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" - "github.com/argoproj/argo/workflow/common" log "github.com/sirupsen/logrus" "github.com/valyala/fasttemplate" apiv1 "k8s.io/api/core/v1" apierr "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" + + "github.com/argoproj/argo/errors" + wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" + "github.com/argoproj/argo/workflow/common" ) // Reusable k8s pod spec portions used in workflow pods @@ -63,26 +66,8 @@ var ( MountPath: "/var/run/docker.sock", ReadOnly: true, } - - // execEnvVars exposes various pod information as environment variables to the exec container - execEnvVars = []apiv1.EnvVar{ - envFromField(common.EnvVarPodName, "metadata.name"), - } ) -// envFromField is a helper to return a EnvVar with the name and field -func envFromField(envVarName, fieldPath string) apiv1.EnvVar { - return apiv1.EnvVar{ - Name: envVarName, - ValueFrom: &apiv1.EnvVarSource{ - FieldRef: &apiv1.ObjectFieldSelector{ - APIVersion: "v1", - FieldPath: fieldPath, - }, - }, - } -} - func (woc *wfOperationCtx) createWorkflowPod(nodeName string, mainCtr apiv1.Container, tmpl *wfv1.Template) (*apiv1.Pod, error) { nodeID := woc.wf.NodeID(nodeName) woc.log.Debugf("Creating Pod: %s (%s)", nodeName, nodeID) @@ -105,10 +90,7 @@ func (woc *wfOperationCtx) createWorkflowPod(nodeName string, mainCtr apiv1.Cont }, }, Spec: apiv1.PodSpec{ - RestartPolicy: apiv1.RestartPolicyNever, - Containers: []apiv1.Container{ - mainCtr, - }, + RestartPolicy: apiv1.RestartPolicyNever, Volumes: woc.createVolumes(), ActiveDeadlineSeconds: tmpl.ActiveDeadlineSeconds, ServiceAccountName: woc.wf.Spec.ServiceAccountName, @@ -131,6 +113,9 @@ func (woc *wfOperationCtx) createWorkflowPod(nodeName string, mainCtr apiv1.Cont if woc.controller.Config.InstanceID != "" { pod.ObjectMeta.Labels[common.LabelKeyControllerInstanceID] = woc.controller.Config.InstanceID } + if woc.controller.Config.ContainerRuntimeExecutor == common.ContainerRuntimeExecutorPNS { + pod.Spec.ShareProcessNamespace = pointer.BoolPtr(true) + } err := woc.addArchiveLocation(pod, tmpl) if err != nil { @@ -147,6 +132,11 @@ func (woc *wfOperationCtx) createWorkflowPod(nodeName string, mainCtr apiv1.Cont } pod.Spec.Containers = append(pod.Spec.Containers, *waitCtr) } + // NOTE: the order of the container list is significant. kubelet will pull, create, and start + // each container sequentially in the order that they appear in this list. For PNS we want the + // wait container to start before the main, so that it always has the chance to see the main + // container's PID and root filesystem. + pod.Spec.Containers = append(pod.Spec.Containers, mainCtr) // Add init container only if it needs input artifacts. This is also true for // script templates (which needs to populate the script) @@ -169,22 +159,20 @@ func (woc *wfOperationCtx) createWorkflowPod(nodeName string, mainCtr apiv1.Cont } if tmpl.GetType() == wfv1.TemplateTypeScript { - addExecutorStagingVolume(pod) + addScriptStagingVolume(pod) } - // addInitContainers should be called after all volumes have been manipulated - // in the main container (in case sidecar requires volume mount mirroring) + // addInitContainers, addSidecars and addOutputArtifactsVolumes should be called after all + // volumes have been manipulated in the main container since volumeMounts are mirrored err = addInitContainers(pod, tmpl) if err != nil { return nil, err } - - // addSidecars should be called after all volumes have been manipulated - // in the main container (in case sidecar requires volume mount mirroring) err = addSidecars(pod, tmpl) if err != nil { return nil, err } + addOutputArtifactsVolumes(pod, tmpl) // Set the container template JSON in pod annotations, which executor examines for things like // artifact location/path. @@ -258,28 +246,45 @@ func substituteGlobals(pod *apiv1.Pod, globalParams map[string]string) (*apiv1.P } func (woc *wfOperationCtx) newInitContainer(tmpl *wfv1.Template) apiv1.Container { - ctr := woc.newExecContainer(common.InitContainerName, false, "init") - ctr.VolumeMounts = append([]apiv1.VolumeMount{volumeMountPodMetadata}, ctr.VolumeMounts...) + ctr := woc.newExecContainer(common.InitContainerName) + ctr.Command = []string{"argoexec", "init"} return *ctr } func (woc *wfOperationCtx) newWaitContainer(tmpl *wfv1.Template) (*apiv1.Container, error) { - ctr := woc.newExecContainer(common.WaitContainerName, false, "wait") - ctr.VolumeMounts = append(woc.createVolumeMounts(), ctr.VolumeMounts...) + ctr := woc.newExecContainer(common.WaitContainerName) + ctr.Command = []string{"argoexec", "wait"} + switch woc.controller.Config.ContainerRuntimeExecutor { + case common.ContainerRuntimeExecutorPNS: + ctr.SecurityContext = &apiv1.SecurityContext{ + Capabilities: &apiv1.Capabilities{ + Add: []apiv1.Capability{ + // necessary to access main's root filesystem when run with a different user id + apiv1.Capability("SYS_PTRACE"), + }, + }, + } + case "", common.ContainerRuntimeExecutorDocker: + ctr.VolumeMounts = append(ctr.VolumeMounts, volumeMountDockerSock) + } return ctr, nil } func (woc *wfOperationCtx) createEnvVars() []apiv1.EnvVar { + var execEnvVars []apiv1.EnvVar + if woc.controller.Config.Executor != nil { + execEnvVars = woc.controller.Config.Executor.Env + } switch woc.controller.Config.ContainerRuntimeExecutor { case common.ContainerRuntimeExecutorK8sAPI: - return append(execEnvVars, + execEnvVars = append(execEnvVars, apiv1.EnvVar{ Name: common.EnvVarContainerRuntimeExecutor, Value: woc.controller.Config.ContainerRuntimeExecutor, }, ) case common.ContainerRuntimeExecutorKubelet: - return append(execEnvVars, + execEnvVars = append(execEnvVars, apiv1.EnvVar{ Name: common.EnvVarContainerRuntimeExecutor, Value: woc.controller.Config.ContainerRuntimeExecutor, @@ -301,21 +306,15 @@ func (woc *wfOperationCtx) createEnvVars() []apiv1.EnvVar { Value: strconv.FormatBool(woc.controller.Config.KubeletInsecure), }, ) - default: - return execEnvVars - } -} - -func (woc *wfOperationCtx) createVolumeMounts() []apiv1.VolumeMount { - volumeMounts := []apiv1.VolumeMount{ - volumeMountPodMetadata, - } - switch woc.controller.Config.ContainerRuntimeExecutor { - case common.ContainerRuntimeExecutorKubelet, common.ContainerRuntimeExecutorK8sAPI: - return volumeMounts - default: - return append(volumeMounts, volumeMountDockerSock) + case common.ContainerRuntimeExecutorPNS: + execEnvVars = append(execEnvVars, + apiv1.EnvVar{ + Name: common.EnvVarContainerRuntimeExecutor, + Value: woc.controller.Config.ContainerRuntimeExecutor, + }, + ) } + return execEnvVars } func (woc *wfOperationCtx) createVolumes() []apiv1.Volume { @@ -337,26 +336,29 @@ func (woc *wfOperationCtx) createVolumes() []apiv1.Volume { }) } switch woc.controller.Config.ContainerRuntimeExecutor { - case common.ContainerRuntimeExecutorKubelet, common.ContainerRuntimeExecutorK8sAPI: + case common.ContainerRuntimeExecutorKubelet, common.ContainerRuntimeExecutorK8sAPI, common.ContainerRuntimeExecutorPNS: return volumes default: return append(volumes, volumeDockerSock) } } -func (woc *wfOperationCtx) newExecContainer(name string, privileged bool, subCommand string) *apiv1.Container { +func (woc *wfOperationCtx) newExecContainer(name string) *apiv1.Container { exec := apiv1.Container{ Name: name, Image: woc.controller.executorImage(), ImagePullPolicy: woc.controller.executorImagePullPolicy(), Env: woc.createEnvVars(), - SecurityContext: &apiv1.SecurityContext{ - Privileged: &privileged, + VolumeMounts: []apiv1.VolumeMount{ + volumeMountPodMetadata, }, - Command: []string{"argoexec"}, - Args: []string{subCommand}, } - if woc.controller.Config.ExecutorResources != nil { + if woc.controller.Config.Executor != nil { + exec.Args = woc.controller.Config.Executor.Args + } + if isResourcesSpecified(woc.controller.Config.Executor) { + exec.Resources = woc.controller.Config.Executor.Resources + } else if woc.controller.Config.ExecutorResources != nil { exec.Resources = *woc.controller.Config.ExecutorResources } if woc.controller.Config.KubeConfig != nil { @@ -380,6 +382,10 @@ func (woc *wfOperationCtx) newExecContainer(name string, privileged bool, subCom return &exec } +func isResourcesSpecified(ctr *apiv1.Container) bool { + return ctr != nil && (ctr.Resources.Limits.Cpu() != nil || ctr.Resources.Limits.Memory() != nil) +} + // addMetadata applies metadata specified in the template func (woc *wfOperationCtx) addMetadata(pod *apiv1.Pod, tmpl *wfv1.Template) { for k, v := range tmpl.Metadata.Annotations { @@ -573,7 +579,7 @@ func (woc *wfOperationCtx) addInputArtifactsVolumes(pod *apiv1.Pod, tmpl *wfv1.T // instead of the artifacts volume if tmpl.Container != nil { for _, mnt := range tmpl.Container.VolumeMounts { - mnt.MountPath = path.Join(common.InitContainerMainFilesystemDir, mnt.MountPath) + mnt.MountPath = filepath.Join(common.ExecutorMainFilesystemDir, mnt.MountPath) initCtr.VolumeMounts = append(initCtr.VolumeMounts, mnt) } } @@ -582,19 +588,19 @@ func (woc *wfOperationCtx) addInputArtifactsVolumes(pod *apiv1.Pod, tmpl *wfv1.T } } - mainCtrIndex := 0 - var mainCtr *apiv1.Container + mainCtrIndex := -1 for i, ctr := range pod.Spec.Containers { - if ctr.Name == common.MainContainerName { + switch ctr.Name { + case common.MainContainerName: mainCtrIndex = i - mainCtr = &pod.Spec.Containers[i] + break } } - if mainCtr == nil { - panic("Could not find main container in pod spec") + if mainCtrIndex == -1 { + panic("Could not find main or wait container in pod spec") } - // TODO: the order in which we construct the volume mounts may matter, - // especially if they are overlapping. + mainCtr := &pod.Spec.Containers[mainCtrIndex] + for _, art := range tmpl.Inputs.Artifacts { if art.Path == "" { return errors.Errorf(errors.CodeBadRequest, "inputs.artifacts.%s did not specify a path", art.Name) @@ -622,6 +628,41 @@ func (woc *wfOperationCtx) addInputArtifactsVolumes(pod *apiv1.Pod, tmpl *wfv1.T return nil } +// addOutputArtifactsVolumes mirrors any volume mounts in the main container to the wait sidecar. +// For any output artifacts that were produced in mounted volumes (e.g. PVCs, emptyDirs), the +// wait container will collect the artifacts directly from volumeMount instead of `docker cp`-ing +// them to the wait sidecar. In order for this to work, we mirror all volume mounts in the main +// container under a well-known path. +func addOutputArtifactsVolumes(pod *apiv1.Pod, tmpl *wfv1.Template) { + if tmpl.GetType() == wfv1.TemplateTypeResource { + return + } + mainCtrIndex := -1 + waitCtrIndex := -1 + var mainCtr *apiv1.Container + for i, ctr := range pod.Spec.Containers { + switch ctr.Name { + case common.MainContainerName: + mainCtrIndex = i + case common.WaitContainerName: + waitCtrIndex = i + } + } + if mainCtrIndex == -1 || waitCtrIndex == -1 { + panic("Could not find main or wait container in pod spec") + } + mainCtr = &pod.Spec.Containers[mainCtrIndex] + waitCtr := &pod.Spec.Containers[waitCtrIndex] + + for _, mnt := range mainCtr.VolumeMounts { + mnt.MountPath = filepath.Join(common.ExecutorMainFilesystemDir, mnt.MountPath) + // ReadOnly is needed to be false for overlapping volume mounts + mnt.ReadOnly = false + waitCtr.VolumeMounts = append(waitCtr.VolumeMounts, mnt) + } + pod.Spec.Containers[waitCtrIndex] = *waitCtr +} + // addArchiveLocation conditionally updates the template with the default artifact repository // information configured in the controller, for the purposes of archiving outputs. This is skipped // for templates which do not need to archive anything, or have explicitly set an archive location @@ -647,7 +688,7 @@ func (woc *wfOperationCtx) addArchiveLocation(pod *apiv1.Pod, tmpl *wfv1.Templat } } if !needLocation { - woc.log.Debugf("archive location unecessary") + woc.log.Debugf("archive location unnecessary") return nil } tmpl.ArchiveLocation = &wfv1.ArtifactLocation{ @@ -691,9 +732,9 @@ func (woc *wfOperationCtx) addArchiveLocation(pod *apiv1.Pod, tmpl *wfv1.Templat return nil } -// addExecutorStagingVolume sets up a shared staging volume between the init container +// addScriptStagingVolume sets up a shared staging volume between the init container // and main container for the purpose of holding the script source code for script templates -func addExecutorStagingVolume(pod *apiv1.Pod) { +func addScriptStagingVolume(pod *apiv1.Pod) { volName := "argo-staging" stagingVol := apiv1.Volume{ Name: volName, @@ -721,11 +762,7 @@ func addExecutorStagingVolume(pod *apiv1.Pod) { Name: volName, MountPath: common.ExecutorStagingEmptyDir, } - if ctr.VolumeMounts == nil { - ctr.VolumeMounts = []apiv1.VolumeMount{volMount} - } else { - ctr.VolumeMounts = append(ctr.VolumeMounts, volMount) - } + ctr.VolumeMounts = append(ctr.VolumeMounts, volMount) pod.Spec.Containers[i] = ctr found = true break diff --git a/workflow/controller/workflowpod_test.go b/workflow/controller/workflowpod_test.go index 41f5bd12d495..190c48d42c1c 100644 --- a/workflow/controller/workflowpod_test.go +++ b/workflow/controller/workflowpod_test.go @@ -2,6 +2,7 @@ package controller import ( "encoding/json" + "fmt" "testing" wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" @@ -283,8 +284,8 @@ func TestVolumeAndVolumeMounts(t *testing.T) { assert.Equal(t, "podmetadata", pod.Spec.Volumes[0].Name) assert.Equal(t, "docker-sock", pod.Spec.Volumes[1].Name) assert.Equal(t, "volume-name", pod.Spec.Volumes[2].Name) - assert.Equal(t, 1, len(pod.Spec.Containers[0].VolumeMounts)) - assert.Equal(t, "volume-name", pod.Spec.Containers[0].VolumeMounts[0].Name) + assert.Equal(t, 1, len(pod.Spec.Containers[1].VolumeMounts)) + assert.Equal(t, "volume-name", pod.Spec.Containers[1].VolumeMounts[0].Name) } // For Kubelet executor @@ -301,8 +302,8 @@ func TestVolumeAndVolumeMounts(t *testing.T) { assert.Equal(t, 2, len(pod.Spec.Volumes)) assert.Equal(t, "podmetadata", pod.Spec.Volumes[0].Name) assert.Equal(t, "volume-name", pod.Spec.Volumes[1].Name) - assert.Equal(t, 1, len(pod.Spec.Containers[0].VolumeMounts)) - assert.Equal(t, "volume-name", pod.Spec.Containers[0].VolumeMounts[0].Name) + assert.Equal(t, 1, len(pod.Spec.Containers[1].VolumeMounts)) + assert.Equal(t, "volume-name", pod.Spec.Containers[1].VolumeMounts[0].Name) } // For K8sAPI executor @@ -319,12 +320,26 @@ func TestVolumeAndVolumeMounts(t *testing.T) { assert.Equal(t, 2, len(pod.Spec.Volumes)) assert.Equal(t, "podmetadata", pod.Spec.Volumes[0].Name) assert.Equal(t, "volume-name", pod.Spec.Volumes[1].Name) - assert.Equal(t, 1, len(pod.Spec.Containers[0].VolumeMounts)) - assert.Equal(t, "volume-name", pod.Spec.Containers[0].VolumeMounts[0].Name) + assert.Equal(t, 1, len(pod.Spec.Containers[1].VolumeMounts)) + assert.Equal(t, "volume-name", pod.Spec.Containers[1].VolumeMounts[0].Name) } } func TestOutOfCluster(t *testing.T) { + + verifyKubeConfigVolume := func(ctr apiv1.Container, volName, mountPath string) { + for _, vol := range ctr.VolumeMounts { + if vol.Name == volName && vol.MountPath == mountPath { + for _, arg := range ctr.Args { + if arg == fmt.Sprintf("--kubeconfig=%s", mountPath) { + return + } + } + } + } + t.Fatalf("%v does not have kubeconfig mounted properly (name: %s, mountPath: %s)", ctr, volName, mountPath) + } + // default mount path & volume name { woc := newWoc() @@ -341,11 +356,8 @@ func TestOutOfCluster(t *testing.T) { assert.Equal(t, "kubeconfig", pod.Spec.Volumes[1].Name) assert.Equal(t, "foo", pod.Spec.Volumes[1].VolumeSource.Secret.SecretName) - // kubeconfig volume is the last one - idx := len(pod.Spec.Containers[1].VolumeMounts) - 1 - assert.Equal(t, "kubeconfig", pod.Spec.Containers[1].VolumeMounts[idx].Name) - assert.Equal(t, "/kube/config", pod.Spec.Containers[1].VolumeMounts[idx].MountPath) - assert.Equal(t, "--kubeconfig=/kube/config", pod.Spec.Containers[1].Args[1]) + waitCtr := pod.Spec.Containers[0] + verifyKubeConfigVolume(waitCtr, "kubeconfig", "/kube/config") } // custom mount path & volume name, in case name collision @@ -367,10 +379,8 @@ func TestOutOfCluster(t *testing.T) { assert.Equal(t, "foo", pod.Spec.Volumes[1].VolumeSource.Secret.SecretName) // kubeconfig volume is the last one - idx := len(pod.Spec.Containers[1].VolumeMounts) - 1 - assert.Equal(t, "kube-config-secret", pod.Spec.Containers[1].VolumeMounts[idx].Name) - assert.Equal(t, "/some/path/config", pod.Spec.Containers[1].VolumeMounts[idx].MountPath) - assert.Equal(t, "--kubeconfig=/some/path/config", pod.Spec.Containers[1].Args[1]) + waitCtr := pod.Spec.Containers[0] + verifyKubeConfigVolume(waitCtr, "kube-config-secret", "/some/path/config") } } @@ -472,7 +482,7 @@ func TestSidecars(t *testing.T) { pod, err := woc.controller.kubeclientset.CoreV1().Pods("").Get(podName, metav1.GetOptions{}) assert.Nil(t, err) assert.Equal(t, 3, len(pod.Spec.Containers)) - assert.Equal(t, "main", pod.Spec.Containers[0].Name) - assert.Equal(t, "wait", pod.Spec.Containers[1].Name) + assert.Equal(t, "wait", pod.Spec.Containers[0].Name) + assert.Equal(t, "main", pod.Spec.Containers[1].Name) assert.Equal(t, "side-foo", pod.Spec.Containers[2].Name) } diff --git a/workflow/executor/common/common.go b/workflow/executor/common/common.go index e5b94cc38f4b..0ce8d251532f 100644 --- a/workflow/executor/common/common.go +++ b/workflow/executor/common/common.go @@ -19,7 +19,7 @@ const ( // killGracePeriod is the time in seconds after sending SIGTERM before // forcefully killing the sidecar with SIGKILL (value matches k8s) -const killGracePeriod = 10 +const KillGracePeriod = 10 // GetContainerID returns container ID of a ContainerStatus resource func GetContainerID(container *v1.ContainerStatus) string { @@ -94,7 +94,7 @@ func KillGracefully(c KubernetesClientInterface, containerID string) error { if err != nil { return err } - err = WaitForTermination(c, containerID, time.Second*killGracePeriod) + err = WaitForTermination(c, containerID, time.Second*KillGracePeriod) if err == nil { log.Infof("ContainerID %q successfully killed", containerID) return nil @@ -104,7 +104,7 @@ func KillGracefully(c KubernetesClientInterface, containerID string) error { if err != nil { return err } - err = WaitForTermination(c, containerID, time.Second*killGracePeriod) + err = WaitForTermination(c, containerID, time.Second*KillGracePeriod) if err != nil { return err } diff --git a/workflow/executor/docker/docker.go b/workflow/executor/docker/docker.go index 9b0c7d9266bb..2c6b3893c7a7 100644 --- a/workflow/executor/docker/docker.go +++ b/workflow/executor/docker/docker.go @@ -4,24 +4,20 @@ import ( "archive/tar" "compress/gzip" "fmt" + "io" "os" "os/exec" - "strings" "time" - "github.com/argoproj/argo/util/file" - - "github.com/argoproj/argo/util" + log "github.com/sirupsen/logrus" "github.com/argoproj/argo/errors" + "github.com/argoproj/argo/util" + "github.com/argoproj/argo/util/file" "github.com/argoproj/argo/workflow/common" - log "github.com/sirupsen/logrus" + execcommon "github.com/argoproj/argo/workflow/executor/common" ) -// killGracePeriod is the time in seconds after sending SIGTERM before -// forcefully killing the sidecar with SIGKILL (value matches k8s) -const killGracePeriod = 10 - type DockerExecutor struct{} func NewDockerExecutor() (*DockerExecutor, error) { @@ -73,35 +69,30 @@ func (d *DockerExecutor) CopyFile(containerID string, sourcePath string, destPat return nil } -// GetOutput returns the entirety of the container output as a string -// Used to capturing script results as an output parameter -func (d *DockerExecutor) GetOutput(containerID string) (string, error) { +func (d *DockerExecutor) GetOutputStream(containerID string, combinedOutput bool) (io.ReadCloser, error) { cmd := exec.Command("docker", "logs", containerID) log.Info(cmd.Args) - outBytes, _ := cmd.Output() - return strings.TrimSpace(string(outBytes)), nil -} - -// Wait for the container to complete -func (d *DockerExecutor) Wait(containerID string) error { - return common.RunCommand("docker", "wait", containerID) -} - -// Logs captures the logs of a container to a file -func (d *DockerExecutor) Logs(containerID string, path string) error { - cmd := exec.Command("docker", "logs", containerID) - outfile, err := os.Create(path) + if combinedOutput { + cmd.Stderr = cmd.Stdout + } + reader, err := cmd.StdoutPipe() if err != nil { - return errors.InternalWrapError(err) + return nil, errors.InternalWrapError(err) } - defer util.Close(outfile) - cmd.Stdout = outfile - cmd.Stderr = outfile err = cmd.Start() if err != nil { - return errors.InternalWrapError(err) + return nil, errors.InternalWrapError(err) } - return cmd.Wait() + return reader, nil +} + +func (d *DockerExecutor) WaitInit() error { + return nil +} + +// Wait for the container to complete +func (d *DockerExecutor) Wait(containerID string) error { + return common.RunCommand("docker", "wait", containerID) } // killContainers kills a list of containerIDs first with a SIGTERM then with a SIGKILL after a grace period @@ -120,8 +111,8 @@ func (d *DockerExecutor) Kill(containerIDs []string) error { // waitCmd.Wait() might return error "signal: killed" when we SIGKILL the process // We ignore errors in this case //ignoreWaitError := false - timer := time.AfterFunc(killGracePeriod*time.Second, func() { - log.Infof("Timed out (%ds) for containers to terminate gracefully. Killing forcefully", killGracePeriod) + timer := time.AfterFunc(execcommon.KillGracePeriod*time.Second, func() { + log.Infof("Timed out (%ds) for containers to terminate gracefully. Killing forcefully", execcommon.KillGracePeriod) forceKillArgs := append([]string{"kill", "--signal", "KILL"}, containerIDs...) forceKillCmd := exec.Command("docker", forceKillArgs...) log.Info(forceKillCmd.Args) diff --git a/workflow/executor/executor.go b/workflow/executor/executor.go index 17561a2b0600..ad15c5439a86 100644 --- a/workflow/executor/executor.go +++ b/workflow/executor/executor.go @@ -19,8 +19,18 @@ import ( "syscall" "time" + argofile "github.com/argoproj/pkg/file" + log "github.com/sirupsen/logrus" + apiv1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/apimachinery/pkg/watch" + "k8s.io/client-go/kubernetes" + "github.com/argoproj/argo/errors" wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" + "github.com/argoproj/argo/util/archive" "github.com/argoproj/argo/util/retry" artifact "github.com/argoproj/argo/workflow/artifacts" "github.com/argoproj/argo/workflow/artifacts/artifactory" @@ -30,12 +40,11 @@ import ( "github.com/argoproj/argo/workflow/artifacts/raw" "github.com/argoproj/argo/workflow/artifacts/s3" "github.com/argoproj/argo/workflow/common" - argofile "github.com/argoproj/pkg/file" - log "github.com/sirupsen/logrus" - apiv1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" - "k8s.io/client-go/kubernetes" +) + +const ( + // This directory temporarily stores the tarballs of the artifacts before uploading + tempOutArtDir = "/argo/outputs/artifacts" ) // WorkflowExecutor is program which runs as the init/wait container @@ -69,28 +78,30 @@ type ContainerRuntimeExecutor interface { // CopyFile copies a source file in a container to a local path CopyFile(containerID string, sourcePath string, destPath string) error - // GetOutput returns the entirety of the container output as a string - // Used to capturing script results as an output parameter - GetOutput(containerID string) (string, error) + // GetOutputStream returns the entirety of the container output as a io.Reader + // Used to capture script results as an output parameter, and to archive container logs + GetOutputStream(containerID string, combinedOutput bool) (io.ReadCloser, error) - // Wait for the container to complete - Wait(containerID string) error + // WaitInit is called before Wait() to signal the executor about an impending Wait call. + // For most executors this is a noop, and is only used by the the PNS executor + WaitInit() error - // Copy logs to a given path - Logs(containerID string, path string) error + // Wait waits for the container to complete + Wait(containerID string) error // Kill a list of containerIDs first with a SIGTERM then with a SIGKILL after a grace period Kill(containerIDs []string) error } // NewExecutor instantiates a new workflow executor -func NewExecutor(clientset kubernetes.Interface, podName, namespace, podAnnotationsPath string, cre ContainerRuntimeExecutor) WorkflowExecutor { +func NewExecutor(clientset kubernetes.Interface, podName, namespace, podAnnotationsPath string, cre ContainerRuntimeExecutor, template wfv1.Template) WorkflowExecutor { return WorkflowExecutor{ PodName: podName, ClientSet: clientset, Namespace: namespace, PodAnnotationsPath: podAnnotationsPath, RuntimeExecutor: cre, + Template: template, memoizedConfigMaps: map[string]string{}, memoizedSecrets: map[string][]byte{}, errors: []error{}, @@ -109,7 +120,7 @@ func (we *WorkflowExecutor) HandleError() { } } -// LoadArtifacts loads aftifacts from location to a container path +// LoadArtifacts loads artifacts from location to a container path func (we *WorkflowExecutor) LoadArtifacts() error { log.Infof("Start loading input artifacts...") @@ -119,7 +130,7 @@ func (we *WorkflowExecutor) LoadArtifacts() error { if !art.HasLocation() { if art.Optional { - log.Warnf("Artifact %s is not supplied. Artifact configured as an optional so, Artifact will be ignored", art.Name) + log.Warnf("Ignoring optional artifact '%s' which was not supplied", art.Name) continue } else { return errors.New("required artifact %s not supplied", art.Name) @@ -144,7 +155,7 @@ func (we *WorkflowExecutor) LoadArtifacts() error { // as opposed to the `input-artifacts` volume that is an implementation detail // unbeknownst to the user. log.Infof("Specified artifact path %s overlaps with volume mount at %s. Extracting to volume mount", art.Path, mnt.MountPath) - artPath = path.Join(common.InitContainerMainFilesystemDir, art.Path) + artPath = path.Join(common.ExecutorMainFilesystemDir, art.Path) } // The artifact is downloaded to a temporary location, after which we determine if @@ -211,15 +222,13 @@ func (we *WorkflowExecutor) SaveArtifacts() error { return err } - // This directory temporarily stores the tarballs of the artifacts before uploading - tempOutArtDir := "/argo/outputs/artifacts" err = os.MkdirAll(tempOutArtDir, os.ModePerm) if err != nil { return errors.InternalWrapError(err) } for i, art := range we.Template.Outputs.Artifacts { - err := we.saveArtifact(tempOutArtDir, mainCtrID, &art) + err := we.saveArtifact(mainCtrID, &art) if err != nil { return err } @@ -228,31 +237,19 @@ func (we *WorkflowExecutor) SaveArtifacts() error { return nil } -func (we *WorkflowExecutor) saveArtifact(tempOutArtDir string, mainCtrID string, art *wfv1.Artifact) error { - log.Infof("Saving artifact: %s", art.Name) +func (we *WorkflowExecutor) saveArtifact(mainCtrID string, art *wfv1.Artifact) error { // Determine the file path of where to find the artifact if art.Path == "" { return errors.InternalErrorf("Artifact %s did not specify a path", art.Name) } - - // fileName is incorporated into the final path when uploading it to the artifact repo - fileName := fmt.Sprintf("%s.tgz", art.Name) - // localArtPath is the final staging location of the file (or directory) which we will pass - // to the SaveArtifacts call - localArtPath := path.Join(tempOutArtDir, fileName) - err := we.RuntimeExecutor.CopyFile(mainCtrID, art.Path, localArtPath) + fileName, localArtPath, err := we.stageArchiveFile(mainCtrID, art) if err != nil { if art.Optional && errors.IsCode(errors.CodeNotFound, err) { - log.Warnf("Error in saving Artifact. Artifact configured as an optional so, Error will be ignored. Error= %v", err) + log.Warnf("Ignoring optional artifact '%s' which does not exist in path '%s': %v", art.Name, art.Path, err) return nil } return err } - fileName, localArtPath, err = stageArchiveFile(fileName, localArtPath, art) - if err != nil { - return err - } - if !art.HasLocation() { // If user did not explicitly set an artifact destination location in the template, // use the default archive location (appended with the filename). @@ -299,7 +296,13 @@ func (we *WorkflowExecutor) saveArtifact(tempOutArtDir string, mainCtrID string, return nil } -func stageArchiveFile(fileName, localArtPath string, art *wfv1.Artifact) (string, string, error) { +// stageArchiveFile stages a path in a container for archiving from the wait sidecar. +// Returns a filename and a local path for the upload. +// The filename is incorporated into the final path when uploading it to the artifact repo. +// The local path is the final staging location of the file (or directory) which we will pass +// to the SaveArtifacts call and may be a directory or file. +func (we *WorkflowExecutor) stageArchiveFile(mainCtrID string, art *wfv1.Artifact) (string, string, error) { + log.Infof("Staging artifact: %s", art.Name) strategy := art.Archive if strategy == nil { // If no strategy is specified, default to the tar strategy @@ -307,44 +310,83 @@ func stageArchiveFile(fileName, localArtPath string, art *wfv1.Artifact) (string Tar: &wfv1.TarStrategy{}, } } - tempOutArtDir := filepath.Dir(localArtPath) - if strategy.None != nil { - log.Info("Disabling archive before upload") - unarchivedArtPath := path.Join(tempOutArtDir, art.Name) - err := untar(localArtPath, unarchivedArtPath) - if err != nil { - return "", "", err + + if !we.isBaseImagePath(art.Path) { + // If we get here, we are uploading an artifact from a mirrored volume mount which the wait + // sidecar has direct access to. We can upload directly from the shared volume mount, + // instead of copying it from the container. + mountedArtPath := filepath.Join(common.ExecutorMainFilesystemDir, art.Path) + log.Infof("Staging %s from mirrored volume mount %s", art.Path, mountedArtPath) + if strategy.None != nil { + fileName := filepath.Base(art.Path) + log.Infof("No compression strategy needed. Staging skipped") + return fileName, mountedArtPath, nil } - // Delete the tarball - err = os.Remove(localArtPath) + fileName := fmt.Sprintf("%s.tgz", art.Name) + localArtPath := filepath.Join(tempOutArtDir, fileName) + f, err := os.Create(localArtPath) if err != nil { return "", "", errors.InternalWrapError(err) } - isDir, err := argofile.IsDirectory(unarchivedArtPath) + w := bufio.NewWriter(f) + err = archive.TarGzToWriter(mountedArtPath, w) if err != nil { - return "", "", errors.InternalWrapError(err) + return "", "", err } - fileName = filepath.Base(art.Path) - if isDir { - localArtPath = unarchivedArtPath - } else { - // If we are uploading a single file, we need to preserve original filename so that - // 1. minio client can infer its mime-type, based on file extension - // 2. the original filename is incorporated into the final path - localArtPath = path.Join(tempOutArtDir, fileName) - err = os.Rename(unarchivedArtPath, localArtPath) - if err != nil { - return "", "", errors.InternalWrapError(err) - } + log.Infof("Successfully staged %s from mirrored volume mount %s", art.Path, mountedArtPath) + return fileName, localArtPath, nil + } + + fileName := fmt.Sprintf("%s.tgz", art.Name) + localArtPath := filepath.Join(tempOutArtDir, fileName) + log.Infof("Copying %s from container base image layer to %s", art.Path, localArtPath) + + err := we.RuntimeExecutor.CopyFile(mainCtrID, art.Path, localArtPath) + if err != nil { + return "", "", err + } + if strategy.Tar != nil { + // NOTE we already tar gzip the file in the executor. So this is a noop. + return fileName, localArtPath, nil + } + // localArtPath now points to a .tgz file, and the archive strategy is *not* tar. We need to untar it + log.Infof("Untaring %s archive before upload", localArtPath) + unarchivedArtPath := path.Join(filepath.Dir(localArtPath), art.Name) + err = untar(localArtPath, unarchivedArtPath) + if err != nil { + return "", "", err + } + // Delete the tarball + err = os.Remove(localArtPath) + if err != nil { + return "", "", errors.InternalWrapError(err) + } + isDir, err := argofile.IsDirectory(unarchivedArtPath) + if err != nil { + return "", "", errors.InternalWrapError(err) + } + fileName = filepath.Base(art.Path) + if isDir { + localArtPath = unarchivedArtPath + } else { + // If we are uploading a single file, we need to preserve original filename so that + // 1. minio client can infer its mime-type, based on file extension + // 2. the original filename is incorporated into the final path + localArtPath = path.Join(tempOutArtDir, fileName) + err = os.Rename(unarchivedArtPath, localArtPath) + if err != nil { + return "", "", errors.InternalWrapError(err) } - } else if strategy.Tar != nil { - // NOTE we already tar gzip the file in the executor. So this is a noop. In the future, if - // we were to support other compression formats (e.g. bzip2) or options, the logic would go - // here, and compression would be moved out of the executors. } + // In the future, if we were to support other compression formats (e.g. bzip2) or options + // the logic would go here, and compression would be moved out of the executors return fileName, localArtPath, nil } +func (we *WorkflowExecutor) isBaseImagePath(path string) bool { + return common.FindOverlappingVolume(&we.Template, path) == nil +} + // SaveParameters will save the content in the specified file path as output parameter value func (we *WorkflowExecutor) SaveParameters() error { if len(we.Template.Outputs.Parameters) == 0 { @@ -363,10 +405,24 @@ func (we *WorkflowExecutor) SaveParameters() error { if param.ValueFrom == nil || param.ValueFrom.Path == "" { continue } - output, err := we.RuntimeExecutor.GetFileContents(mainCtrID, param.ValueFrom.Path) - if err != nil { - return err + + var output string + if we.isBaseImagePath(param.ValueFrom.Path) { + log.Infof("Copying %s from base image layer", param.ValueFrom.Path) + output, err = we.RuntimeExecutor.GetFileContents(mainCtrID, param.ValueFrom.Path) + if err != nil { + return err + } + } else { + log.Infof("Copying %s from from volume mount", param.ValueFrom.Path) + mountedPath := filepath.Join(common.ExecutorMainFilesystemDir, param.ValueFrom.Path) + out, err := ioutil.ReadFile(mountedPath) + if err != nil { + return err + } + output = string(out) } + outputLen := len(output) // Trims off a single newline for user convenience if outputLen > 0 && output[outputLen-1] == '\n' { @@ -395,7 +451,7 @@ func (we *WorkflowExecutor) SaveLogs() (*wfv1.Artifact, error) { } fileName := "main.log" mainLog := path.Join(tempLogsDir, fileName) - err = we.RuntimeExecutor.Logs(mainCtrID, mainLog) + err = we.saveLogToFile(mainCtrID, mainLog) if err != nil { return nil, err } @@ -437,9 +493,26 @@ func (we *WorkflowExecutor) SaveLogs() (*wfv1.Artifact, error) { // GetSecretFromVolMount will retrive the Secrets from VolumeMount func (we *WorkflowExecutor) GetSecretFromVolMount(accessKeyName string, accessKey string) ([]byte, error) { - return ioutil.ReadFile(filepath.Join(common.SecretVolMountPath, accessKeyName, accessKey)) +} +// saveLogToFile saves the entire log output of a container to a local file +func (we *WorkflowExecutor) saveLogToFile(mainCtrID, path string) error { + outFile, err := os.Create(path) + if err != nil { + return errors.InternalWrapError(err) + } + defer func() { _ = outFile.Close() }() + reader, err := we.RuntimeExecutor.GetOutputStream(mainCtrID, true) + if err != nil { + return err + } + defer func() { _ = reader.Close() }() + _, err = io.Copy(outFile, reader) + if err != nil { + return errors.InternalWrapError(err) + } + return nil } // InitDriver initializes an instance of an artifact driver @@ -664,10 +737,21 @@ func (we *WorkflowExecutor) CaptureScriptResult() error { if err != nil { return err } - out, err := we.RuntimeExecutor.GetOutput(mainContainerID) + reader, err := we.RuntimeExecutor.GetOutputStream(mainContainerID, false) if err != nil { return err } + defer func() { _ = reader.Close() }() + bytes, err := ioutil.ReadAll(reader) + if err != nil { + return errors.InternalWrapError(err) + } + out := string(bytes) + // Trims off a single newline for user convenience + outputLen := len(out) + if outputLen > 0 && out[outputLen-1] == '\n' { + out = out[0 : outputLen-1] + } we.Template.Outputs.Result = &out return nil } @@ -692,6 +776,7 @@ func (we *WorkflowExecutor) AnnotateOutputs(logArt *wfv1.Artifact) error { // AddError adds an error to the list of encountered errors durign execution func (we *WorkflowExecutor) AddError(err error) { + log.Errorf("executor error: %+v", err) we.errors = append(we.errors, err) } @@ -762,20 +847,13 @@ func containerID(ctrID string) string { // Wait is the sidecar container logic which waits for the main container to complete. // Also monitors for updates in the pod annotations which may change (e.g. terminate) // Upon completion, kills any sidecars after it finishes. -func (we *WorkflowExecutor) Wait() (err error) { - defer func() { - killSidecarsErr := we.killSidecars() - if killSidecarsErr != nil { - log.Errorf("Failed to kill sidecars: %v", killSidecarsErr) - if err == nil { - // set error only if not already set - err = killSidecarsErr - } - } - }() +func (we *WorkflowExecutor) Wait() error { + err := we.RuntimeExecutor.WaitInit() + if err != nil { + return err + } log.Infof("Waiting on main container") - var mainContainerID string - mainContainerID, err = we.waitMainContainerStart() + mainContainerID, err := we.waitMainContainerStart() if err != nil { return err } @@ -787,33 +865,52 @@ func (we *WorkflowExecutor) Wait() (err error) { go we.monitorDeadline(ctx, annotationUpdatesCh) err = we.RuntimeExecutor.Wait(mainContainerID) + if err != nil { + return err + } log.Infof("Main container completed") - return + return nil } // waitMainContainerStart waits for the main container to start and returns its container ID. func (we *WorkflowExecutor) waitMainContainerStart() (string, error) { for { - ctrStatus, err := we.GetMainContainerStatus() + podsIf := we.ClientSet.CoreV1().Pods(we.Namespace) + fieldSelector := fields.ParseSelectorOrDie(fmt.Sprintf("metadata.name=%s", we.PodName)) + opts := metav1.ListOptions{ + FieldSelector: fieldSelector.String(), + } + watchIf, err := podsIf.Watch(opts) if err != nil { - return "", err + return "", errors.InternalWrapErrorf(err, "Failed to establish pod watch: %v", err) } - if ctrStatus != nil { - log.Debug(ctrStatus) - if ctrStatus.ContainerID != "" { - we.mainContainerID = containerID(ctrStatus.ContainerID) - return containerID(ctrStatus.ContainerID), nil - } else if ctrStatus.State.Waiting == nil && ctrStatus.State.Running == nil && ctrStatus.State.Terminated == nil { - // status still not ready, wait - time.Sleep(1 * time.Second) - } else if ctrStatus.State.Waiting != nil { - // main container is still in waiting status - time.Sleep(1 * time.Second) - } else { - // main container in running or terminated state but missing container ID - return "", errors.InternalError("Main container ID cannot be found") + for watchEv := range watchIf.ResultChan() { + if watchEv.Type == watch.Error { + return "", errors.InternalErrorf("Pod watch error waiting for main to start: %v", watchEv.Object) + } + pod, ok := watchEv.Object.(*apiv1.Pod) + if !ok { + log.Warnf("Pod watch returned non pod object: %v", watchEv.Object) + continue + } + for _, ctrStatus := range pod.Status.ContainerStatuses { + if ctrStatus.Name == common.MainContainerName { + log.Debug(ctrStatus) + if ctrStatus.ContainerID != "" { + we.mainContainerID = containerID(ctrStatus.ContainerID) + return containerID(ctrStatus.ContainerID), nil + } else if ctrStatus.State.Waiting == nil && ctrStatus.State.Running == nil && ctrStatus.State.Terminated == nil { + // status still not ready, wait + } else if ctrStatus.State.Waiting != nil { + // main container is still in waiting status + } else { + // main container in running or terminated state but missing container ID + return "", errors.InternalError("Main container ID cannot be found") + } + } } } + log.Warnf("Pod watch closed unexpectedly") } } @@ -954,8 +1051,8 @@ func (we *WorkflowExecutor) monitorDeadline(ctx context.Context, annotationsUpda } } -// killSidecars kills any sidecars to the main container -func (we *WorkflowExecutor) killSidecars() error { +// KillSidecars kills any sidecars to the main container +func (we *WorkflowExecutor) KillSidecars() error { if len(we.Template.Sidecars) == 0 { log.Infof("No sidecars") return nil @@ -983,15 +1080,6 @@ func (we *WorkflowExecutor) killSidecars() error { return we.RuntimeExecutor.Kill(sidecarIDs) } -// LoadTemplate reads the template definition from the the Kubernetes downward api annotations volume file -func (we *WorkflowExecutor) LoadTemplate() error { - err := unmarshalAnnotationField(we.PodAnnotationsPath, common.AnnotationKeyTemplate, &we.Template) - if err != nil { - return err - } - return nil -} - // LoadExecutionControl reads the execution control definition from the the Kubernetes downward api annotations volume file func (we *WorkflowExecutor) LoadExecutionControl() error { err := unmarshalAnnotationField(we.PodAnnotationsPath, common.AnnotationKeyExecutionControl, &we.ExecutionControl) @@ -1004,6 +1092,16 @@ func (we *WorkflowExecutor) LoadExecutionControl() error { return nil } +// LoadTemplate reads the template definition from the the Kubernetes downward api annotations volume file +func LoadTemplate(path string) (*wfv1.Template, error) { + var tmpl wfv1.Template + err := unmarshalAnnotationField(path, common.AnnotationKeyTemplate, &tmpl) + if err != nil { + return nil, err + } + return &tmpl, nil +} + // unmarshalAnnotationField unmarshals the value of an annotation key into the supplied interface // from the downward api annotation volume file func unmarshalAnnotationField(filePath string, key string, into interface{}) error { diff --git a/workflow/executor/k8sapi/k8sapi.go b/workflow/executor/k8sapi/k8sapi.go index 6f3fd932f705..8c44ef06e548 100644 --- a/workflow/executor/k8sapi/k8sapi.go +++ b/workflow/executor/k8sapi/k8sapi.go @@ -1,10 +1,13 @@ package k8sapi import ( - "github.com/argoproj/argo/errors" + "io" + log "github.com/sirupsen/logrus" "k8s.io/client-go/kubernetes" restclient "k8s.io/client-go/rest" + + "github.com/argoproj/argo/errors" ) type K8sAPIExecutor struct { @@ -30,17 +33,16 @@ func (k *K8sAPIExecutor) CopyFile(containerID string, sourcePath string, destPat return errors.Errorf(errors.CodeNotImplemented, "CopyFile() is not implemented in the k8sapi executor.") } -// GetOutput returns the entirety of the container output as a string -// Used to capturing script results as an output parameter -func (k *K8sAPIExecutor) GetOutput(containerID string) (string, error) { +func (k *K8sAPIExecutor) GetOutputStream(containerID string, combinedOutput bool) (io.ReadCloser, error) { log.Infof("Getting output of %s", containerID) - return k.client.getLogs(containerID) + if !combinedOutput { + log.Warn("non combined output unsupported") + } + return k.client.getLogsAsStream(containerID) } -// Logs copies logs to a given path -func (k *K8sAPIExecutor) Logs(containerID, path string) error { - log.Infof("Saving output of %s to %s", containerID, path) - return k.client.saveLogs(containerID, path) +func (k *K8sAPIExecutor) WaitInit() error { + return nil } // Wait for the container to complete diff --git a/workflow/executor/kubelet/client.go b/workflow/executor/kubelet/client.go index 49730c187535..4af47c6836b6 100644 --- a/workflow/executor/kubelet/client.go +++ b/workflow/executor/kubelet/client.go @@ -15,8 +15,6 @@ import ( "syscall" "time" - "github.com/argoproj/argo/util" - "github.com/argoproj/argo/errors" "github.com/argoproj/argo/workflow/common" execcommon "github.com/argoproj/argo/workflow/executor/common" @@ -127,6 +125,26 @@ func (k *kubeletClient) getPodList() (*v1.PodList, error) { return podList, resp.Body.Close() } +func (k *kubeletClient) GetLogStream(containerID string) (io.ReadCloser, error) { + podList, err := k.getPodList() + if err != nil { + return nil, err + } + for _, pod := range podList.Items { + for _, container := range pod.Status.ContainerStatuses { + if execcommon.GetContainerID(&container) != containerID { + continue + } + resp, err := k.doRequestLogs(pod.Namespace, pod.Name, container.Name) + if err != nil { + return nil, err + } + return resp.Body, nil + } + } + return nil, errors.New(errors.CodeNotFound, fmt.Sprintf("containerID %q is not found in the pod list", containerID)) +} + func (k *kubeletClient) doRequestLogs(namespace, podName, containerName string) (*http.Response, error) { u, err := url.ParseRequestURI(fmt.Sprintf("https://%s/containerLogs/%s/%s/%s", k.kubeletEndpoint, namespace, podName, containerName)) if err != nil { @@ -147,38 +165,6 @@ func (k *kubeletClient) doRequestLogs(namespace, podName, containerName string) return resp, nil } -func (k *kubeletClient) getLogs(namespace, podName, containerName string) (string, error) { - resp, err := k.doRequestLogs(namespace, podName, containerName) - if resp != nil { - defer func() { _ = resp.Body.Close() }() - } - if err != nil { - return "", err - } - b, err := ioutil.ReadAll(resp.Body) - if err != nil { - return "", errors.InternalWrapError(err) - } - return string(b), resp.Body.Close() -} - -func (k *kubeletClient) saveLogsToFile(namespace, podName, containerName, path string) error { - resp, err := k.doRequestLogs(namespace, podName, containerName) - if resp != nil { - defer func() { _ = resp.Body.Close() }() - } - if err != nil { - return err - } - outFile, err := os.Create(path) - if err != nil { - return errors.InternalWrapError(err) - } - defer util.Close(outFile) - _, err = io.Copy(outFile, resp.Body) - return err -} - func (k *kubeletClient) getContainerStatus(containerID string) (*v1.Pod, *v1.ContainerStatus, error) { podList, err := k.getPodList() if err != nil { @@ -195,38 +181,6 @@ func (k *kubeletClient) getContainerStatus(containerID string) (*v1.Pod, *v1.Con return nil, nil, errors.New(errors.CodeNotFound, fmt.Sprintf("containerID %q is not found in the pod list", containerID)) } -func (k *kubeletClient) GetContainerLogs(containerID string) (string, error) { - podList, err := k.getPodList() - if err != nil { - return "", errors.InternalWrapError(err) - } - for _, pod := range podList.Items { - for _, container := range pod.Status.ContainerStatuses { - if execcommon.GetContainerID(&container) != containerID { - continue - } - return k.getLogs(pod.Namespace, pod.Name, container.Name) - } - } - return "", errors.New(errors.CodeNotFound, fmt.Sprintf("containerID %q is not found in the pod list", containerID)) -} - -func (k *kubeletClient) SaveLogsToFile(containerID, path string) error { - podList, err := k.getPodList() - if err != nil { - return errors.InternalWrapError(err) - } - for _, pod := range podList.Items { - for _, container := range pod.Status.ContainerStatuses { - if execcommon.GetContainerID(&container) != containerID { - continue - } - return k.saveLogsToFile(pod.Namespace, pod.Name, container.Name, path) - } - } - return errors.New(errors.CodeNotFound, fmt.Sprintf("containerID %q is not found in the pod list", containerID)) -} - func (k *kubeletClient) exec(u *url.URL) (*url.URL, error) { _, resp, err := k.websocketDialer.Dial(u.String(), k.httpHeader) if resp == nil { diff --git a/workflow/executor/kubelet/kubelet.go b/workflow/executor/kubelet/kubelet.go index 6cd8f9a482f0..cf5115e1b274 100644 --- a/workflow/executor/kubelet/kubelet.go +++ b/workflow/executor/kubelet/kubelet.go @@ -1,8 +1,11 @@ package kubelet import ( - "github.com/argoproj/argo/errors" + "io" + log "github.com/sirupsen/logrus" + + "github.com/argoproj/argo/errors" ) type KubeletExecutor struct { @@ -28,15 +31,15 @@ func (k *KubeletExecutor) CopyFile(containerID string, sourcePath string, destPa return errors.Errorf(errors.CodeNotImplemented, "CopyFile() is not implemented in the kubelet executor.") } -// GetOutput returns the entirety of the container output as a string -// Used to capturing script results as an output parameter -func (k *KubeletExecutor) GetOutput(containerID string) (string, error) { - return k.cli.GetContainerLogs(containerID) +func (k *KubeletExecutor) GetOutputStream(containerID string, combinedOutput bool) (io.ReadCloser, error) { + if !combinedOutput { + log.Warn("non combined output unsupported") + } + return k.cli.GetLogStream(containerID) } -// Logs copies logs to a given path -func (k *KubeletExecutor) Logs(containerID, path string) error { - return k.cli.SaveLogsToFile(containerID, path) +func (k *KubeletExecutor) WaitInit() error { + return nil } // Wait for the container to complete diff --git a/workflow/executor/mocks/ContainerRuntimeExecutor.go b/workflow/executor/mocks/ContainerRuntimeExecutor.go index df574d2da817..55046f8fe877 100644 --- a/workflow/executor/mocks/ContainerRuntimeExecutor.go +++ b/workflow/executor/mocks/ContainerRuntimeExecutor.go @@ -1,6 +1,8 @@ -// Code generated by mockery v1.0.0 +// Code generated by mockery v1.0.0. DO NOT EDIT. + package mocks +import io "io" import mock "github.com/stretchr/testify/mock" // ContainerRuntimeExecutor is an autogenerated mock type for the ContainerRuntimeExecutor type @@ -43,20 +45,22 @@ func (_m *ContainerRuntimeExecutor) GetFileContents(containerID string, sourcePa return r0, r1 } -// GetOutput provides a mock function with given fields: containerID -func (_m *ContainerRuntimeExecutor) GetOutput(containerID string) (string, error) { - ret := _m.Called(containerID) +// GetOutputStream provides a mock function with given fields: containerID, combinedOutput +func (_m *ContainerRuntimeExecutor) GetOutputStream(containerID string, combinedOutput bool) (io.ReadCloser, error) { + ret := _m.Called(containerID, combinedOutput) - var r0 string - if rf, ok := ret.Get(0).(func(string) string); ok { - r0 = rf(containerID) + var r0 io.ReadCloser + if rf, ok := ret.Get(0).(func(string, bool) io.ReadCloser); ok { + r0 = rf(containerID, combinedOutput) } else { - r0 = ret.Get(0).(string) + if ret.Get(0) != nil { + r0 = ret.Get(0).(io.ReadCloser) + } } var r1 error - if rf, ok := ret.Get(1).(func(string) error); ok { - r1 = rf(containerID) + if rf, ok := ret.Get(1).(func(string, bool) error); ok { + r1 = rf(containerID, combinedOutput) } else { r1 = ret.Error(1) } @@ -78,13 +82,13 @@ func (_m *ContainerRuntimeExecutor) Kill(containerIDs []string) error { return r0 } -// Logs provides a mock function with given fields: containerID, path -func (_m *ContainerRuntimeExecutor) Logs(containerID string, path string) error { - ret := _m.Called(containerID, path) +// Wait provides a mock function with given fields: containerID +func (_m *ContainerRuntimeExecutor) Wait(containerID string) error { + ret := _m.Called(containerID) var r0 error - if rf, ok := ret.Get(0).(func(string, string) error); ok { - r0 = rf(containerID, path) + if rf, ok := ret.Get(0).(func(string) error); ok { + r0 = rf(containerID) } else { r0 = ret.Error(0) } @@ -92,13 +96,13 @@ func (_m *ContainerRuntimeExecutor) Logs(containerID string, path string) error return r0 } -// Wait provides a mock function with given fields: containerID -func (_m *ContainerRuntimeExecutor) Wait(containerID string) error { - ret := _m.Called(containerID) +// WaitInit provides a mock function with given fields: +func (_m *ContainerRuntimeExecutor) WaitInit() error { + ret := _m.Called() var r0 error - if rf, ok := ret.Get(0).(func(string) error); ok { - r0 = rf(containerID) + if rf, ok := ret.Get(0).(func() error); ok { + r0 = rf() } else { r0 = ret.Error(0) } diff --git a/workflow/executor/pns/pns.go b/workflow/executor/pns/pns.go new file mode 100644 index 000000000000..3b6412ddf248 --- /dev/null +++ b/workflow/executor/pns/pns.go @@ -0,0 +1,385 @@ +package pns + +import ( + "bufio" + "fmt" + "io" + "io/ioutil" + "os" + "strings" + "sync" + "syscall" + "time" + + executil "github.com/argoproj/pkg/exec" + gops "github.com/mitchellh/go-ps" + log "github.com/sirupsen/logrus" + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes" + + "github.com/argoproj/argo/errors" + "github.com/argoproj/argo/util/archive" + "github.com/argoproj/argo/workflow/common" + execcommon "github.com/argoproj/argo/workflow/executor/common" +) + +type PNSExecutor struct { + clientset *kubernetes.Clientset + podName string + namespace string + + // ctrIDToPid maps a containerID to a process ID + ctrIDToPid map[string]int + // pidToCtrID maps a process ID to a container ID + pidToCtrID map[int]string + + // pidFileHandles holds file handles to all root containers + pidFileHandles map[int]*fileInfo + + // thisPID is the pid of this process + thisPID int + // mainPID holds the main container's pid + mainPID int + // mainFS holds a file descriptor to the main filesystem, allowing the executor to access the + // filesystem after the main process exited + mainFS *os.File + // rootFS holds a file descriptor to the root filesystem, allowing the executor to exit out of a chroot + rootFS *os.File + // debug enables additional debugging + debug bool + // hasOutputs indicates if the template has outputs. determines if we need to + hasOutputs bool +} + +type fileInfo struct { + file os.File + info os.FileInfo +} + +func NewPNSExecutor(clientset *kubernetes.Clientset, podName, namespace string, hasOutputs bool) (*PNSExecutor, error) { + thisPID := os.Getpid() + log.Infof("Creating PNS executor (namespace: %s, pod: %s, pid: %d, hasOutputs: %v)", namespace, podName, thisPID, hasOutputs) + if thisPID == 1 { + return nil, errors.New(errors.CodeBadRequest, "process namespace sharing is not enabled on pod") + } + return &PNSExecutor{ + clientset: clientset, + podName: podName, + namespace: namespace, + ctrIDToPid: make(map[string]int), + pidToCtrID: make(map[int]string), + pidFileHandles: make(map[int]*fileInfo), + thisPID: thisPID, + debug: log.GetLevel() == log.DebugLevel, + hasOutputs: hasOutputs, + }, nil +} + +func (p *PNSExecutor) GetFileContents(containerID string, sourcePath string) (string, error) { + err := p.enterChroot() + if err != nil { + return "", err + } + defer func() { _ = p.exitChroot() }() + out, err := ioutil.ReadFile(sourcePath) + if err != nil { + return "", err + } + return string(out), nil +} + +// enterChroot enters chroot of the main container +func (p *PNSExecutor) enterChroot() error { + if p.mainFS == nil { + return errors.InternalErrorf("could not chroot into main for artifact collection: container may have exited too quickly") + } + if err := p.mainFS.Chdir(); err != nil { + return errors.InternalWrapErrorf(err, "failed to chdir to main filesystem: %v", err) + } + err := syscall.Chroot(".") + if err != nil { + return errors.InternalWrapErrorf(err, "failed to chroot to main filesystem: %v", err) + } + return nil +} + +// exitChroot exits chroot +func (p *PNSExecutor) exitChroot() error { + if err := p.rootFS.Chdir(); err != nil { + return errors.InternalWrapError(err) + } + err := syscall.Chroot(".") + if err != nil { + return errors.InternalWrapError(err) + } + return nil +} + +// CopyFile copies a source file in a container to a local path +func (p *PNSExecutor) CopyFile(containerID string, sourcePath string, destPath string) (err error) { + destFile, err := os.Create(destPath) + if err != nil { + return err + } + defer func() { + // exit chroot and close the file. preserve the original error + deferErr := p.exitChroot() + if err == nil && deferErr != nil { + err = errors.InternalWrapError(deferErr) + } + deferErr = destFile.Close() + if err == nil && deferErr != nil { + err = errors.InternalWrapError(deferErr) + } + }() + w := bufio.NewWriter(destFile) + err = p.enterChroot() + if err != nil { + return err + } + + err = archive.TarGzToWriter(sourcePath, w) + if err != nil { + return err + } + + return nil +} + +func (p *PNSExecutor) WaitInit() error { + if !p.hasOutputs { + return nil + } + go p.pollRootProcesses(time.Minute) + // Secure a filehandle on our own root. This is because we will chroot back and forth from + // the main container's filesystem, to our own. + rootFS, err := os.Open("/") + if err != nil { + return errors.InternalWrapError(err) + } + p.rootFS = rootFS + return nil +} + +// Wait for the container to complete +func (p *PNSExecutor) Wait(containerID string) error { + mainPID, err := p.getContainerPID(containerID) + if err != nil { + if !p.hasOutputs { + log.Warnf("Ignoring wait failure: %v. Process assumed to have completed", err) + return nil + } + return err + } + log.Infof("Main pid identified as %d", mainPID) + p.mainPID = mainPID + for pid, f := range p.pidFileHandles { + if pid == p.mainPID { + log.Info("Successfully secured file handle on main container root filesystem") + p.mainFS = &f.file + } else { + log.Infof("Closing root filehandle for non-main pid %d", pid) + _ = f.file.Close() + } + } + if p.mainFS == nil { + log.Warn("Failed to secure file handle on main container's root filesystem. Output artifacts from base image layer will fail") + } + + // wait for pid to complete + log.Infof("Waiting for main pid %d to complete", mainPID) + err = executil.WaitPID(mainPID) + if err != nil { + return err + } + log.Infof("Main pid %d completed", mainPID) + return nil +} + +// pollRootProcesses will poll /proc for root pids (pids without parents) in a tight loop, for the +// purpose of securing an open file handle against /proc//root as soon as possible. +// It opens file handles on all root pids because at this point, we do not yet know which pid is the +// "main" container. +// Polling is necessary because it is not possible to use something like fsnotify against procfs. +func (p *PNSExecutor) pollRootProcesses(timeout time.Duration) { + log.Warnf("Polling root processes (%v)", timeout) + deadline := time.Now().Add(timeout) + for { + p.updateCtrIDMap() + if p.mainFS != nil { + log.Info("Stopped root processes polling due to successful securing of main root fs") + break + } + if time.Now().After(deadline) { + log.Warnf("Polling root processes timed out (%v)", timeout) + break + } + time.Sleep(50 * time.Millisecond) + } +} + +func (p *PNSExecutor) GetOutputStream(containerID string, combinedOutput bool) (io.ReadCloser, error) { + if !combinedOutput { + log.Warn("non combined output unsupported") + } + opts := v1.PodLogOptions{ + Container: common.MainContainerName, + } + return p.clientset.CoreV1().Pods(p.namespace).GetLogs(p.podName, &opts).Stream() +} + +// Kill a list of containerIDs first with a SIGTERM then with a SIGKILL after a grace period +func (p *PNSExecutor) Kill(containerIDs []string) error { + var asyncErr error + wg := sync.WaitGroup{} + for _, cid := range containerIDs { + wg.Add(1) + go func(containerID string) { + err := p.killContainer(containerID) + if err != nil && asyncErr != nil { + asyncErr = err + } + wg.Done() + }(cid) + } + wg.Wait() + return asyncErr +} + +func (p *PNSExecutor) killContainer(containerID string) error { + pid, err := p.getContainerPID(containerID) + if err != nil { + log.Warnf("Ignoring kill container failure of %s: %v. Process assumed to have completed", containerID, err) + return nil + } + // On Unix systems, FindProcess always succeeds and returns a Process + // for the given pid, regardless of whether the process exists. + proc, _ := os.FindProcess(pid) + log.Infof("Sending SIGTERM to pid %d", pid) + err = proc.Signal(syscall.SIGTERM) + if err != nil { + log.Warnf("Failed to SIGTERM pid %d: %v", pid, err) + } + + waitPIDOpts := executil.WaitPIDOpts{Timeout: execcommon.KillGracePeriod * time.Second} + err = executil.WaitPID(pid, waitPIDOpts) + if err == nil { + log.Infof("PID %d completed", pid) + return nil + } + if err != executil.ErrWaitPIDTimeout { + return err + } + log.Warnf("Timed out (%v) waiting for pid %d to complete after SIGTERM. Issing SIGKILL", waitPIDOpts.Timeout, pid) + time.Sleep(30 * time.Minute) + err = proc.Signal(syscall.SIGKILL) + if err != nil { + log.Warnf("Failed to SIGKILL pid %d: %v", pid, err) + } + return err +} + +// getContainerPID returns the pid associated with the container id. Returns error if it was unable +// to be determined because no running root processes exist with that container ID +func (p *PNSExecutor) getContainerPID(containerID string) (int, error) { + pid, ok := p.ctrIDToPid[containerID] + if ok { + return pid, nil + } + p.updateCtrIDMap() + pid, ok = p.ctrIDToPid[containerID] + if !ok { + return -1, errors.InternalErrorf("Failed to determine pid for containerID %s: container may have exited too quickly", containerID) + } + return pid, nil +} + +// updateCtrIDMap updates the mapping between container IDs to PIDs +func (p *PNSExecutor) updateCtrIDMap() { + allProcs, err := gops.Processes() + if err != nil { + log.Warnf("Failed to list processes: %v", err) + return + } + for _, proc := range allProcs { + pid := proc.Pid() + if pid == 1 || pid == p.thisPID || proc.PPid() != 0 { + // ignore the pause container, our own pid, and non-root processes + continue + } + + // Useful code for debugging: + if p.debug { + if data, err := ioutil.ReadFile(fmt.Sprintf("/proc/%d/root", pid) + "/etc/os-release"); err == nil { + log.Infof("pid %d: %s", pid, string(data)) + _, _ = parseContainerID(pid) + } + } + + if p.hasOutputs && p.mainFS == nil { + rootPath := fmt.Sprintf("/proc/%d/root", pid) + currInfo, err := os.Stat(rootPath) + if err != nil { + log.Warnf("Failed to stat %s: %v", rootPath, err) + continue + } + log.Infof("pid %d: %v", pid, currInfo) + prevInfo := p.pidFileHandles[pid] + + // Secure the root filehandle of the process. NOTE if the file changed, it means that + // the main container may have switched (e.g. gone from busybox to the user's container) + if prevInfo == nil || !os.SameFile(prevInfo.info, currInfo) { + fs, err := os.Open(rootPath) + if err != nil { + log.Warnf("Failed to open %s: %v", rootPath, err) + continue + } + log.Infof("Secured filehandle on %s", rootPath) + p.pidFileHandles[pid] = &fileInfo{ + info: currInfo, + file: *fs, + } + if prevInfo != nil { + _ = prevInfo.file.Close() + } + } + } + + // Update maps of pids to container ids + if _, ok := p.pidToCtrID[pid]; !ok { + containerID, err := parseContainerID(pid) + if err != nil { + log.Warnf("Failed to identify containerID for process %d", pid) + continue + } + log.Infof("containerID %s mapped to pid %d", containerID, pid) + p.ctrIDToPid[containerID] = pid + p.pidToCtrID[pid] = containerID + } + } +} + +// parseContainerID parses the containerID of a pid +func parseContainerID(pid int) (string, error) { + cgroupPath := fmt.Sprintf("/proc/%d/cgroup", pid) + cgroupFile, err := os.OpenFile(cgroupPath, os.O_RDONLY, os.ModePerm) + if err != nil { + return "", errors.InternalWrapError(err) + } + defer func() { _ = cgroupFile.Close() }() + sc := bufio.NewScanner(cgroupFile) + for sc.Scan() { + // See https://www.systutorials.com/docs/linux/man/5-proc/ for /proc/XX/cgroup format. e.g.: + // 5:cpuacct,cpu,cpuset:/daemons + line := sc.Text() + log.Debugf("pid %d: %s", pid, line) + parts := strings.Split(line, "/") + if len(parts) > 1 { + if containerID := parts[len(parts)-1]; containerID != "" { + // need to check for empty string because the line may look like: 5:rdma:/ + return containerID, nil + } + } + } + return "", errors.InternalErrorf("Failed to parse container ID from %s", cgroupPath) +} diff --git a/workflow/metrics/collector.go b/workflow/metrics/collector.go index 960074d73cc4..40a3d2e30457 100644 --- a/workflow/metrics/collector.go +++ b/workflow/metrics/collector.go @@ -112,16 +112,12 @@ func (wc *workflowCollector) collectWorkflow(ch chan<- prometheus.Metric, wf wfv addGauge(descWorkflowInfo, 1, wf.Spec.Entrypoint, wf.Spec.ServiceAccountName, joinTemplates(wf.Spec.Templates)) - if phase := wf.Status.Phase; phase != "" { - // TODO: we do not have queuing feature yet so are not adding to a 'Pending' guague. - // Uncomment when we support queueing. - //addGauge(descWorkflowStatusPhase, boolFloat64(phase == wfv1.NodePending), string(wfv1.NodePending)) - addGauge(descWorkflowStatusPhase, boolFloat64(phase == wfv1.NodeRunning), string(wfv1.NodeRunning)) - addGauge(descWorkflowStatusPhase, boolFloat64(phase == wfv1.NodeSucceeded), string(wfv1.NodeSucceeded)) - addGauge(descWorkflowStatusPhase, boolFloat64(phase == wfv1.NodeSkipped), string(wfv1.NodeSkipped)) - addGauge(descWorkflowStatusPhase, boolFloat64(phase == wfv1.NodeFailed), string(wfv1.NodeFailed)) - addGauge(descWorkflowStatusPhase, boolFloat64(phase == wfv1.NodeError), string(wfv1.NodeError)) - } + addGauge(descWorkflowStatusPhase, boolFloat64(wf.Status.Phase == wfv1.NodePending || wf.Status.Phase == ""), string(wfv1.NodePending)) + addGauge(descWorkflowStatusPhase, boolFloat64(wf.Status.Phase == wfv1.NodeRunning), string(wfv1.NodeRunning)) + addGauge(descWorkflowStatusPhase, boolFloat64(wf.Status.Phase == wfv1.NodeSucceeded), string(wfv1.NodeSucceeded)) + addGauge(descWorkflowStatusPhase, boolFloat64(wf.Status.Phase == wfv1.NodeSkipped), string(wfv1.NodeSkipped)) + addGauge(descWorkflowStatusPhase, boolFloat64(wf.Status.Phase == wfv1.NodeFailed), string(wfv1.NodeFailed)) + addGauge(descWorkflowStatusPhase, boolFloat64(wf.Status.Phase == wfv1.NodeError), string(wfv1.NodeError)) if !wf.CreationTimestamp.IsZero() { addGauge(descWorkflowCreated, float64(wf.CreationTimestamp.Unix())) diff --git a/workflow/util/util.go b/workflow/util/util.go index fc25a6198662..842939aa3265 100644 --- a/workflow/util/util.go +++ b/workflow/util/util.go @@ -26,6 +26,7 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" + "k8s.io/utils/pointer" "github.com/argoproj/argo/errors" "github.com/argoproj/argo/pkg/apis/workflow" @@ -239,7 +240,7 @@ func SubmitWorkflow(wfIf v1alpha1.WorkflowInterface, wf *wfv1.Workflow, opts *Su wf.SetOwnerReferences(append(wf.GetOwnerReferences(), *opts.OwnerReference)) } - err := validate.ValidateWorkflow(wf) + err := validate.ValidateWorkflow(wf, validate.ValidateOpts{}) if err != nil { return nil, err } @@ -257,8 +258,7 @@ func SuspendWorkflow(wfIf v1alpha1.WorkflowInterface, workflowName string) error return false, errSuspendedCompletedWorkflow } if wf.Spec.Suspend == nil || *wf.Spec.Suspend != true { - t := true - wf.Spec.Suspend = &t + wf.Spec.Suspend = pointer.BoolPtr(true) wf, err = wfIf.Update(wf) if err != nil { if apierr.IsConflict(err) { diff --git a/workflow/validate/lint.go b/workflow/validate/lint.go index e14414e6cce6..d2b0d9327c73 100644 --- a/workflow/validate/lint.go +++ b/workflow/validate/lint.go @@ -5,11 +5,12 @@ import ( "os" "path/filepath" + "github.com/argoproj/pkg/json" + "github.com/argoproj/argo/errors" "github.com/argoproj/argo/pkg/apis/workflow" wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" "github.com/argoproj/argo/workflow/common" - "github.com/argoproj/pkg/json" ) // LintWorkflowDir validates all workflow manifests in a directory. Ignores non-workflow manifests @@ -60,7 +61,7 @@ func LintWorkflowFile(filePath string, strict bool) error { return errors.Errorf(errors.CodeBadRequest, "%s failed to parse: %v", filePath, err) } for _, wf := range workflows { - err = ValidateWorkflow(&wf, true) + err = ValidateWorkflow(&wf, ValidateOpts{Lint: true}) if err != nil { return errors.Errorf(errors.CodeBadRequest, "%s: %s", filePath, err.Error()) } diff --git a/workflow/validate/validate.go b/workflow/validate/validate.go index acaabc593eae..007ae4b7d86a 100644 --- a/workflow/validate/validate.go +++ b/workflow/validate/validate.go @@ -8,16 +8,31 @@ import ( "regexp" "strings" + "github.com/valyala/fasttemplate" + apivalidation "k8s.io/apimachinery/pkg/util/validation" + "github.com/argoproj/argo/errors" wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" "github.com/argoproj/argo/workflow/artifacts/hdfs" "github.com/argoproj/argo/workflow/common" - "github.com/valyala/fasttemplate" - apivalidation "k8s.io/apimachinery/pkg/util/validation" ) +// ValidateOpts provides options when linting +type ValidateOpts struct { + // Lint indicates if this is performing validation in the context of linting. If true, will + // skip some validations which is permissible during linting but not submission (e.g. missing + // input parameters to the workflow) + Lint bool + // ContainerRuntimeExecutor will trigger additional validation checks specific to different + // types of executors. For example, the inability of kubelet/k8s executors to copy artifacts + // out of the base image layer. If unspecified, will use docker executor validation + ContainerRuntimeExecutor string +} + // wfValidationCtx is the context for validating a workflow spec type wfValidationCtx struct { + ValidateOpts + wf *wfv1.Workflow // globalParams keeps track of variables which are available the global // scope and can be referenced from anywhere. @@ -36,21 +51,19 @@ const ( anyItemMagicValue = "item.*" ) -// ValidateWorkflow accepts a workflow and performs validation against it. If lint is specified as -// true, will skip some validations which is permissible during linting but not submission -func ValidateWorkflow(wf *wfv1.Workflow, lint ...bool) error { +// ValidateWorkflow accepts a workflow and performs validation against it. +func ValidateWorkflow(wf *wfv1.Workflow, opts ValidateOpts) error { ctx := wfValidationCtx{ + ValidateOpts: opts, wf: wf, globalParams: make(map[string]string), results: make(map[string]bool), } - linting := len(lint) > 0 && lint[0] - err := validateWorkflowFieldNames(wf.Spec.Templates) if err != nil { return errors.Errorf(errors.CodeBadRequest, "spec.templates%s", err.Error()) } - if linting { + if ctx.Lint { // if we are just linting we don't care if spec.arguments.parameters.XXX doesn't have an // explicit value. workflows without a default value is a desired use case err = validateArgumentsFieldNames("spec.arguments.", wf.Spec.Arguments) @@ -154,6 +167,10 @@ func (ctx *wfValidationCtx) validateTemplate(tmpl *wfv1.Template, args wfv1.Argu if err != nil { return err } + err = ctx.validateBaseImageOutputs(tmpl) + if err != nil { + return err + } if tmpl.ArchiveLocation != nil { err = validateArtifactLocation("templates.archiveLocation", *tmpl.ArchiveLocation) if err != nil { @@ -551,6 +568,51 @@ func validateOutputs(scope map[string]interface{}, tmpl *wfv1.Template) error { return nil } +// validateBaseImageOutputs detects if the template contains an output from +func (ctx *wfValidationCtx) validateBaseImageOutputs(tmpl *wfv1.Template) error { + switch ctx.ContainerRuntimeExecutor { + case "", common.ContainerRuntimeExecutorDocker: + // docker executor supports all modes of artifact outputs + case common.ContainerRuntimeExecutorPNS: + // pns supports copying from the base image, but only if there is no volume mount underneath it + errMsg := "pns executor does not support outputs from base image layer with volume mounts. must use emptyDir" + for _, out := range tmpl.Outputs.Artifacts { + if common.FindOverlappingVolume(tmpl, out.Path) == nil { + // output is in the base image layer. need to verify there are no volume mounts under it + if tmpl.Container != nil { + for _, volMnt := range tmpl.Container.VolumeMounts { + if strings.HasPrefix(volMnt.MountPath, out.Path+"/") { + return errors.Errorf(errors.CodeBadRequest, "templates.%s.outputs.artifacts.%s: %s", tmpl.Name, out.Name, errMsg) + } + } + + } + if tmpl.Script != nil { + for _, volMnt := range tmpl.Container.VolumeMounts { + if strings.HasPrefix(volMnt.MountPath, out.Path+"/") { + return errors.Errorf(errors.CodeBadRequest, "templates.%s.outputs.artifacts.%s: %s", tmpl.Name, out.Name, errMsg) + } + } + } + } + } + case common.ContainerRuntimeExecutorK8sAPI, common.ContainerRuntimeExecutorKubelet: + // for kubelet/k8s fail validation if we detect artifact is copied from base image layer + errMsg := fmt.Sprintf("%s executor does not support outputs from base image layer. must use emptyDir", ctx.ContainerRuntimeExecutor) + for _, out := range tmpl.Outputs.Artifacts { + if common.FindOverlappingVolume(tmpl, out.Path) == nil { + return errors.Errorf(errors.CodeBadRequest, "templates.%s.outputs.artifacts.%s: %s", tmpl.Name, out.Name, errMsg) + } + } + for _, out := range tmpl.Outputs.Parameters { + if out.ValueFrom != nil && common.FindOverlappingVolume(tmpl, out.ValueFrom.Path) == nil { + return errors.Errorf(errors.CodeBadRequest, "templates.%s.outputs.parameters.%s: %s", tmpl.Name, out.Name, errMsg) + } + } + } + return nil +} + // validateOutputParameter verifies that only one of valueFrom is defined in an output func validateOutputParameter(paramRef string, param *wfv1.Parameter) error { if param.ValueFrom == nil { diff --git a/workflow/validate/validate_test.go b/workflow/validate/validate_test.go index fd9eee10bc72..25774083052b 100644 --- a/workflow/validate/validate_test.go +++ b/workflow/validate/validate_test.go @@ -3,17 +3,19 @@ package validate import ( "testing" - wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" - "github.com/argoproj/argo/test" "github.com/ghodss/yaml" "github.com/stretchr/testify/assert" + + wfv1 "github.com/argoproj/argo/pkg/apis/workflow/v1alpha1" + "github.com/argoproj/argo/test" + "github.com/argoproj/argo/workflow/common" ) // validate is a test helper to accept YAML as a string and return // its validation result. func validate(yamlStr string) error { wf := unmarshalWf(yamlStr) - return ValidateWorkflow(wf) + return ValidateWorkflow(wf, ValidateOpts{}) } func unmarshalWf(yamlStr string) *wfv1.Workflow { @@ -819,13 +821,13 @@ spec: func TestVolumeMountArtifactPathCollision(t *testing.T) { // ensure we detect and reject path collisions wf := unmarshalWf(volumeMountArtifactPathCollision) - err := ValidateWorkflow(wf) + err := ValidateWorkflow(wf, ValidateOpts{}) if assert.NotNil(t, err) { assert.Contains(t, err.Error(), "already mounted") } // tweak the mount path and validation should now be successful wf.Spec.Templates[0].Container.VolumeMounts[0].MountPath = "/differentpath" - err = ValidateWorkflow(wf) + err = ValidateWorkflow(wf, ValidateOpts{}) assert.Nil(t, err) } @@ -1111,7 +1113,7 @@ func TestPodNameVariable(t *testing.T) { } func TestGlobalParamWithVariable(t *testing.T) { - err := ValidateWorkflow(test.LoadE2EWorkflow("functional/global-outputs-variable.yaml")) + err := ValidateWorkflow(test.LoadE2EWorkflow("functional/global-outputs-variable.yaml"), ValidateOpts{}) assert.Nil(t, err) } @@ -1136,9 +1138,9 @@ spec: // TestSpecArgumentNoValue we allow parameters to have no value at the spec level during linting func TestSpecArgumentNoValue(t *testing.T) { wf := unmarshalWf(specArgumentNoValue) - err := ValidateWorkflow(wf, true) + err := ValidateWorkflow(wf, ValidateOpts{Lint: true}) assert.Nil(t, err) - err = ValidateWorkflow(wf) + err = ValidateWorkflow(wf, ValidateOpts{}) assert.NotNil(t, err) } @@ -1173,7 +1175,7 @@ spec: // TestSpecArgumentSnakeCase we allow parameter and artifact names to be snake case func TestSpecArgumentSnakeCase(t *testing.T) { wf := unmarshalWf(specArgumentSnakeCase) - err := ValidateWorkflow(wf, true) + err := ValidateWorkflow(wf, ValidateOpts{Lint: true}) assert.Nil(t, err) } @@ -1203,13 +1205,12 @@ spec: container: image: alpine:latest command: [echo, "{{inputs.parameters.num}}"] - ` // TestSpecBadSequenceCountAndEnd verifies both count and end cannot be defined func TestSpecBadSequenceCountAndEnd(t *testing.T) { wf := unmarshalWf(specBadSequenceCountAndEnd) - err := ValidateWorkflow(wf, true) + err := ValidateWorkflow(wf, ValidateOpts{Lint: true}) assert.Error(t, err) } @@ -1229,6 +1230,129 @@ spec: // TestCustomTemplatVariable verifies custom template variable func TestCustomTemplatVariable(t *testing.T) { wf := unmarshalWf(customVariableInput) - err := ValidateWorkflow(wf, true) + err := ValidateWorkflow(wf, ValidateOpts{Lint: true}) assert.Equal(t, err, nil) } + +var baseImageOutputArtifact = ` +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: base-image-out-art- +spec: + entrypoint: base-image-out-art + templates: + - name: base-image-out-art + container: + image: alpine:latest + command: [echo, hello] + outputs: + artifacts: + - name: tmp + path: /tmp +` + +var baseImageOutputParameter = ` +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: base-image-out-art- +spec: + entrypoint: base-image-out-art + templates: + - name: base-image-out-art + container: + image: alpine:latest + command: [echo, hello] + outputs: + parameters: + - name: tmp + valueFrom: + path: /tmp/file +` + +var volumeMountOutputArtifact = ` +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: base-image-out-art- +spec: + entrypoint: base-image-out-art + volumes: + - name: workdir + emptyDir: {} + templates: + - name: base-image-out-art + container: + image: alpine:latest + command: [echo, hello] + volumeMounts: + - name: workdir + mountPath: /mnt/vol + outputs: + artifacts: + - name: workdir + path: /mnt/vol +` + +var baseImageDirWithEmptyDirOutputArtifact = ` +apiVersion: argoproj.io/v1alpha1 +kind: Workflow +metadata: + generateName: base-image-out-art- +spec: + entrypoint: base-image-out-art + volumes: + - name: workdir + emptyDir: {} + templates: + - name: base-image-out-art + container: + image: alpine:latest + command: [echo, hello] + volumeMounts: + - name: workdir + mountPath: /mnt/vol + outputs: + artifacts: + - name: workdir + path: /mnt +` + +// TestBaseImageOutputVerify verifies we error when we detect the condition when the container +// runtime executor doesn't support output artifacts from a base image layer, and fails validation +func TestBaseImageOutputVerify(t *testing.T) { + wfBaseOutArt := unmarshalWf(baseImageOutputArtifact) + wfBaseOutParam := unmarshalWf(baseImageOutputParameter) + wfEmptyDirOutArt := unmarshalWf(volumeMountOutputArtifact) + wfBaseWithEmptyDirOutArt := unmarshalWf(baseImageDirWithEmptyDirOutputArtifact) + var err error + + for _, executor := range []string{common.ContainerRuntimeExecutorK8sAPI, common.ContainerRuntimeExecutorKubelet, common.ContainerRuntimeExecutorPNS, common.ContainerRuntimeExecutorDocker, ""} { + switch executor { + case common.ContainerRuntimeExecutorK8sAPI, common.ContainerRuntimeExecutorKubelet: + err = ValidateWorkflow(wfBaseOutArt, ValidateOpts{ContainerRuntimeExecutor: executor}) + assert.Error(t, err) + err = ValidateWorkflow(wfBaseOutParam, ValidateOpts{ContainerRuntimeExecutor: executor}) + assert.Error(t, err) + err = ValidateWorkflow(wfBaseWithEmptyDirOutArt, ValidateOpts{ContainerRuntimeExecutor: executor}) + assert.Error(t, err) + case common.ContainerRuntimeExecutorPNS: + err = ValidateWorkflow(wfBaseOutArt, ValidateOpts{ContainerRuntimeExecutor: executor}) + assert.NoError(t, err) + err = ValidateWorkflow(wfBaseOutParam, ValidateOpts{ContainerRuntimeExecutor: executor}) + assert.NoError(t, err) + err = ValidateWorkflow(wfBaseWithEmptyDirOutArt, ValidateOpts{ContainerRuntimeExecutor: executor}) + assert.Error(t, err) + case common.ContainerRuntimeExecutorDocker, "": + err = ValidateWorkflow(wfBaseOutArt, ValidateOpts{ContainerRuntimeExecutor: executor}) + assert.NoError(t, err) + err = ValidateWorkflow(wfBaseOutParam, ValidateOpts{ContainerRuntimeExecutor: executor}) + assert.NoError(t, err) + err = ValidateWorkflow(wfBaseWithEmptyDirOutArt, ValidateOpts{ContainerRuntimeExecutor: executor}) + assert.NoError(t, err) + } + err = ValidateWorkflow(wfEmptyDirOutArt, ValidateOpts{ContainerRuntimeExecutor: executor}) + assert.NoError(t, err) + } +}