Regenerate E2E test logs for release 0.8.0 (kubeflow#566)

* Regenerate E2E test logs for release 0.8.0 * Regerate "golden" log files * Regenerate "golden" logs after PR kubeflow#567 * Update list of ignored tests * Add cond_recur.yaml to list of ignored tests * Regenerate E2E logs after updating "golden" YAML files of unit tests * Regenerate E2E logs after enabling auto-strip EOF * Regenerate "golden" YAML for unit tests * Rename loop-static CRDs * Regenerate E2E logs, enable EOF stripping only for E2E test
red-hat-data-services · Apr 29, 2021 · 649152c · 649152c
1 parent 6672261
commit 649152c
Show file tree

Hide file tree

Showing 74 changed files with 105,906 additions and 601 deletions.
diff --git a/manifests/kustomize/base/pipeline/kfp-pipeline-config.yaml b/manifests/kustomize/base/pipeline/kfp-pipeline-config.yaml
@@ -22,6 +22,8 @@ data:
         push_artifact main-log step-main.log
     }
     strip_eof() {
-        awk 'NF' $2 | head -c -1 > $1_temp_save && cp $1_temp_save $2
+        if [ -f "$2" ]; then
+            awk 'NF' $2 | head -c -1 > $1_temp_save && cp $1_temp_save $2
+        fi
     }
     mc config host add storage ${ARTIFACT_ENDPOINT_SCHEME}${ARTIFACT_ENDPOINT} $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY
diff --git a/sdk/python/tests/compiler/compiler_tests_e2e.py b/sdk/python/tests/compiler/compiler_tests_e2e.py
@@ -16,14 +16,14 @@
 import logging
 import os
 import re
-import textwrap
 import unittest
 import yaml
 
 from glob import glob
 from kfp_tekton import TektonClient
 from packaging import version
 from os import environ as env
+from os.path import dirname, join
 from subprocess import run, SubprocessError
 from time import sleep
 
@@ -46,11 +46,11 @@
 else:
     logging.warning("KUBECONFIG={}".format(KUBECONFIG))
 
-# set or override the minimum required Tekton Pipeline version, default "v0.20.1":
-#    TKN_PIPELINE_MIN_VERSION=v0.20 sdk/python/tests/run_e2e_tests.sh
+# set or override the minimum required Tekton Pipeline version, default "v0.21.0":
+#    TKN_PIPELINE_MIN_VERSION=v0.21 sdk/python/tests/run_e2e_tests.sh
 # or:
-#    make e2e_test TKN_PIPELINE_MIN_VERSION=v0.20
-TKN_PIPELINE_MIN_VERSION = env.get("TKN_PIPELINE_MIN_VERSION", "v0.20.1")
+#    make e2e_test TKN_PIPELINE_MIN_VERSION=v0.21
+TKN_PIPELINE_MIN_VERSION = env.get("TKN_PIPELINE_MIN_VERSION", "v0.21.0")
 
 # let the user know the expected Tekton Pipeline version
 if env.get("TKN_PIPELINE_MIN_VERSION"):
@@ -183,80 +183,47 @@
 
 
 # =============================================================================
-#  non-configurable test settings
+#  general test settings loaded from e2e_test_config.yaml file
 # =============================================================================
 
-# ignore pipelines with unpredictable log output or complex prerequisites
-# TODO: revisit this list, try to rewrite those Python DSLs in a way that they
-#  will produce logs which can be verified. One option would be to keep more
-#  than one "golden" log file to match either of the desired possible outputs
-ignored_yaml_files = [
-    "big_data_passing.yaml",    # does not complete in a reasonable time frame
-    "create_component_from_func_component.yaml",  # not a Tekton PipelineRun
-    "create_component_from_func.yaml",  # need to investigate, keeps Running
-    "katib.yaml",               # service account needs Katib permission, takes too long doing 9 trail runs
-    "parallel_join_with_logging.yaml",  # experimental feature, requires S3 (Minio)
-    "retry.yaml",               # designed to occasionally fail (randomly) if number of retries exceeded
-    "timeout.yaml",             # random failure (by design) ... would need multiple golden log files to compare to
-    "tolerations.yaml",         # designed to fail, test show only how to add the toleration to the pod
-    "volume.yaml",              # need to rework the credentials part
-    "volume_op.yaml",           # need to delete PVC before/after test run
-    "volume_snapshot_op.yaml",  # only works on Minikube, K8s alpha feature, requires a feature gate from K8s master
-
-    # the following tests require tekton-pipelines feature-flag data.enable-custom-tasks=true
-    #   kubectl patch cm feature-flags -n tekton-pipelines \
-    #     -p '{"data":{"disable-home-env-overwrite":"true","disable-working-directory-overwrite":"true", "enable-custom-tasks": "true"}}'
-    # TODO: apply the _cr*.yaml files "apiVersion: custom.tekton.dev/v1alpha1, kind: PipelineLoop"
-    #   for f in sdk/python/tests/compiler/testdata/*_cr*.yaml; do \
-    #     echo "=== ${f} ==="; \
-    #     kubectl apply -f "${f}" -n kubeflow && \
-    #     echo OK || echo FAILED; \
-    #   done
-    "conditions_and_loops.yaml",
-    "loop_over_lightweight_output.yaml",
-    "loop_static.yaml",
-    "parallelfor_item_argument_resolving.yaml",
-    "withitem_nested.yaml",
-    "withparam_global.yaml",
-    "withparam_global_dict.yaml",
-    "withparam_output.yaml",
-    "withparam_output_dict.yaml",
-
-    # TODO: remove the following from ignored list
-    # "any_sequencer.yaml",       # takes 5 min
-    # "basic_no_decorator.yaml",  # takes 2 min
-    # "compose.yaml",             # takes 2 min
-]
+config_file = "./../e2e_test_config.yaml"
+
+with open(join(dirname(__file__), config_file), 'r') as f:
+    test_config = yaml.safe_load(f)
+
+namespace = test_config["namespace"]
+experiment_name = test_config["experiment_name"]
+ignored_yaml_files = test_config["ignored_test_files"]
 
 if ignored_yaml_files:
     logging.warning("Ignoring the following pipelines: {}".format(
         ", ".join(ignored_yaml_files)))
 
-# run pipelines in "kubeflow" namespace as some E2E tests depend on Minio
-# for artifact storage in order to access secrets:
-namespace = "kubeflow"
 
-# experiment name to group the pipeline runs started by these E2E tests
-experiment_name = "E2E_TEST"
+# ==============================================================================
+#  Test setup TODOs prior to running the E2E tests to be automated/codified
+# ==============================================================================
 
 # KFP doesn't allow any resource to be created by a pipeline. The API has an option
 # for users to provide their own service account that has those permissions.
 # see https://github.com/kubeflow/kfp-tekton/blob/master/sdk/sa-and-rbac.md
 # TODO: add to setUpClass method
-rbac = textwrap.dedent("""\
-    apiVersion: rbac.authorization.k8s.io/v1beta1
-    kind: ClusterRoleBinding
-    metadata:
-      name: default-admin
-    subjects:
-      - kind: ServiceAccount
-        name: default
-        namespace: {}
-    roleRef:
-      kind: ClusterRole
-      name: cluster-admin
-      apiGroup: rbac.authorization.k8s.io
-    """.format(namespace))
+# rbac = textwrap.dedent("""\
+#     apiVersion: rbac.authorization.k8s.io/v1beta1
+#     kind: ClusterRoleBinding
+#     metadata:
+#       name: default-admin
+#     subjects:
+#       - kind: ServiceAccount
+#         name: pipeline-runner
+#         namespace: kubeflow
+#     roleRef:
+#       kind: ClusterRole
+#       name: cluster-admin
+#       apiGroup: rbac.authorization.k8s.io
+#     """.format(namespace))
+#
+# $ kubectl create clusterrolebinding pipeline-runner-extend --clusterrole cluster-admin --serviceaccount=kubeflow:pipeline-runner
 
 # TODO: enable feature flag for custom tasks to enable E2E tests for loops
 #   $ kubectl get configmap feature-flags -n tekton-pipelines -o jsonpath='{.data.enable-custom-tasks}'
@@ -274,6 +241,37 @@
 # test_withparam_output
 # test_withparam_output_dict
 
+# TODO: examples using the CEL custom task (apiVersion: cel.tekton.dev/v1alpha1)
+#   require separate installation using golang ko and Docker:
+#
+#   $ git clone https://github.com/tektoncd/experimental/
+#   $ cd experimental/cel
+#   $ ko apply -L -f config/
+
+
+# Regenerate the PipelineLoop Custom Resource files:
+#
+#  $ for f in sdk/python/tests/compiler/testdata/*_pipelineloop_cr*.yaml; do \
+#      echo ${f/_pipelineloop_cr*.yaml/.py}; done | sort -u | while read f; do echo $f; \
+#      dsl-compile-tekton --py $f --output ${f/.py/.yaml}; done
+#
+# ...or generate all "golden" YAML files:
+#
+#  $ for f in sdk/python/tests/compiler/testdata/*.py; do echo $f; \
+#      dsl-compile-tekton --py $f --output ${f/.py/.yaml}; done
+#
+# and apply CRDs to the KFP cluster:
+#
+#  $ for f in sdk/python/tests/compiler/testdata/*_cr*.yaml; do echo "=== ${f##*/} ==="; \
+#      kubectl apply -f "${f}" -n kubeflow; done
+
+# For custom task implementation, we need to enable auto-strip for End-of-File newlines,
+# it appears that the tektoncd/experimental does not like EOF newline for the output variables:
+#  - test_condition_custom_task
+#  - test_tekton_custom_task
+#
+#  $ kubectl patch cm kfp-tekton-config -n kubeflow -p '{"data":{"strip_eof":"true"}}'
+#  $ kubectl rollout restart deploy/ml-pipeline -n kubeflow
 
 # =============================================================================
 #  ensure we have what we need, abort early instead of failing every test
@@ -376,13 +374,14 @@ def _delete_pipelinerun(self, name):
     def _start_pipelinerun(self, name, yaml_file):
         kfp_cmd = "kfp run submit -f {} -n {} -e {} -r {}".format(
             yaml_file, namespace, experiment_name, name)
-        kfp_proc = run(kfp_cmd.split(), capture_output=True, timeout=10, check=False)
+        kfp_proc = run(kfp_cmd.split(), capture_output=True, timeout=30, check=False)
         self.assertEqual(kfp_proc.returncode, 0,
                          "Process returned non-zero exit code: {} -> {}".format(
                              kfp_cmd, kfp_proc.stderr))
-        run_id = kfp_proc.stdout.decode().split()[1]
+        run_id = kfp_proc.stdout.decode().splitlines()[-2].split("|")[1].strip()
         wf_manifest = self.client.get_run(run_id).pipeline_runtime.workflow_manifest
         pr_name = json.loads(wf_manifest)['metadata']['name']
+        self.assertIn(name, pr_name, msg=f"pr_name '{pr_name}' is expected to contain pipeline name {name}")
         self.pr_name_map[name] = pr_name
         # TODO: find a better way than to sleep, but some PipelineRuns take longer
         #   to be created and logs may not be available yet even with --follow or

diff --git a/sdk/python/tests/compiler/testdata/any_sequencer.log b/sdk/python/tests/compiler/testdata/any_sequencer.log
@@ -1,2 +1,11 @@
-time="2021-04-20T04:39:23Z" level=info msg="Starting to watch taskrun for 'task2,task3' and condition in gang-test/any-sequencer."
-time="2021-04-20T04:39:41Z" level=info msg="The TaskRun of flipcoin succeeded."
+
+
+
+[flipcoin : main] tails
+
+
+[any-test : main] time="2021-04-23T23:20:01Z" level=info msg="Starting to watch taskrun for 'task2,task3' and condition in kubeflow/any-sequencer."
+[any-test : main] time="2021-04-23T23:20:23Z" level=info msg="The TaskRun of flipcoin succeeded."
+[any-test : main] time="2021-04-23T23:20:23Z" level=info msg="The condition results_flipcoin_output == 'heads' for the task flipcoin does not meet."
+[any-test : main] time="2021-04-23T23:23:21Z" level=info msg="The TaskRun of task2 succeeded."
+
diff --git a/sdk/python/tests/compiler/testdata/artifact_passing_using_volume.log b/sdk/python/tests/compiler/testdata/artifact_passing_using_volume.log
@@ -0,0 +1,28 @@
+
+[producer : copy-artifacts] Added `storage` successfully.
+[producer : copy-artifacts] tekton/results/output-1
+[producer : copy-artifacts] tar: removing leading '/' from member names
+[producer : copy-artifacts] `Output-1.tgz` -> `storage/mlpipeline/artifacts/artifact-passing-pipeline/producer/Output-1.tgz`
+[producer : copy-artifacts] Total: 0 B, Transferred: 121 B, Speed: 29 B/s
+[producer : copy-artifacts] tar: removing leading '/' from member names
+[producer : copy-artifacts] workspace/producer/producer-Output-2
+[producer : copy-artifacts] `Output-2.tgz` -> `storage/mlpipeline/artifacts/artifact-passing-pipeline/producer/Output-2.tgz`
+[producer : copy-artifacts] Total: 0 B, Transferred: 144 B, Speed: 41 B/s
+
+
+[processor : copy-artifacts] Added `storage` successfully.
+[processor : copy-artifacts] tekton/results/output-1
+[processor : copy-artifacts] tar: removing leading '/' from member names
+[processor : copy-artifacts] `Output-1.tgz` -> `storage/mlpipeline/artifacts/artifact-passing-pipeline/processor/Output-1.tgz`
+[processor : copy-artifacts] Total: 0 B, Transferred: 121 B, Speed: 39 B/s
+[processor : copy-artifacts] /tekton/scripts/script-1-b4jcg: line 17: workspaces.producer.path: not found
+[processor : copy-artifacts] tar: /producer-Output-2: No such file or directory
+[processor : copy-artifacts] tar: error exit delayed from previous errors
+[processor : copy-artifacts] `Output-2.tgz` -> `storage/mlpipeline/artifacts/artifact-passing-pipeline/processor/Output-2.tgz`
+[processor : copy-artifacts] Total: 0 B, Transferred: 29 B, Speed: 8 B/s
+[processor : copy-artifacts] /tekton/scripts/script-1-b4jcg: line 19: workspaces.producer.path: not found
+
+[consumer : main] Input parameter = Data 1
+[consumer : main] Input artifact = 
+[consumer : main] Data 2
+
diff --git a/sdk/python/tests/compiler/testdata/basic_no_decorator.log b/sdk/python/tests/compiler/testdata/basic_no_decorator.log
@@ -1,14 +1,7 @@
 [get-frequent : main] flies
 
-[get-frequent : copy-artifacts] Added `storage` successfully.
-[get-frequent : copy-artifacts] tar: removing leading '/' from member names
-[get-frequent : copy-artifacts] tekton/results/word
-[get-frequent : copy-artifacts] `word.tgz` -> `storage/mlpipeline/artifacts/save-most-frequent-word/get-frequent/word.tgz`
-[get-frequent : copy-artifacts] Total: 0 B, Transferred: 112 B, Speed: 3.42 KiB/s
-
 [save : main] Copying file:///tmp/results.txt...
-[save : main] / [0 files][    0.0 B/    7.0 B]                                                
-/ [1 files][    7.0 B/    7.0 B]                                                
+[save : main] / [0 files][    0.0 B/    7.0 B]                                                / [1 files][    7.0 B/    7.0 B]                                                
 [save : main] Operation completed over 1 objects/7.0 B.                                        
 
 [exiting : main] exit!