diff --git a/README.md b/README.md index 310b48c80..fd55c58ec 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ Quick Links ## Anatomy of our Tests -* Our prow jobs are defined in [config.yaml](https://github.com/kubernetes/test-infra/blob/master/prow/config.yaml) +* Our prow jobs are defined [here](https://github.com/kubernetes/test-infra/tree/master/config/jobs/kubeflow) * Each prow job defines a K8s PodSpec indicating a command to run * Our prow jobs use [run_e2e_workflow.py](https://github.com/kubeflow/testing/blob/master/py/kubeflow/testing/run_e2e_workflow.py) to trigger an Argo workflow that checks out our code and runs our tests. @@ -170,6 +170,21 @@ configured for the repository (see these [instructions](#prow-setup) for info on 1. Use [kubeflow.testing.run_e2e_workflow](https://github.com/kubeflow/testing/tree/master/py/kubeflow/testing/run_e2e_workflow.py) to run the Argo workflow. * Add a `prow_config.yaml` file that will be passed into run_e2e_workflow to determine which ksonnet app to use for testing. An example can be seen [here](https://github.com/kubeflow/kubeflow/blob/master/prow_config.yaml). + * Workflows can optionally be scoped by job type (presubmit/postsubmit) or modified directories. For example: + + ``` + workflows: + - app_dir: kubeflow/testing/workflows + component: workflows + name: unittests + job_types: + - presubmit + include_dirs: + - foo/* + - bar/* + ``` + This configures the `unittests` workflow to only run during presubmit jobs, and only if there are changes under directories `foo` or `bar`. + 1. Create a prow job for that repository * The command for the prow job should be set via the entrypoint baked into the Docker image * This way we can change the Prow job just by pushing a docker image and we don't need to update the prow config. diff --git a/py/kubeflow/testing/run_e2e_workflow.py b/py/kubeflow/testing/run_e2e_workflow.py index 813889519..c63c7dd34 100644 --- a/py/kubeflow/testing/run_e2e_workflow.py +++ b/py/kubeflow/testing/run_e2e_workflow.py @@ -12,6 +12,10 @@ - name: e2e-test app_dir: tensorflow/k8s/test/workflows component: workflows + job_types: + presubmit + include_dirs: + tensorflow/* - name: lint app_dir: kubeflow/kubeflow/testing/workflows @@ -23,6 +27,12 @@ component is the name of the ksonnet component corresponding to the workflow to launch. +job_types (optional) is an array of strings representing the job types (presubmit/postsubmit) +that should run this workflow. + +include_dirs (optional) is an array of strings that specify which directories, if modified, +should run this workflow. + The script expects that the directories {repos_dir}/{app_dir} exists. Where repos_dir is provided as a command line argument. @@ -30,6 +40,7 @@ import argparse import datetime +import fnmatch import logging from kubernetes import client as k8s_client import os @@ -50,10 +61,12 @@ def get_namespace(args): class WorkflowComponent(object): """Datastructure to represent a ksonnet component to submit a workflow.""" - def __init__(self, name, app_dir, component, params): + def __init__(self, name, app_dir, component, job_types, include_dirs, params): self.name = name self.app_dir = app_dir self.component = component + self.job_types = job_types + self.include_dirs = include_dirs self.params = params def _get_src_dir(): @@ -73,7 +86,8 @@ def parse_config_file(config_file, root_dir): components = [] for i in results["workflows"]: components.append(WorkflowComponent( - i["name"], os.path.join(root_dir, i["app_dir"]), i["component"], i.get("params", {}))) + i["name"], os.path.join(root_dir, i["app_dir"]), i["component"], i.get("job_types", []), + i.get("include_dirs", []), i.get("params", {}))) return components def generate_env_from_head(args): @@ -97,15 +111,17 @@ def generate_env_from_head(args): def run(args, file_handler): # pylint: disable=too-many-statements,too-many-branches # Print ksonnet version util.run(["ks", "version"]) + + # Compare with master branch and get changed files. + changed_files = util.run(["git", "diff", "--name-only", "master"], + cwd=os.path.join(args.repos_dir, os.getenv("REPO_OWNER"), os.getenv("REPO_NAME"))).splitlines() + if args.release: generate_env_from_head(args) workflows = [] if args.config_file: workflows.extend(parse_config_file(args.config_file, args.repos_dir)) - if args.app_dir and args.component: - # TODO(jlewi): We can get rid of this branch once all repos are using a prow_config.xml file. - workflows.append(WorkflowComponent("legacy", args.app_dir, args.component, {})) create_started_file(args.bucket) util.maybe_activate_service_account() @@ -124,6 +140,28 @@ def run(args, file_handler): # pylint: disable=too-many-statements,too-many-bran # as a label on the pods. workflow_name = os.getenv("JOB_NAME") + "-" + w.name job_type = os.getenv("JOB_TYPE") + + # Skip this workflow if it is scoped to a different job type. + if w.job_types and not job_type in w.job_types: + logging.info("Skipping workflow %s.", w.name) + continue + + # If we are scoping this workflow to specific directories, check if any files + # modified match the specified regex patterns. + dir_modified = False + if w.include_dirs: + for f in changed_files: + for d in w.include_dirs: + if fnmatch.fnmatch(f, d): + dir_modified = True + break + if dir_modified: + break + + if w.include_dirs and not dir_modified: + logging.info("Skipping workflow %s.", w.name) + continue + if job_type == "presubmit": workflow_name += "-{0}".format(os.getenv("PULL_NUMBER")) workflow_name += "-{0}".format(os.getenv("PULL_PULL_SHA")[0:7]) @@ -264,21 +302,6 @@ def main(unparsed_args=None): # pylint: disable=too-many-locals type=str, help="The directory where the different repos are checked out.") - # TODO(jlewi): app_dir and component predate the use of a config - # file we should consider getting rid of them once all repos - # have been updated to run multiple workflows. - parser.add_argument( - "--app_dir", - type=str, - default="", - help="The directory where the ksonnet app is stored.") - - parser.add_argument( - "--component", - type=str, - default="", - help="The ksonnet component to use.") - parser.add_argument( "--release", action='store_true', diff --git a/py/kubeflow/tests/run_e2e_workflow_test.py b/py/kubeflow/tests/run_e2e_workflow_test.py index ca9160542..e2fadaf2c 100644 --- a/py/kubeflow/tests/run_e2e_workflow_test.py +++ b/py/kubeflow/tests/run_e2e_workflow_test.py @@ -23,69 +23,6 @@ def assertItemsMatchRegex(self, expected, actual): pattern = "^" + e + "$" six.assertRegex(self, actual[index], pattern) - @mock.patch("kubeflow.testing.run_e2e_workflow.prow_artifacts" - ".finalize_prow_job") - @mock.patch("kubeflow.testing.run_e2e_workflow.util" - ".maybe_activate_service_account") - @mock.patch("kubeflow.testing.run_e2e_workflow.util.upload_file_to_gcs") - @mock.patch("kubeflow.testing.run_e2e_workflow.util.upload_to_gcs") - @mock.patch("kubeflow.testing.run_e2e_workflow.util.load_kube_config") - @mock.patch("kubeflow.testing.run_e2e_workflow.argo_client.wait_for_workflows") - @mock.patch("kubeflow.testing.run_e2e_workflow.util.configure_kubectl") - @mock.patch("kubeflow.testing.run_e2e_workflow.util.run") - def testMainPresubmit(self, mock_run, mock_configure, *unused_mocks): # pylint: disable=no-self-use,unused-argument - """Test create started for presubmit job.""" - - os.environ = {} - os.environ["REPO_OWNER"] = "fake_org" - os.environ["REPO_NAME"] = "fake_name" - os.environ["PULL_NUMBER"] = "77" - os.environ["PULL_PULL_SHA"] = "123abc" - os.environ["JOB_NAME"] = "kubeflow-presubmit" - os.environ["JOB_TYPE"] = "presubmit" - os.environ["BUILD_NUMBER"] = "1234" - os.environ["BUILD_ID"] = "11" - - args = ["--project=some-project", "--cluster=some-cluster", - "--zone=us-east1-d", "--bucket=some-bucket", - "--app_dir=/some/dir", - "--component=workflows"] - run_e2e_workflow.main(args) - - mock_configure.assert_called_once_with("some-project", "us-east1-d", - "some-cluster",) - - expected_calls = [ - ["ks", "version"], - ["ks", "env", "add", "kubeflow-presubmit-legacy-77-123abc-1234-.*"], - ["ks", "param", "set", "--env=.*", "workflows", "name", - "kubeflow-presubmit-legacy-77-123abc-1234-[0-9a-z]{4}"], - ["ks", "param", "set", - "--env=.*", - "workflows", "prow_env", - "BUILD_ID=11,BUILD_NUMBER=1234,JOB_NAME=kubeflow-presubmit," - "JOB_TYPE=presubmit,PULL_NUMBER=77,PULL_PULL_SHA=123abc," - "REPO_NAME=fake_name,REPO_OWNER=fake_org"], - ["ks", "param", "set", - "--env=.*", - "workflows", "namespace", - "kubeflow-test-infra"], - ["ks", "param", "set", - "--env=.*", - "workflows", "bucket", "some-bucket"], - ["ks", "show", "kubeflow-presubmit.*", "-c", "workflows"], - ["ks", "apply", "kubeflow-presubmit.*", "-c", "workflows"], - ] - - for i, expected in enumerate(expected_calls): - self.assertItemsMatchRegex( - expected, - mock_run.call_args_list[i][0][0]) - if i > 0: - self.assertEqual( - "/some/dir", - mock_run.call_args_list[i][1]["cwd"]) - @mock.patch("kubeflow.testing.run_e2e_workflow.prow_artifacts" ".finalize_prow_job") @mock.patch("kubeflow.testing.run_e2e_workflow.util" @@ -134,6 +71,7 @@ def testWithConfig(self, mock_run, mock_configure, *unused_mocks): # pylint: di expected_calls = [ ["ks", "version"], + ["git", "diff", "--name-only", "master"], ["ks", "env", "add", "kubeflow-presubmit-wf-77-123abc-1234-.*"], ["ks", "param", "set", "--env=.*", "workflows", "name", "kubeflow-presubmit-wf-77-123abc-1234-[0-9a-z]{4}"], @@ -164,7 +102,7 @@ def testWithConfig(self, mock_run, mock_configure, *unused_mocks): # pylint: di self.assertItemsMatchRegex( expected, mock_run.call_args_list[i][0][0]) - if i > 0: + if i > 1: self.assertEqual( "/src/kubeflow/testing/workflows", mock_run.call_args_list[i][1]["cwd"])