Merge pull request #24 from DharmitD/add-e2e-tests

Adding ML Pipelines' end-to-end tests
red-hat-data-services · Sep 2, 2022 · 7fb3741 · 7fb3741
2 parents 4dd32b2 + 1d34b9c
commit 7fb3741
Show file tree

Hide file tree

Showing 14 changed files with 1,229 additions and 0 deletions.
diff --git a/tests/Dockerfile b/tests/Dockerfile
@@ -0,0 +1,70 @@
+FROM quay.io/centos/centos:stream8
+
+# List of chromedriver versions for download - https://chromedriver.chromium.org/downloads
+ARG CHROMEDRIVER_VER=104.0.5112.79
+ARG ORG=opendatahub-io
+ARG BRANCH=master
+ARG ODS_CI_REPO=https://github.com/red-hat-data-services/ods-ci
+ARG ODS_CI_GITREF=releases/1.7.0-5
+ARG OC_CLI_URL=https://mirror.openshift.com/pub/openshift-v4/amd64/clients/ocp/latest/openshift-client-linux.tar.gz
+
+ENV HOME /root
+WORKDIR /root
+
+RUN dnf install -y bc git go-toolset python3-pip unzip && \
+    git clone https://github.com/crobby/peak $HOME/peak && \    
+    cd $HOME/peak && \
+    git submodule update --init
+
+RUN dnf -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm &&\
+    echo -e "[google-chrome]\nname=google-chrome\nbaseurl=http://dl.google.com/linux/chrome/rpm/stable/x86_64\nenabled=1\ngpgcheck=1\ngpgkey=https://dl.google.com/linux/linux_signing_key.pub" > /etc/yum.repos.d/google-chrome.repo &&\
+    dnf -y install "google-chrome-stable" &&\
+    dnf clean all
+
+# install jq to help with parsing json
+RUN curl -o /usr/local/bin/jq http://stedolan.github.io/jq/download/linux64/jq && \
+    chmod +x /usr/local/bin/jq
+
+RUN mkdir -p $HOME/src && \
+    cd $HOME/src && \
+    git clone --depth=1 --branch ${BRANCH} https://github.com/${ORG}/ml-pipelines && \
+    # Clone ods-ci repo at specified git ref for the JupyterHub webUI tests
+    git clone --depth=1 ${ODS_CI_REPO} ods-ci && cd ods-ci && \
+    git fetch origin ${ODS_CI_GITREF} && git checkout FETCH_HEAD && \
+    chmod -R 777 $HOME/src
+
+# Use a specific destination file name in case the url dow download name changes
+ADD ${OC_CLI_URL} $HOME/peak/oc-cli.tar.gz
+RUN tar -C /usr/local/bin -xvf $HOME/peak/oc-cli.tar.gz && \
+    chmod +x /usr/local/bin/oc
+
+RUN curl -o /tmp/chromedriver_linux64.zip -L https://chromedriver.storage.googleapis.com/${CHROMEDRIVER_VER}/chromedriver_linux64.zip &&\
+    unzip /tmp/chromedriver_linux64.zip &&\
+    cp chromedriver /usr/local/bin/chromedriver
+
+COPY Pipfile Pipfile.lock $HOME/peak/
+
+RUN pip3 install micropipenv &&\
+    ln -s `which pip3` /usr/bin/pip &&\
+    cd $HOME/peak &&\
+    micropipenv install
+
+COPY setup/operatorsetup scripts/install.sh scripts/installandtest.sh $HOME/peak/
+COPY resources $HOME/peak/operator-tests/odh-manifests/resources
+COPY util $HOME/peak/operator-tests/odh-manifests
+COPY setup/kfctl_openshift.yaml $HOME/kfdef/
+COPY basictests $HOME/peak/operator-tests/odh-manifests/basictests
+
+RUN chmod -R 777 $HOME/kfdef && \
+    mkdir -p $HOME/.kube && \
+    chmod -R 777 $HOME/.kube && \
+    chmod -R 777 $HOME/peak && \
+    mkdir -p /peak && \
+    chmod -R 777 $HOME && \
+    ln -s $HOME/peak/installandtest.sh /peak/installandtest.sh
+
+# For local testing, you can add your own kubeconfig to the image
+# Note:  Do not push the image to a public repo with your kubeconfig
+# ADD kubeconfig /root/.kube/config
+
+CMD $HOME/peak/installandtest.sh
diff --git a/tests/Makefile b/tests/Makefile
@@ -0,0 +1,43 @@
+IMAGE=odh-manifests-test
+GIT_ORG=opendatahub-io
+GIT_BRANCH=master
+ODHPROJECT=opendatahub
+# Specify the repo and git ref/branch to use for cloning ods-ci
+ODS_CI_REPO=https://github.com/red-hat-data-services/ods-ci
+ODS_CI_GITREF=master
+OC_CLI_URL=https://mirror.openshift.com/pub/openshift-v4/amd64/clients/ocp/latest/openshift-client-linux.tar.gz
+OPENSHIFT_USER=
+OPENSHIFT_PASS=
+OPENSHIFT_LOGIN_PROVIDER=
+# Setting SKIP_INSTALL will let you run the tests against an ODH instance that is already setup
+SKIP_INSTALL=
+# Setting TESTS_REGEX will allow you to change which tests are going to be run
+TESTS_REGEX=
+# Location inside the container where CI system will retrieve files after a test run
+ARTIFACT_DIR=/tmp/artifacts
+LOCAL_ARTIFACT_DIR="${PWD}/artifacts"
+
+all: test
+test: build run clean
+
+build:
+	podman build -t $(IMAGE) --build-arg ORG=$(GIT_ORG) --build-arg BRANCH=$(GIT_BRANCH) --build-arg ODS_CI_REPO=$(ODS_CI_REPO) --build-arg ODS_CI_GITREF=$(ODS_CI_GITREF) --build-arg OC_CLI_URL=$(OC_CLI_URL) .
+
+run:
+	# Confirm that we have a directory for storing any screenshots from selenium tests
+	mkdir -p ${LOCAL_ARTIFACT_DIR}/screenshots
+	oc config view --flatten --minify > /tmp/tests-kubeconfig
+	podman run -e SKIP_INSTALL=$(SKIP_INSTALL) -e TESTS_REGEX=$(TESTS_REGEX) -e SKIP_OPERATOR_INSTALL=$(SKIP_OPERATOR_INSTALL) \
+	    -e SKIP_KFDEF_INSTALL=$(SKIP_KFDEF_INSTALL) -e ODHPROJECT=$(ODHPROJECT) \
+		-e OPENSHIFT_USER="$(OPENSHIFT_USER)" -e OPENSHIFT_PASS="$(OPENSHIFT_PASS)" -e OPENSHIFT_LOGIN_PROVIDER=$(OPENSHIFT_LOGIN_PROVIDER) -e ARTIFACT_DIR=$(ARTIFACT_DIR) \
+		-it -v ${LOCAL_ARTIFACT_DIR}/:$(ARTIFACT_DIR):z -v /tmp/tests-kubeconfig:/tmp/kubeconfig:z $(IMAGE)
+
+clean:
+	oc delete -n $(ODHPROJECT) kfdef opendatahub || true
+	oc delete project $(ODHPROJECT) || echo -e "\n\n==> If the project deletion failed, you can try to use this script to force it: https://raw.githubusercontent.com/jefferyb/useful-scripts/master/openshift/force-delete-openshift-project\n\n"
+	#Clean up openshift-operators namespace
+	oc get csv -n openshift-operators -o name | grep strimzi-cluster-operator | xargs oc delete -n openshift-operators || true
+	oc get csv -n openshift-operators -o name | grep opendatahub-operator | xargs oc delete -n openshift-operators || true
+	oc delete subscription -n openshift-operators -l peak.test.subscription=opendatahub-operator
+	oc get mutatingwebhookconfiguration -o name | grep katib | grep $(ODHPROJECT) | xargs oc delete || true
+	oc get validatingwebhookconfiguration -o name | grep katib | grep $(ODHPROJECT) | xargs oc delete || true
diff --git a/tests/Pipfile b/tests/Pipfile
@@ -0,0 +1,12 @@
+[[source]]
+name = "pypi"
+url = "https://pypi.org/simple"
+verify_ssl = true
+
+[dev-packages]
+
+[packages]
+selenium = "*"
+
+[requires]
+python_version = "3.6"
diff --git a/tests/Pipfile.lock b/tests/Pipfile.lock
diff --git a/tests/README.md b/tests/README.md
@@ -0,0 +1,105 @@
+# Running containerized tests
+
+Running the tests this way assumes that you have an active kubeadmin login
+on the cluster that you want to run the tests against and that you have podman
+installed.  (If you prefer docker, you can edit the Makefile to replace podman
+with docker).
+
+Run the following:
+
+```sh
+cd tests
+make build
+make run
+```
+
+## Cleaning up after your test run (optional)
+
+Only run the following if you want to eliminate your Open Data Hub installation.
+
+To cleanup the Open Data Hub installation after a test run, you can run `make clean`.
+Running `make clean` **will wipe your Open Data Hub installation** and delete the project.
+
+
+## Customizing test behavior
+
+Without changes, the test image will run `$HOME/peak/installandtest.sh` which
+handles setting up the opendatahub-operator and then creating the KfDef found in
+`tests/setup/kfctl_openshift.yaml`.  If you want to modify your test run, you
+might want to change those files to get the behavior that you're looking for.
+After you make changes, you will need to rebuild the test image with `make build`.
+
+If you'd like to run the tests against an instance that already has Open Data Hub installed,
+you set `SKIP_INSTALL=true` and that will cause the test run
+to skip the installation process and will only run the tests.  example: `make run SKIP_INSTALL=true`
+
+If you'd like to run the tests against an instance that already has a KfDef created,
+you set `SKIP_KFDEF_INSTALL=true` and that will cause the test run
+to skip the step of creating the default KfDef.  example: `make run SKIP_KFDEF_INSTALL=true`
+
+If you'd like to run a single test instead of all tests, you can
+set the TESTS_REGEX variable `TESTS_REGEX=<name of the test to run>`.  That will
+only run the test that you specify instead of all of the tests.  example: `make run TESTS_REGEX=grafana`
+
+If you have a local instance already running the operator and you'd like to skip that part
+of the install process, you can set `SKIP_OPERATOR_INSTALL=true` and that will bypass installation
+of the operator, but will still install the authentication for the jupyterhub tests.
+
+For other possible configurations, you can look in the Makefile.  There are a set of
+variables at the top that you could change to meet the needs of your particular test run.
+
+## Test Artifacts
+The environment variable `ARTIFACT_DIR` specifies the root directory where all test artifacts should be
+stored for retrieval at the end of a test run. Any files created should be uniquely named to prevent
+a test from overwriting an artifact generated by another test.
+
+# Running tests manually
+
+Manual running of the tests relies on the test
+runner [located here](https://github.com/AICoE/peak).
+See the README.md there for more detailed information on how it works.
+
+Note when running on a **mac** you may need to do the following:
+
+```sh
+brew install coreutils
+ln -s /usr/local/bin/greadlink /usr/local/bin/readlink
+```
+
+Make sure you have an OpenShift login, then do the following:
+
+```sh
+git clone https://github.com/AICoE/peak
+cd peak
+git submodule update --init
+echo opendatahub-kubeflow nil https://github.com/opendatahub-io/odh-manifests > my-list
+./setup.sh -t my-list
+./run.sh operator-tests/opendatahub-kubeflow/tests/basictests
+```
+
+Note, if you're looking to test another repo and/or branch, you can change the "echo" command from above to something of the following form where "your branch" is optional:
+
+```sh
+echo opendatahub-kubeflow nil <your repo> <your branch> > my-list
+```
+
+If your installation is not in the opendatahub project, you will need to modify
+the export line in tests/util to set the value of ODHPROJECT to match name of the project you are using.
+
+You can run tests individually by passing a substring to run.sh to match:
+
+```sh
+./run.sh ailibrary.sh
+```
+
+# Basic test
+
+These tests are in the basictests directory.  This set of tests assumes that you have opendatahub (Kubeflow-based) isntalled.  It then goes through each module and tests
+to be sure that the expected pods are all in the running state.  This is meant to be the barebones basic smoke tests for an installation.
+The steps to run this test are:
+
+* Run the tests
+
+  ```sh
+  ./run.sh tests/basictests
+  ```
diff --git a/tests/TESTING.md b/tests/TESTING.md
@@ -0,0 +1,20 @@
+## Overivew
+
+The aim was to set ourselves up with a test system that would give us an automated way to run tests against our PRs.  At the outset of this, our repo had no tests of any sort, so the first task became getting some basic tests running against the bits in our repo.
+
+Our tests are based on the utilities found in https://github.com/openshift/origin/tree/master/hack/lib which are a set of bash functions and scripts that facilitate a reasonably fast way to develop and run a set of tests against either OpenShift itself or, in our case, a set of applications running on OpenShift.  Those tests were adapted for use in radanalytics and then re-adapted them for testing operators running in OpenShift.  We have borrowed their test runner (our fork is [here](https://github.com/crobby/peak)) that will search a subdirectory tree for scripts that match a given regular expression (ie:  ‘tests’ would find all scripts that have ‘tests’ anywhere in their full path or name), so it is easy to run a single test or a large group of tests.
+
+Each test script has a small amount of boilerplate code followed by a series of bash tests.  Each test could call out to another utility/language/whatever.  The utilities available in the testing library can check for specific results in text/exit code/etc of each call.  Any test lines that produce a failed result are tabulated and reported at the end of the test runs.  Of course, the stdout/stderr of each failed call is also available in addition to whatever other logging your test call might produce.  Here’s what I would call the main building block of the tests:  https://github.com/openshift/origin/blob/master/hack/lib/cmd.sh It defines what amount to wrappers around whatever calls you want to make in your tests and handles the parsing of the result text/exit codes.
+
+## Integration with OpenShift-CI:
+
+The first step toward integrating with [OpenShift CI](https://github.com/openshift/release) is granting access to the OpenShift CI Robot and the OpenShift Merge Robot entities in the settings of the repo.  Once that is complete, you can contact the openshift-ci team and they will set up the necessary webhooks in the target repo.
+
+Next is the prow configuration.  The configuration files are kept in the https://github.com/openshift/release repository.  Under the `core_services/prow` directory, you’ll need to modify `_config.yaml` and `_plugins.yaml` in order to have your repository included in the configuration.  Submit a PR to that repo and when it merges, you’re all set.  As for the contents of your changes, unless you know exactly what you want, it might be useful to start by adding your repo with settings copied from another repository already in the config.
+
+Lastly, and perhaps the most important is defining the configuration that will run your tests.  These files are also in the openshift/release repo.  To define your test job, you can create a config file like ours which is defined [here](https://github.com/openshift/release/blob/master/ci-operator/config/opendatahub-io/odh-manifests/opendatahub-io-odh-manifests-master.yaml).  The job defines the following items:  
+1) An image that can be built to run your tests (your tests run inside a container) 
+2) Instructions on how to build that test image and 
+3) A workflow that has your test or tests in the “tests” portion of the workflow definition.  In our case, we are using the ipi-aws workflow which will spin-up a fresh OpenShift cluster in AWS where our tests will run (our test container will start with an admin KUBECONFIG for that cluster)
+
+For greater detail on any of the steps, you can refer to the [OpenShift relase README](https://github.com/openshift/release/blob/master/README.md)
diff --git a/tests/basictests/ml-pipelines.sh b/tests/basictests/ml-pipelines.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+source $TEST_DIR/common
+
+MY_DIR=$(readlink -f `dirname "${BASH_SOURCE[0]}"`)
+
+source ${MY_DIR}/../util
+RESOURCEDIR="${MY_DIR}/../resources"
+
+os::test::junit::declare_suite_start "$MY_SCRIPT"
+
+function check_resources() {
+    header "Testing ML pipelines installation"
+    os::cmd::expect_success "oc project ${ODHPROJECT}"
+    os::cmd::try_until_text "oc get crd pipelineruns.tekton.dev " "pipelineruns.tekton.dev" $odhdefaulttimeout $odhdefaultinterval
+    os::cmd::try_until_text "oc get pods -l application-crd-id=kubeflow-pipelines --field-selector='status.phase!=Running,status.phase!=Completed' -o jsonpath='{$.items[*].metadata.name}' | wc -w" "0" $odhdefaulttimeout $odhdefaultinterval
+    running_pods=$(oc get pods -l application-crd-id=kubeflow-pipelines --field-selector='status.phase=Running' -o jsonpath='{$.items[*].metadata.name}' | wc -w)
+    os::cmd::expect_success "if [ "$running_pods" -gt "0" ]; then exit 0; else exit 1; fi"
+}
+
+function check_ui_overlay() {
+    header "Checking UI overlay Kfdef deploys the UI"
+    os::cmd::try_until_text "oc get pods -l app=ml-pipeline-ui --field-selector='status.phase=Running' -o jsonpath='{$.items[*].metadata.name}' | wc -w" "1" $odhdefaulttimeout $odhdefaultinterval
+}
+
+function create_pipeline() {
+    header "Creating a pipeline"
+    route=`oc get route ml-pipeline || echo ""`
+    if [[ -z $route ]]; then
+        oc expose service ml-pipeline
+    fi
+    ROUTE=$(oc get route ml-pipeline --template={{.spec.host}})
+    PIPELINE_ID=$(curl -s -F "uploadfile=@${RESOURCEDIR}/ml-pipelines/test-pipeline-run.yaml" ${ROUTE}/apis/v1beta1/pipelines/upload | jq -r .id)
+    os::cmd::try_until_not_text "curl -s ${ROUTE}/apis/v1beta1/pipelines/${PIPELINE_ID} | jq" "null" $odhdefaulttimeout $odhdefaultinterval
+}
+
+function list_pipelines() {
+    header "Listing pipelines"
+    os::cmd::try_until_text "curl -s ${ROUTE}/apis/v1beta1/pipelines | jq '.total_size'" "2" $odhdefaulttimeout $odhdefaultinterval
+}
+
+function create_run() {
+    header "Creating a run"
+    RUN_ID=$(curl -s -H "Content-Type: application/json" -X POST ${ROUTE}/apis/v1beta1/runs -d "{\"name\":\"test-pipeline-run_run\", \"pipeline_spec\":{\"pipeline_id\":\"${PIPELINE_ID}\"}}" | jq -r .run.id)
+    os::cmd::try_until_not_text "curl -s ${ROUTE}/apis/v1beta1/runs/${RUN_ID} | jq '" "null" $odhdefaulttimeout $odhdefaultinterval
+}
+
+function list_runs() {
+    header "Listing runs"
+    os::cmd::try_until_text "curl -s ${ROUTE}/apis/v1beta1/runs | jq '.total_size'" "1" $odhdefaulttimeout $odhdefaultinterval
+}
+
+function check_run_status() {
+    header "Checking run status"
+    os::cmd::try_until_text "curl -s ${ROUTE}/apis/v1beta1/runs/${RUN_ID} | jq '.run.status'" "Completed" $odhdefaulttimeout $odhdefaultinterval
+}
+
+function setup_monitoring() {
+    header "Enabling User Workload Monitoring on the cluster"
+    oc apply -f ${RESOURCEDIR}/ml-pipelines/enable-uwm.yaml
+}
+
+function test_metrics() {
+    header "Checking metrics for total number of runs, should be 1 since we have spun up 1 run"
+    ## On OCP 4.11, get-token is removed
+    cluster_version=`oc get -o json clusterversion | jq '.items[0].status.desired.version' | grep "4.11" || echo ""`
+    if [[ -z $cluster_version ]]; then
+        monitoring_token=`oc sa get-token prometheus-k8s -n openshift-monitoring`
+    else
+        monitoring_token=`oc create token prometheus-k8s -n openshift-monitoring`
+    fi
+    os::cmd::try_until_text "oc -n openshift-monitoring exec -c prometheus prometheus-k8s-0 -- curl -k -H \"Authorization: Bearer $monitoring_token\" 'https://thanos-querier.openshift-monitoring:9091/api/v1/query?query=run_server_run_count' | jq '.data.result[0].value[1]'" "1" $odhdefaulttimeout $odhdefaultinterval
+}
+
+function delete_runs() {
+    header "Deleting runs"
+    os::cmd::try_until_text "curl -s -X DELETE ${ROUTE}/apis/v1beta1/runs/${RUN_ID} | jq" "" $odhdefaulttimeout $odhdefaultinterval
+    os::cmd::try_until_text "curl -s ${ROUTE}/apis/v1beta1/runs/${RUN_ID} | jq '.code'" "5" $odhdefaulttimeout $odhdefaultinterval
+}
+
+function delete_pipeline() {
+    header "Deleting the pipeline"
+    os::cmd::try_until_text "curl -s -X DELETE ${ROUTE}/apis/v1beta1/pipelines/${PIPELINE_ID} | jq" "" $odhdefaulttimeout $odhdefaultinterval
+}
+
+check_resources
+check_ui_overlay
+create_pipeline
+list_pipelines
+create_run
+list_runs
+check_run_status
+setup_monitoring
+test_metrics
+delete_runs
+delete_pipeline
+oc delete route ml-pipeline
+
+os::test::junit::declare_suite_end