Skip to content

Commit

Permalink
Merge pull request #24 from DharmitD/add-e2e-tests
Browse files Browse the repository at this point in the history
Adding ML Pipelines' end-to-end tests
  • Loading branch information
openshift-merge-robot authored Sep 2, 2022
2 parents 4dd32b2 + 1d34b9c commit 7fb3741
Show file tree
Hide file tree
Showing 14 changed files with 1,229 additions and 0 deletions.
70 changes: 70 additions & 0 deletions tests/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
FROM quay.io/centos/centos:stream8

# List of chromedriver versions for download - https://chromedriver.chromium.org/downloads
ARG CHROMEDRIVER_VER=104.0.5112.79
ARG ORG=opendatahub-io
ARG BRANCH=master
ARG ODS_CI_REPO=https://github.com/red-hat-data-services/ods-ci
ARG ODS_CI_GITREF=releases/1.7.0-5
ARG OC_CLI_URL=https://mirror.openshift.com/pub/openshift-v4/amd64/clients/ocp/latest/openshift-client-linux.tar.gz

ENV HOME /root
WORKDIR /root

RUN dnf install -y bc git go-toolset python3-pip unzip && \
git clone https://github.com/crobby/peak $HOME/peak && \
cd $HOME/peak && \
git submodule update --init

RUN dnf -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm &&\
echo -e "[google-chrome]\nname=google-chrome\nbaseurl=http://dl.google.com/linux/chrome/rpm/stable/x86_64\nenabled=1\ngpgcheck=1\ngpgkey=https://dl.google.com/linux/linux_signing_key.pub" > /etc/yum.repos.d/google-chrome.repo &&\
dnf -y install "google-chrome-stable" &&\
dnf clean all

# install jq to help with parsing json
RUN curl -o /usr/local/bin/jq http://stedolan.github.io/jq/download/linux64/jq && \
chmod +x /usr/local/bin/jq

RUN mkdir -p $HOME/src && \
cd $HOME/src && \
git clone --depth=1 --branch ${BRANCH} https://github.com/${ORG}/ml-pipelines && \
# Clone ods-ci repo at specified git ref for the JupyterHub webUI tests
git clone --depth=1 ${ODS_CI_REPO} ods-ci && cd ods-ci && \
git fetch origin ${ODS_CI_GITREF} && git checkout FETCH_HEAD && \
chmod -R 777 $HOME/src

# Use a specific destination file name in case the url dow download name changes
ADD ${OC_CLI_URL} $HOME/peak/oc-cli.tar.gz
RUN tar -C /usr/local/bin -xvf $HOME/peak/oc-cli.tar.gz && \
chmod +x /usr/local/bin/oc

RUN curl -o /tmp/chromedriver_linux64.zip -L https://chromedriver.storage.googleapis.com/${CHROMEDRIVER_VER}/chromedriver_linux64.zip &&\
unzip /tmp/chromedriver_linux64.zip &&\
cp chromedriver /usr/local/bin/chromedriver

COPY Pipfile Pipfile.lock $HOME/peak/

RUN pip3 install micropipenv &&\
ln -s `which pip3` /usr/bin/pip &&\
cd $HOME/peak &&\
micropipenv install

COPY setup/operatorsetup scripts/install.sh scripts/installandtest.sh $HOME/peak/
COPY resources $HOME/peak/operator-tests/odh-manifests/resources
COPY util $HOME/peak/operator-tests/odh-manifests
COPY setup/kfctl_openshift.yaml $HOME/kfdef/
COPY basictests $HOME/peak/operator-tests/odh-manifests/basictests

RUN chmod -R 777 $HOME/kfdef && \
mkdir -p $HOME/.kube && \
chmod -R 777 $HOME/.kube && \
chmod -R 777 $HOME/peak && \
mkdir -p /peak && \
chmod -R 777 $HOME && \
ln -s $HOME/peak/installandtest.sh /peak/installandtest.sh

# For local testing, you can add your own kubeconfig to the image
# Note: Do not push the image to a public repo with your kubeconfig
# ADD kubeconfig /root/.kube/config

CMD $HOME/peak/installandtest.sh
43 changes: 43 additions & 0 deletions tests/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
IMAGE=odh-manifests-test
GIT_ORG=opendatahub-io
GIT_BRANCH=master
ODHPROJECT=opendatahub
# Specify the repo and git ref/branch to use for cloning ods-ci
ODS_CI_REPO=https://github.com/red-hat-data-services/ods-ci
ODS_CI_GITREF=master
OC_CLI_URL=https://mirror.openshift.com/pub/openshift-v4/amd64/clients/ocp/latest/openshift-client-linux.tar.gz
OPENSHIFT_USER=
OPENSHIFT_PASS=
OPENSHIFT_LOGIN_PROVIDER=
# Setting SKIP_INSTALL will let you run the tests against an ODH instance that is already setup
SKIP_INSTALL=
# Setting TESTS_REGEX will allow you to change which tests are going to be run
TESTS_REGEX=
# Location inside the container where CI system will retrieve files after a test run
ARTIFACT_DIR=/tmp/artifacts
LOCAL_ARTIFACT_DIR="${PWD}/artifacts"

all: test
test: build run clean

build:
podman build -t $(IMAGE) --build-arg ORG=$(GIT_ORG) --build-arg BRANCH=$(GIT_BRANCH) --build-arg ODS_CI_REPO=$(ODS_CI_REPO) --build-arg ODS_CI_GITREF=$(ODS_CI_GITREF) --build-arg OC_CLI_URL=$(OC_CLI_URL) .

run:
# Confirm that we have a directory for storing any screenshots from selenium tests
mkdir -p ${LOCAL_ARTIFACT_DIR}/screenshots
oc config view --flatten --minify > /tmp/tests-kubeconfig
podman run -e SKIP_INSTALL=$(SKIP_INSTALL) -e TESTS_REGEX=$(TESTS_REGEX) -e SKIP_OPERATOR_INSTALL=$(SKIP_OPERATOR_INSTALL) \
-e SKIP_KFDEF_INSTALL=$(SKIP_KFDEF_INSTALL) -e ODHPROJECT=$(ODHPROJECT) \
-e OPENSHIFT_USER="$(OPENSHIFT_USER)" -e OPENSHIFT_PASS="$(OPENSHIFT_PASS)" -e OPENSHIFT_LOGIN_PROVIDER=$(OPENSHIFT_LOGIN_PROVIDER) -e ARTIFACT_DIR=$(ARTIFACT_DIR) \
-it -v ${LOCAL_ARTIFACT_DIR}/:$(ARTIFACT_DIR):z -v /tmp/tests-kubeconfig:/tmp/kubeconfig:z $(IMAGE)

clean:
oc delete -n $(ODHPROJECT) kfdef opendatahub || true
oc delete project $(ODHPROJECT) || echo -e "\n\n==> If the project deletion failed, you can try to use this script to force it: https://raw.githubusercontent.com/jefferyb/useful-scripts/master/openshift/force-delete-openshift-project\n\n"
#Clean up openshift-operators namespace
oc get csv -n openshift-operators -o name | grep strimzi-cluster-operator | xargs oc delete -n openshift-operators || true
oc get csv -n openshift-operators -o name | grep opendatahub-operator | xargs oc delete -n openshift-operators || true
oc delete subscription -n openshift-operators -l peak.test.subscription=opendatahub-operator
oc get mutatingwebhookconfiguration -o name | grep katib | grep $(ODHPROJECT) | xargs oc delete || true
oc get validatingwebhookconfiguration -o name | grep katib | grep $(ODHPROJECT) | xargs oc delete || true
12 changes: 12 additions & 0 deletions tests/Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true

[dev-packages]

[packages]
selenium = "*"

[requires]
python_version = "3.6"
37 changes: 37 additions & 0 deletions tests/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

105 changes: 105 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Running containerized tests

Running the tests this way assumes that you have an active kubeadmin login
on the cluster that you want to run the tests against and that you have podman
installed. (If you prefer docker, you can edit the Makefile to replace podman
with docker).

Run the following:

```sh
cd tests
make build
make run
```

## Cleaning up after your test run (optional)

Only run the following if you want to eliminate your Open Data Hub installation.

To cleanup the Open Data Hub installation after a test run, you can run `make clean`.
Running `make clean` **will wipe your Open Data Hub installation** and delete the project.


## Customizing test behavior

Without changes, the test image will run `$HOME/peak/installandtest.sh` which
handles setting up the opendatahub-operator and then creating the KfDef found in
`tests/setup/kfctl_openshift.yaml`. If you want to modify your test run, you
might want to change those files to get the behavior that you're looking for.
After you make changes, you will need to rebuild the test image with `make build`.

If you'd like to run the tests against an instance that already has Open Data Hub installed,
you set `SKIP_INSTALL=true` and that will cause the test run
to skip the installation process and will only run the tests. example: `make run SKIP_INSTALL=true`

If you'd like to run the tests against an instance that already has a KfDef created,
you set `SKIP_KFDEF_INSTALL=true` and that will cause the test run
to skip the step of creating the default KfDef. example: `make run SKIP_KFDEF_INSTALL=true`

If you'd like to run a single test instead of all tests, you can
set the TESTS_REGEX variable `TESTS_REGEX=<name of the test to run>`. That will
only run the test that you specify instead of all of the tests. example: `make run TESTS_REGEX=grafana`

If you have a local instance already running the operator and you'd like to skip that part
of the install process, you can set `SKIP_OPERATOR_INSTALL=true` and that will bypass installation
of the operator, but will still install the authentication for the jupyterhub tests.

For other possible configurations, you can look in the Makefile. There are a set of
variables at the top that you could change to meet the needs of your particular test run.

## Test Artifacts
The environment variable `ARTIFACT_DIR` specifies the root directory where all test artifacts should be
stored for retrieval at the end of a test run. Any files created should be uniquely named to prevent
a test from overwriting an artifact generated by another test.

# Running tests manually

Manual running of the tests relies on the test
runner [located here](https://github.com/AICoE/peak).
See the README.md there for more detailed information on how it works.

Note when running on a **mac** you may need to do the following:

```sh
brew install coreutils
ln -s /usr/local/bin/greadlink /usr/local/bin/readlink
```

Make sure you have an OpenShift login, then do the following:

```sh
git clone https://github.com/AICoE/peak
cd peak
git submodule update --init
echo opendatahub-kubeflow nil https://github.com/opendatahub-io/odh-manifests > my-list
./setup.sh -t my-list
./run.sh operator-tests/opendatahub-kubeflow/tests/basictests
```

Note, if you're looking to test another repo and/or branch, you can change the "echo" command from above to something of the following form where "your branch" is optional:

```sh
echo opendatahub-kubeflow nil <your repo> <your branch> > my-list
```

If your installation is not in the opendatahub project, you will need to modify
the export line in tests/util to set the value of ODHPROJECT to match name of the project you are using.

You can run tests individually by passing a substring to run.sh to match:

```sh
./run.sh ailibrary.sh
```

# Basic test

These tests are in the basictests directory. This set of tests assumes that you have opendatahub (Kubeflow-based) isntalled. It then goes through each module and tests
to be sure that the expected pods are all in the running state. This is meant to be the barebones basic smoke tests for an installation.
The steps to run this test are:

* Run the tests

```sh
./run.sh tests/basictests
```
20 changes: 20 additions & 0 deletions tests/TESTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
## Overivew

The aim was to set ourselves up with a test system that would give us an automated way to run tests against our PRs. At the outset of this, our repo had no tests of any sort, so the first task became getting some basic tests running against the bits in our repo.

Our tests are based on the utilities found in https://github.com/openshift/origin/tree/master/hack/lib which are a set of bash functions and scripts that facilitate a reasonably fast way to develop and run a set of tests against either OpenShift itself or, in our case, a set of applications running on OpenShift. Those tests were adapted for use in radanalytics and then re-adapted them for testing operators running in OpenShift. We have borrowed their test runner (our fork is [here](https://github.com/crobby/peak)) that will search a subdirectory tree for scripts that match a given regular expression (ie: ‘tests’ would find all scripts that have ‘tests’ anywhere in their full path or name), so it is easy to run a single test or a large group of tests.

Each test script has a small amount of boilerplate code followed by a series of bash tests. Each test could call out to another utility/language/whatever. The utilities available in the testing library can check for specific results in text/exit code/etc of each call. Any test lines that produce a failed result are tabulated and reported at the end of the test runs. Of course, the stdout/stderr of each failed call is also available in addition to whatever other logging your test call might produce. Here’s what I would call the main building block of the tests: https://github.com/openshift/origin/blob/master/hack/lib/cmd.sh It defines what amount to wrappers around whatever calls you want to make in your tests and handles the parsing of the result text/exit codes.

## Integration with OpenShift-CI:

The first step toward integrating with [OpenShift CI](https://github.com/openshift/release) is granting access to the OpenShift CI Robot and the OpenShift Merge Robot entities in the settings of the repo. Once that is complete, you can contact the openshift-ci team and they will set up the necessary webhooks in the target repo.

Next is the prow configuration. The configuration files are kept in the https://github.com/openshift/release repository. Under the `core_services/prow` directory, you’ll need to modify `_config.yaml` and `_plugins.yaml` in order to have your repository included in the configuration. Submit a PR to that repo and when it merges, you’re all set. As for the contents of your changes, unless you know exactly what you want, it might be useful to start by adding your repo with settings copied from another repository already in the config.

Lastly, and perhaps the most important is defining the configuration that will run your tests. These files are also in the openshift/release repo. To define your test job, you can create a config file like ours which is defined [here](https://github.com/openshift/release/blob/master/ci-operator/config/opendatahub-io/odh-manifests/opendatahub-io-odh-manifests-master.yaml). The job defines the following items:
1) An image that can be built to run your tests (your tests run inside a container)
2) Instructions on how to build that test image and
3) A workflow that has your test or tests in the “tests” portion of the workflow definition. In our case, we are using the ipi-aws workflow which will spin-up a fresh OpenShift cluster in AWS where our tests will run (our test container will start with an admin KUBECONFIG for that cluster)

For greater detail on any of the steps, you can refer to the [OpenShift relase README](https://github.com/openshift/release/blob/master/README.md)
99 changes: 99 additions & 0 deletions tests/basictests/ml-pipelines.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash

source $TEST_DIR/common

MY_DIR=$(readlink -f `dirname "${BASH_SOURCE[0]}"`)

source ${MY_DIR}/../util
RESOURCEDIR="${MY_DIR}/../resources"

os::test::junit::declare_suite_start "$MY_SCRIPT"

function check_resources() {
header "Testing ML pipelines installation"
os::cmd::expect_success "oc project ${ODHPROJECT}"
os::cmd::try_until_text "oc get crd pipelineruns.tekton.dev " "pipelineruns.tekton.dev" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "oc get pods -l application-crd-id=kubeflow-pipelines --field-selector='status.phase!=Running,status.phase!=Completed' -o jsonpath='{$.items[*].metadata.name}' | wc -w" "0" $odhdefaulttimeout $odhdefaultinterval
running_pods=$(oc get pods -l application-crd-id=kubeflow-pipelines --field-selector='status.phase=Running' -o jsonpath='{$.items[*].metadata.name}' | wc -w)
os::cmd::expect_success "if [ "$running_pods" -gt "0" ]; then exit 0; else exit 1; fi"
}

function check_ui_overlay() {
header "Checking UI overlay Kfdef deploys the UI"
os::cmd::try_until_text "oc get pods -l app=ml-pipeline-ui --field-selector='status.phase=Running' -o jsonpath='{$.items[*].metadata.name}' | wc -w" "1" $odhdefaulttimeout $odhdefaultinterval
}

function create_pipeline() {
header "Creating a pipeline"
route=`oc get route ml-pipeline || echo ""`
if [[ -z $route ]]; then
oc expose service ml-pipeline
fi
ROUTE=$(oc get route ml-pipeline --template={{.spec.host}})
PIPELINE_ID=$(curl -s -F "uploadfile=@${RESOURCEDIR}/ml-pipelines/test-pipeline-run.yaml" ${ROUTE}/apis/v1beta1/pipelines/upload | jq -r .id)
os::cmd::try_until_not_text "curl -s ${ROUTE}/apis/v1beta1/pipelines/${PIPELINE_ID} | jq" "null" $odhdefaulttimeout $odhdefaultinterval
}

function list_pipelines() {
header "Listing pipelines"
os::cmd::try_until_text "curl -s ${ROUTE}/apis/v1beta1/pipelines | jq '.total_size'" "2" $odhdefaulttimeout $odhdefaultinterval
}

function create_run() {
header "Creating a run"
RUN_ID=$(curl -s -H "Content-Type: application/json" -X POST ${ROUTE}/apis/v1beta1/runs -d "{\"name\":\"test-pipeline-run_run\", \"pipeline_spec\":{\"pipeline_id\":\"${PIPELINE_ID}\"}}" | jq -r .run.id)
os::cmd::try_until_not_text "curl -s ${ROUTE}/apis/v1beta1/runs/${RUN_ID} | jq '" "null" $odhdefaulttimeout $odhdefaultinterval
}

function list_runs() {
header "Listing runs"
os::cmd::try_until_text "curl -s ${ROUTE}/apis/v1beta1/runs | jq '.total_size'" "1" $odhdefaulttimeout $odhdefaultinterval
}

function check_run_status() {
header "Checking run status"
os::cmd::try_until_text "curl -s ${ROUTE}/apis/v1beta1/runs/${RUN_ID} | jq '.run.status'" "Completed" $odhdefaulttimeout $odhdefaultinterval
}

function setup_monitoring() {
header "Enabling User Workload Monitoring on the cluster"
oc apply -f ${RESOURCEDIR}/ml-pipelines/enable-uwm.yaml
}

function test_metrics() {
header "Checking metrics for total number of runs, should be 1 since we have spun up 1 run"
## On OCP 4.11, get-token is removed
cluster_version=`oc get -o json clusterversion | jq '.items[0].status.desired.version' | grep "4.11" || echo ""`
if [[ -z $cluster_version ]]; then
monitoring_token=`oc sa get-token prometheus-k8s -n openshift-monitoring`
else
monitoring_token=`oc create token prometheus-k8s -n openshift-monitoring`
fi
os::cmd::try_until_text "oc -n openshift-monitoring exec -c prometheus prometheus-k8s-0 -- curl -k -H \"Authorization: Bearer $monitoring_token\" 'https://thanos-querier.openshift-monitoring:9091/api/v1/query?query=run_server_run_count' | jq '.data.result[0].value[1]'" "1" $odhdefaulttimeout $odhdefaultinterval
}

function delete_runs() {
header "Deleting runs"
os::cmd::try_until_text "curl -s -X DELETE ${ROUTE}/apis/v1beta1/runs/${RUN_ID} | jq" "" $odhdefaulttimeout $odhdefaultinterval
os::cmd::try_until_text "curl -s ${ROUTE}/apis/v1beta1/runs/${RUN_ID} | jq '.code'" "5" $odhdefaulttimeout $odhdefaultinterval
}

function delete_pipeline() {
header "Deleting the pipeline"
os::cmd::try_until_text "curl -s -X DELETE ${ROUTE}/apis/v1beta1/pipelines/${PIPELINE_ID} | jq" "" $odhdefaulttimeout $odhdefaultinterval
}

check_resources
check_ui_overlay
create_pipeline
list_pipelines
create_run
list_runs
check_run_status
setup_monitoring
test_metrics
delete_runs
delete_pipeline
oc delete route ml-pipeline

os::test::junit::declare_suite_end
Loading

0 comments on commit 7fb3741

Please sign in to comment.