Add compilation report for all KFP DSL scripts (#130)

red-hat-data-services · May 1, 2020 · f8673e7 · f8673e7
1 parent 2fd278a
commit f8673e7
Show file tree

Hide file tree

Showing 2 changed files with 276 additions and 48 deletions.
diff --git a/sdk/python/tests/README.md b/sdk/python/tests/README.md
@@ -24,6 +24,8 @@ or run this command from the project root directory:
 You should see an output similar to the one below, outlining which test scripts have passed and which are failing:
 
 ```YAML
+KFP version: 0.2.2
+
 SUCCESS: add_pod_env.py
 SUCCESS: artifact_location.py
 SUCCESS: basic.py
@@ -66,3 +68,66 @@ Compiled Tekton YAML files:  temp/tekton_compiler_output/
 
 The goal is to have all the 30 tests pass before we can have a degree of confidence that the compiler can handle
 a fair number of pipelines.
+
+
+## Summary Report for all KFP Sample DSL Scripts
+
+For a more comprehensive report about the compilation status for all of the Python DSL scripts found in the
+[`kubeflow/pipelines`](https://github.com/kubeflow/pipelines/) repository you may run this report:
+
+    ./test_kfp_samples.sh \
+        --include-all-samples \
+        --dont-list-files
+
+This will include all `core/samples`, 3rd-party contributed samples, tutorials, as well as the compiler `testdata`.
+
+```YAML
+Compilation status for testdata DSL scripts:
+
+  Success: 25
+  Failure: 5
+  Total:   30
+
+Compilation status for core samples:
+
+  Success: 18
+  Failure: 5
+  Total:   23
+
+Compilation status for 3rd-party contributed samples:
+
+  Success: 23
+  Failure: 5
+  Total:   28
+
+Overall success rate: 69/84 = 82%
+```
+
+When the `--print-error-details` flag is used, a summary of all the compilation errors is appended to the console
+output -- sorted by their respective number of occurrences:
+
+    ./test_kfp_samples.sh -a -s --print-error-details
+
+```YAML
+...
+
+Overall success rate: 69/84 = 82%
+
+Occurences of NotImplementedError:
+   7: dynamic params are not yet implemented
+
+Occurences of other Errors:
+   2 ValueError: These Argo variables are not supported in Tekton Pipeline: {{workflow.uid}}
+   2 ValueError: These Argo variables are not supported in Tekton Pipeline: {{pod.name}}, {{workflow.name}}
+   1 ValueError: These Argo variables are not supported in Tekton Pipeline: {{workflow.uid}}, {{pod.name}}
+   1 ValueError: These Argo variables are not supported in Tekton Pipeline: {{workflow.name}}
+   1 ValueError: There are multiple pipelines: ['flipcoin_pipeline', 'flipcoin_exit_pipeline']. Please specify --function.
+   1 ValueError: A function with @dsl.pipeline decorator is required in the py file.
+```
+
+## Disclaimer
+
+**Note:** The reports above were created for the pipeline scripts found in KFP version `0.2.2` since the
+`kfp_tekton` compiler code is still based on the `kfp` SDK compiler version `0.2.2`. We are working on 
+upgrading the `kfp_tekton` compiler code to be based on `kfp` version `0.5.0`
+([issue #133](https://github.com/kubeflow/kfp-tekton/issues/133)).
diff --git a/sdk/python/tests/test_kfp_samples.sh b/sdk/python/tests/test_kfp_samples.sh
@@ -14,35 +14,73 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+function help {
+  bold=$(tput bold)
+  normal=$(tput sgr0)
+  color=$(tput setaf 6)
+  echo
+  echo "This scripts clones the ${bold}kubeflow/pipelines${normal} repository and attempts to compile each Python"
+  echo "DSL script found in the compiler testdata directory, optionally including all samples."
+  echo
+  echo -e "${bold}USAGE:${normal}"
+  echo -e "    $0 [${color}OPTIONS${normal}]"
+  echo
+  echo -e "${bold}OPTIONS:${normal}"
+  grep -iE '\-\-[a-z-]+)\s+.*?# .*$$' "$0" | \
+    awk -v color="${color}"\
+        -v normal="${normal}" \
+      'BEGIN {FS = ").*?# "}; {printf "%s%-35s%s%s\n", color, $1, normal, $2}'
+  echo
+}
 
-# The scripts clones the kubeflow/pipelines repository and attempts to compile
-# each Python DSL script found in the compiler testdata directory.
-#
-# Usage:
-#   ./test_kfp_samples.sh [KFP version, default to 0.2.2]
+# process command line parameters
+while (( $# > 0 )); do
+  case "$1" in
+    -v|--kfp-version)          KFP_VERSION="$2";            shift 2 ;;  # KFP SDK version, default: 0.2.2
+    -a|--include-all-samples)  ALL_SAMPLES="TRUE";          shift 1 ;;  # Compile all DSL scripts in KFP repo
+    -s|--dont-list-files)      SKIP_FILES="TRUE";           shift 1 ;;  # Suppress compile status for each DSL file
+    -e|--print-error-details)  PRINT_ERRORS="TRUE";         shift 1 ;;  # Print summary of compilation errors
+    -h|--help)                 help;                        exit 0  ;;  # Show this help message
+    -*)                        echo "Unknown option '$1'";  exit 1  ;;
+    *)                         KFP_VERSION="$1";            break   ;;
+  esac
+done
 
-KFP_VERSION=${1:-0.2.2}
+# define global variables
+KFP_VERSION=${KFP_VERSION:-0.2.2}
 KFP_REPO_URL="https://github.com/kubeflow/pipelines.git"
-
 SCRIPT_DIR="$(cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd)"
 PROJECT_DIR="${TRAVIS_BUILD_DIR:-$(cd "${SCRIPT_DIR%/sdk/python/tests}"; pwd)}"
 TEMP_DIR="${PROJECT_DIR}/temp"
 VENV_DIR="${VIRTUAL_ENV:-${TEMP_DIR}/.venv}"
 KFP_CLONE_DIR="${TEMP_DIR}/kubeflow/pipelines"
 KFP_TESTDATA_DIR="${KFP_CLONE_DIR}/sdk/python/tests/compiler/testdata"
 TEKTON_COMPILED_YAML_DIR="${TEMP_DIR}/tekton_compiler_output"
-COMPILE_REPORT_FILE="${PROJECT_DIR}/sdk/python/tests/test_kfp_samples_report.txt"
 COMPILER_OUTPUTS_FILE="${TEMP_DIR}/test_kfp_samples_output.txt"
 CONFIG_FILE="${PROJECT_DIR}/sdk/python/tests/config.yaml"
+REPLACE_EXCEPTIONS="FALSE" # "TRUE" | "FALSE"
 
 mkdir -p "${TEMP_DIR}"
 mkdir -p "${TEKTON_COMPILED_YAML_DIR}"
 
+# don't override the testdata report when running report for all samples
+if [[ "${ALL_SAMPLES}" == "TRUE" ]]; then
+  COMPILE_REPORT_FILE="${TEMP_DIR}/test_kfp_samples_report_ALL.txt"
+else
+  COMPILE_REPORT_FILE="${PROJECT_DIR}/sdk/python/tests/test_kfp_samples_report.txt"
+fi
+
+# create a temporary copy of the previous compilation report
+COMPILE_REPORT_FILE_OLD="${COMPILE_REPORT_FILE/%.txt/_before.txt}"
+touch "${COMPILE_REPORT_FILE}"
+cp "${COMPILE_REPORT_FILE}" "${COMPILE_REPORT_FILE_OLD}"
+
 # clone kubeflow/pipeline repo to get the testdata DSL scripts
 if [ ! -d "${KFP_CLONE_DIR}" ]; then
   git -c advice.detachedHead=false clone -b "${KFP_VERSION}" "${KFP_REPO_URL}" "${KFP_CLONE_DIR}" -q
 else
   cd "${KFP_CLONE_DIR}"
+  git fetch --all -q
   git -c advice.detachedHead=false checkout "${KFP_VERSION}" -f -q
   cd - &> /dev/null
 fi
@@ -57,26 +95,47 @@ if [ ! -d "${VENV_DIR}" ]; then
 fi
 source "${VENV_DIR}/bin/activate"
 
-# install KFP and KFP-Tekton compiler, unless already installed
-if ! (pip show "kfp-tekton" | grep Location | grep -q "${PROJECT_DIR}"); then
+# install KFP with the desired KFP_VERSION (unless already installed)
+if ! (pip show "kfp" | grep Version | grep -q "${KFP_VERSION}"); then
+  echo "Installing KFP ${KFP_VERSION} ..."
   pip install -q -e "${KFP_CLONE_DIR}/sdk/python"
+fi
+
+# install KFP-Tekton compiler, unless already installed
+if ! (pip show "kfp-tekton" | grep Location | grep -q "${PROJECT_DIR}"); then
+  echo "Installing KFP-Tekton ..."
   pip install -q -e "${PROJECT_DIR}/sdk/python"
 fi
 
+# install 3rd party dependencies required for certain pipeline samples
+if [[ "${ALL_SAMPLES}" == "TRUE" ]]; then
+  echo "Installing 3rd-party dependencies ..."
+  pip show ai_pipeline_params   >/dev/null 2>&1 || pip install ai_pipeline_params
+  pip show kfp-azure-databricks >/dev/null 2>&1 || pip install -e "${KFP_CLONE_DIR}/samples/contrib/azure-samples/kfp-azure-databricks"
+  pip show kfp-arena            >/dev/null 2>&1 || pip install "http://kubeflow.oss-cn-beijing.aliyuncs.com/kfp-arena/kfp-arena-0.6.tar.gz"
+  pip show fire                 >/dev/null 2>&1 || pip install fire
+  pip show tfx                  >/dev/null 2>&1 || pip install tfx
+  # reinstall KFP with the desired version to get all of its dependencies with their respective desired versions
+  pip install -q -e "${KFP_CLONE_DIR}/sdk/python"
+fi
+
 echo  # just adding some separation for console output
 
-# create a temporary copy of the previous compilation report
-COMPILE_REPORT_FILE_OLD="${COMPILE_REPORT_FILE/%.txt/_before.txt}"
-cp "${COMPILE_REPORT_FILE}" "${COMPILE_REPORT_FILE_OLD}"
+# replace NotImplementedError with simple print out
+if [[ "${REPLACE_EXCEPTIONS}" == "TRUE" ]]; then
+  find "${PROJECT_DIR}"/sdk/python/kfp_tekton/compiler/*.py -type f -exec gsed -i 's/raise NotImplementedError(/print("NotImplementedError: "+/' {} \;
+  find "${PROJECT_DIR}"/sdk/python/kfp_tekton/compiler/*.py -type f -exec gsed -i 's/raise ValueError(/print("ValueError: "+/' {} \;
+fi
 
 # delete the previous compiler output file
+rm -f "${COMPILE_REPORT_FILE}"
 rm -f "${COMPILER_OUTPUTS_FILE}"
 
 # check which pipelines have special configurations
-PIPELINES=$(awk '/pipeline:/{print $NF}' "${CONFIG_FILE}")
+SPECIAL_PIPELINES=$(awk '/pipeline:/{print $NF}' "${CONFIG_FILE}")
 
 function compile_dsl {
-  IS_SPECIAL=$(grep -E "${1##*/}" <<< "${PIPELINES}")
+  IS_SPECIAL=$(grep -E "${1##*/}" <<< "${SPECIAL_PIPELINES}")
   if [ -z "${IS_SPECIAL}" ]; then
     dsl-compile-tekton --py "$1" --output "$2"
   else
@@ -85,49 +144,153 @@ function compile_dsl {
   fi
 }
 
-# compile each of the Python scripts in the KFP testdata folder
-for f in "${KFP_TESTDATA_DIR}"/*.py; do
-  echo -e "\nCompiling ${f##*/}:" >> "${COMPILER_OUTPUTS_FILE}"
-  if compile_dsl "${f}" "${TEKTON_COMPILED_YAML_DIR}/${f##*/}.yaml" >> "${COMPILER_OUTPUTS_FILE}" 2>&1;
+# find the pipeline DSL scripts in the KFP repository
+# make newlines the only separator to support arrays and looping over files with spaces in their name
+IFS=$'\n'
+if [[ "${ALL_SAMPLES}" == "TRUE" ]]; then
+  # find all the pipeline DSL scripts in the KFP repository
+  CONTRIB_PIPELINES=$(find "${KFP_CLONE_DIR}" -name "*.py" -path "*/contrib/*" -not -path "*/.venv/*" -exec grep -i -l "dsl.Pipeline" {} + | sort)
+  SAMPLE_PIPELINES=$(find "${KFP_CLONE_DIR}" -name "*.py" -not -path "*/contrib/*" -not -path "*/sdk/python/*" -not -path "*/.venv/*" -exec grep -i -l "dsl.Pipeline" {} + | sort)
+  DSL_SCRIPTS=(
+    "${KFP_TESTDATA_DIR}"/*.py
+    ${SAMPLE_PIPELINES[@]}
+    ${CONTRIB_PIPELINES[@]}
+  )
+else
+  # only the pipelines in KFP compiler testdata
+  DSL_SCRIPTS=("${KFP_TESTDATA_DIR}"/*.py)
+fi
+
+# run the KFP-Tekton compiler on the dsl.Pipeline scripts
+i=1
+for f in "${DSL_SCRIPTS[@]}"; do
+
+  # display just the file name when compiling testdata scripts only, keep relative paths when compiling all KFP samples
+  if [[ "${ALL_SAMPLES}" == "TRUE" ]]; then
+    file_shortname="${f#${KFP_CLONE_DIR}/}"
+  else
+    file_shortname="${f##*/}"
+  fi
+  yaml_file="${TEKTON_COMPILED_YAML_DIR}/${f##*/}.yaml"
+
+  echo -e "\nCompiling ${file_shortname}:" >> "${COMPILER_OUTPUTS_FILE}"
+
+  # change directory to allow loading pipeline components from relative paths, set PYTHONPATH to local exec directory
+  cd "${f%/*}"
+  export PYTHONPATH="${f%/*}"
+
+  # compile the DSL script
+  if compile_dsl "${f}" "${yaml_file}" >> "${COMPILER_OUTPUTS_FILE}" 2>&1;
   then
-    echo "SUCCESS: ${f##*/}" | tee -a "${COMPILER_OUTPUTS_FILE}"
+    status="SUCCESS"
   else
-    echo "FAILURE: ${f##*/}" | tee -a "${COMPILER_OUTPUTS_FILE}"
+    status="FAILURE"
   fi
-done | tee "${COMPILE_REPORT_FILE}"
 
-# compile the report
-SUCCESS=$(grep -c "SUCCESS" "${COMPILE_REPORT_FILE}")
-FAILURE=$(grep -c "FAILURE" "${COMPILE_REPORT_FILE}")
-TOTAL=$(grep -c "SUCCESS\|FAILURE" "${COMPILE_REPORT_FILE}")
-(
+  # print SUCCESS or FAILURE status to report file
+  echo "${status}: ${file_shortname}" | tee -a "${COMPILE_REPORT_FILE}" >> "${COMPILER_OUTPUTS_FILE}"
+
+  # print progress report to console
+  if [[ "${SKIP_FILES}" == "TRUE"  ]]
+  then
+    echo -ne "\r\033[0KProgress: ${i}/${#DSL_SCRIPTS[@]}";
+  else
+    tail -1 "${COMPILE_REPORT_FILE}"
+  fi
+
+  # change back the working directory
+  cd - &> /dev/null
+
+  ((++i))
+done
+
+# add some space
+[[ "${SKIP_FILES}" == "TRUE"  ]] && echo
+
+# function to compile the success-failure-report
+function compile_report() {
+  FILE_GROUP="$1"
+  FILE_FILTER="$2"
+
+  SUCCESS=$( grep "${FILE_FILTER}" "${COMPILE_REPORT_FILE}" | grep -c "SUCCESS" )
+  FAILURE=$( grep "${FILE_FILTER}" "${COMPILE_REPORT_FILE}" | grep -c "FAILURE" )
+  TOTAL=$(   grep "${FILE_FILTER}" "${COMPILE_REPORT_FILE}" | grep -c "SUCCESS\|FAILURE" )
+  (
+    echo
+    echo "Compilation status for ${FILE_GROUP}:"
+    echo
+    echo "  Success: ${SUCCESS}"
+    echo "  Failure: ${FAILURE}"
+    echo "  Total:   ${TOTAL}"
+  )
+}
+
+# print success-failure-report summary in groups
+if [[ "${ALL_SAMPLES}" == "TRUE" ]]; then
+  compile_report "testdata DSL scripts" "/testdata/"
+  compile_report "core samples" 'samples/core/\|samples/tutorials'
+  compile_report "3rd-party contributed samples" 'contrib/samples/\|samples/contrib'
+else
+  compile_report "testdata DSL scripts" ".py"
+fi
+
+# print overall success-failure-report summary
+SUCCESS=$( grep -c "SUCCESS" "${COMPILE_REPORT_FILE}" )
+TOTAL=$(   grep -c "SUCCESS\|FAILURE" "${COMPILE_REPORT_FILE}")
+SUCCESS_RATE=$(awk -v s="${SUCCESS}" -v t="${TOTAL}" 'BEGIN { printf("%.0f%\n", 100.0/t*s) }')
+echo
+echo "Overall success rate: ${SUCCESS}/${TOTAL} = ${SUCCESS_RATE}"
+
+# print error statistics
+if [[ "${PRINT_ERRORS}" == "TRUE" ]]; then
+  echo
+  echo "Occurences of NotImplementedError:"
+  grep "NotImplementedError: " "${COMPILER_OUTPUTS_FILE}" | sed 's/NotImplementedError: //' | sort | uniq -c | sort -n -r
   echo
-  echo "Success: ${SUCCESS}"
-  echo "Failure: ${FAILURE}"
-  echo "Total:   ${TOTAL}"
-) # | tee -a "${COMPILE_REPORT_FILE}"  # do not include totals in report file to avoid constant merge conflicts
+  echo "Occurences of other Errors:"
+  grep "Error: " "${COMPILER_OUTPUTS_FILE}" | grep -v "NotImplementedError" | sort | uniq -c | sort -n -r | grep "." || echo "   0"
+fi
+
+# display all output file locations
 echo
 echo "Compilation status report:   ${COMPILE_REPORT_FILE#${PROJECT_DIR}/}"
 echo "Accumulated compiler logs:   ${COMPILER_OUTPUTS_FILE#${PROJECT_DIR}/}"
 echo "Compiled Tekton YAML files:  ${TEKTON_COMPILED_YAML_DIR#${PROJECT_DIR}/}/"
-echo
+
+# check for missing Python modules
+if grep -q "ModuleNotFoundError:" "${COMPILER_OUTPUTS_FILE}"; then
+  echo
+  echo "NOTE: Please update this script to install required Python modules:"
+  grep "ModuleNotFoundError:" "${COMPILER_OUTPUTS_FILE}" | sort | uniq | awk 'NF{ print " - " $NF }'
+fi
+
+# re-instate the NotImplementedErrors
+if [[ "${REPLACE_EXCEPTIONS}" == "TRUE" ]]; then
+  find "${PROJECT_DIR}"/sdk/python/kfp_tekton/compiler/*.py -type f -exec gsed -i 's/print("NotImplementedError: "+/raise NotImplementedError(/' {} \;
+  find "${PROJECT_DIR}"/sdk/python/kfp_tekton/compiler/*.py -type f -exec gsed -i 's/print("ValueError: "+/raise ValueError(/' {} \;
+fi
 
 # for Travis/CI integration return exit code 1 if this report is different from the previous report
 # sort the list of files since we cannot ensure same sort order on MacOS (local) and Linux (build machine)
-if ! diff -q -a -w -B <(sort "${COMPILE_REPORT_FILE}") <(sort "${COMPILE_REPORT_FILE_OLD}") >/dev/null 2>&1 ; then
-  echo
-  echo "This compilation report (left) differs from the previous report (right):"
-  echo
-  diff -y -W 80 --suppress-common-lines -d \
-      <(sort -k2 "${COMPILE_REPORT_FILE}") \
-      <(sort -k2 "${COMPILE_REPORT_FILE_OLD}")
-  echo
-  rm -f "${COMPILE_REPORT_FILE_OLD}"
-  exit 1
-else
-  echo
-  echo "This compilation report did not change from the previous report."
-  echo
-  rm -f "${COMPILE_REPORT_FILE_OLD}"
-  exit 0
+if [[ ! "${ALL_SAMPLES}" == "TRUE" ]]; then
+  if ! diff -q -a -w -B \
+      <(sort "${COMPILE_REPORT_FILE}") \
+      <(sort "${COMPILE_REPORT_FILE_OLD}") >/dev/null 2>&1
+  then
+    echo
+    echo "This compilation report (left) differs from the previous report (right):"
+    echo
+    diff -y -W 80 --suppress-common-lines -d \
+        <(sort -k2 "${COMPILE_REPORT_FILE}") \
+        <(sort -k2 "${COMPILE_REPORT_FILE_OLD}")
+    echo
+    rm -f "${COMPILE_REPORT_FILE_OLD}"
+    exit 1
+  else
+    echo
+    echo "This compilation report did not change from the previous report."
+    echo
+    rm -f "${COMPILE_REPORT_FILE_OLD}"
+    exit 0
+  fi
 fi