From 87d91bd45ed0bf114f8ee72a6f40f8eae4dc641f Mon Sep 17 00:00:00 2001 From: Jiaxiao Zheng Date: Fri, 13 Mar 2020 17:34:34 -0700 Subject: [PATCH] TFX Iris sample (#3119) * init * update comment * fix module file * clean up * update to beam sample * add doc of default bucket * bump viz server tfma version * update iris sample to keras native version * update iris sample to keras native version * pin TFMA * add readme * add to sample test corpus * add prebuilt && update some config * sync frontend * update snapshot * update snapshot * fix gettingstarted page * fix unit test * fix unit test * update description * update some comments * add some dependencies. --- backend/requirements.txt | 2 +- .../src/apiserver/config/sample_config.json | 9 +- .../config/sample_config_from_backend.json | 7 +- .../GettingStarted.test.tsx.snap | 4 +- samples/core/iris/README.md | 31 +++ samples/core/iris/iris.py | 210 ++++++++++++++++++ test/sample-test/Dockerfile | 2 +- test/sample-test/configs/iris.config.yaml | 18 ++ test/sample-test/requirements.txt | 4 +- test/sample-test/run_sample_test.py | 5 + test/sample_test.yaml | 1 + 11 files changed, 282 insertions(+), 11 deletions(-) create mode 100644 samples/core/iris/README.md create mode 100644 samples/core/iris/iris.py create mode 100644 test/sample-test/configs/iris.config.yaml diff --git a/backend/requirements.txt b/backend/requirements.txt index 115831c8c816..130b3a5b99d0 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -108,7 +108,7 @@ tensorboard==2.1.0 # via tensorflow tensorflow-data-validation==0.21.4 # via tfx tensorflow-estimator==2.1.0 # via tensorflow tensorflow-metadata==0.21.1 # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx-bsl -tensorflow-model-analysis==0.21.4 # via tfx +tensorflow-model-analysis==0.21.5 # via -r requirements.in (line 2), tfx tensorflow-serving-api==2.1.0 # via tfx, tfx-bsl tensorflow-transform==0.21.2 # via tensorflow-data-validation, tfx tensorflow==2.1.0 # via ml-metadata, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-serving-api, tensorflow-transform, tfx, tfx-bsl diff --git a/backend/src/apiserver/config/sample_config.json b/backend/src/apiserver/config/sample_config.json index cb73d23d5fa1..a0830c173cde 100644 --- a/backend/src/apiserver/config/sample_config.json +++ b/backend/src/apiserver/config/sample_config.json @@ -1,11 +1,11 @@ [ { - "name": "[Demo] XGBoost - Training with Confusion Matrix", + "name": "[Demo] XGBoost - Training with confusion matrix", "description": "[source code](https://github.com/kubeflow/pipelines/blob/master/samples/core/xgboost_training_cm) [GCP Permission requirements](https://github.com/kubeflow/pipelines/blob/master/samples/core/xgboost_training_cm#requirements). A trainer that does end-to-end distributed training for XGBoost models.", "file": "/samples/core/xgboost_training_cm/xgboost_training_cm.py.yaml" }, { - "name": "[Demo] TFX - Taxi Tip Prediction Model Trainer", + "name": "[Demo] TFX - Taxi tip prediction model trainer", "description": "[source code](https://github.com/kubeflow/pipelines/tree/master/samples/core/parameterized_tfx_oss) [GCP Permission requirements](https://github.com/kubeflow/pipelines/blob/master/samples/core/parameterized_tfx_oss#permission). Example pipeline that does classification with model analysis based on a public tax cab dataset.", "file": "/samples/core/parameterized_tfx_oss/parameterized_tfx_oss.py.yaml" }, @@ -18,5 +18,10 @@ "name": "[Tutorial] DSL - Control structures", "description": "[source code](https://github.com/kubeflow/pipelines/tree/master/samples/tutorials/DSL%20-%20Control%20structures) Shows how to use conditional execution and exit handlers. This pipeline will randomly fail to demonstrate that the exit handler gets executed even in case of failure.", "file": "/samples/tutorials/DSL - Control structures/DSL - Control structures.py.yaml" + }, + { + "name": "[Demo] TFX - Iris classification pipeline", + "description": "[source code](https://github.com/kubeflow/pipelines/tree/master/samples/core/iris). Example pipeline that classifies Iris flower subspecies and how to use native Keras within TFX.", + "file": "/samples/core/iris/iris.py.yaml" } ] diff --git a/frontend/src/config/sample_config_from_backend.json b/frontend/src/config/sample_config_from_backend.json index 7d6c8b2dabcc..534f6ad51938 100644 --- a/frontend/src/config/sample_config_from_backend.json +++ b/frontend/src/config/sample_config_from_backend.json @@ -1,6 +1,7 @@ [ - "[Demo] XGBoost - Training with Confusion Matrix", - "[Demo] TFX - Taxi Tip Prediction Model Trainer", + "[Demo] XGBoost - Training with confusion matrix", + "[Demo] TFX - Taxi tip prediction model trainer", "[Tutorial] Data passing in python components", - "[Tutorial] DSL - Control structures" + "[Tutorial] DSL - Control structures", + "[Demo] TFX - Iris classification pipeline" ] diff --git a/frontend/src/pages/__snapshots__/GettingStarted.test.tsx.snap b/frontend/src/pages/__snapshots__/GettingStarted.test.tsx.snap index e35da65126fb..f70e0ec7e08b 100644 --- a/frontend/src/pages/__snapshots__/GettingStarted.test.tsx.snap +++ b/frontend/src/pages/__snapshots__/GettingStarted.test.tsx.snap @@ -182,13 +182,13 @@ Array [ undefined, 10, undefined, - "%7B%22predicates%22%3A%5B%7B%22key%22%3A%22name%22%2C%22op%22%3A%22EQUALS%22%2C%22string_value%22%3A%22%5BDemo%5D%20XGBoost%20-%20Training%20with%20Confusion%20Matrix%22%7D%5D%7D", + "%7B%22predicates%22%3A%5B%7B%22key%22%3A%22name%22%2C%22op%22%3A%22EQUALS%22%2C%22string_value%22%3A%22%5BDemo%5D%20XGBoost%20-%20Training%20with%20confusion%20matrix%22%7D%5D%7D", ], Array [ undefined, 10, undefined, - "%7B%22predicates%22%3A%5B%7B%22key%22%3A%22name%22%2C%22op%22%3A%22EQUALS%22%2C%22string_value%22%3A%22%5BDemo%5D%20TFX%20-%20Taxi%20Tip%20Prediction%20Model%20Trainer%22%7D%5D%7D", + "%7B%22predicates%22%3A%5B%7B%22key%22%3A%22name%22%2C%22op%22%3A%22EQUALS%22%2C%22string_value%22%3A%22%5BDemo%5D%20TFX%20-%20Taxi%20tip%20prediction%20model%20trainer%22%7D%5D%7D", ], Array [ undefined, diff --git a/samples/core/iris/README.md b/samples/core/iris/README.md new file mode 100644 index 000000000000..ec19bb1946a7 --- /dev/null +++ b/samples/core/iris/README.md @@ -0,0 +1,31 @@ +# Overview +[Tensorflow Extended (TFX)](https://github.com/tensorflow/tfx) is a Google-production-scale machine +learning platform based on TensorFlow. It provides a configuration framework to express ML pipelines +consisting of TFX components. Kubeflow Pipelines can be used as the orchestrator supporting the +execution of a TFX pipeline. + +This directory contains a sample that demonstrate how to author a ML pipeline +to solve the famous [iris flower classification problem](https://www.kaggle.com/arshid/iris-flower-dataset) +in TFX and run it on a KFP deployment. Specifically it highlights the following +functionalities: + +1. Support of [Keras](https://keras.io/) API; +2. Use [TFMA](https://github.com/tensorflow/model-analysis) for model validation; +3. Warm-start training by Resolver. + +# Compilation +In order to successfully compile the Python sample, it is recommended to use +`tfx>=0.21.2`. + +# Permission + +> :warning: If you are using **full-scope** or **workload identity enabled** cluster in hosted pipeline beta version, **DO NOT** follow this section. However you'll still need to enable corresponding GCP API. + +This pipeline requires Google Cloud Storage permission to run. +If KFP was deployed through K8S marketplace, please follow instructions in +[the guideline](https://github.com/kubeflow/pipelines/blob/master/manifests/gcp_marketplace/guide.md#gcp-service-account-credentials) +to make sure the service account has `storage.admin` role. +If KFP was deployed through +[standalone deployment](https://github.com/kubeflow/pipelines/tree/master/manifests/kustomize) +please refer to [Authenticating Pipelines to GCP](https://www.kubeflow.org/docs/gke/authentication-pipelines/) +to provide `storage.admin` permission. \ No newline at end of file diff --git a/samples/core/iris/iris.py b/samples/core/iris/iris.py new file mode 100644 index 000000000000..0c00d9a49b03 --- /dev/null +++ b/samples/core/iris/iris.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python3 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Iris flowers example using TFX. Based on https://github.com/tensorflow/tfx/blob/master/tfx/examples/iris/iris_pipeline_native_keras.py""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import kfp +from typing import Text + +import absl +import tensorflow_model_analysis as tfma + +from tfx.components import CsvExampleGen +from tfx.components import Evaluator +from tfx.components import ExampleValidator +from tfx.components import Pusher +from tfx.components import ResolverNode +from tfx.components import SchemaGen +from tfx.components import StatisticsGen +from tfx.components import Trainer +from tfx.components import Transform +from tfx.components.base import executor_spec +from tfx.components.trainer.executor import GenericExecutor +from tfx.dsl.experimental import latest_blessed_model_resolver +from tfx.orchestration import data_types +from tfx.orchestration import pipeline +from tfx.orchestration.kubeflow import kubeflow_dag_runner +from tfx.proto import trainer_pb2 +from tfx.proto import pusher_pb2 +from tfx.types import Channel +from tfx.types.standard_artifacts import Model +from tfx.types.standard_artifacts import ModelBlessing +from tfx.utils.dsl_utils import external_input + +_pipeline_name = 'iris_native_keras' + +# This example assumes that Iris flowers data is stored in GCS and the +# utility function is in iris_utils.py. Feel free to customize as needed. +_data_root_param = data_types.RuntimeParameter( + name='data-root', + default='gs://ml-pipeline-playground/iris/data', + ptype=Text, +) + +# Python module file to inject customized logic into the TFX components. The +# Transform and Trainer both require user-defined functions to run successfully. +# This file is fork from https://github.com/tensorflow/tfx/blob/master/tfx/examples/iris/iris_utils_native_keras.py +_module_file_param = data_types.RuntimeParameter( + name='module-file', + default= + 'gs://ml-pipeline-playground/iris/modules/iris_utils_native_keras.py', + ptype=Text, +) + +# Directory and data locations. This example assumes all of the flowers +# example code and metadata library is relative to a GCS path. +# Note: if one deployed KFP from GKE marketplace, it's possible to leverage +# the following magic placeholder to auto-populate the default GCS bucket +# associated with KFP deployment. Otherwise you'll need to replace it with your +# actual bucket name here or when creating a run. +_pipeline_root = os.path.join( + 'gs://{{kfp-default-bucket}}', 'tfx_iris', kfp.dsl.RUN_ID_PLACEHOLDER +) + + +def _create_pipeline( + pipeline_name: Text, pipeline_root: Text +) -> pipeline.Pipeline: + """Implements the Iris flowers pipeline with TFX.""" + examples = external_input(_data_root_param) + + # Brings data into the pipeline or otherwise joins/converts training data. + example_gen = CsvExampleGen(input=examples) + + # Computes statistics over data for visualization and example validation. + statistics_gen = StatisticsGen(examples=example_gen.outputs['examples']) + + # Generates schema based on statistics files. + infer_schema = SchemaGen( + statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True + ) + + # Performs anomaly detection based on statistics and data schema. + validate_stats = ExampleValidator( + statistics=statistics_gen.outputs['statistics'], + schema=infer_schema.outputs['schema'] + ) + + # Performs transformations and feature engineering in training and serving. + transform = Transform( + examples=example_gen.outputs['examples'], + schema=infer_schema.outputs['schema'], + module_file=_module_file_param + ) + + # Uses user-provided Python function that implements a model using Keras. + trainer = Trainer( + module_file=_module_file_param, + custom_executor_spec=executor_spec.ExecutorClassSpec(GenericExecutor), + examples=transform.outputs['transformed_examples'], + transform_graph=transform.outputs['transform_graph'], + schema=infer_schema.outputs['schema'], + train_args=trainer_pb2.TrainArgs(num_steps=100), + eval_args=trainer_pb2.EvalArgs(num_steps=50) + ) + + # Get the latest blessed model for model validation. + model_resolver = ResolverNode( + instance_name='latest_blessed_model_resolver', + resolver_class=latest_blessed_model_resolver.LatestBlessedModelResolver, + model=Channel(type=Model), + model_blessing=Channel(type=ModelBlessing) + ) + + # Uses TFMA to compute an evaluation statistics over features of a model and + # perform quality validation of a candidate model (compared to a baseline). + # Note: to compile this successfully you'll need TFMA at >= 0.21.5 + eval_config = tfma.EvalConfig( + model_specs=[ + tfma.ModelSpec(name='candidate', label_key='variety'), + tfma.ModelSpec( + name='baseline', label_key='variety', is_baseline=True + ) + ], + slicing_specs=[tfma.SlicingSpec()], + metrics_specs=[ + tfma.MetricsSpec( + metrics=[ + tfma.MetricConfig( + class_name='SparseCategoricalAccuracy', + threshold=tfma.config.MetricThreshold( + value_threshold=tfma.GenericValueThreshold( + lower_bound={'value': 0.9} + ), + change_threshold=tfma.GenericChangeThreshold( + direction=tfma.MetricDirection.HIGHER_IS_BETTER, + absolute={'value': -1e-10} + ) + ) + ) + ] + ) + ] + ) + + # Uses TFMA to compute a evaluation statistics over features of a model. + model_analyzer = Evaluator( + examples=example_gen.outputs['examples'], + model=trainer.outputs['model'], + baseline_model=model_resolver.outputs['model'], + # Change threshold will be ignored if there is no baseline (first run). + eval_config=eval_config + ) + + # Checks whether the model passed the validation steps and pushes the model + # to a file destination if check passed. + pusher = Pusher( + model=trainer.outputs['model'], + model_blessing=model_analyzer.outputs['blessing'], + push_destination=pusher_pb2.PushDestination( + filesystem=pusher_pb2.PushDestination.Filesystem( + base_directory=os.path. + join(str(pipeline.ROOT_PARAMETER), 'model_serving') + ) + ) + ) + + return pipeline.Pipeline( + pipeline_name=pipeline_name, + pipeline_root=pipeline_root, + components=[ + example_gen, statistics_gen, infer_schema, validate_stats, transform, + trainer, model_resolver, model_analyzer, pusher + ], + enable_cache=True, + ) + + +if __name__ == '__main__': + absl.logging.set_verbosity(absl.logging.INFO) + # Make sure the version of TFX image used is consistent with the version of + # TFX SDK. Here we use tfx:0.21.2 image. + config = kubeflow_dag_runner.KubeflowDagRunnerConfig( + kubeflow_metadata_config=kubeflow_dag_runner. + get_default_kubeflow_metadata_config(), + tfx_image='tensorflow/tfx:0.21.2', + ) + kfp_runner = kubeflow_dag_runner.KubeflowDagRunner( + output_filename=__file__ + '.yaml', config=config + ) + kfp_runner.run( + _create_pipeline( + pipeline_name=_pipeline_name, pipeline_root=_pipeline_root + ) + ) diff --git a/test/sample-test/Dockerfile b/test/sample-test/Dockerfile index 6002852509b4..a0b1177e50fb 100644 --- a/test/sample-test/Dockerfile +++ b/test/sample-test/Dockerfile @@ -4,7 +4,7 @@ FROM google/cloud-sdk:279.0.0 RUN apt-get update -y RUN apt-get install --no-install-recommends -y -q libssl-dev libffi-dev wget ssh -RUN apt-get install --no-install-recommends -y -q default-jre default-jdk python3-setuptools python3.7-dev gcc +RUN apt-get install --no-install-recommends -y -q default-jre default-jdk python3-setuptools python3.7-dev gcc libpython3.7-dev zlib1g-dev RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py diff --git a/test/sample-test/configs/iris.config.yaml b/test/sample-test/configs/iris.config.yaml new file mode 100644 index 000000000000..d82821970343 --- /dev/null +++ b/test/sample-test/configs/iris.config.yaml @@ -0,0 +1,18 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +test_name: iris +arguments: + output: +run_pipeline: True \ No newline at end of file diff --git a/test/sample-test/requirements.txt b/test/sample-test/requirements.txt index 794f8aada94a..a027929f8464 100644 --- a/test/sample-test/requirements.txt +++ b/test/sample-test/requirements.txt @@ -45,7 +45,7 @@ google-pasta==0.1.8 # via tensorflow google-resumable-media==0.4.1 # via google-cloud-bigquery, google-cloud-storage googleapis-common-protos[grpc]==1.51.0 # via google-api-core, grpc-google-iam-v1, tensorflow-metadata grpc-google-iam-v1==0.12.3 # via google-cloud-bigtable, google-cloud-pubsub -grpcio==1.27.1 # via apache-beam, google-api-core, googleapis-common-protos, grpc-google-iam-v1, tensorboard, tensorflow, tensorflow-serving-api, tfx +grpcio==1.27.1 # via apache-beam, google-api-core, grpc-google-iam-v1, tensorboard, tensorflow, tensorflow-serving-api, tfx h5py==2.10.0 # via keras-applications hdfs==2.5.8 # via apache-beam httplib2==0.12.0 # via apache-beam, google-api-python-client, google-apitools, google-auth-httplib2, oauth2client @@ -117,7 +117,7 @@ tensorboard==2.1.0 # via tensorflow tensorflow-data-validation==0.21.4 # via tfx tensorflow-estimator==2.1.0 # via tensorflow tensorflow-metadata==0.21.1 # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx-bsl -tensorflow-model-analysis==0.21.4 # via tfx +tensorflow-model-analysis==0.21.5 # via -r requirements.in (line 13), tfx tensorflow-serving-api==2.1.0 # via tfx, tfx-bsl tensorflow-transform==0.21.2 # via tensorflow-data-validation, tfx tensorflow==2.1.0 # via ml-metadata, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-serving-api, tensorflow-transform, tfx, tfx-bsl diff --git a/test/sample-test/run_sample_test.py b/test/sample-test/run_sample_test.py index 675975b46785..0a6051f6f110 100644 --- a/test/sample-test/run_sample_test.py +++ b/test/sample-test/run_sample_test.py @@ -118,6 +118,11 @@ def run(self): self._test_args['output'], 'tfx_taxi_simple_' + kfp.dsl.RUN_ID_PLACEHOLDER) del self._test_args['output'] + if self._testname == 'iris': + self._test_args['pipeline-root'] = os.path.join( + self._test_args['output'], + 'tfx_iris_' + kfp.dsl.RUN_ID_PLACEHOLDER) + del self._test_args['output'] # Submit for pipeline running. if self._run_pipeline: diff --git a/test/sample_test.yaml b/test/sample_test.yaml index 2a198b23207c..79ece9cfd5d3 100644 --- a/test/sample_test.yaml +++ b/test/sample_test.yaml @@ -85,6 +85,7 @@ spec: - multiple_outputs - ai_platform - parameterized_tfx_oss + - iris # Build and push image - name: build-image-by-dockerfile retryStrategy: