diff --git a/samples/tfx-oss/TFX Example.ipynb b/samples/tfx-oss/TFX Example.ipynb new file mode 100644 index 00000000000..b5da68d1e47 --- /dev/null +++ b/samples/tfx-oss/TFX Example.ipynb @@ -0,0 +1,122 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TFX on KubeFlow Pipelines Example\n", + "\n", + "### Install TFX and KFP packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip3 install https://storage.googleapis.com/ml-pipeline/tfx/tfx-0.12.0rc0-py2.py3-none-any.whl \n", + "!pip3 install https://storage.googleapis.com/ml-pipeline/release/0.1.10/kfp.tar.gz --upgrade\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get the TFX repo with sample pipeline\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!git clone https://github.com/tensorflow/tfx" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# copy the trainer code to a storage bucket \n", + "!gsutil cp tfx/examples/chicago_taxi_pipeline/taxi_utils.py gs:///" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure the TFX pipeline example\n", + "\n", + "Reload this cell by running the load command to get the pipeline configuration file\n", + "```\n", + "%load tfx/examples/chicago_taxi_pipeline/taxi_pipeline_kubeflow.py\n", + "```\n", + "\n", + "Configure:\n", + "- GCS storage bucket name (replace my-bucket)\n", + "- GCP project ID (replace my-gcp-project)\n", + "- Make sure the path to the taxi_utils.py is correct\n", + "\n", + "The dataset in BigQuery has 100M rows, you can change the query parameters in WHERE clause to limit the number of rows used.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "%load tfx/examples/chicago_taxi_pipeline/taxi_pipeline_kubeflow.py" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compile the pipeline and submit a run to the Kubeflow cluster" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get or create a new experiment\n", + "import kfp\n", + "client = kfp.Client()\n", + "experiment = client.create_experiment(\"TFX Examples\")\n", + "pipeline_filename = \"chicago_taxi_pipeline_kubeflow.tar.gz\"\n", + "\n", + "#Submit a pipeline run\n", + "run_name = 'Run 1'\n", + "run_result = client.run_pipeline(experiment.id, run_name, pipeline_filename, {})\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}