From b2510528a1fc728dd4d684af4f4cb9ac1bfdbb9a Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 14 Dec 2018 14:30:17 -0800 Subject: [PATCH 1/3] restructure the dataproc component folder --- components/README.md | 10 +++--- .../{containers => }/analyze/Dockerfile | 0 .../build.sh => analyze/build_image.sh} | 2 +- .../{xgboost => analyze/src}/analyze.py | 3 +- .../run.py => analyze/src/analyze_run.py} | 0 .../dataproc/{containers => }/base/Dockerfile | 0 .../base/build.sh => base/build_image.sh} | 14 +++++--- .../dataproc/{xgboost => }/common/__init__.py | 0 .../dataproc/{xgboost => }/common/_utils.py | 0 .../create_cluster/Dockerfile | 0 .../build_image.sh} | 2 +- .../src}/create_cluster.py | 3 +- .../src}/initialization_actions.sh | 0 .../delete_cluster/Dockerfile | 0 .../build_image.sh} | 2 +- .../src}/delete_cluster.py | 0 .../{containers => }/predict/Dockerfile | 0 .../build.sh => predict/build_image.sh} | 2 +- .../{xgboost => predict/src}/predict.py | 0 .../{containers => }/train/Dockerfile | 0 .../train/build.sh => train/build_image.sh} | 2 +- .../{xgboost/train => train/src}/README.md | 0 .../src}/XGBoostPredictor.scala | 0 .../train => train/src}/XGBoostTrainer.scala | 0 .../dataproc/{xgboost => train/src}/train.py | 0 .../train => train/src}/xgb4j_build.sh | 0 .../{containers => }/transform/Dockerfile | 0 .../build.sh => transform/build_image.sh} | 2 +- .../{xgboost => transform/src}/transform.py | 3 +- .../run.py => transform/src/transform_run.py} | 0 components/dataproc/xgboost/setup.py | 35 ------------------- .../tfx/taxi-cab-classification-pipeline.py | 16 ++++----- 32 files changed, 32 insertions(+), 64 deletions(-) rename components/dataproc/{containers => }/analyze/Dockerfile (100%) rename components/dataproc/{containers/analyze/build.sh => analyze/build_image.sh} (98%) rename components/dataproc/{xgboost => analyze/src}/analyze.py (95%) rename components/dataproc/{xgboost/analyze/run.py => analyze/src/analyze_run.py} (100%) rename components/dataproc/{containers => }/base/Dockerfile (100%) rename components/dataproc/{containers/base/build.sh => base/build_image.sh} (63%) rename components/dataproc/{xgboost => }/common/__init__.py (100%) rename components/dataproc/{xgboost => }/common/_utils.py (100%) rename components/dataproc/{containers => }/create_cluster/Dockerfile (100%) rename components/dataproc/{containers/create_cluster/build.sh => create_cluster/build_image.sh} (99%) rename components/dataproc/{xgboost => create_cluster/src}/create_cluster.py (93%) rename components/dataproc/{xgboost/create_cluster => create_cluster/src}/initialization_actions.sh (100%) rename components/dataproc/{containers => }/delete_cluster/Dockerfile (100%) rename components/dataproc/{containers/delete_cluster/build.sh => delete_cluster/build_image.sh} (99%) rename components/dataproc/{xgboost => delete_cluster/src}/delete_cluster.py (100%) rename components/dataproc/{containers => }/predict/Dockerfile (100%) rename components/dataproc/{containers/predict/build.sh => predict/build_image.sh} (98%) rename components/dataproc/{xgboost => predict/src}/predict.py (100%) rename components/dataproc/{containers => }/train/Dockerfile (100%) rename components/dataproc/{containers/train/build.sh => train/build_image.sh} (98%) rename components/dataproc/{xgboost/train => train/src}/README.md (100%) rename components/dataproc/{xgboost/train => train/src}/XGBoostPredictor.scala (100%) rename components/dataproc/{xgboost/train => train/src}/XGBoostTrainer.scala (100%) rename components/dataproc/{xgboost => train/src}/train.py (100%) rename components/dataproc/{xgboost/train => train/src}/xgb4j_build.sh (100%) rename components/dataproc/{containers => }/transform/Dockerfile (100%) rename components/dataproc/{containers/transform/build.sh => transform/build_image.sh} (99%) rename components/dataproc/{xgboost => transform/src}/transform.py (96%) rename components/dataproc/{xgboost/transform/run.py => transform/src/transform_run.py} (100%) delete mode 100644 components/dataproc/xgboost/setup.py diff --git a/components/README.md b/components/README.md index 08de81240e0..b9f31bcd8d2 100644 --- a/components/README.md +++ b/components/README.md @@ -7,11 +7,11 @@ as input and may produce one or more **Example: XGBoost DataProc components** -* [Set up cluster](dataproc/xgboost/create_cluster.py) -* [Analyze](dataproc/xgboost/analyze.py) -* [Transform](dataproc/xgboost/transform.py) -* [Distributed train](dataproc/xgboost/train.py) -* [Delete cluster](dataproc/xgboost/delete_cluster.py) +* [Set up cluster](dataproc/create_cluster/src/create_cluster.py) +* [Analyze](dataproc/analyze/src/analyze.py) +* [Transform](dataproc/transform/src/transform.py) +* [Distributed train](dataproc/train/src/train.py) +* [Delete cluster](dataproc/delete_cluster/src/delete_cluster.py) Each task usually includes two parts: diff --git a/components/dataproc/containers/analyze/Dockerfile b/components/dataproc/analyze/Dockerfile similarity index 100% rename from components/dataproc/containers/analyze/Dockerfile rename to components/dataproc/analyze/Dockerfile diff --git a/components/dataproc/containers/analyze/build.sh b/components/dataproc/analyze/build_image.sh similarity index 98% rename from components/dataproc/containers/analyze/build.sh rename to components/dataproc/analyze/build_image.sh index f95689c0950..64bfbad9500 100755 --- a/components/dataproc/containers/analyze/build.sh +++ b/components/dataproc/analyze/build_image.sh @@ -44,7 +44,7 @@ fi # build base image pushd ../base -./build.sh +./build_image.sh popd docker build -t ${LOCAL_IMAGE_NAME} . diff --git a/components/dataproc/xgboost/analyze.py b/components/dataproc/analyze/src/analyze.py similarity index 95% rename from components/dataproc/xgboost/analyze.py rename to components/dataproc/analyze/src/analyze.py index 0c83bbbd478..6e19f33fff6 100644 --- a/components/dataproc/xgboost/analyze.py +++ b/components/dataproc/analyze/src/analyze.py @@ -40,8 +40,7 @@ def main(argv=None): args = parser.parse_args() code_path = os.path.dirname(os.path.realpath(__file__)) - dirname = os.path.basename(__file__).split('.')[0] - runfile_source = os.path.join(code_path, dirname, 'run.py') + runfile_source = os.path.join(code_path, 'analyze_run.py') dest_files = _utils.copy_resources_to_gcs([runfile_source], args.output) try: api = _utils.get_client() diff --git a/components/dataproc/xgboost/analyze/run.py b/components/dataproc/analyze/src/analyze_run.py similarity index 100% rename from components/dataproc/xgboost/analyze/run.py rename to components/dataproc/analyze/src/analyze_run.py diff --git a/components/dataproc/containers/base/Dockerfile b/components/dataproc/base/Dockerfile similarity index 100% rename from components/dataproc/containers/base/Dockerfile rename to components/dataproc/base/Dockerfile diff --git a/components/dataproc/containers/base/build.sh b/components/dataproc/base/build_image.sh similarity index 63% rename from components/dataproc/containers/base/build.sh rename to components/dataproc/base/build_image.sh index b646cf0f0c0..fe38da1c675 100755 --- a/components/dataproc/containers/base/build.sh +++ b/components/dataproc/base/build_image.sh @@ -14,11 +14,17 @@ # limitations under the License. -mkdir -p ./build -rsync -arvp "../../xgboost"/ ./build/ +mkdir -p ./build/common +rsync -arvp "../analyze/src"/ ./build/ +rsync -arvp "../train/src"/ ./build/ +rsync -arvp "../predict/src"/ ./build/ +rsync -arvp "../create_cluster/src"/ ./build/ +rsync -arvp "../delete_cluster/src"/ ./build/ +rsync -arvp "../transform/src"/ ./build/ +rsync -arvp "../common"/ ./build/common/ -cp ../../../license.sh ./build -cp ../../../third_party_licenses.csv ./build +cp ../../license.sh ./build +cp ../../third_party_licenses.csv ./build docker build -t ml-pipeline-dataproc-base . rm -rf ./build diff --git a/components/dataproc/xgboost/common/__init__.py b/components/dataproc/common/__init__.py similarity index 100% rename from components/dataproc/xgboost/common/__init__.py rename to components/dataproc/common/__init__.py diff --git a/components/dataproc/xgboost/common/_utils.py b/components/dataproc/common/_utils.py similarity index 100% rename from components/dataproc/xgboost/common/_utils.py rename to components/dataproc/common/_utils.py diff --git a/components/dataproc/containers/create_cluster/Dockerfile b/components/dataproc/create_cluster/Dockerfile similarity index 100% rename from components/dataproc/containers/create_cluster/Dockerfile rename to components/dataproc/create_cluster/Dockerfile diff --git a/components/dataproc/containers/create_cluster/build.sh b/components/dataproc/create_cluster/build_image.sh similarity index 99% rename from components/dataproc/containers/create_cluster/build.sh rename to components/dataproc/create_cluster/build_image.sh index 75c27573072..9998ec2e4a8 100755 --- a/components/dataproc/containers/create_cluster/build.sh +++ b/components/dataproc/create_cluster/build_image.sh @@ -44,7 +44,7 @@ fi # build base image pushd ../base -./build.sh +./build_image.sh popd docker build -t ${LOCAL_IMAGE_NAME} . diff --git a/components/dataproc/xgboost/create_cluster.py b/components/dataproc/create_cluster/src/create_cluster.py similarity index 93% rename from components/dataproc/xgboost/create_cluster.py rename to components/dataproc/create_cluster/src/create_cluster.py index 6091f7e0161..11e508ebf10 100644 --- a/components/dataproc/xgboost/create_cluster.py +++ b/components/dataproc/create_cluster/src/create_cluster.py @@ -35,8 +35,7 @@ def main(argv=None): args = parser.parse_args() code_path = os.path.dirname(os.path.realpath(__file__)) - dirname = os.path.basename(__file__).split('.')[0] - init_file_source = os.path.join(code_path, dirname, 'initialization_actions.sh') + init_file_source = os.path.join(code_path, 'initialization_actions.sh') dest_files = _utils.copy_resources_to_gcs([init_file_source], args.staging) try: diff --git a/components/dataproc/xgboost/create_cluster/initialization_actions.sh b/components/dataproc/create_cluster/src/initialization_actions.sh similarity index 100% rename from components/dataproc/xgboost/create_cluster/initialization_actions.sh rename to components/dataproc/create_cluster/src/initialization_actions.sh diff --git a/components/dataproc/containers/delete_cluster/Dockerfile b/components/dataproc/delete_cluster/Dockerfile similarity index 100% rename from components/dataproc/containers/delete_cluster/Dockerfile rename to components/dataproc/delete_cluster/Dockerfile diff --git a/components/dataproc/containers/delete_cluster/build.sh b/components/dataproc/delete_cluster/build_image.sh similarity index 99% rename from components/dataproc/containers/delete_cluster/build.sh rename to components/dataproc/delete_cluster/build_image.sh index 3439aa6338d..c0afdcd9c19 100755 --- a/components/dataproc/containers/delete_cluster/build.sh +++ b/components/dataproc/delete_cluster/build_image.sh @@ -44,7 +44,7 @@ fi # build base image pushd ../base -./build.sh +./build_image.sh popd docker build -t ${LOCAL_IMAGE_NAME} . diff --git a/components/dataproc/xgboost/delete_cluster.py b/components/dataproc/delete_cluster/src/delete_cluster.py similarity index 100% rename from components/dataproc/xgboost/delete_cluster.py rename to components/dataproc/delete_cluster/src/delete_cluster.py diff --git a/components/dataproc/containers/predict/Dockerfile b/components/dataproc/predict/Dockerfile similarity index 100% rename from components/dataproc/containers/predict/Dockerfile rename to components/dataproc/predict/Dockerfile diff --git a/components/dataproc/containers/predict/build.sh b/components/dataproc/predict/build_image.sh similarity index 98% rename from components/dataproc/containers/predict/build.sh rename to components/dataproc/predict/build_image.sh index 21a2044978b..457dd788075 100755 --- a/components/dataproc/containers/predict/build.sh +++ b/components/dataproc/predict/build_image.sh @@ -44,7 +44,7 @@ fi # build base image pushd ../base -./build.sh +./build_image.sh popd docker build -t ${LOCAL_IMAGE_NAME} . diff --git a/components/dataproc/xgboost/predict.py b/components/dataproc/predict/src/predict.py similarity index 100% rename from components/dataproc/xgboost/predict.py rename to components/dataproc/predict/src/predict.py diff --git a/components/dataproc/containers/train/Dockerfile b/components/dataproc/train/Dockerfile similarity index 100% rename from components/dataproc/containers/train/Dockerfile rename to components/dataproc/train/Dockerfile diff --git a/components/dataproc/containers/train/build.sh b/components/dataproc/train/build_image.sh similarity index 98% rename from components/dataproc/containers/train/build.sh rename to components/dataproc/train/build_image.sh index 49b241b942e..e8ae37f3c25 100755 --- a/components/dataproc/containers/train/build.sh +++ b/components/dataproc/train/build_image.sh @@ -44,7 +44,7 @@ fi # build base image pushd ../base -./build.sh +./build_image.sh popd docker build -t ${LOCAL_IMAGE_NAME} . diff --git a/components/dataproc/xgboost/train/README.md b/components/dataproc/train/src/README.md similarity index 100% rename from components/dataproc/xgboost/train/README.md rename to components/dataproc/train/src/README.md diff --git a/components/dataproc/xgboost/train/XGBoostPredictor.scala b/components/dataproc/train/src/XGBoostPredictor.scala similarity index 100% rename from components/dataproc/xgboost/train/XGBoostPredictor.scala rename to components/dataproc/train/src/XGBoostPredictor.scala diff --git a/components/dataproc/xgboost/train/XGBoostTrainer.scala b/components/dataproc/train/src/XGBoostTrainer.scala similarity index 100% rename from components/dataproc/xgboost/train/XGBoostTrainer.scala rename to components/dataproc/train/src/XGBoostTrainer.scala diff --git a/components/dataproc/xgboost/train.py b/components/dataproc/train/src/train.py similarity index 100% rename from components/dataproc/xgboost/train.py rename to components/dataproc/train/src/train.py diff --git a/components/dataproc/xgboost/train/xgb4j_build.sh b/components/dataproc/train/src/xgb4j_build.sh similarity index 100% rename from components/dataproc/xgboost/train/xgb4j_build.sh rename to components/dataproc/train/src/xgb4j_build.sh diff --git a/components/dataproc/containers/transform/Dockerfile b/components/dataproc/transform/Dockerfile similarity index 100% rename from components/dataproc/containers/transform/Dockerfile rename to components/dataproc/transform/Dockerfile diff --git a/components/dataproc/containers/transform/build.sh b/components/dataproc/transform/build_image.sh similarity index 99% rename from components/dataproc/containers/transform/build.sh rename to components/dataproc/transform/build_image.sh index 4130e1ac276..e48d7afa016 100755 --- a/components/dataproc/containers/transform/build.sh +++ b/components/dataproc/transform/build_image.sh @@ -44,7 +44,7 @@ fi # build base image pushd ../base -./build.sh +./build_image.sh popd docker build -t ${LOCAL_IMAGE_NAME} . diff --git a/components/dataproc/xgboost/transform.py b/components/dataproc/transform/src/transform.py similarity index 96% rename from components/dataproc/xgboost/transform.py rename to components/dataproc/transform/src/transform.py index 4483a859835..1aa67afdd93 100644 --- a/components/dataproc/xgboost/transform.py +++ b/components/dataproc/transform/src/transform.py @@ -51,8 +51,7 @@ def main(argv=None): _utils.delete_directory_from_gcs(os.path.join(args.output, 'eval')) code_path = os.path.dirname(os.path.realpath(__file__)) - dirname = os.path.basename(__file__).split('.')[0] - runfile_source = os.path.join(code_path, dirname, 'run.py') + runfile_source = os.path.join(code_path, 'transform_run.py') dest_files = _utils.copy_resources_to_gcs([runfile_source], args.output) try: api = _utils.get_client() diff --git a/components/dataproc/xgboost/transform/run.py b/components/dataproc/transform/src/transform_run.py similarity index 100% rename from components/dataproc/xgboost/transform/run.py rename to components/dataproc/transform/src/transform_run.py diff --git a/components/dataproc/xgboost/setup.py b/components/dataproc/xgboost/setup.py deleted file mode 100644 index 609ca6537dd..00000000000 --- a/components/dataproc/xgboost/setup.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from setuptools import setup, find_packages - - -setup( - name='XGBoostPipeline', - version='0.1.0', - packages=find_packages(), - description='XGBoost Pipeline with DataProc and Spark', - author='Google', - keywords=[ - ], - license="Apache Software License", - long_description=""" - """, - install_requires=[ - 'tensorflow==1.4.1', - ], - package_data={ - }, - data_files=[], -) diff --git a/samples/tfx/taxi-cab-classification-pipeline.py b/samples/tfx/taxi-cab-classification-pipeline.py index d2ba230d935..7cec306e1d2 100755 --- a/samples/tfx/taxi-cab-classification-pipeline.py +++ b/samples/tfx/taxi-cab-classification-pipeline.py @@ -21,7 +21,7 @@ def dataflow_tf_data_validation_op(inference_data: 'GcsUri', validation_data: 'GcsUri', column_names: 'GcsUri[text/json]', key_columns, project: 'GcpProject', mode, validation_output: 'GcsUri[Directory]', step_name='validation'): return dsl.ContainerOp( name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:dev', #TODO-release: update the release tag for the next release + image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:0.1.4', #TODO-release: update the release tag for the next release arguments = [ '--csv-data-for-inference', inference_data, '--csv-data-to-validate', validation_data, @@ -40,7 +40,7 @@ def dataflow_tf_data_validation_op(inference_data: 'GcsUri', validation_data: 'G def dataflow_tf_transform_op(train_data: 'GcsUri', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', preprocess_mode, preprocess_module: 'GcsUri[text/code/python]', transform_output: 'GcsUri[Directory]', step_name='preprocess'): return dsl.ContainerOp( name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:0.1.3-rc.2', #TODO-release: update the release tag for the next release + image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:0.1.4', #TODO-release: update the release tag for the next release arguments = [ '--train', train_data, '--eval', evaluation_data, @@ -57,7 +57,7 @@ def dataflow_tf_transform_op(train_data: 'GcsUri', evaluation_data: 'GcsUri', sc def tf_train_op(transformed_data_dir, schema: 'GcsUri[text/json]', learning_rate: float, hidden_layer_size: int, steps: int, target: str, preprocess_module: 'GcsUri[text/code/python]', training_output: 'GcsUri[Directory]', step_name='training'): return dsl.ContainerOp( name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.3-rc.2', #TODO-release: update the release tag for the next release + image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.4', #TODO-release: update the release tag for the next release arguments = [ '--transformed-data-dir', transformed_data_dir, '--schema', schema, @@ -74,7 +74,7 @@ def tf_train_op(transformed_data_dir, schema: 'GcsUri[text/json]', learning_rate def dataflow_tf_model_analyze_op(model: 'TensorFlow model', evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', project: 'GcpProject', analyze_mode, analyze_slice_column, analysis_output: 'GcsUri', step_name='analysis'): return dsl.ContainerOp( name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:0.1.3-rc.2', #TODO-release: update the release tag for the next release + image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:0.1.4', #TODO-release: update the release tag for the next release arguments = [ '--model', model, '--eval', evaluation_data, @@ -91,7 +91,7 @@ def dataflow_tf_model_analyze_op(model: 'TensorFlow model', evaluation_data: 'Gc def dataflow_tf_predict_op(evaluation_data: 'GcsUri', schema: 'GcsUri[text/json]', target: str, model: 'TensorFlow model', predict_mode, project: 'GcpProject', prediction_output: 'GcsUri', step_name='prediction'): return dsl.ContainerOp( name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:0.1.3-rc.2', #TODO-release: update the release tag for the next release + image = 'gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:0.1.4', #TODO-release: update the release tag for the next release arguments = [ '--data', evaluation_data, '--schema', schema, @@ -108,7 +108,7 @@ def dataflow_tf_predict_op(evaluation_data: 'GcsUri', schema: 'GcsUri[text/json] def confusion_matrix_op(predictions: 'GcsUri', output: 'GcsUri', step_name='confusion_matrix'): return dsl.ContainerOp( name=step_name, - image='gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:dev', #TODO-release: update the release tag for the next release + image='gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:0.1.4', #TODO-release: update the release tag for the next release arguments=[ '--output', '%s/{{workflow.name}}/confusionmatrix' % output, '--predictions', predictions, @@ -119,7 +119,7 @@ def confusion_matrix_op(predictions: 'GcsUri', output: 'GcsUri', step_name='conf def roc_op(predictions: 'GcsUri', output: 'GcsUri', step_name='roc'): return dsl.ContainerOp( name=step_name, - image='gcr.io/ml-pipeline/ml-pipeline-local-roc:dev', #TODO-release: update the release tag for the next release + image='gcr.io/ml-pipeline/ml-pipeline-local-roc:0.1.4', #TODO-release: update the release tag for the next release arguments=[ '--output', '%s/{{workflow.name}}/roc' % output, '--predictions', predictions, @@ -130,7 +130,7 @@ def roc_op(predictions: 'GcsUri', output: 'GcsUri', step_name='roc'): def kubeflow_deploy_op(model: 'TensorFlow model', tf_server_name, step_name='deploy'): return dsl.ContainerOp( name = step_name, - image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:0.1.3-rc.2', #TODO-release: update the release tag for the next release + image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:0.1.4', #TODO-release: update the release tag for the next release arguments = [ '--model-path', model, '--server-name', tf_server_name From 687e01c6f27ce27a9a077166ab56abee70dd1869 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 14 Dec 2018 14:38:15 -0800 Subject: [PATCH 2/3] more changes in cloudbuild and sample test and readme --- .cloudbuild.yaml | 12 ++++++------ samples/xgboost-spark/README.md | 24 ++++++++++++------------ test/sample_test_v2.yaml | 12 ++++++------ 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.cloudbuild.yaml b/.cloudbuild.yaml index b946651b1b1..3f610f94876 100644 --- a/.cloudbuild.yaml +++ b/.cloudbuild.yaml @@ -125,27 +125,27 @@ steps: # Build the Dataproc-based pipeline component images - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/containers/analyze && ./build.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/analyze/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocAnalyze' - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/containers/create_cluster && ./build.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/create_cluster/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocCreateCluster' - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/containers/delete_cluster && ./build.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/delete_cluster/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocDeleteCluster' - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/containers/predict && ./build.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/predict/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocPredict' - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/containers/transform && ./build.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/transform/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocTransform' - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/containers/train && ./build.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/train/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocTrain' # Build the ResNet-CMLE sample pipeline component images diff --git a/samples/xgboost-spark/README.md b/samples/xgboost-spark/README.md index 9e651a18854..5c88f5b4e91 100644 --- a/samples/xgboost-spark/README.md +++ b/samples/xgboost-spark/README.md @@ -31,24 +31,24 @@ pipeline run results. Note that each pipeline run will create a unique directory ## Components source Create Cluster: - [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/create_cluster) - [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/containers/create_cluster) + [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/create_cluster/src) + [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/create_cluster) Analyze (step one for preprocessing): - [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/analyze) - [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/containers/analyze) + [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/analyze/src) + [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/analyze) Transform (step two for preprocessing): - [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/transform) - [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/containers/transform) + [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/transform/src) + [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/transform) Distributed Training: - [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/train) - [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/containers/train) + [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/train/src) + [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/train) Distributed Predictions: - [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/predict) - [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/containers/predict) + [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/predict/src) + [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/predict) Confusion Matrix: [source code](https://github.com/kubeflow/pipelines/tree/master/components/local/confusion_matrix/src) @@ -61,7 +61,7 @@ ROC: Delete Cluster: - [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/delete_cluster) - [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/containers/delete_cluster) + [source code](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/xgboost/delete_cluster/src) + [container](https://github.com/kubeflow/pipelines/tree/master/components/dataproc/delete_cluster) diff --git a/test/sample_test_v2.yaml b/test/sample_test_v2.yaml index ff47837c98e..7182cf489c5 100644 --- a/test/sample_test_v2.yaml +++ b/test/sample_test_v2.yaml @@ -131,7 +131,7 @@ spec: - name: image-name value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-create-cluster-image-suffix}}" - name: build-script - value: components/dataproc/containers/create_cluster/build.sh + value: components/dataproc/create_cluster/build_image.sh - name: build-dataproc-delete-cluster-image template: build-image-by-script arguments: @@ -139,7 +139,7 @@ spec: - name: image-name value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-delete-cluster-image-suffix}}" - name: build-script - value: components/dataproc/containers/delete_cluster/build.sh + value: components/dataproc/delete_cluster/build_image.sh - name: build-dataproc-analyze-image template: build-image-by-script arguments: @@ -147,7 +147,7 @@ spec: - name: image-name value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-analyze-image-suffix}}" - name: build-script - value: components/dataproc/containers/analyze/build.sh + value: components/dataproc/analyze/build_image.sh - name: build-dataproc-transform-image template: build-image-by-script arguments: @@ -155,7 +155,7 @@ spec: - name: image-name value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-transform-image-suffix}}" - name: build-script - value: components/dataproc/containers/transform/build.sh + value: components/dataproc/transform/build_image.sh - name: build-dataproc-train-image template: build-image-by-script arguments: @@ -163,7 +163,7 @@ spec: - name: image-name value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-train-image-suffix}}" - name: build-script - value: components/dataproc/containers/train/build.sh + value: components/dataproc/train/build_image.sh - name: build-dataproc-predict-image template: build-image-by-script arguments: @@ -171,7 +171,7 @@ spec: - name: image-name value: "{{inputs.parameters.target-image-prefix}}{{inputs.parameters.dataproc-predict-image-suffix}}" - name: build-script - value: components/dataproc/containers/predict/build.sh + value: components/dataproc/predict/build_image.sh - name: build-kubeflow-dnntrainer-image template: build-image-by-script arguments: From 1018bb22b97afac1eaa2b96556bf9b7adfd27da9 Mon Sep 17 00:00:00 2001 From: Ning Gao Date: Fri, 14 Dec 2018 14:48:35 -0800 Subject: [PATCH 3/3] remove src in cloudbuild --- .cloudbuild.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.cloudbuild.yaml b/.cloudbuild.yaml index 3f610f94876..79b8e216782 100644 --- a/.cloudbuild.yaml +++ b/.cloudbuild.yaml @@ -125,27 +125,27 @@ steps: # Build the Dataproc-based pipeline component images - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/analyze/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/analyze && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocAnalyze' - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/create_cluster/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/create_cluster && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocCreateCluster' - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/delete_cluster/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/delete_cluster && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocDeleteCluster' - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/predict/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/predict && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocPredict' - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/transform/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/transform && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocTransform' - name: 'gcr.io/cloud-builders/docker' entrypoint: '/bin/bash' - args: ['-c', 'cd /workspace/components/dataproc/train/src && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] + args: ['-c', 'cd /workspace/components/dataproc/train && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA'] id: 'buildDataprocTrain' # Build the ResNet-CMLE sample pipeline component images