-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add component yaml for GCP components * Add bigquery component yaml * Fix typo and set default instead of optional setting.
- Loading branch information
1 parent
85738cb
commit 77df6c2
Showing
6 changed files
with
289 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
name: Bigquery - Query | ||
description: | | ||
Submit a query to Bigquery service and write outputs to a GCS blob. | ||
inputs: | ||
- {name: query, description: 'The query used by Bigquery service to fetch the results.'} | ||
- {name: project_id, description: 'The project to execute the query job.' } | ||
- {name: dataset_id, description: 'The ID of the persistent dataset to keep the results of the query.'} | ||
- {name: table_id, description: 'The ID of the table to keep the results of the query. If absent, the operation will generate a random id for the table.', default: '' } | ||
- {name: output_gcs_path, description: 'The GCS blob path to dump the query results to.', default: '' } | ||
- {name: job_config, description: 'The full config spec for the query job.', default: '' } | ||
outputs: | ||
- {name: output_gcs_path, description: 'The GCS blob path to dump the query results to.'} | ||
implementation: | ||
container: | ||
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest | ||
args: [ | ||
kfp_component.google.bigquery, query, | ||
--query, {inputValue: query}, | ||
--project_id, {inputValue: project_id}, | ||
--dataset_id, {inputValue: dataset_id}, | ||
--table_id, {inputValue: table_id}, | ||
--output_gcs_path, {inputValue: output_gcs_path}, | ||
--job_config, {inputValue: job_config} | ||
] | ||
env: | ||
KFP_POD_NAME: "{{pod.name}}" | ||
fileOutputs: | ||
output_gcs_path: /tmp/kfp/output/bigquery/query-output-path.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
name: Launch Python | ||
description: | | ||
Launch a self-executing beam python file. | ||
inputs: | ||
- {name: python_file_path, description: 'The gcs or local path to the python file to run.'} | ||
- {name: project_id, description: 'The ID of the parent project.' } | ||
- {name: requirements_file_path, description: 'Optional, the gcs or local path to the pip requirements file', default: '' } | ||
- {name: location, description: 'The regional endpoint to which to direct the request.', default: '' } | ||
- {name: job_name_prefix, description: 'Optional. The prefix of the genrated job name. If not provided, the method will generated a random name.', default: '' } | ||
- {name: args, description: 'The list of args to pass to the python file.', default: '[]' } | ||
- {name: wait_interval, default: '30', description: 'Optional wait interval between calls to get job status. Defaults to 30.' } | ||
outputs: | ||
- {name: job_id, description: 'The id of the created dataflow job.'} | ||
implementation: | ||
container: | ||
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest | ||
args: [ | ||
kfp_component.google.dataflow, launch_python, | ||
--python_file_path, {inputValue: python_file_path}, | ||
--project_id, {inputValue: project_id}, | ||
--requirements_file_path, {inputValue: requirements_file_path}, | ||
--location, {inputValue: location}, | ||
--job_name_prefix, {inputValue: job_name_prefix}, | ||
--args, {inputValue: args}, | ||
--wait_interval, {inputValue: wait_interval} | ||
] | ||
env: | ||
KFP_POD_NAME: "{{pod.name}}" | ||
fileOutputs: | ||
job_id: /tmp/kfp/output/dataflow/job_id.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
name: Launch Dataflow Template | ||
description: | | ||
Launchs a dataflow job from template. | ||
inputs: | ||
- {name: project_id, description: 'Required. The ID of the Cloud Platform project that the job belongs to.'} | ||
- {name: gcs_path, description: 'Required. A Cloud Storage path to the template from which to create the job. Must be valid Cloud Storage URL, beginning with `gs://`.' } | ||
- {name: launch_parameters, description: 'Parameters to provide to the template being launched. Schema defined in https://cloud.google.com/dataflow/docs/reference/rest/v1b3/LaunchTemplateParameters. `jobName` will be replaced by generated name.' } | ||
- {name: location, description: 'The regional endpoint to which to direct the request.', default: '' } | ||
- {name: job_name_prefix, description: 'Optional. The prefix of the genrated job name. If not provided, the method will generated a random name.', default: '' } | ||
- {name: validate_only, description: 'If true, the request is validated but not actually executed. Defaults to false.', default: 'False' } | ||
- {name: wait_interval, description: 'Optional wait interval between calls to get job status. Defaults to 30.', default: '30'} | ||
outputs: | ||
- {name: job_id, description: 'The ID of the created dataflow job.'} | ||
implementation: | ||
container: | ||
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest | ||
args: [ | ||
kfp_component.google.dataflow, launch_template, | ||
--project_id, {inputValue: project_id}, | ||
--gcs_path, {inputValue: gcs_path}, | ||
--launch_parameters, {inputValue: launch_parameters}, | ||
--location, {inputValue: location}, | ||
--job_name_prefix, {inputValue: job_name_prefix}, | ||
--validate_only, {inputValue: validate_only}, | ||
--wait_interval, {inputValue: wait_interval}, | ||
] | ||
env: | ||
KFP_POD_NAME: "{{pod.name}}" | ||
fileOutputs: | ||
job_id: /tmp/kfp/output/dataflow/job_id.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
name: Batch predict against a model with Cloud ML Engine | ||
description: | | ||
Creates a MLEngine batch prediction job. | ||
inputs: | ||
- {name: project_id, description: 'Required. The ID of the parent project of the job.'} | ||
- {name: model_path, description: 'The path to the model. It can be either: `projects/[PROJECT_ID]/models/[MODEL_ID]` or `projects/[PROJECT_ID]/models/[MODEL_ID]/versions/[VERSION_ID]` or a GCS path of a model file.' } | ||
- {name: input_paths, description: 'Required. The Google Cloud Storage location of the input data files. May contain wildcards.' } | ||
- {name: input_data_format, description: 'Required. The format of the input data files. See https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#DataFormat.' } | ||
- {name: output_path, description: 'Required. The output Google Cloud Storage location.' } | ||
- {name: region, description: 'Required. The Google Compute Engine region to run the prediction job in.' } | ||
- {name: output_data_format, description: 'Optional. Format of the output data files, defaults to JSON.', default: ''} | ||
- {name: prediction_input, description: 'Input parameters to create a prediction job.', default: ''} | ||
- {name: job_id_prefix, description: 'The prefix of the generated job id.', default: ''} | ||
- {name: wait_interval, description: 'Optional wait interval between calls to get job status. Defaults to 30.', default: '30'} | ||
outputs: | ||
- {name: job_id, description: 'The ID of the created job.'} | ||
implementation: | ||
container: | ||
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest | ||
args: [ | ||
kfp_component.google.ml_engine, batch_predict, | ||
--project_id, {inputValue: project_id}, | ||
--model_path, {inputValue: model_path}, | ||
--input_paths, {inputValue: input_paths}, | ||
--input_data_format, {inputValue: input_data_format}, | ||
--output_path, {inputValue: output_path}, | ||
--region, {inputValue: region}, | ||
--output_data_format, {inputValue: output_data_format}, | ||
--prediction_input, {inputValue: prediction_input}, | ||
--job_id_prefix, {inputValue: job_id_prefix}, | ||
--wait_interval, {inputValue: wait_interval} | ||
] | ||
env: | ||
KFP_POD_NAME: "{{pod.name}}" | ||
fileOutputs: | ||
job_id: /tmp/kfp/output/ml_engine/job_id.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
name: Deploy a model to Cloud ML Engine | ||
description: | | ||
Creates a Cloud Machine Learning version and optionally a model if it's not exist. | ||
inputs: | ||
- {name: model_uri, description: 'Required, the GCS URI which contains a model file. Common used TF model search path (export/exporter) will be used if exist.'} | ||
- {name: project_id, description: 'Required, the ID of the parent project.'} | ||
- {name: model_id, description: 'Optional, the user provided name of the model.', default: '' } | ||
- {name: version_id, description: 'Optional, the user provided name of the version. If it is not provided, the operation uses a random name.', default: '' } | ||
- {name: runtime_version, description: 'Optional, the Cloud ML Engine runtime version to use for this deployment. If not set, Cloud ML Engine uses the default stable version, 1.0.', default: '' } | ||
- {name: python_version, description: 'Optional, the version of Python used in prediction. If not set, the default version is `2.7`. Python `3.5` is available when runtimeVersion is set to `1.4` and above. Python `2.7` works with all supported runtime versions.', default: '' } | ||
- {name: version, description: 'Optional, the payload of the new version.', default: '' } | ||
- {name: replace_existing_version, description: 'Boolean flag indicates whether to replace existing version in case of conflict.', default: 'Fasle' } | ||
- {name: set_default, description: 'Boolean flag indicates whether to set the new version as default version in the model.', default: 'False'} | ||
- {name: wait_interval, description: 'The interval to wait for a long running operation.', default: '30'} | ||
outputs: | ||
- {name: model_uri, description: 'The URI of the model.'} | ||
- {name: model_name, description: 'The name of the deployed model.'} | ||
- {name: version_name, description: 'The name of the deployed version.'} | ||
implementation: | ||
container: | ||
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest | ||
args: [ | ||
kfp_component.google.ml_engine, deploy, | ||
--model_uri, {inputValue: model_uri}, | ||
--project_id, {inputValue: project_id}, | ||
--model_short_name, {inputValue: model_short_name}, | ||
--version_short_name, {inputValue: version_short_name}, | ||
--runtime_version, {inputValue: runtime_version}, | ||
--version, {inputValue: version}, | ||
--replace_existing_version, {inputValue: replace_existing_version}, | ||
--set_default, {inputValue: set_default}, | ||
--wait_interval, {inputValue: wait_interval}, | ||
] | ||
env: | ||
KFP_POD_NAME: "{{pod.name}}" | ||
fileOutputs: | ||
model_uri: /tmp/kfp/output/ml_engine/model_uri.txt | ||
model_name: /tmp/kfp/output/ml_engine/model_name.txt | ||
version_name: /tmp/kfp/output/ml_engine/version_name.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# Copyright 2018 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
name: Train a model with Cloud ML Engine | ||
description: | | ||
Submits a Cloud Machine Learning training job. | ||
inputs: | ||
- {name: project_id, description: 'Required. The ID of the parent project of the job.'} | ||
- {name: python_module, description: 'The Python module name to run after installing the packages.', default: ''} | ||
- {name: package_uris, description: 'The Google Cloud Storage location of the packages with the training program and any additional dependencies. The maximum number of package URIs is 100.', default: ''} | ||
- {name: region, description: 'The Google Compute Engine region to run the training job in.', default: ''} | ||
- {name: args, description: 'Command line arguments to pass to the program.', default: ''} | ||
- {name: job_dir, description: 'A Google Cloud Storage path in which to store training outputs and other data needed for training. This path is passed to your TensorFlow program as the `--job-dir` command-line argument. The benefit of specifying this field is that Cloud ML validates the path for use in training.', default: ''} | ||
- {name: python_version, description: 'The version of Python used in training. If not set, the default version is `2.7`. Python `3.5` is available when runtimeVersion is set to `1.4` and above.', default: ''} | ||
- {name: runtime_version, description: 'The Cloud ML Engine runtime version to use for training. If not set, Cloud ML Engine uses the default stable version, 1.0. ', default: ''} | ||
- {name: master_image_uri, description: 'The Docker image to run on the master replica. This image must be in Container Registry.', default: ''} | ||
- {name: worker_image_uri, description: 'The Docker image to run on the worker replica. This image must be in Container Registry.', default: ''} | ||
- {name: training_input, description: 'Input parameters to create a training job.', default: ''} | ||
- {name: job_id_prefix, description: 'The prefix of the generated job id.', default: ''} | ||
- {name: wait_interval, description: 'Optional wait interval between calls to get job status. Defaults to 30.', default: '30'} | ||
outputs: | ||
- {name: job_id, description: 'The ID of the created job.'} | ||
implementation: | ||
container: | ||
image: gcr.io/ml-pipeline-dogfood/ml-pipeline-gcp:latest | ||
args: [ | ||
kfp_component.google.ml_engine, train, | ||
--project_id, {inputValue: project_id}, | ||
--python_module, {inputValue: python_module}, | ||
--package_uris, {inputValue: package_uris}, | ||
--region, {inputValue: region}, | ||
--args, {inputValue: args}, | ||
--job_dir, {inputValue: job_dir}, | ||
--python_version, {inputValue: python_version}, | ||
--runtime_version, {inputValue: runtime_version}, | ||
--master_image_uri, {inputValue: master_image_uri}, | ||
--worker_image_uri, {inputValue: worker_image_uri}, | ||
--training_input, {inputValue: training_input}, | ||
--job_id_prefix, {inputValue: job_id_prefix}, | ||
--wait_interval, {inputValue: wait_interval} | ||
] | ||
env: | ||
KFP_POD_NAME: "{{pod.name}}" | ||
fileOutputs: | ||
job_id: /tmp/kfp/output/ml_engine/job_id.txt |