Removed all properties from GCPPath

This will confuse our users and make type checking worse, but Hongye and Ajay requested that. `s/type: (\{GCSPath:.*?}})(.*)/type: GCPPath$2 # type: $1/g`
kubeflow · Apr 4, 2019 · dd55d25 · dd55d25
1 parent 3fce318
commit dd55d25
Show file tree

Hide file tree

Showing 8 changed files with 31 additions and 31 deletions.
diff --git a/components/dataflow/predict/component.yaml b/components/dataflow/predict/component.yaml
@@ -3,16 +3,16 @@ description: |
   Runs TensorFlow prediction on Google Cloud Dataflow
   Input and output data is in GCS
 inputs:
-  - {name: Data file pattern,   type: {GCSPath: {data_type: CSV}},        description: 'GCS or local path of test file patterns.'}
-  - {name: Schema,              type: {GCSPath: {data_type: TFDV schema JSON}}, description: 'GCS json schema file path.'}
+  - {name: Data file pattern,   type: GCPPath,        description: 'GCS or local path of test file patterns.'} # type: {GCSPath: {data_type: CSV}}
+  - {name: Schema,              type: GCPPath, description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: TFDV schema JSON}}
   - {name: Target column,       type: String,                             description: 'Name of the column for prediction target.'}
-  - {name: Model,               type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}, description: 'GCS or local path of model trained with tft preprocessed data.'}   # Models trained with estimator are exported to base/export/export/123456781 directory.  # Our trainer export only one model. #TODO: Output single model from trainer
+  - {name: Model,               type: GCPPath, description: 'GCS or local path of model trained with tft preprocessed data.'}   # Models trained with estimator are exported to base/export/export/123456781 directory.  # Our trainer export only one model. #TODO: Output single model from trainer # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}
   - {name: Batch size,          type: Integer,   default: '32',           description: 'Batch size used in prediction.'}
   - {name: Run mode,            type: String,    default: local,          description: 'Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud".'}
   - {name: GCP project,         type: GcpProject,                         description: 'The GCP project to run the dataflow job.'}
-  - {name: Predictions dir,     type: {GCSPath: {path_type: Directory}},  description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file
+  - {name: Predictions dir,     type: GCPPath,  description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}}
 outputs:
-  - {name: Predictions dir,     type: {GCSPath: {path_type: Directory}},  description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file
+  - {name: Predictions dir,     type: GCPPath,  description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}}
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:2c2445df83fa879387a200747cc20f72a7ee9727

diff --git a/components/dataflow/tfdv/component.yaml b/components/dataflow/tfdv/component.yaml
@@ -6,15 +6,15 @@ description: |
   * infer a schema,
   * detect data anomalies.
 inputs:
-- {name: Inference data,    type: {GCSPath: {data_type: CSV}},  description: GCS path of the CSV file from which to infer the schema.}
-- {name: Validation data,   type: {GCSPath: {data_type: CSV}},  description: GCS path of the CSV file whose contents should be validated.}
-- {name: Column names,      type: {GCSPath: {data_type: JSON}}, description: GCS json file containing a list of column names.}
+- {name: Inference data,    type: GCPPath,  description: GCS path of the CSV file from which to infer the schema.} # type: {GCSPath: {data_type: CSV}}
+- {name: Validation data,   type: GCPPath,  description: GCS path of the CSV file whose contents should be validated.} # type: {GCSPath: {data_type: CSV}}
+- {name: Column names,      type: GCPPath, description: GCS json file containing a list of column names.} # type: {GCSPath: {data_type: JSON}}
 - {name: Key columns,       type: String,                       description: Comma separated list of columns to treat as keys.}
 - {name: GCP project,       type: GcpProject, default: '',      description: The GCP project to run the dataflow job.}
 - {name: Run mode,          type: String,     default: local,   description: Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud". }
-- {name: Validation output, type: {GCSPath: {path_type: Directory}}, description: GCS or local directory.}
+- {name: Validation output, type: GCPPath, description: GCS or local directory.} # type: {GCSPath: {path_type: Directory}}
 outputs:
-- {name: Schema,            type: {GCSPath: {data_type: TFDV schema JSON}}, description: GCS path of the inferred schema JSON.}
+- {name: Schema,            type: GCPPath, description: GCS path of the inferred schema JSON.} # type: {GCSPath: {data_type: TFDV schema JSON}}
 - {name: Validation result, type: String,                       description: Indicates whether anomalies were detected or not.}
 implementation:
   container:

diff --git a/components/dataflow/tfma/component.yaml b/components/dataflow/tfma/component.yaml
@@ -6,15 +6,15 @@ description: |
   * tracking metrics over time
   * model quality performance on different feature slices
 inputs:
-- {name: Model,           type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}, description: GCS path to the model which will be evaluated.}
-- {name: Evaluation data, type: {GCSPath: {data_type: CSV}},              description: GCS path of eval files.}
-- {name: Schema,          type: {GCSPath: {data_type: TFDV schema JSON}}, description: GCS json schema file path.}
+- {name: Model,           type: GCPPath, description: GCS path to the model which will be evaluated.} # type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}
+- {name: Evaluation data, type: GCPPath,              description: GCS path of eval files.} # type: {GCSPath: {data_type: CSV}}
+- {name: Schema,          type: GCPPath, description: GCS json schema file path.} # type: {GCSPath: {data_type: TFDV schema JSON}}
 - {name: Run mode,        type: String,     default: local,               description: whether to run the job locally or in Cloud Dataflow.}
 - {name: GCP project,     type: GcpProject, default: '',                  description: 'The GCP project to run the dataflow job, if running in the `cloud` mode.'}
 - {name: Slice columns,   type: String,                                   description: Comma-separated list of columns on which to slice for analysis.}
-- {name: Analysis results dir, type: {GCSPath: {path_type: Directory}},   description: GCS or local directory where the analysis results should be written.}
+- {name: Analysis results dir, type: GCPPath,   description: GCS or local directory where the analysis results should be written.} # type: {GCSPath: {path_type: Directory}}
 outputs:
-- {name: Analysis results dir, type: {GCSPath: {path_type: Directory}},   description: GCS or local directory where the analysis results should were written.}
+- {name: Analysis results dir, type: GCPPath,   description: GCS or local directory where the analysis results should were written.} # type: {GCSPath: {path_type: Directory}}
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:2c2445df83fa879387a200747cc20f72a7ee9727

diff --git a/components/dataflow/tft/component.yaml b/components/dataflow/tft/component.yaml
@@ -1,15 +1,15 @@
 name: Transform using TF on Dataflow
 description: Runs TensorFlow Transform on Google Cloud Dataflow
 inputs:
-  - {name: Training data file pattern,    type: {GCSPath: {data_type: CSV}},    description: 'GCS path of train file patterns.'} #Also supports local CSV
-  - {name: Evaluation data file pattern,  type: {GCSPath: {data_type: CSV}},    description: 'GCS path of eval file patterns.'} #Also supports local CSV
-  - {name: Schema,                        type: {GCSPath: {data_type: JSON}},   description: 'GCS json schema file path.'}
+  - {name: Training data file pattern,    type: GCPPath,    description: 'GCS path of train file patterns.'} #Also supports local CSV # type: {GCSPath: {data_type: CSV}}
+  - {name: Evaluation data file pattern,  type: GCPPath,    description: 'GCS path of eval file patterns.'} #Also supports local CSV # type: {GCSPath: {data_type: CSV}}
+  - {name: Schema,                        type: GCPPath,   description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: JSON}}
   - {name: GCP project,                   type: GcpProject,                     description: 'The GCP project to run the dataflow job.'}
   - {name: Run mode,                      type: String, default: local,         description: 'Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud".' }
-  - {name: Preprocessing module,          type: {GCSPath: {data_type: Python}}, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'}
-  - {name: Transformed data dir,          type: {GCSPath: {path_type: Directory}}, description: 'GCS or local directory'} #Also supports local paths
+  - {name: Preprocessing module,          type: GCPPath, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} # type: {GCSPath: {data_type: Python}}
+  - {name: Transformed data dir,          type: GCPPath, description: 'GCS or local directory'} #Also supports local paths # type: {GCSPath: {path_type: Directory}}
 outputs:
-  - {name: Transformed data dir,          type: {GCSPath: {path_type: Directory}}}
+  - {name: Transformed data dir,          type: GCPPath} # type: {GCSPath: {path_type: Directory}}
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:2c2445df83fa879387a200747cc20f72a7ee9727

diff --git a/components/kubeflow/deployer/component.yaml b/components/kubeflow/deployer/component.yaml
@@ -1,7 +1,7 @@
 name: Kubeflow - Serve TF model
 description: Serve TensorFlow model using Kubeflow TF-serving
 inputs:
-  - {name: Model dir,     type: {GCSPath: {path_type: Directory}}, description: 'Path of GCS directory containing exported Tensorflow model.'}
+  - {name: Model dir,     type: GCPPath, description: 'Path of GCS directory containing exported Tensorflow model.'} # type: {GCSPath: {path_type: Directory}}
   - {name: Cluster name,  type: String, default: '',              description: 'Kubernetes cluster name where the TS-serving service should be deployed. Uses the current cluster by default.'}
   - {name: Namespace,     type: String, default: 'kubeflow',      description: 'Kubernetes namespace where the TS-serving service should be deployed.'}
   - {name: Server name,   type: String, default: 'model-server',  description: 'TF-serving server name to use when deploying.'}

diff --git a/components/kubeflow/dnntrainer/component.yaml b/components/kubeflow/dnntrainer/component.yaml
@@ -1,18 +1,18 @@
 name: Train FC DNN using TF
 description: Trains fully-connected neural network using Tensorflow
 inputs:
-  - {name: Transformed data dir,  type: {GCSPath: {path_type: Directory}},  description: 'GCS path containing tf-transformed training and eval data.'}
-  - {name: Schema,                type: {GCSPath: {data_type: JSON}},       description: 'GCS json schema file path.'}
+  - {name: Transformed data dir,  type: GCPPath,  description: 'GCS path containing tf-transformed training and eval data.'} # type: {GCSPath: {path_type: Directory}}
+  - {name: Schema,                type: GCPPath,       description: 'GCS json schema file path.'} # type: {GCSPath: {data_type: JSON}}
   - {name: Learning rate,         type: Float, default: '0.1',              description: 'Learning rate for training.'}
   - {name: Optimizer,             type: String, default: 'Adagrad', description: 'Optimizer for training. Valid values are: Adam, SGD, Adagrad. If not provided, tf.estimator default will be used.'}
   - {name: Hidden layer size,     type: String, default: '100',             description: 'Comma-separated hidden layer sizes. For example "200,100,50".'}
   - {name: Steps,                 type: Integer,                            description: 'Maximum number of training steps to perform. If unspecified, will honor epochs.'}
   #- {name: Epochs,                type: Integer, default: '',               description: 'Maximum number of training data epochs on which to train. If both "steps" and "epochs" are specified, the training job will run for "steps" or "epochs", whichever occurs first.'}
   - {name: Target,                type: String,                             description: 'Name of the column for prediction target.'}
-  - {name: Preprocessing module,  type: {GCSPath: {data_type: Python}}, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'}
-  - {name: Training output dir,   type: {GCSPath: {path_type: Directory}},  description: 'GCS or local directory.'}
+  - {name: Preprocessing module,  type: GCPPath, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} # type: {GCSPath: {data_type: Python}}
+  - {name: Training output dir,   type: GCPPath,  description: 'GCS or local directory.'} # type: {GCSPath: {path_type: Directory}}
 outputs:
-  - {name: Training output dir,   type: {GCSPath: {path_type: Directory}},  description: 'GCS or local directory.'}
+  - {name: Training output dir,   type: GCPPath,  description: 'GCS or local directory.'} # type: {GCSPath: {path_type: Directory}}
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:2c2445df83fa879387a200747cc20f72a7ee9727

diff --git a/components/local/confusion_matrix/component.yaml b/components/local/confusion_matrix/component.yaml
@@ -1,8 +1,8 @@
 name: Confusion matrix
 description: Calculates confusion matrix
 inputs:
-  - {name: Predictions,     type: {GCSPath: {data_type: CSV}},        description: 'GCS path of prediction file pattern.'}
-  - {name: Output dir,      type: {GCSPath: {path_type: Directory}},  description: 'GCS path of the output directory.'}
+  - {name: Predictions,     type: GCPPath,        description: 'GCS path of prediction file pattern.'} # type: {GCSPath: {data_type: CSV}}
+  - {name: Output dir,      type: GCPPath,  description: 'GCS path of the output directory.'} # type: {GCSPath: {path_type: Directory}}
 #outputs:
 #  - {name: UI metadata,     type: UI metadata}
 #  - {name: Metrics,         type: Metrics}

diff --git a/components/local/roc/component.yaml b/components/local/roc/component.yaml
@@ -1,11 +1,11 @@
 name: ROC curve
 description: Calculates Receiver Operating Characteristic curve. See https://en.wikipedia.org/wiki/Receiver_operating_characteristic
 inputs:
-  - {name: Predictions dir,   type: {GCSPath: {path_type: Directory}},  description: 'GCS path of prediction file pattern.'}  #TODO: Replace dir data + schema files
+  - {name: Predictions dir,   type: GCPPath,  description: 'GCS path of prediction file pattern.'}  #TODO: Replace dir data + schema files # type: {GCSPath: {path_type: Directory}}
   - {name: True class,        type: String, default: 'true',            description: 'The true class label for the sample. Default is "true".'}
   - {name: True score column, type: String, default: 'true',            description: 'The name of the column for positive probability.'}
   - {name: Target lambda,     type: String, default: '',                description: 'Text of Python lambda function which returns boolean value indicating whether the classification result is correct.\nFor example, "lambda x: x[''a''] and x[''b'']". If missing, input must have a "target" column.'}
-  - {name: Output dir,        type: {GCSPath: {path_type: Directory}},  description: 'GCS path of the output directory.'}     #TODO: Replace dir with single file
+  - {name: Output dir,        type: GCPPath,  description: 'GCS path of the output directory.'}     #TODO: Replace dir with single file # type: {GCSPath: {path_type: Directory}}
 #outputs:
 #  - {name: UI metadata,       type: UI metadata}
 #  - {name: Metrics,           type: Metrics}