diff --git a/components/dataflow/predict/component.yaml b/components/dataflow/predict/component.yaml index 2e68d6a97d2..66a0f367c36 100644 --- a/components/dataflow/predict/component.yaml +++ b/components/dataflow/predict/component.yaml @@ -3,16 +3,16 @@ description: | Runs TensorFlow prediction on Google Cloud Dataflow Input and output data is in GCS inputs: - - {name: Data file pattern, type: {GcsPath: {data_type: CSV}}, description: 'GCS or local path of test file patterns.'} - - {name: Schema, type: {GcsPath: {data_type: TFDV schema JSON}}, description: 'GCS json schema file path.'} + - {name: Data file pattern, type: {GCSPath: {data_type: CSV}}, description: 'GCS or local path of test file patterns.'} + - {name: Schema, type: {GCSPath: {data_type: TFDV schema JSON}}, description: 'GCS json schema file path.'} - {name: Target column, type: String, description: 'Name of the column for prediction target.'} - - {name: Model, type: {GcsPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}, description: 'GCS or local path of model trained with tft preprocessed data.'} # Models trained with estimator are exported to base/export/export/123456781 directory. # Our trainer export only one model. #TODO: Output single model from trainer + - {name: Model, type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}, description: 'GCS or local path of model trained with tft preprocessed data.'} # Models trained with estimator are exported to base/export/export/123456781 directory. # Our trainer export only one model. #TODO: Output single model from trainer - {name: Batch size, type: Integer, default: '32', description: 'Batch size used in prediction.'} - {name: Run mode, type: String, default: local, description: 'Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud".'} - {name: GCP project, type: GcpProject, description: 'The GCP project to run the dataflow job.'} - - {name: Predictions dir, type: {GcsPath: {path_type: Directory}}, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file + - {name: Predictions dir, type: {GCSPath: {path_type: Directory}}, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file outputs: - - {name: Predictions dir, type: {GcsPath: {path_type: Directory}}, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file + - {name: Predictions dir, type: {GCSPath: {path_type: Directory}}, description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file implementation: container: image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:2c2445df83fa879387a200747cc20f72a7ee9727 diff --git a/components/dataflow/tfdv/component.yaml b/components/dataflow/tfdv/component.yaml index f51c3cdfa46..4e9bd8eae80 100644 --- a/components/dataflow/tfdv/component.yaml +++ b/components/dataflow/tfdv/component.yaml @@ -6,15 +6,15 @@ description: | * infer a schema, * detect data anomalies. inputs: -- {name: Inference data, type: {GcsPath: {data_type: CSV}}, description: GCS path of the CSV file from which to infer the schema.} -- {name: Validation data, type: {GcsPath: {data_type: CSV}}, description: GCS path of the CSV file whose contents should be validated.} -- {name: Column names, type: {GcsPath: {data_type: JSON}}, description: GCS json file containing a list of column names.} +- {name: Inference data, type: {GCSPath: {data_type: CSV}}, description: GCS path of the CSV file from which to infer the schema.} +- {name: Validation data, type: {GCSPath: {data_type: CSV}}, description: GCS path of the CSV file whose contents should be validated.} +- {name: Column names, type: {GCSPath: {data_type: JSON}}, description: GCS json file containing a list of column names.} - {name: Key columns, type: String, description: Comma separated list of columns to treat as keys.} - {name: GCP project, type: GcpProject, default: '', description: The GCP project to run the dataflow job.} - {name: Run mode, type: String, default: local, description: Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud". } -- {name: Validation output, type: {GcsPath: {path_type: Directory}}, description: GCS or local directory.} +- {name: Validation output, type: {GCSPath: {path_type: Directory}}, description: GCS or local directory.} outputs: -- {name: Schema, type: {GcsPath: {data_type: TFDV schema JSON}}, description: GCS path of the inferred schema JSON.} +- {name: Schema, type: {GCSPath: {data_type: TFDV schema JSON}}, description: GCS path of the inferred schema JSON.} - {name: Validation result, type: String, description: Indicates whether anomalies were detected or not.} implementation: container: diff --git a/components/dataflow/tfma/component.yaml b/components/dataflow/tfma/component.yaml index ca9d9d0e9c1..cbd0a7be19e 100644 --- a/components/dataflow/tfma/component.yaml +++ b/components/dataflow/tfma/component.yaml @@ -6,15 +6,15 @@ description: | * tracking metrics over time * model quality performance on different feature slices inputs: -- {name: Model, type: {GcsPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}, description: GCS path to the model which will be evaluated.} -- {name: Evaluation data, type: {GcsPath: {data_type: CSV}}, description: GCS path of eval files.} -- {name: Schema, type: {GcsPath: {data_type: TFDV schema JSON}}, description: GCS json schema file path.} +- {name: Model, type: {GCSPath: {path_type: Directory, data_type: Exported TensorFlow models dir}}, description: GCS path to the model which will be evaluated.} +- {name: Evaluation data, type: {GCSPath: {data_type: CSV}}, description: GCS path of eval files.} +- {name: Schema, type: {GCSPath: {data_type: TFDV schema JSON}}, description: GCS json schema file path.} - {name: Run mode, type: String, default: local, description: whether to run the job locally or in Cloud Dataflow.} - {name: GCP project, type: GcpProject, default: '', description: 'The GCP project to run the dataflow job, if running in the `cloud` mode.'} - {name: Slice columns, type: String, description: Comma-separated list of columns on which to slice for analysis.} -- {name: Analysis results dir, type: {GcsPath: {path_type: Directory}}, description: GCS or local directory where the analysis results should be written.} +- {name: Analysis results dir, type: {GCSPath: {path_type: Directory}}, description: GCS or local directory where the analysis results should be written.} outputs: -- {name: Analysis results dir, type: {GcsPath: {path_type: Directory}}, description: GCS or local directory where the analysis results should were written.} +- {name: Analysis results dir, type: {GCSPath: {path_type: Directory}}, description: GCS or local directory where the analysis results should were written.} implementation: container: image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:2c2445df83fa879387a200747cc20f72a7ee9727 diff --git a/components/dataflow/tft/component.yaml b/components/dataflow/tft/component.yaml index a4bedbbc977..0a7909e8f64 100644 --- a/components/dataflow/tft/component.yaml +++ b/components/dataflow/tft/component.yaml @@ -1,15 +1,15 @@ name: Transform using TF on Dataflow description: Runs TensorFlow Transform on Google Cloud Dataflow inputs: - - {name: Training data file pattern, type: {GcsPath: {data_type: CSV}}, description: 'GCS path of train file patterns.'} #Also supports local CSV - - {name: Evaluation data file pattern, type: {GcsPath: {data_type: CSV}}, description: 'GCS path of eval file patterns.'} #Also supports local CSV - - {name: Schema, type: {GcsPath: {data_type: JSON}}, description: 'GCS json schema file path.'} + - {name: Training data file pattern, type: {GCSPath: {data_type: CSV}}, description: 'GCS path of train file patterns.'} #Also supports local CSV + - {name: Evaluation data file pattern, type: {GCSPath: {data_type: CSV}}, description: 'GCS path of eval file patterns.'} #Also supports local CSV + - {name: Schema, type: {GCSPath: {data_type: JSON}}, description: 'GCS json schema file path.'} - {name: GCP project, type: GcpProject, description: 'The GCP project to run the dataflow job.'} - {name: Run mode, type: String, default: local, description: 'Whether to run the job locally or in Cloud Dataflow. Valid values are "local" and "cloud".' } - - {name: Preprocessing module, type: {GcsPath: {data_type: Python}}, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} - - {name: Transformed data dir, type: {GcsPath: {path_type: Directory}}, description: 'GCS or local directory'} #Also supports local paths + - {name: Preprocessing module, type: {GCSPath: {data_type: Python}}, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} + - {name: Transformed data dir, type: {GCSPath: {path_type: Directory}}, description: 'GCS or local directory'} #Also supports local paths outputs: - - {name: Transformed data dir, type: {GcsPath: {path_type: Directory}}} + - {name: Transformed data dir, type: {GCSPath: {path_type: Directory}}} implementation: container: image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:2c2445df83fa879387a200747cc20f72a7ee9727 diff --git a/components/kubeflow/dnntrainer/component.yaml b/components/kubeflow/dnntrainer/component.yaml index 1a63ea022bb..714f3944874 100644 --- a/components/kubeflow/dnntrainer/component.yaml +++ b/components/kubeflow/dnntrainer/component.yaml @@ -1,18 +1,18 @@ name: Train FC DNN using TF description: Trains fully-connected neural network using Tensorflow inputs: - - {name: Transformed data dir, type: {GcsPath: {path_type: Directory}}, description: 'GCS path containing tf-transformed training and eval data.'} - - {name: Schema, type: {GcsPath: {data_type: JSON}}, description: 'GCS json schema file path.'} + - {name: Transformed data dir, type: {GCSPath: {path_type: Directory}}, description: 'GCS path containing tf-transformed training and eval data.'} + - {name: Schema, type: {GCSPath: {data_type: JSON}}, description: 'GCS json schema file path.'} - {name: Learning rate, type: Float, default: '0.1', description: 'Learning rate for training.'} - {name: Optimizer, type: String, default: 'Adagrad', description: 'Optimizer for training. Valid values are: Adam, SGD, Adagrad. If not provided, tf.estimator default will be used.'} - {name: Hidden layer size, type: String, default: '100', description: 'Comma-separated hidden layer sizes. For example "200,100,50".'} - {name: Steps, type: Integer, description: 'Maximum number of training steps to perform. If unspecified, will honor epochs.'} #- {name: Epochs, type: Integer, default: '', description: 'Maximum number of training data epochs on which to train. If both "steps" and "epochs" are specified, the training job will run for "steps" or "epochs", whichever occurs first.'} - {name: Target, type: String, description: 'Name of the column for prediction target.'} - - {name: Preprocessing module, type: {GcsPath: {data_type: Python}}, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} - - {name: Training output dir, type: {GcsPath: {path_type: Directory}}, description: 'GCS or local directory.'} + - {name: Preprocessing module, type: {GCSPath: {data_type: Python}}, default: '', description: 'GCS path to a python file defining "preprocess" and "get_feature_columns" functions.'} + - {name: Training output dir, type: {GCSPath: {path_type: Directory}}, description: 'GCS or local directory.'} outputs: - - {name: Training output dir, type: {GcsPath: {path_type: Directory}}, description: 'GCS or local directory.'} + - {name: Training output dir, type: {GCSPath: {path_type: Directory}}, description: 'GCS or local directory.'} implementation: container: image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:2c2445df83fa879387a200747cc20f72a7ee9727 diff --git a/components/local/confusion_matrix/component.yaml b/components/local/confusion_matrix/component.yaml index b7925d6ce5c..68d54a8df34 100644 --- a/components/local/confusion_matrix/component.yaml +++ b/components/local/confusion_matrix/component.yaml @@ -1,8 +1,8 @@ name: Confusion matrix description: Calculates confusion matrix inputs: - - {name: Predictions, type: {GcsPath: {data_type: CSV}}, description: 'GCS path of prediction file pattern.'} - - {name: Output dir, type: {GcsPath: {path_type: Directory}}, description: 'GCS path of the output directory.'} + - {name: Predictions, type: {GCSPath: {data_type: CSV}}, description: 'GCS path of prediction file pattern.'} + - {name: Output dir, type: {GCSPath: {path_type: Directory}}, description: 'GCS path of the output directory.'} #outputs: # - {name: UI metadata, type: UI metadata} # - {name: Metrics, type: Metrics} diff --git a/components/local/roc/component.yaml b/components/local/roc/component.yaml index 02b8a180ee0..5bcec836a26 100644 --- a/components/local/roc/component.yaml +++ b/components/local/roc/component.yaml @@ -1,11 +1,11 @@ name: ROC curve description: Calculates Receiver Operating Characteristic curve. See https://en.wikipedia.org/wiki/Receiver_operating_characteristic inputs: - - {name: Predictions dir, type: {GcsPath: {path_type: Directory}}, description: 'GCS path of prediction file pattern.'} #TODO: Replace dir data + schema files + - {name: Predictions dir, type: {GCSPath: {path_type: Directory}}, description: 'GCS path of prediction file pattern.'} #TODO: Replace dir data + schema files - {name: True class, type: String, default: 'true', description: 'The true class label for the sample. Default is "true".'} - {name: True score column, type: String, default: 'true', description: 'The name of the column for positive probability.'} - {name: Target lambda, type: String, default: '', description: 'Text of Python lambda function which returns boolean value indicating whether the classification result is correct.\nFor example, "lambda x: x[''a''] and x[''b'']". If missing, input must have a "target" column.'} - - {name: Output dir, type: {GcsPath: {path_type: Directory}}, description: 'GCS path of the output directory.'} #TODO: Replace dir with single file + - {name: Output dir, type: {GCSPath: {path_type: Directory}}, description: 'GCS path of the output directory.'} #TODO: Replace dir with single file #outputs: # - {name: UI metadata, type: UI metadata} # - {name: Metrics, type: Metrics}