Skip to content

Commit

Permalink
feat(dataflow)!: update the API
Browse files Browse the repository at this point in the history
BREAKING CHANGE: This release has breaking changes.

#### dataflow:v1b3

The following keys were deleted:
- schemas.StreamingScalingReport.properties.activeBundleCount.description
- schemas.StreamingScalingReport.properties.maximumBytesCount.description
- schemas.StreamingScalingReport.properties.outstandingBytesCount.description

The following keys were added:
- schemas.Base2Exponent.description
- schemas.Base2Exponent.id
- schemas.Base2Exponent.properties.numberOfBuckets.description
- schemas.Base2Exponent.properties.numberOfBuckets.format
- schemas.Base2Exponent.properties.numberOfBuckets.type
- schemas.Base2Exponent.properties.scale.description
- schemas.Base2Exponent.properties.scale.format
- schemas.Base2Exponent.properties.scale.type
- schemas.Base2Exponent.type
- schemas.BucketOptions.description
- schemas.BucketOptions.id
- schemas.BucketOptions.properties.exponential.$ref
- schemas.BucketOptions.properties.exponential.description
- schemas.BucketOptions.properties.linear.$ref
- schemas.BucketOptions.properties.linear.description
- schemas.BucketOptions.type
- schemas.DataflowHistogramValue.description
- schemas.DataflowHistogramValue.id
- schemas.DataflowHistogramValue.properties.bucketCounts.description
- schemas.DataflowHistogramValue.properties.bucketCounts.items.format
- schemas.DataflowHistogramValue.properties.bucketCounts.items.type
- schemas.DataflowHistogramValue.properties.bucketCounts.type
- schemas.DataflowHistogramValue.properties.bucketOptions.$ref
- schemas.DataflowHistogramValue.properties.bucketOptions.description
- schemas.DataflowHistogramValue.properties.count.description
- schemas.DataflowHistogramValue.properties.count.format
- schemas.DataflowHistogramValue.properties.count.type
- schemas.DataflowHistogramValue.properties.outlierStats.$ref
- schemas.DataflowHistogramValue.properties.outlierStats.description
- schemas.DataflowHistogramValue.type
- schemas.FlexTemplateRuntimeEnvironment.properties.streamingMode.description
- schemas.FlexTemplateRuntimeEnvironment.properties.streamingMode.enum
- schemas.FlexTemplateRuntimeEnvironment.properties.streamingMode.enumDescriptions
- schemas.FlexTemplateRuntimeEnvironment.properties.streamingMode.type
- schemas.Linear.description
- schemas.Linear.id
- schemas.Linear.properties.numberOfBuckets.description
- schemas.Linear.properties.numberOfBuckets.format
- schemas.Linear.properties.numberOfBuckets.type
- schemas.Linear.properties.start.description
- schemas.Linear.properties.start.format
- schemas.Linear.properties.start.type
- schemas.Linear.properties.width.description
- schemas.Linear.properties.width.format
- schemas.Linear.properties.width.type
- schemas.Linear.type
- schemas.MetricValue.description
- schemas.MetricValue.id
- schemas.MetricValue.properties.metric.description
- schemas.MetricValue.properties.metric.type
- schemas.MetricValue.properties.metricLabels.additionalProperties.type
- schemas.MetricValue.properties.metricLabels.description
- schemas.MetricValue.properties.metricLabels.type
- schemas.MetricValue.properties.valueHistogram.$ref
- schemas.MetricValue.properties.valueHistogram.description
- schemas.MetricValue.properties.valueInt64.description
- schemas.MetricValue.properties.valueInt64.format
- schemas.MetricValue.properties.valueInt64.type
- schemas.MetricValue.type
- schemas.OutlierStats.description
- schemas.OutlierStats.id
- schemas.OutlierStats.properties.overflowCount.description
- schemas.OutlierStats.properties.overflowCount.format
- schemas.OutlierStats.properties.overflowCount.type
- schemas.OutlierStats.properties.overflowMean.description
- schemas.OutlierStats.properties.overflowMean.format
- schemas.OutlierStats.properties.overflowMean.type
- schemas.OutlierStats.properties.underflowCount.description
- schemas.OutlierStats.properties.underflowCount.format
- schemas.OutlierStats.properties.underflowCount.type
- schemas.OutlierStats.properties.underflowMean.description
- schemas.OutlierStats.properties.underflowMean.format
- schemas.OutlierStats.properties.underflowMean.type
- schemas.OutlierStats.type
- schemas.ParameterMetadata.properties.hiddenUi.description
- schemas.ParameterMetadata.properties.hiddenUi.type
- schemas.PerStepNamespaceMetrics.description
- schemas.PerStepNamespaceMetrics.id
- schemas.PerStepNamespaceMetrics.properties.metricValues.description
- schemas.PerStepNamespaceMetrics.properties.metricValues.items.$ref
- schemas.PerStepNamespaceMetrics.properties.metricValues.type
- schemas.PerStepNamespaceMetrics.properties.metricsNamespace.description
- schemas.PerStepNamespaceMetrics.properties.metricsNamespace.type
- schemas.PerStepNamespaceMetrics.properties.originalStep.description
- schemas.PerStepNamespaceMetrics.properties.originalStep.type
- schemas.PerStepNamespaceMetrics.type
- schemas.PerWorkerMetrics.description
- schemas.PerWorkerMetrics.id
- schemas.PerWorkerMetrics.properties.perStepNamespaceMetrics.description
- schemas.PerWorkerMetrics.properties.perStepNamespaceMetrics.items.$ref
- schemas.PerWorkerMetrics.properties.perStepNamespaceMetrics.type
- schemas.PerWorkerMetrics.type
- schemas.RuntimeEnvironment.properties.streamingMode.description
- schemas.RuntimeEnvironment.properties.streamingMode.enum
- schemas.RuntimeEnvironment.properties.streamingMode.enumDescriptions
- schemas.RuntimeEnvironment.properties.streamingMode.type
- schemas.StreamingScalingReport.properties.activeBundleCount.deprecated
- schemas.StreamingScalingReport.properties.maximumBytes.description
- schemas.StreamingScalingReport.properties.maximumBytes.format
- schemas.StreamingScalingReport.properties.maximumBytes.type
- schemas.StreamingScalingReport.properties.maximumBytesCount.deprecated
- schemas.StreamingScalingReport.properties.outstandingBundleCount.description
- schemas.StreamingScalingReport.properties.outstandingBundleCount.format
- schemas.StreamingScalingReport.properties.outstandingBundleCount.type
- schemas.StreamingScalingReport.properties.outstandingBytes.description
- schemas.StreamingScalingReport.properties.outstandingBytes.format
- schemas.StreamingScalingReport.properties.outstandingBytes.type
- schemas.StreamingScalingReport.properties.outstandingBytesCount.deprecated
- schemas.TemplateMetadata.properties.streaming.description
- schemas.TemplateMetadata.properties.streaming.type
- schemas.TemplateMetadata.properties.supportsAtLeastOnce.description
- schemas.TemplateMetadata.properties.supportsAtLeastOnce.type
- schemas.TemplateMetadata.properties.supportsExactlyOnce.description
- schemas.TemplateMetadata.properties.supportsExactlyOnce.type
- schemas.WorkerMessage.properties.perWorkerMetrics.$ref
- schemas.WorkerMessage.properties.perWorkerMetrics.description

The following keys were changed:
- schemas.StreamingScalingReport.properties.maximumBundleCount.description
  • Loading branch information
yoshi-automation authored and bcoe committed Feb 2, 2024
1 parent 6f7a6cc commit 83ba2b6
Show file tree
Hide file tree
Showing 2 changed files with 409 additions and 11 deletions.
245 changes: 240 additions & 5 deletions discovery/dataflow-v1b3.json
Original file line number Diff line number Diff line change
Expand Up @@ -2221,7 +2221,7 @@
}
}
},
"revision": "20231217",
"revision": "20240121",
"rootUrl": "https://dataflow.googleapis.com/",
"schemas": {
"ApproximateProgress": {
Expand Down Expand Up @@ -2367,6 +2367,23 @@
},
"type": "object"
},
"Base2Exponent": {
"description": "Exponential buckets where the growth factor between buckets is `2**(2**-scale)`. e.g. for `scale=1` growth factor is `2**(2**(-1))=sqrt(2)`. `n` buckets will have the following boundaries. - 0th: [0, gf) - i in [1, n-1]: [gf^(i), gf^(i+1))",
"id": "Base2Exponent",
"properties": {
"numberOfBuckets": {
"description": "Must be greater than 0.",
"format": "int32",
"type": "integer"
},
"scale": {
"description": "Must be between -3 and 3. This forces the growth factor of the bucket boundaries to be between `2^(1/8)` and `256`.",
"format": "int32",
"type": "integer"
}
},
"type": "object"
},
"BigQueryIODetails": {
"description": "Metadata for a BigQuery connector used by the job.",
"id": "BigQueryIODetails",
Expand Down Expand Up @@ -2409,6 +2426,21 @@
},
"type": "object"
},
"BucketOptions": {
"description": "`BucketOptions` describes the bucket boundaries used in the histogram.",
"id": "BucketOptions",
"properties": {
"exponential": {
"$ref": "Base2Exponent",
"description": "Bucket boundaries grow exponentially."
},
"linear": {
"$ref": "Linear",
"description": "Bucket boundaries grow linearly."
}
},
"type": "object"
},
"CPUTime": {
"description": "Modeled after information exposed by /proc/stat.",
"id": "CPUTime",
Expand Down Expand Up @@ -2907,6 +2939,34 @@
},
"type": "object"
},
"DataflowHistogramValue": {
"description": "Summary statistics for a population of values. HistogramValue contains a sequence of buckets and gives a count of values that fall into each bucket. Bucket boundares are defined by a formula and bucket widths are either fixed or exponentially increasing.",
"id": "DataflowHistogramValue",
"properties": {
"bucketCounts": {
"description": "Optional. The number of values in each bucket of the histogram, as described in `bucket_options`. `bucket_counts` should contain N values, where N is the number of buckets specified in `bucket_options`. If `bucket_counts` has fewer than N values, the remaining values are assumed to be 0.",
"items": {
"format": "int64",
"type": "string"
},
"type": "array"
},
"bucketOptions": {
"$ref": "BucketOptions",
"description": "Describes the bucket boundaries used in the histogram."
},
"count": {
"description": "Number of values recorded in this histogram.",
"format": "int64",
"type": "string"
},
"outlierStats": {
"$ref": "OutlierStats",
"description": "Statistics on the values recorded in the histogram that fall out of the bucket boundaries."
}
},
"type": "object"
},
"DatastoreIODetails": {
"description": "Metadata for a Datastore connector used by the job.",
"id": "DatastoreIODetails",
Expand Down Expand Up @@ -3517,6 +3577,20 @@
"description": "The Cloud Storage path for staging local files. Must be a valid Cloud Storage URL, beginning with `gs://`.",
"type": "string"
},
"streamingMode": {
"description": "Optional. Specifies the Streaming Engine message processing guarantees. Reduces cost and latency but might result in duplicate messages committed to storage. Designed to run simple mapping streaming ETL jobs at the lowest cost. For example, Change Data Capture (CDC) to BigQuery is a canonical use case.",
"enum": [
"STREAMING_MODE_UNSPECIFIED",
"STREAMING_MODE_EXACTLY_ONCE",
"STREAMING_MODE_AT_LEAST_ONCE"
],
"enumDescriptions": [
"Run in the default mode.",
"In this mode, message deduplication is performed against persistent state to make sure each message is processed and committed to storage exactly once.",
"Message deduplication is not performed. Messages might be processed multiple times, and the results are applied multiple times. Note: Setting this value also enables Streaming Engine and Streaming Engine resource-based billing."
],
"type": "string"
},
"subnetwork": {
"description": "Subnetwork to which VMs will be assigned, if desired. You can specify a subnetwork using either a complete URL or an abbreviated path. Expected to be of the form \"https://www.googleapis.com/compute/v1/projects/HOST_PROJECT_ID/regions/REGION/subnetworks/SUBNETWORK\" or \"regions/REGION/subnetworks/SUBNETWORK\". If the subnetwork is located in a Shared VPC network, you must use the complete URL.",
"type": "string"
Expand Down Expand Up @@ -4408,6 +4482,28 @@
},
"type": "object"
},
"Linear": {
"description": "Linear buckets with the following boundaries for indices in 0 to n-1. - i in [0, n-1]: [start + (i)*width, start + (i+1)*width)",
"id": "Linear",
"properties": {
"numberOfBuckets": {
"description": "Must be greater than 0.",
"format": "int32",
"type": "integer"
},
"start": {
"description": "Lower bound of the first bucket.",
"format": "double",
"type": "number"
},
"width": {
"description": "Distance between bucket boundaries. Must be greater than 0.",
"format": "double",
"type": "number"
}
},
"type": "object"
},
"ListJobMessagesResponse": {
"description": "Response to a request to list job messages.",
"id": "ListJobMessagesResponse",
Expand Down Expand Up @@ -4621,6 +4717,33 @@
},
"type": "object"
},
"MetricValue": {
"description": "The value of a metric along with its name and labels.",
"id": "MetricValue",
"properties": {
"metric": {
"description": "Base name for this metric.",
"type": "string"
},
"metricLabels": {
"additionalProperties": {
"type": "string"
},
"description": "Optional. Set of metric labels for this metric.",
"type": "object"
},
"valueHistogram": {
"$ref": "DataflowHistogramValue",
"description": "Histogram value of this metric."
},
"valueInt64": {
"description": "Integer value of this metric.",
"format": "int64",
"type": "string"
}
},
"type": "object"
},
"MountedDataDisk": {
"description": "Describes mounted data disk.",
"id": "MountedDataDisk",
Expand Down Expand Up @@ -4682,6 +4805,33 @@
},
"type": "object"
},
"OutlierStats": {
"description": "Statistics for the underflow and overflow bucket.",
"id": "OutlierStats",
"properties": {
"overflowCount": {
"description": "Number of values that are larger than the upper bound of the largest bucket.",
"format": "int64",
"type": "string"
},
"overflowMean": {
"description": "Mean of values in the overflow bucket.",
"format": "double",
"type": "number"
},
"underflowCount": {
"description": "Number of values that are smaller than the lower bound of the smallest bucket.",
"format": "int64",
"type": "string"
},
"underflowMean": {
"description": "Mean of values in the undeflow bucket.",
"format": "double",
"type": "number"
}
},
"type": "object"
},
"Package": {
"description": "The packages that must be installed in order for a worker to run the steps of the Cloud Dataflow job that will be assigned to its worker pool. This is the mechanism by which the Cloud Dataflow SDK causes code to be loaded onto the workers. For example, the Cloud Dataflow Java SDK might use this to install jars containing the user's code and all of the various dependencies (libraries, data files, etc.) required in order for that code to run.",
"id": "Package",
Expand Down Expand Up @@ -4826,6 +4976,10 @@
"description": "Required. The help text to display for the parameter.",
"type": "string"
},
"hiddenUi": {
"description": "Optional. Whether the parameter should be hidden in the UI.",
"type": "boolean"
},
"isOptional": {
"description": "Optional. Whether the parameter is optional. Defaults to false.",
"type": "boolean"
Expand Down Expand Up @@ -4968,6 +5122,42 @@
},
"type": "object"
},
"PerStepNamespaceMetrics": {
"description": "Metrics for a particular unfused step and namespace. A metric is uniquely identified by the `metrics_namespace`, `original_step`, `metric name` and `metric_labels`.",
"id": "PerStepNamespaceMetrics",
"properties": {
"metricValues": {
"description": "Optional. Metrics that are recorded for this namespace and unfused step.",
"items": {
"$ref": "MetricValue"
},
"type": "array"
},
"metricsNamespace": {
"description": "The namespace of these metrics on the worker.",
"type": "string"
},
"originalStep": {
"description": "The original system name of the unfused step that these metrics are reported from.",
"type": "string"
}
},
"type": "object"
},
"PerWorkerMetrics": {
"description": "Per worker metrics.",
"id": "PerWorkerMetrics",
"properties": {
"perStepNamespaceMetrics": {
"description": "Optional. Metrics for a particular unfused step and namespace.",
"items": {
"$ref": "PerStepNamespaceMetrics"
},
"type": "array"
}
},
"type": "object"
},
"PipelineDescription": {
"description": "A descriptive representation of submitted pipeline as well as the executed form. This data is provided by the Dataflow service for ease of visualizing the pipeline and interpreting Dataflow provided metrics.",
"id": "PipelineDescription",
Expand Down Expand Up @@ -5332,6 +5522,20 @@
"description": "Optional. The email address of the service account to run the job as.",
"type": "string"
},
"streamingMode": {
"description": "Optional. Specifies the Streaming Engine message processing guarantees. Reduces cost and latency but might result in duplicate messages committed to storage. Designed to run simple mapping streaming ETL jobs at the lowest cost. For example, Change Data Capture (CDC) to BigQuery is a canonical use case.",
"enum": [
"STREAMING_MODE_UNSPECIFIED",
"STREAMING_MODE_EXACTLY_ONCE",
"STREAMING_MODE_AT_LEAST_ONCE"
],
"enumDescriptions": [
"Run in the default mode.",
"In this mode, message deduplication is performed against persistent state to make sure each message is processed and committed to storage exactly once.",
"Message deduplication is not performed. Messages might be processed multiple times, and the results are applied multiple times. Note: Setting this value also enables Streaming Engine and Streaming Engine resource-based billing."
],
"type": "string"
},
"subnetwork": {
"description": "Optional. Subnetwork to which VMs will be assigned, if desired. You can specify a subnetwork using either a complete URL or an abbreviated path. Expected to be of the form \"https://www.googleapis.com/compute/v1/projects/HOST_PROJECT_ID/regions/REGION/subnetworks/SUBNETWORK\" or \"regions/REGION/subnetworks/SUBNETWORK\". If the subnetwork is located in a Shared VPC network, you must use the complete URL.",
"type": "string"
Expand Down Expand Up @@ -6522,7 +6726,7 @@
"id": "StreamingScalingReport",
"properties": {
"activeBundleCount": {
"description": "Current acive bundle count.",
"deprecated": true,
"format": "int32",
"type": "integer"
},
Expand All @@ -6532,12 +6736,17 @@
"type": "integer"
},
"maximumBundleCount": {
"description": "Maximum bundle count limit.",
"description": "Maximum bundle count.",
"format": "int32",
"type": "integer"
},
"maximumBytes": {
"description": "Maximum bytes.",
"format": "int64",
"type": "string"
},
"maximumBytesCount": {
"description": "Maximum bytes count limit.",
"deprecated": true,
"format": "int32",
"type": "integer"
},
Expand All @@ -6546,8 +6755,18 @@
"format": "int32",
"type": "integer"
},
"outstandingBundleCount": {
"description": "Current outstanding bundle count.",
"format": "int32",
"type": "integer"
},
"outstandingBytes": {
"description": "Current outstanding bytes.",
"format": "int64",
"type": "string"
},
"outstandingBytesCount": {
"description": "Current outstanding bytes count.",
"deprecated": true,
"format": "int32",
"type": "integer"
}
Expand Down Expand Up @@ -6780,6 +6999,18 @@
"$ref": "ParameterMetadata"
},
"type": "array"
},
"streaming": {
"description": "Optional. Indicates if the template is streaming or not.",
"type": "boolean"
},
"supportsAtLeastOnce": {
"description": "Optional. Indicates if the streaming template supports at least once mode.",
"type": "boolean"
},
"supportsExactlyOnce": {
"description": "Optional. Indicates if the streaming template supports exactly once mode.",
"type": "boolean"
}
},
"type": "object"
Expand Down Expand Up @@ -7287,6 +7518,10 @@
"description": "Labels are used to group WorkerMessages. For example, a worker_message about a particular container might have the labels: { \"JOB_ID\": \"2015-04-22\", \"WORKER_ID\": \"wordcount-vm-2015…\" \"CONTAINER_TYPE\": \"worker\", \"CONTAINER_ID\": \"ac1234def\"} Label tags typically correspond to Label enum values. However, for ease of development other strings can be used as tags. LABEL_UNSPECIFIED should not be used here.",
"type": "object"
},
"perWorkerMetrics": {
"$ref": "PerWorkerMetrics",
"description": "System defined metrics for this worker."
},
"streamingScalingReport": {
"$ref": "StreamingScalingReport",
"description": "Contains per-user worker telemetry used in streaming autoscaling."
Expand Down
Loading

0 comments on commit 83ba2b6

Please sign in to comment.