From 92acbae01f3f83b50d020985269112e68a323717 Mon Sep 17 00:00:00 2001 From: Bill Prin Date: Thu, 28 Apr 2016 11:27:56 -0700 Subject: [PATCH 001/109] Add XMPP Sample --- samples/snippets/README.md | 0 samples/snippets/create_cluster.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 samples/snippets/README.md create mode 100644 samples/snippets/create_cluster.py diff --git a/samples/snippets/README.md b/samples/snippets/README.md new file mode 100644 index 00000000..e69de29b diff --git a/samples/snippets/create_cluster.py b/samples/snippets/create_cluster.py new file mode 100644 index 00000000..e69de29b From a106d1e5a93f67bb9cd5616073932a6585558d2f Mon Sep 17 00:00:00 2001 From: Bill Prin Date: Fri, 29 Apr 2016 13:37:44 -0700 Subject: [PATCH 002/109] Add Dataproc Sample --- samples/snippets/README.md | 63 +++++ samples/snippets/create_cluster.py | 0 .../snippets/create_cluster_and_submit_job.py | 235 ++++++++++++++++++ samples/snippets/dataproc_e2e_test.py | 30 +++ samples/snippets/list_clusters.py | 61 +++++ samples/snippets/pyspark_sort.py | 28 +++ samples/snippets/requirements.txt | 2 + 7 files changed, 419 insertions(+) delete mode 100644 samples/snippets/create_cluster.py create mode 100644 samples/snippets/create_cluster_and_submit_job.py create mode 100644 samples/snippets/dataproc_e2e_test.py create mode 100644 samples/snippets/list_clusters.py create mode 100644 samples/snippets/pyspark_sort.py create mode 100644 samples/snippets/requirements.txt diff --git a/samples/snippets/README.md b/samples/snippets/README.md index e69de29b..16beb58e 100644 --- a/samples/snippets/README.md +++ b/samples/snippets/README.md @@ -0,0 +1,63 @@ +# Cloud Dataproc API Example + +Sample command-line programs for interacting with the Cloud Dataproc API. + +Note that while this sample demonstrates interacting with Dataproc via the API, the functionality +demonstrated here could also be accomplished using the Cloud Console or the gcloud CLI. + +`list_clusters.py` is a simple command-line program to demonstrate connecting to the +Dataproc API and listing the clusters in a ergion + +`create_cluster_and_submit_jbo.py` demonstrates how to create a cluster, submit the +`pyspark_sort.py` job, download the output from Google Cloud Storage, and output the result. + +## Prerequisites to run locally: + +* [pip](https://pypi.python.org/pypi/pip) + +Go to the [Google Cloud Console](https://console.cloud.google.com). + +Under API Manager, search for the Google Cloud Dataproc API and enable it. + + +# Set Up Your Local Dev Environment +To install, run the following commands. If you want to use [virtualenv](https://virtualenv.readthedocs.org/en/latest/) +(recommended), run the commands within a virtualenv. + + * pip install -r requirements.txt + +Create local credentials by running the following command and following the oauth2 flow: + + gcloud beta auth application-default login + +To run list_clusters.py: + + python list_clusters.py --project_id= --zone=us-central1-b + + +To run create_cluster_and_submit_job, first create a GCS bucket, from the Cloud Console or with +gsutil: + + gsutil mb gs:// + +Then run: + + python create_cluster_and_submit_job.py --project_id= --zone=us-central1-b --cluster_name=testcluster --gcs_bucket= + +This will setup a cluster, upload the PySpark file, submit the job, print the result, then +delete the cluster. + +You can optionally specify a `--pyspark_file` argument to change from the default +`pyspark_sort.py` included in this script to a new script. + +## Running on GCE, GAE, or other environments + +On Google App Engine, the credentials should be found automatically. + +On Google Compute Engine, the credentials should be found automatically, but require that +you create the instance with the correct scopes. + + gcloud compute instances create --scopes="https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/compute,https://www.googleapis.com/auth/compute.readonly" test-instance + +If you did not create the instance with the right scopes, you can still upload a JSON service +account and set GOOGLE_APPLICATION_CREDENTIALS as described below. diff --git a/samples/snippets/create_cluster.py b/samples/snippets/create_cluster.py deleted file mode 100644 index e69de29b..00000000 diff --git a/samples/snippets/create_cluster_and_submit_job.py b/samples/snippets/create_cluster_and_submit_job.py new file mode 100644 index 00000000..e6b0c5a7 --- /dev/null +++ b/samples/snippets/create_cluster_and_submit_job.py @@ -0,0 +1,235 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Sample command-line program for listing Google Dataproc Clusters""" + +import argparse +import os + +from apiclient import discovery +from gcloud import storage +from oauth2client.client import GoogleCredentials + +# Currently only the "global" region is supported +REGION = 'global' +DEFAULT_FILENAME = 'pyspark_sort.py' + + +def get_default_pyspark_file(): + """Gets the PySpark file from this directory""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + f = open(os.path.join(current_dir, DEFAULT_FILENAME), 'r') + return f, DEFAULT_FILENAME + + +def get_pyspark_file(filename): + f = open(filename, 'r') + return f, os.path.basename(filename) + + +def upload_pyspark_file(project_id, bucket_name, filename, file): + """Uploads the PySpark file in this directory to the configured + input bucket.""" + print('Uploading pyspark file to GCS') + client = storage.Client(project=project_id) + bucket = client.get_bucket(bucket_name) + blob = bucket.blob(filename) + blob.upload_from_file(file) + + +def download_output(project_id, cluster_id, output_bucket, job_id): + """Downloads the output file from Cloud Storage and returns it as a + string.""" + print('Downloading output file') + client = storage.Client(project=project_id) + bucket = client.get_bucket(output_bucket) + output_blob = ( + 'google-cloud-dataproc-metainfo/{}/jobs/{}/driveroutput.000000000' + .format(cluster_id, job_id)) + return bucket.blob(output_blob).download_as_string() + + +# [START create_cluster] +def create_cluster(dataproc, project, cluster_name, zone): + print('Creating cluster.') + zone_uri = \ + 'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format( + project, zone) + cluster_data = { + 'projectId': project, + 'clusterName': cluster_name, + 'config': { + 'gceClusterConfig': { + 'zoneUri': zone_uri + } + } + } + result = dataproc.projects().regions().clusters().create( + projectId=project, + region=REGION, + body=cluster_data).execute() + return result +# [END create_cluster] + + +def wait_for_cluster_creation(dataproc, project_id, cluster_name, zone): + print('Waiting for cluster creation') + + while True: + result = dataproc.projects().regions().clusters().list( + projectId=project_id, + region=REGION).execute() + cluster_list = result['clusters'] + cluster = [c + for c in cluster_list + if c['clusterName'] == cluster_name][0] + if cluster['status']['state'] == 'ERROR': + raise Exception(result['status']['details']) + if cluster['status']['state'] == 'RUNNING': + print("Cluster created.") + break + + +# [START list_clusters_with_detail] +def list_clusters_with_details(dataproc, project): + result = dataproc.projects().regions().clusters().list( + projectId=project, + region=REGION).execute() + cluster_list = result['clusters'] + for cluster in cluster_list: + print("{} - {}" + .format(cluster['clusterName'], cluster['status']['state'])) + return result +# [END list_clusters_with_detail] + + +def get_cluster_id_by_name(cluster_list, cluster_name): + """Helper function to retrieve the ID and output bucket of a cluster by + name.""" + cluster = [c for c in cluster_list if c['clusterName'] == cluster_name][0] + return cluster['clusterUuid'], cluster['config']['configBucket'] + + +# [START submit_pyspark_job] +def submit_pyspark_job(dataproc, project, cluster_name, bucket_name, filename): + """Submits the Pyspark job to the cluster, assuming `filename` has + already been uploaded to `bucket_name`""" + job_details = { + 'projectId': project, + 'job': { + 'placement': { + 'clusterName': cluster_name + }, + 'pysparkJob': { + 'mainPythonFileUri': 'gs://{}/{}'.format(bucket_name, filename) + } + } + } + result = dataproc.projects().regions().jobs().submit( + projectId=project, + region=REGION, + body=job_details).execute() + job_id = result['reference']['jobId'] + print('Submitted job ID {}'.format(job_id)) + return job_id +# [END submit_pyspark_job] + + +# [START delete] +def delete_cluster(dataproc, project, cluster): + print('Tearing down cluster') + result = dataproc.projects().regions().clusters().delete( + projectId=project, + region=REGION, + clusterName=cluster).execute() + return result +# [END delete] + + +# [START wait] +def wait_for_job(dataproc, project, job_id): + print('Waiting for job to finish...') + while True: + result = dataproc.projects().regions().jobs().get( + projectId=project, + region=REGION, + jobId=job_id).execute() + # Handle exceptions + if result['status']['state'] == 'ERROR': + print(result) + raise Exception(result['status']['details']) + elif result['status']['state'] == 'DONE': + print('Job finished') + return result +# [END wait] + + +# [START get_client] +def get_client(): + """Builds an http client authenticated with the service account + credentials.""" + credentials = GoogleCredentials.get_application_default() + dataproc = discovery.build('dataproc', 'v1', credentials=credentials) + return dataproc +# [END get_client] + + +def main(project_id, zone, cluster_name, bucket_name, pyspark_file=None): + dataproc = get_client() + try: + if pyspark_file: + spark_file, spark_filename = get_pyspark_file(pyspark_file) + else: + spark_file, spark_filename = get_default_pyspark_file() + + create_cluster(dataproc, project_id, cluster_name, zone) + wait_for_cluster_creation(dataproc, project_id, cluster_name, zone) + upload_pyspark_file(project_id, bucket_name, + spark_filename, spark_file) + cluster_list = list_clusters_with_details( + dataproc, project_id)['clusters'] + + (cluster_id, output_bucket) = ( + get_cluster_id_by_name(cluster_list, cluster_name)) + job_id = submit_pyspark_job( + dataproc, project_id, cluster_name, bucket_name, spark_filename) + wait_for_job(dataproc, project_id, job_id) + + output = download_output(project_id, cluster_id, output_bucket, job_id) + print('Received job output {}'.format(output)) + return output + finally: + delete_cluster(dataproc, project_id, cluster_name) + spark_file.close() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + '--project_id', help='Project ID you want to access.', required=True), + parser.add_argument( + '--zone', help='Region to create clusters in', required=True) + parser.add_argument( + '--cluster_name', help='Region to create clusters in', required=True) + parser.add_argument( + '--gcs_bucket', help='Bucket to upload Pyspark file to', required=True) + parser.add_argument( + '--pyspark_file', help='Pyspark filename. Defaults to pyspark_sort.py') + + args = parser.parse_args() + main( + args.project_id, args.zone, + args.cluster_name, args.gcs_bucket, args.pyspark_file) diff --git a/samples/snippets/dataproc_e2e_test.py b/samples/snippets/dataproc_e2e_test.py new file mode 100644 index 00000000..c624b8c8 --- /dev/null +++ b/samples/snippets/dataproc_e2e_test.py @@ -0,0 +1,30 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Integration tests for Dataproc samples. + +Creates a Dataproc cluster, uploads a pyspark file to Google Cloud Storage, +submits a job to Dataproc that runs the pyspark file, then downloads +the output logs from Cloud Storage and verifies the expected output.""" + +import create_cluster_and_submit_job +from gcp.testing.flaky import flaky + +CLUSTER_NAME = 'testcluster2' +ZONE = 'us-central1-b' + + +@flaky +def test_e2e(cloud_config): + output = create_cluster_and_submit_job.main( + cloud_config.project, ZONE, CLUSTER_NAME, cloud_config.storage_bucket) + assert "['Hello,', 'dog', 'elephant', 'panther', 'world!']" in output diff --git a/samples/snippets/list_clusters.py b/samples/snippets/list_clusters.py new file mode 100644 index 00000000..e08ee889 --- /dev/null +++ b/samples/snippets/list_clusters.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Sample command-line program for listing Google Dataproc Clusters +""" + +import argparse + +from apiclient import discovery +from oauth2client.client import GoogleCredentials + +# Currently only the "global" region is supported +REGION = 'global' + + +# [START list_clusters] +def list_clusters(dataproc, project): + result = dataproc.projects().regions().clusters().list( + projectId=project, + region=REGION).execute() + return result +# [END list_clusters] + + +# [START get_client] +def get_client(): + """Builds an http client authenticated with the service account + credentials.""" + credentials = GoogleCredentials.get_application_default() + dataproc = discovery.build('dataproc', 'v1', credentials=credentials) + return dataproc +# [END get_client] + + +def main(project_id, zone): + dataproc = get_client() + result = list_clusters(dataproc, project_id) + print(result) + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + 'project_id', help='Project ID you want to access.'), + parser.add_argument( + 'zone', help='Region to create clusters in') + + args = parser.parse_args() + main(args.project_id, args.zone) diff --git a/samples/snippets/pyspark_sort.py b/samples/snippets/pyspark_sort.py new file mode 100644 index 00000000..14b66995 --- /dev/null +++ b/samples/snippets/pyspark_sort.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Sample pyspark script to be uploaded to Cloud Storage and run on +Cloud Dataproc. + +Note this file is not intended to be run directly, but run inside a PySpark +environment. +""" + +# [START pyspark] +import pyspark + +sc = pyspark.SparkContext() +rdd = sc.parallelize(['Hello,', 'world!', 'dog', 'elephant', 'panther']) +words = sorted(rdd.collect()) +print words +# [END pyspark] diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt new file mode 100644 index 00000000..50278dbe --- /dev/null +++ b/samples/snippets/requirements.txt @@ -0,0 +1,2 @@ +google-api-python-client==1.5.0 +gcloud==0.13.0 From 02b747c3bbbc85d300749971f01b0c070ab97542 Mon Sep 17 00:00:00 2001 From: Bill Prin Date: Fri, 29 Apr 2016 15:45:47 -0700 Subject: [PATCH 003/109] Add more region tags --- samples/snippets/create_cluster_and_submit_job.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samples/snippets/create_cluster_and_submit_job.py b/samples/snippets/create_cluster_and_submit_job.py index e6b0c5a7..9077f926 100644 --- a/samples/snippets/create_cluster_and_submit_job.py +++ b/samples/snippets/create_cluster_and_submit_job.py @@ -201,8 +201,10 @@ def main(project_id, zone, cluster_name, bucket_name, pyspark_file=None): (cluster_id, output_bucket) = ( get_cluster_id_by_name(cluster_list, cluster_name)) + # [START call_submit_pyspark_job] job_id = submit_pyspark_job( dataproc, project_id, cluster_name, bucket_name, spark_filename) + # [END call_submit_pyspark_job] wait_for_job(dataproc, project_id, job_id) output = download_output(project_id, cluster_id, output_bucket, job_id) From e8b3fb5641ee9adfa49b9036b711343380b0615b Mon Sep 17 00:00:00 2001 From: Bill Prin Date: Fri, 29 Apr 2016 16:52:47 -0700 Subject: [PATCH 004/109] Minor dataproc fixes --- samples/snippets/README.md | 2 +- samples/snippets/create_cluster_and_submit_job.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/samples/snippets/README.md b/samples/snippets/README.md index 16beb58e..8fe04973 100644 --- a/samples/snippets/README.md +++ b/samples/snippets/README.md @@ -8,7 +8,7 @@ demonstrated here could also be accomplished using the Cloud Console or the gclo `list_clusters.py` is a simple command-line program to demonstrate connecting to the Dataproc API and listing the clusters in a ergion -`create_cluster_and_submit_jbo.py` demonstrates how to create a cluster, submit the +`create_cluster_and_submit_job.py` demonstrates how to create a cluster, submit the `pyspark_sort.py` job, download the output from Google Cloud Storage, and output the result. ## Prerequisites to run locally: diff --git a/samples/snippets/create_cluster_and_submit_job.py b/samples/snippets/create_cluster_and_submit_job.py index 9077f926..0cf58ad7 100644 --- a/samples/snippets/create_cluster_and_submit_job.py +++ b/samples/snippets/create_cluster_and_submit_job.py @@ -166,7 +166,6 @@ def wait_for_job(dataproc, project, job_id): jobId=job_id).execute() # Handle exceptions if result['status']['state'] == 'ERROR': - print(result) raise Exception(result['status']['details']) elif result['status']['state'] == 'DONE': print('Job finished') From 359264112bdd35d74513630ec520714cd43bb73c Mon Sep 17 00:00:00 2001 From: Bill Prin Date: Tue, 3 May 2016 16:15:15 -0700 Subject: [PATCH 005/109] Fix Dataproc e2e for Python 3 --- samples/snippets/dataproc_e2e_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/dataproc_e2e_test.py b/samples/snippets/dataproc_e2e_test.py index c624b8c8..56db9b11 100644 --- a/samples/snippets/dataproc_e2e_test.py +++ b/samples/snippets/dataproc_e2e_test.py @@ -27,4 +27,4 @@ def test_e2e(cloud_config): output = create_cluster_and_submit_job.main( cloud_config.project, ZONE, CLUSTER_NAME, cloud_config.storage_bucket) - assert "['Hello,', 'dog', 'elephant', 'panther', 'world!']" in output + assert b"['Hello,', 'dog', 'elephant', 'panther', 'world!']" in output From 2e843c5cea22aebf3a600bcdd89ce4005120f52e Mon Sep 17 00:00:00 2001 From: Bill Prin Date: Tue, 17 May 2016 16:05:39 -0700 Subject: [PATCH 006/109] Update reqs --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 50278dbe..f062fdab 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.5.0 -gcloud==0.13.0 +gcloud==0.14.0 From afc618e64a76a0325b02eec9a3fcd9ccab89031b Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 23 May 2016 13:53:01 -0700 Subject: [PATCH 007/109] updating requirements [(#358)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/358) Change-Id: I6177a17fad021e26ed76679d9db34848c17b62a8 --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f062fdab..20007ba1 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-api-python-client==1.5.0 +google-api-python-client==1.5.1 gcloud==0.14.0 From fbe4587dc673da0997eccc5f08ea0565f8248763 Mon Sep 17 00:00:00 2001 From: Bill Prin Date: Mon, 27 Jun 2016 15:21:01 -0700 Subject: [PATCH 008/109] Update Reqs --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 20007ba1..d29e5ef5 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.5.1 -gcloud==0.14.0 +gcloud==0.17.0 From 88f8066e13d5fc8b20d5b32a78a0bb79c6b9bfc9 Mon Sep 17 00:00:00 2001 From: Eran Kampf Date: Fri, 8 Jul 2016 17:20:57 -0700 Subject: [PATCH 009/109] Wrong arg description --- samples/snippets/create_cluster_and_submit_job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/create_cluster_and_submit_job.py b/samples/snippets/create_cluster_and_submit_job.py index 0cf58ad7..9275f1f9 100644 --- a/samples/snippets/create_cluster_and_submit_job.py +++ b/samples/snippets/create_cluster_and_submit_job.py @@ -224,7 +224,7 @@ def main(project_id, zone, cluster_name, bucket_name, pyspark_file=None): parser.add_argument( '--zone', help='Region to create clusters in', required=True) parser.add_argument( - '--cluster_name', help='Region to create clusters in', required=True) + '--cluster_name', help='Name of the cluster to create', required=True) parser.add_argument( '--gcs_bucket', help='Bucket to upload Pyspark file to', required=True) parser.add_argument( From 1972be307efa946821c2a25a06d8c5329e196ff7 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 16 Aug 2016 13:32:42 -0700 Subject: [PATCH 010/109] Auto-update dependencies. [(#456)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/456) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d29e5ef5..fb16597c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.5.1 -gcloud==0.17.0 +gcloud==0.18.1 From da093c32c3c0101e27258514f5bf7555f16dd418 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Wed, 17 Aug 2016 09:34:47 -0700 Subject: [PATCH 011/109] Auto-update dependencies. [(#459)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/459) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index fb16597c..8eed69bb 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-api-python-client==1.5.1 +google-api-python-client==1.5.2 gcloud==0.18.1 From e2a59e9a72675595dfb8a03f3102380d3f9f9eb4 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 19 Aug 2016 13:56:28 -0700 Subject: [PATCH 012/109] Fix import order lint errors Change-Id: Ieaf7237fc6f925daec46a07d2e81a452b841198a --- samples/snippets/dataproc_e2e_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/snippets/dataproc_e2e_test.py b/samples/snippets/dataproc_e2e_test.py index 56db9b11..c69726ce 100644 --- a/samples/snippets/dataproc_e2e_test.py +++ b/samples/snippets/dataproc_e2e_test.py @@ -16,9 +16,10 @@ submits a job to Dataproc that runs the pyspark file, then downloads the output logs from Cloud Storage and verifies the expected output.""" -import create_cluster_and_submit_job from gcp.testing.flaky import flaky +import create_cluster_and_submit_job + CLUSTER_NAME = 'testcluster2' ZONE = 'us-central1-b' From a88094f9f4f0acb6448f96880a87cd9c9851d6fc Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Fri, 19 Aug 2016 14:13:21 -0700 Subject: [PATCH 013/109] bump Change-Id: I02e7767d13ba267ee9fc72c5b68a57013bb8b8d3 From 981141b6d836e9a03d6ce7b509139af8bf92dc27 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 30 Aug 2016 10:08:32 -0700 Subject: [PATCH 014/109] Auto-update dependencies. [(#486)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/486) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8eed69bb..4e9158a5 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-api-python-client==1.5.2 +google-api-python-client==1.5.3 gcloud==0.18.1 From 63273847c82902a39f4ecfc65942a79143f25dbb Mon Sep 17 00:00:00 2001 From: DPE bot Date: Fri, 23 Sep 2016 09:48:46 -0700 Subject: [PATCH 015/109] Auto-update dependencies. [(#540)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/540) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4e9158a5..a60ee53c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.5.3 -gcloud==0.18.1 +gcloud==0.18.2 From 813a8b8f7385904e4fd2386c4daf5f4bddc69189 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Mon, 26 Sep 2016 11:34:45 -0700 Subject: [PATCH 016/109] Auto-update dependencies. [(#542)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/542) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index a60ee53c..8d2c52ac 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.5.3 -gcloud==0.18.2 +gcloud==0.18.3 From 80f6cadd4591c698b71f819d2044f8fff5e77bad Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 29 Sep 2016 20:51:47 -0700 Subject: [PATCH 017/109] Move to google-cloud [(#544)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/544) --- samples/snippets/create_cluster_and_submit_job.py | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/snippets/create_cluster_and_submit_job.py b/samples/snippets/create_cluster_and_submit_job.py index 9275f1f9..426a73e5 100644 --- a/samples/snippets/create_cluster_and_submit_job.py +++ b/samples/snippets/create_cluster_and_submit_job.py @@ -17,7 +17,7 @@ import os from apiclient import discovery -from gcloud import storage +from google.cloud import storage from oauth2client.client import GoogleCredentials # Currently only the "global" region is supported diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8d2c52ac..87da6882 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.5.3 -gcloud==0.18.3 +google-cloud==0.20.0 From 318967ad2b8adf0c244462b502fbb394c06cad22 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 18 Oct 2016 13:41:00 -0700 Subject: [PATCH 018/109] Auto-update dependencies. [(#584)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/584) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 87da6882..2cc56833 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-api-python-client==1.5.3 +google-api-python-client==1.5.4 google-cloud==0.20.0 From 9902564cee78a3abc59b7170f73ec3607df1b4de Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 1 Nov 2016 23:10:14 -0700 Subject: [PATCH 019/109] Auto-update dependencies. [(#629)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/629) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 2cc56833..6ac114bc 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-api-python-client==1.5.4 +google-api-python-client==1.5.5 google-cloud==0.20.0 From 34145a4e69faf4ae91664ad1e6645dce43fae6e5 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 15 Nov 2016 14:58:27 -0800 Subject: [PATCH 020/109] Update samples to support latest Google Cloud Python [(#656)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/656) --- samples/snippets/list_clusters.py | 1 + samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/snippets/list_clusters.py b/samples/snippets/list_clusters.py index e08ee889..38e6dd27 100644 --- a/samples/snippets/list_clusters.py +++ b/samples/snippets/list_clusters.py @@ -47,6 +47,7 @@ def main(project_id, zone): result = list_clusters(dataproc, project_id) print(result) + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6ac114bc..d291ebf7 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.5.5 -google-cloud==0.20.0 +google-cloud==0.21.0 From 02efe78940e0409caa301af21a2f603cfd06108d Mon Sep 17 00:00:00 2001 From: aman-ebay Date: Wed, 30 Nov 2016 10:02:18 -0800 Subject: [PATCH 021/109] Update README.md [(#691)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/691) --- samples/snippets/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/README.md b/samples/snippets/README.md index 8fe04973..32bde698 100644 --- a/samples/snippets/README.md +++ b/samples/snippets/README.md @@ -60,4 +60,4 @@ you create the instance with the correct scopes. gcloud compute instances create --scopes="https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/compute,https://www.googleapis.com/auth/compute.readonly" test-instance If you did not create the instance with the right scopes, you can still upload a JSON service -account and set GOOGLE_APPLICATION_CREDENTIALS as described below. +account and set `GOOGLE_APPLICATION_CREDENTIALS`. See [Google Application Default Credentials](https://developers.google.com/identity/protocols/application-default-credentials) for more details. From 46d9375799416ed3879ed69ce368c2745ed5e16d Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 13 Dec 2016 09:54:02 -0800 Subject: [PATCH 022/109] Auto-update dependencies. [(#715)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/715) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d291ebf7..57d7f6e6 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.5.5 -google-cloud==0.21.0 +google-cloud==0.22.0 From 9724c9e9625840e88c3baf547749bc988c708bb3 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Thu, 12 Jan 2017 12:01:20 -0800 Subject: [PATCH 023/109] Auto-update dependencies. [(#735)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/735) * Auto-update dependencies. * Fix language OCR sample * Remove unused import --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 57d7f6e6..f2711b1c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-api-python-client==1.5.5 +google-api-python-client==1.6.1 google-cloud==0.22.0 From 514cafda269644e69d4913602327f879c56c8447 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Thu, 9 Feb 2017 08:59:42 -0800 Subject: [PATCH 024/109] Auto-update dependencies. [(#790)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/790) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f2711b1c..40732089 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-api-python-client==1.6.1 +google-api-python-client==1.6.2 google-cloud==0.22.0 From f3dc0f3cacba4725497ee0abba12555ce3458a9f Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Thu, 16 Feb 2017 17:07:45 -0800 Subject: [PATCH 025/109] Remove usage of GoogleCredentials [(#810)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/810) --- samples/snippets/create_cluster_and_submit_job.py | 6 ++---- samples/snippets/list_clusters.py | 9 +++------ 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/samples/snippets/create_cluster_and_submit_job.py b/samples/snippets/create_cluster_and_submit_job.py index 426a73e5..f165e145 100644 --- a/samples/snippets/create_cluster_and_submit_job.py +++ b/samples/snippets/create_cluster_and_submit_job.py @@ -16,9 +16,8 @@ import argparse import os -from apiclient import discovery from google.cloud import storage -from oauth2client.client import GoogleCredentials +import googleapiclient.discovery # Currently only the "global" region is supported REGION = 'global' @@ -177,8 +176,7 @@ def wait_for_job(dataproc, project, job_id): def get_client(): """Builds an http client authenticated with the service account credentials.""" - credentials = GoogleCredentials.get_application_default() - dataproc = discovery.build('dataproc', 'v1', credentials=credentials) + dataproc = googleapiclient.discovery.build('dataproc', 'v1') return dataproc # [END get_client] diff --git a/samples/snippets/list_clusters.py b/samples/snippets/list_clusters.py index 38e6dd27..89cf0c34 100644 --- a/samples/snippets/list_clusters.py +++ b/samples/snippets/list_clusters.py @@ -16,8 +16,7 @@ import argparse -from apiclient import discovery -from oauth2client.client import GoogleCredentials +import googleapiclient.discovery # Currently only the "global" region is supported REGION = 'global' @@ -34,10 +33,8 @@ def list_clusters(dataproc, project): # [START get_client] def get_client(): - """Builds an http client authenticated with the service account - credentials.""" - credentials = GoogleCredentials.get_application_default() - dataproc = discovery.build('dataproc', 'v1', credentials=credentials) + """Builds a client to the dataproc API.""" + dataproc = googleapiclient.discovery.build('dataproc', 'v1') return dataproc # [END get_client] From 21a7875a6af69d46194923930cc4b0a0e937be15 Mon Sep 17 00:00:00 2001 From: Martial Hue Date: Fri, 17 Feb 2017 17:56:04 +0100 Subject: [PATCH 026/109] Fix a typo [(#813)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/813) --- samples/snippets/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/README.md b/samples/snippets/README.md index 32bde698..8cbb3f68 100644 --- a/samples/snippets/README.md +++ b/samples/snippets/README.md @@ -6,7 +6,7 @@ Note that while this sample demonstrates interacting with Dataproc via the API, demonstrated here could also be accomplished using the Cloud Console or the gcloud CLI. `list_clusters.py` is a simple command-line program to demonstrate connecting to the -Dataproc API and listing the clusters in a ergion +Dataproc API and listing the clusters in a region `create_cluster_and_submit_job.py` demonstrates how to create a cluster, submit the `pyspark_sort.py` job, download the output from Google Cloud Storage, and output the result. From 81646acdd4ccf803ff6825ce995cfd7cb5be9f0e Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Tue, 4 Apr 2017 16:08:30 -0700 Subject: [PATCH 027/109] Remove cloud config fixture [(#887)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/887) * Remove cloud config fixture * Fix client secrets * Fix bigtable instance --- samples/snippets/dataproc_e2e_test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/samples/snippets/dataproc_e2e_test.py b/samples/snippets/dataproc_e2e_test.py index c69726ce..4e86fdcb 100644 --- a/samples/snippets/dataproc_e2e_test.py +++ b/samples/snippets/dataproc_e2e_test.py @@ -16,16 +16,20 @@ submits a job to Dataproc that runs the pyspark file, then downloads the output logs from Cloud Storage and verifies the expected output.""" +import os + from gcp.testing.flaky import flaky import create_cluster_and_submit_job +PROJECT = os.environ['GCLOUD_PROJECT'] +BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] CLUSTER_NAME = 'testcluster2' ZONE = 'us-central1-b' @flaky -def test_e2e(cloud_config): +def test_e2e(): output = create_cluster_and_submit_job.main( - cloud_config.project, ZONE, CLUSTER_NAME, cloud_config.storage_bucket) + PROJECT, ZONE, CLUSTER_NAME, BUCKET) assert b"['Hello,', 'dog', 'elephant', 'panther', 'world!']" in output From 695f3ac79c960c2d63924b2eb0a30511b8a40dd3 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Wed, 12 Apr 2017 15:14:35 -0700 Subject: [PATCH 028/109] Fix reference to our testing tools --- samples/snippets/dataproc_e2e_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/dataproc_e2e_test.py b/samples/snippets/dataproc_e2e_test.py index 4e86fdcb..dcb836c3 100644 --- a/samples/snippets/dataproc_e2e_test.py +++ b/samples/snippets/dataproc_e2e_test.py @@ -18,7 +18,7 @@ import os -from gcp.testing.flaky import flaky +from gcp_devrel.testing.flaky import flaky import create_cluster_and_submit_job From add1f4908d31e445fa7bd030fce593c16a879fc4 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Mon, 24 Apr 2017 13:12:09 -0700 Subject: [PATCH 029/109] Auto-update dependencies. [(#914)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/914) * Auto-update dependencies. * xfail the error reporting test * Fix lint --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 40732089..0b63cfeb 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.6.2 -google-cloud==0.22.0 +google-cloud==0.24.0 From a8e0c4924c48254c8c8e41e03eb9382a876e0321 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Mon, 1 May 2017 10:49:29 -0700 Subject: [PATCH 030/109] Auto-update dependencies. [(#922)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/922) * Auto-update dependencies. * Fix pubsub iam samples --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0b63cfeb..6ae3a1eb 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.6.2 -google-cloud==0.24.0 +google-cloud==0.25.0 From 31fcf192e3087646f5c33537df5375ea7f15db48 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Wed, 28 Jun 2017 09:26:33 -0700 Subject: [PATCH 031/109] Auto-update dependencies. [(#1005)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1005) * Auto-update dependencies. * Fix bigtable lint * Fix IOT iam interaction --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6ae3a1eb..1219a362 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.6.2 -google-cloud==0.25.0 +google-cloud==0.26.0 From 668510b08486c81ab968b54fe59328f89f9392fb Mon Sep 17 00:00:00 2001 From: DPE bot Date: Thu, 13 Jul 2017 09:12:37 -0700 Subject: [PATCH 032/109] Auto-update dependencies. [(#1011)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1011) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1219a362..23432f79 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.6.2 -google-cloud==0.26.0 +google-cloud==0.26.1 From 86abc221236851ea56566a55560a265c63ccc810 Mon Sep 17 00:00:00 2001 From: Gioia Ballin Date: Fri, 21 Jul 2017 19:28:49 +0200 Subject: [PATCH 033/109] Properly forwarding the "region" parameter provided as an input argument. [(#1029)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1029) --- samples/snippets/README.md | 2 +- samples/snippets/list_clusters.py | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/samples/snippets/README.md b/samples/snippets/README.md index 8cbb3f68..6f0a0390 100644 --- a/samples/snippets/README.md +++ b/samples/snippets/README.md @@ -32,7 +32,7 @@ Create local credentials by running the following command and following the oaut To run list_clusters.py: - python list_clusters.py --project_id= --zone=us-central1-b + python list_clusters.py --region=us-central1 To run create_cluster_and_submit_job, first create a GCS bucket, from the Cloud Console or with diff --git a/samples/snippets/list_clusters.py b/samples/snippets/list_clusters.py index 89cf0c34..14a2ba81 100644 --- a/samples/snippets/list_clusters.py +++ b/samples/snippets/list_clusters.py @@ -18,15 +18,12 @@ import googleapiclient.discovery -# Currently only the "global" region is supported -REGION = 'global' - # [START list_clusters] -def list_clusters(dataproc, project): +def list_clusters(dataproc, project, region): result = dataproc.projects().regions().clusters().list( projectId=project, - region=REGION).execute() + region=region).execute() return result # [END list_clusters] @@ -39,9 +36,9 @@ def get_client(): # [END get_client] -def main(project_id, zone): +def main(project_id, region): dataproc = get_client() - result = list_clusters(dataproc, project_id) + result = list_clusters(dataproc, project_id, region) print(result) @@ -52,8 +49,10 @@ def main(project_id, zone): ) parser.add_argument( 'project_id', help='Project ID you want to access.'), + # Sets the region to "global" if it's not provided + # Note: sub-regions (e.g.: us-central1-a/b) are currently not supported parser.add_argument( - 'zone', help='Region to create clusters in') + '--region', default='global', help='Region to create clusters in') args = parser.parse_args() - main(args.project_id, args.zone) + main(args.project_id, args.region) From 93cc5077b4d4cf7329ca234291a1e16522f2f8ff Mon Sep 17 00:00:00 2001 From: DPE bot Date: Mon, 7 Aug 2017 10:04:55 -0700 Subject: [PATCH 034/109] Auto-update dependencies. [(#1055)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1055) * Auto-update dependencies. * Explicitly use latest bigtable client Change-Id: Id71e9e768f020730e4ca9514a0d7ebaa794e7d9e * Revert language update for now Change-Id: I8867f154e9a5aae00d0047c9caf880e5e8f50c53 * Remove pdb. smh Change-Id: I5ff905fadc026eebbcd45512d4e76e003e3b2b43 --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 23432f79..2059ddef 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ google-api-python-client==1.6.2 -google-cloud==0.26.1 +google-cloud==0.27.0 From d71a8951e890b3a6eef83386d420ad46d2ca87d5 Mon Sep 17 00:00:00 2001 From: Gioia Ballin Date: Tue, 8 Aug 2017 00:36:41 +0200 Subject: [PATCH 035/109] Fix region handling and allow to use an existing cluster. [(#1053)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1053) --- samples/snippets/README.md | 11 ++- samples/snippets/dataproc_e2e_test.py | 4 +- ...submit_job.py => submit_job_to_cluster.py} | 81 ++++++++++++------- 3 files changed, 61 insertions(+), 35 deletions(-) rename samples/snippets/{create_cluster_and_submit_job.py => submit_job_to_cluster.py} (74%) diff --git a/samples/snippets/README.md b/samples/snippets/README.md index 6f0a0390..6bf819cc 100644 --- a/samples/snippets/README.md +++ b/samples/snippets/README.md @@ -35,14 +35,19 @@ To run list_clusters.py: python list_clusters.py --region=us-central1 -To run create_cluster_and_submit_job, first create a GCS bucket, from the Cloud Console or with +To run submit_job_to_cluster.py, first create a GCS bucket, from the Cloud Console or with gsutil: gsutil mb gs:// -Then run: +Then, if you want to rely on an existing cluster, run: - python create_cluster_and_submit_job.py --project_id= --zone=us-central1-b --cluster_name=testcluster --gcs_bucket= + python submit_job_to_cluster.py --project_id= --zone=us-central1-b --cluster_name=testcluster --gcs_bucket= + +Otherwise, if you want the script to create a new cluster for you: + + python submit_job_to_cluster.py --project_id= --zone=us-central1-b --cluster_name=testcluster --gcs_bucket= --create_new_cluster + This will setup a cluster, upload the PySpark file, submit the job, print the result, then delete the cluster. diff --git a/samples/snippets/dataproc_e2e_test.py b/samples/snippets/dataproc_e2e_test.py index dcb836c3..d7e9c522 100644 --- a/samples/snippets/dataproc_e2e_test.py +++ b/samples/snippets/dataproc_e2e_test.py @@ -20,7 +20,7 @@ from gcp_devrel.testing.flaky import flaky -import create_cluster_and_submit_job +import submit_job_to_cluster PROJECT = os.environ['GCLOUD_PROJECT'] BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] @@ -30,6 +30,6 @@ @flaky def test_e2e(): - output = create_cluster_and_submit_job.main( + output = submit_job_to_cluster.main( PROJECT, ZONE, CLUSTER_NAME, BUCKET) assert b"['Hello,', 'dog', 'elephant', 'panther', 'world!']" in output diff --git a/samples/snippets/create_cluster_and_submit_job.py b/samples/snippets/submit_job_to_cluster.py similarity index 74% rename from samples/snippets/create_cluster_and_submit_job.py rename to samples/snippets/submit_job_to_cluster.py index f165e145..3ffde240 100644 --- a/samples/snippets/create_cluster_and_submit_job.py +++ b/samples/snippets/submit_job_to_cluster.py @@ -19,8 +19,6 @@ from google.cloud import storage import googleapiclient.discovery -# Currently only the "global" region is supported -REGION = 'global' DEFAULT_FILENAME = 'pyspark_sort.py' @@ -36,6 +34,14 @@ def get_pyspark_file(filename): return f, os.path.basename(filename) +def get_region_from_zone(zone): + try: + region_as_list = zone.split('-')[:-1] + return '-'.join(region_as_list) + except (AttributeError, IndexError, ValueError): + raise ValueError('Invalid zone provided, please check your input.') + + def upload_pyspark_file(project_id, bucket_name, filename, file): """Uploads the PySpark file in this directory to the configured input bucket.""" @@ -59,8 +65,8 @@ def download_output(project_id, cluster_id, output_bucket, job_id): # [START create_cluster] -def create_cluster(dataproc, project, cluster_name, zone): - print('Creating cluster.') +def create_cluster(dataproc, project, zone, region, cluster_name): + print('Creating cluster...') zone_uri = \ 'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format( project, zone) @@ -75,19 +81,19 @@ def create_cluster(dataproc, project, cluster_name, zone): } result = dataproc.projects().regions().clusters().create( projectId=project, - region=REGION, + region=region, body=cluster_data).execute() return result # [END create_cluster] -def wait_for_cluster_creation(dataproc, project_id, cluster_name, zone): - print('Waiting for cluster creation') +def wait_for_cluster_creation(dataproc, project_id, region, cluster_name): + print('Waiting for cluster creation...') while True: result = dataproc.projects().regions().clusters().list( projectId=project_id, - region=REGION).execute() + region=region).execute() cluster_list = result['clusters'] cluster = [c for c in cluster_list @@ -100,10 +106,10 @@ def wait_for_cluster_creation(dataproc, project_id, cluster_name, zone): # [START list_clusters_with_detail] -def list_clusters_with_details(dataproc, project): +def list_clusters_with_details(dataproc, project, region): result = dataproc.projects().regions().clusters().list( projectId=project, - region=REGION).execute() + region=region).execute() cluster_list = result['clusters'] for cluster in cluster_list: print("{} - {}" @@ -120,7 +126,8 @@ def get_cluster_id_by_name(cluster_list, cluster_name): # [START submit_pyspark_job] -def submit_pyspark_job(dataproc, project, cluster_name, bucket_name, filename): +def submit_pyspark_job(dataproc, project, region, + cluster_name, bucket_name, filename): """Submits the Pyspark job to the cluster, assuming `filename` has already been uploaded to `bucket_name`""" job_details = { @@ -136,7 +143,7 @@ def submit_pyspark_job(dataproc, project, cluster_name, bucket_name, filename): } result = dataproc.projects().regions().jobs().submit( projectId=project, - region=REGION, + region=region, body=job_details).execute() job_id = result['reference']['jobId'] print('Submitted job ID {}'.format(job_id)) @@ -145,29 +152,29 @@ def submit_pyspark_job(dataproc, project, cluster_name, bucket_name, filename): # [START delete] -def delete_cluster(dataproc, project, cluster): +def delete_cluster(dataproc, project, region, cluster): print('Tearing down cluster') result = dataproc.projects().regions().clusters().delete( projectId=project, - region=REGION, + region=region, clusterName=cluster).execute() return result # [END delete] # [START wait] -def wait_for_job(dataproc, project, job_id): +def wait_for_job(dataproc, project, region, job_id): print('Waiting for job to finish...') while True: result = dataproc.projects().regions().jobs().get( projectId=project, - region=REGION, + region=region, jobId=job_id).execute() # Handle exceptions if result['status']['state'] == 'ERROR': raise Exception(result['status']['details']) elif result['status']['state'] == 'DONE': - print('Job finished') + print('Job finished.') return result # [END wait] @@ -181,34 +188,44 @@ def get_client(): # [END get_client] -def main(project_id, zone, cluster_name, bucket_name, pyspark_file=None): +def main(project_id, zone, cluster_name, bucket_name, + pyspark_file=None, create_new_cluster=True): dataproc = get_client() + region = get_region_from_zone(zone) try: if pyspark_file: spark_file, spark_filename = get_pyspark_file(pyspark_file) else: spark_file, spark_filename = get_default_pyspark_file() - create_cluster(dataproc, project_id, cluster_name, zone) - wait_for_cluster_creation(dataproc, project_id, cluster_name, zone) - upload_pyspark_file(project_id, bucket_name, - spark_filename, spark_file) + if create_new_cluster: + create_cluster( + dataproc, project_id, zone, region, cluster_name) + wait_for_cluster_creation( + dataproc, project_id, region, cluster_name) + + upload_pyspark_file( + project_id, bucket_name, spark_filename, spark_file) + cluster_list = list_clusters_with_details( - dataproc, project_id)['clusters'] + dataproc, project_id, region)['clusters'] (cluster_id, output_bucket) = ( get_cluster_id_by_name(cluster_list, cluster_name)) + # [START call_submit_pyspark_job] job_id = submit_pyspark_job( - dataproc, project_id, cluster_name, bucket_name, spark_filename) + dataproc, project_id, region, + cluster_name, bucket_name, spark_filename) # [END call_submit_pyspark_job] - wait_for_job(dataproc, project_id, job_id) + wait_for_job(dataproc, project_id, region, job_id) output = download_output(project_id, cluster_id, output_bucket, job_id) print('Received job output {}'.format(output)) return output finally: - delete_cluster(dataproc, project_id, cluster_name) + if create_new_cluster: + delete_cluster(dataproc, project_id, region, cluster_name) spark_file.close() @@ -220,15 +237,19 @@ def main(project_id, zone, cluster_name, bucket_name, pyspark_file=None): parser.add_argument( '--project_id', help='Project ID you want to access.', required=True), parser.add_argument( - '--zone', help='Region to create clusters in', required=True) + '--zone', help='Zone to create clusters in/connect to', required=True) parser.add_argument( - '--cluster_name', help='Name of the cluster to create', required=True) + '--cluster_name', + help='Name of the cluster to create/connect to', required=True) parser.add_argument( '--gcs_bucket', help='Bucket to upload Pyspark file to', required=True) parser.add_argument( '--pyspark_file', help='Pyspark filename. Defaults to pyspark_sort.py') + parser.add_argument( + '--create_new_cluster', + action='store_true', help='States if the cluster should be created') args = parser.parse_args() main( - args.project_id, args.zone, - args.cluster_name, args.gcs_bucket, args.pyspark_file) + args.project_id, args.zone, args.cluster_name, + args.gcs_bucket, args.pyspark_file, args.create_new_cluster) From 52696f553f0348217d6a6bd5f6e5bc7cfd51d670 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Wed, 30 Aug 2017 10:15:58 -0700 Subject: [PATCH 036/109] Auto-update dependencies. [(#1094)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1094) * Auto-update dependencies. * Relax assertions in the ocr_nl sample Change-Id: I6d37e5846a8d6dd52429cb30d501f448c52cbba1 * Drop unused logging apiary samples Change-Id: I545718283773cb729a5e0def8a76ebfa40829d51 --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 2059ddef..969c0bbd 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-api-python-client==1.6.2 +google-api-python-client==1.6.3 google-cloud==0.27.0 From aac52a1b0f9b27d6e93b25d01ca20393cba9b61c Mon Sep 17 00:00:00 2001 From: DPE bot Date: Thu, 21 Sep 2017 13:40:34 -0700 Subject: [PATCH 037/109] Auto-update dependencies. [(#1133)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1133) * Auto-update dependencies. * Fix missing http library Change-Id: I99faa600f2f3f1f50f57694fc9835d7f35bda250 --- samples/snippets/requirements.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 969c0bbd..7910d657 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,4 @@ -google-api-python-client==1.6.3 +google-api-python-client==1.6.4 +google-auth==1.1.1 +google-auth-httplib2==0.0.2 google-cloud==0.27.0 From 15c7503c6a0ca8b3f687797611ce53c897d21ede Mon Sep 17 00:00:00 2001 From: DPE bot Date: Wed, 1 Nov 2017 12:30:10 -0700 Subject: [PATCH 038/109] Auto-update dependencies. [(#1186)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1186) --- samples/snippets/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7910d657..8acd9c54 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ google-api-python-client==1.6.4 -google-auth==1.1.1 +google-auth==1.2.0 google-auth-httplib2==0.0.2 -google-cloud==0.27.0 +google-cloud==0.28.0 From 9e21f7e3624cfe77c82a15fdd7dd925a965e287a Mon Sep 17 00:00:00 2001 From: DPE bot Date: Mon, 6 Nov 2017 10:44:14 -0800 Subject: [PATCH 039/109] Auto-update dependencies. [(#1199)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1199) * Auto-update dependencies. * Fix iot lint Change-Id: I6289e093bdb35e38f9e9bfc3fbc3df3660f9a67e --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8acd9c54..95506d5e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ google-api-python-client==1.6.4 google-auth==1.2.0 google-auth-httplib2==0.0.2 -google-cloud==0.28.0 +google-cloud==0.29.0 From 4eb7dae7c0e912726550568d5e2206ef517c77f2 Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Wed, 8 Nov 2017 12:17:34 -0800 Subject: [PATCH 040/109] Fixed Failed Kokoro Test (Dataproc) [(#1203)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1203) * Fixed Failed Kokoro Test (Dataproc) * Fixed Lint Error * Update dataproc_e2e_test.py * Update dataproc_e2e_test.py * Fixing More Lint Errors * Fixed b/65407087 * Revert "Merge branch 'master' of https://github.com/michaelawyu/python-docs-samples" This reverts commit 1614c7d3ef33630a8ab095792b27fc25fd91f0ad, reversing changes made to cd1dbfd25997a154a8a85cc754cc2a85b18a63c4. * Revert "Fixed b/65407087" This reverts commit cd1dbfd25997a154a8a85cc754cc2a85b18a63c4. * Fixed Lint Error * Fixed Lint Error --- samples/snippets/dataproc_e2e_test.py | 5 +---- samples/snippets/submit_job_to_cluster.py | 12 ++++++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/samples/snippets/dataproc_e2e_test.py b/samples/snippets/dataproc_e2e_test.py index d7e9c522..0a45d080 100644 --- a/samples/snippets/dataproc_e2e_test.py +++ b/samples/snippets/dataproc_e2e_test.py @@ -18,17 +18,14 @@ import os -from gcp_devrel.testing.flaky import flaky - import submit_job_to_cluster PROJECT = os.environ['GCLOUD_PROJECT'] BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] -CLUSTER_NAME = 'testcluster2' +CLUSTER_NAME = 'testcluster3' ZONE = 'us-central1-b' -@flaky def test_e2e(): output = submit_job_to_cluster.main( PROJECT, ZONE, CLUSTER_NAME, BUCKET) diff --git a/samples/snippets/submit_job_to_cluster.py b/samples/snippets/submit_job_to_cluster.py index 3ffde240..18150782 100644 --- a/samples/snippets/submit_job_to_cluster.py +++ b/samples/snippets/submit_job_to_cluster.py @@ -25,12 +25,12 @@ def get_default_pyspark_file(): """Gets the PySpark file from this directory""" current_dir = os.path.dirname(os.path.abspath(__file__)) - f = open(os.path.join(current_dir, DEFAULT_FILENAME), 'r') + f = open(os.path.join(current_dir, DEFAULT_FILENAME), 'rb') return f, DEFAULT_FILENAME def get_pyspark_file(filename): - f = open(filename, 'r') + f = open(filename, 'rb') return f, os.path.basename(filename) @@ -76,6 +76,14 @@ def create_cluster(dataproc, project, zone, region, cluster_name): 'config': { 'gceClusterConfig': { 'zoneUri': zone_uri + }, + 'masterConfig': { + 'numInstances': 1, + 'machineTypeUri': 'n1-standard-1' + }, + 'workerConfig': { + 'numInstances': 2, + 'machineTypeUri': 'n1-standard-1' } } } From 3b59b755329438a254d899489c083ecb4f651112 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Thu, 9 Nov 2017 14:45:13 -0800 Subject: [PATCH 041/109] Auto-update dependencies. [(#1208)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1208) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 95506d5e..2afbeb94 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ google-api-python-client==1.6.4 google-auth==1.2.0 google-auth-httplib2==0.0.2 -google-cloud==0.29.0 +google-cloud==0.30.0 From ccf7fb05cdd150d032f29aa2c3928f24bb78ca3b Mon Sep 17 00:00:00 2001 From: Bill Prin Date: Wed, 15 Nov 2017 11:06:43 -0800 Subject: [PATCH 042/109] Dataproc GCS sample plus doc touchups [(#1151)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1151) --- samples/snippets/README.md | 69 +++++++++++++++++----------- samples/snippets/pyspark_sort.py | 2 +- samples/snippets/pyspark_sort_gcs.py | 30 ++++++++++++ 3 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 samples/snippets/pyspark_sort_gcs.py diff --git a/samples/snippets/README.md b/samples/snippets/README.md index 6bf819cc..ab38fbac 100644 --- a/samples/snippets/README.md +++ b/samples/snippets/README.md @@ -2,15 +2,23 @@ Sample command-line programs for interacting with the Cloud Dataproc API. + +Please see [the tutorial on the using the Dataproc API with the Python client +library](https://cloud.google.com/dataproc/docs/tutorials/python-library-example) +for more information. + Note that while this sample demonstrates interacting with Dataproc via the API, the functionality demonstrated here could also be accomplished using the Cloud Console or the gcloud CLI. `list_clusters.py` is a simple command-line program to demonstrate connecting to the Dataproc API and listing the clusters in a region -`create_cluster_and_submit_job.py` demonstrates how to create a cluster, submit the +`create_cluster_and_submit_job.py` demonstrates how to create a cluster, submit the `pyspark_sort.py` job, download the output from Google Cloud Storage, and output the result. +`pyspark_sort.py_gcs` is the asme as `pyspark_sort.py` but demonstrates + reading from a GCS bucket. + ## Prerequisites to run locally: * [pip](https://pypi.python.org/pypi/pip) @@ -19,50 +27,59 @@ Go to the [Google Cloud Console](https://console.cloud.google.com). Under API Manager, search for the Google Cloud Dataproc API and enable it. +## Set Up Your Local Dev Environment -# Set Up Your Local Dev Environment To install, run the following commands. If you want to use [virtualenv](https://virtualenv.readthedocs.org/en/latest/) (recommended), run the commands within a virtualenv. * pip install -r requirements.txt -Create local credentials by running the following command and following the oauth2 flow: +## Authentication + +Please see the [Google cloud authentication guide](https://cloud.google.com/docs/authentication/). +The recommended approach to running these samples is a Service Account with a JSON key. + +## Environment Variables - gcloud beta auth application-default login +Set the following environment variables: + + GOOGLE_CLOUD_PROJECT=your-project-id + REGION=us-central1 # or your region + CLUSTER_NAME=waprin-spark7 + ZONE=us-central1-b + +## Running the samples To run list_clusters.py: - python list_clusters.py --region=us-central1 + python list_clusters.py $GOOGLE_CLOUD_PROJECT --region=$REGION +`submit_job_to_cluster.py` can create the Dataproc cluster, or use an existing one. +If you'd like to create a cluster ahead of time, either use the +[Cloud Console](console.cloud.google.com) or run: -To run submit_job_to_cluster.py, first create a GCS bucket, from the Cloud Console or with -gsutil: + gcloud dataproc clusters create your-cluster-name - gsutil mb gs:// - -Then, if you want to rely on an existing cluster, run: - - python submit_job_to_cluster.py --project_id= --zone=us-central1-b --cluster_name=testcluster --gcs_bucket= - -Otherwise, if you want the script to create a new cluster for you: +To run submit_job_to_cluster.py, first create a GCS bucket for Dataproc to stage files, from the Cloud Console or with +gsutil: - python submit_job_to_cluster.py --project_id= --zone=us-central1-b --cluster_name=testcluster --gcs_bucket= --create_new_cluster + gsutil mb gs:// +Set the environment variable's name: -This will setup a cluster, upload the PySpark file, submit the job, print the result, then -delete the cluster. + BUCKET=your-staging-bucket + CLUSTER=your-cluster-name -You can optionally specify a `--pyspark_file` argument to change from the default -`pyspark_sort.py` included in this script to a new script. +Then, if you want to rely on an existing cluster, run: -## Running on GCE, GAE, or other environments + python submit_job_to_cluster.py --project_id=$GOOGLE_CLOUD_PROJECT --zone=us-central1-b --cluster_name=$CLUSTER --gcs_bucket=$BUCKET -On Google App Engine, the credentials should be found automatically. +Otherwise, if you want the script to create a new cluster for you: -On Google Compute Engine, the credentials should be found automatically, but require that -you create the instance with the correct scopes. + python submit_job_to_cluster.py --project_id=$GOOGLE_CLOUD_PROJECT --zone=us-central1-b --cluster_name=$CLUSTER --gcs_bucket=$BUCKET --create_new_cluster - gcloud compute instances create --scopes="https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/compute,https://www.googleapis.com/auth/compute.readonly" test-instance +This will setup a cluster, upload the PySpark file, submit the job, print the result, then +delete the cluster. -If you did not create the instance with the right scopes, you can still upload a JSON service -account and set `GOOGLE_APPLICATION_CREDENTIALS`. See [Google Application Default Credentials](https://developers.google.com/identity/protocols/application-default-credentials) for more details. +You can optionally specify a `--pyspark_file` argument to change from the default +`pyspark_sort.py` included in this script to a new script. diff --git a/samples/snippets/pyspark_sort.py b/samples/snippets/pyspark_sort.py index 14b66995..518e906e 100644 --- a/samples/snippets/pyspark_sort.py +++ b/samples/snippets/pyspark_sort.py @@ -24,5 +24,5 @@ sc = pyspark.SparkContext() rdd = sc.parallelize(['Hello,', 'world!', 'dog', 'elephant', 'panther']) words = sorted(rdd.collect()) -print words +print(words) # [END pyspark] diff --git a/samples/snippets/pyspark_sort_gcs.py b/samples/snippets/pyspark_sort_gcs.py new file mode 100644 index 00000000..70f77d8d --- /dev/null +++ b/samples/snippets/pyspark_sort_gcs.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Sample pyspark script to be uploaded to Cloud Storage and run on +Cloud Dataproc. + +Note this file is not intended to be run directly, but run inside a PySpark +environment. + +This file demonstrates how to read from a GCS bucket. See README.md for more +information. +""" + +# [START pyspark] +import pyspark + +sc = pyspark.SparkContext() +rdd = sc.textFile('gs://path-to-your-GCS-file') +print(sorted(rdd.collect())) +# [END pyspark] From f113e3346501d151b2858a18fa63d819a5fd33a3 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Wed, 15 Nov 2017 12:18:33 -0800 Subject: [PATCH 043/109] Auto-update dependencies. [(#1217)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1217) --- samples/snippets/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 2afbeb94..9d03d5ba 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ google-api-python-client==1.6.4 -google-auth==1.2.0 -google-auth-httplib2==0.0.2 +google-auth==1.2.1 +google-auth-httplib2==0.0.3 google-cloud==0.30.0 From 1c4629978a20e2bd02a87fa13ee7a250a86775ff Mon Sep 17 00:00:00 2001 From: DPE bot Date: Thu, 30 Nov 2017 10:25:03 -0800 Subject: [PATCH 044/109] Auto-update dependencies. [(#1239)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1239) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 9d03d5ba..10701cfc 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ google-api-python-client==1.6.4 google-auth==1.2.1 google-auth-httplib2==0.0.3 -google-cloud==0.30.0 +google-cloud==0.31.0 From 5d195d269281c0304d0dfbbb570968415f45ca9b Mon Sep 17 00:00:00 2001 From: michaelawyu Date: Thu, 7 Dec 2017 10:34:29 -0800 Subject: [PATCH 045/109] Added "Open in Cloud Shell" buttons to README files [(#1254)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1254) --- samples/snippets/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samples/snippets/README.md b/samples/snippets/README.md index ab38fbac..1d919e46 100644 --- a/samples/snippets/README.md +++ b/samples/snippets/README.md @@ -1,5 +1,10 @@ # Cloud Dataproc API Example +[![Open in Cloud Shell][shell_img]][shell_link] + +[shell_img]: http://gstatic.com/cloudssh/images/open-btn.png +[shell_link]: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dataproc/README.md + Sample command-line programs for interacting with the Cloud Dataproc API. From 091b08f309ec4fb33d11f2a48fd646e59f4b17cf Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 2 Jan 2018 14:02:47 -0800 Subject: [PATCH 046/109] Auto-update dependencies. [(#1282)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1282) * Auto-update dependencies. * Fix storage acl sample Change-Id: I413bea899fdde4c4859e4070a9da25845b81f7cf --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 10701cfc..1e41426e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ google-api-python-client==1.6.4 google-auth==1.2.1 google-auth-httplib2==0.0.3 -google-cloud==0.31.0 +google-cloud==0.32.0 From 585406c6a4ec4813b43e03bdd47cbeb326c02f00 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Wed, 10 Jan 2018 09:07:00 -0800 Subject: [PATCH 047/109] Auto-update dependencies. [(#1309)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1309) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1e41426e..80d830a4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ google-api-python-client==1.6.4 -google-auth==1.2.1 +google-auth==1.3.0 google-auth-httplib2==0.0.3 google-cloud==0.32.0 From 43d3afd6b27ede727fa8054fb18db3342108209b Mon Sep 17 00:00:00 2001 From: DPE bot Date: Thu, 1 Feb 2018 22:20:35 -0800 Subject: [PATCH 048/109] Auto-update dependencies. [(#1320)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1320) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 80d830a4..b1c216fe 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-api-python-client==1.6.4 +google-api-python-client==1.6.5 google-auth==1.3.0 google-auth-httplib2==0.0.3 google-cloud==0.32.0 From 773b0fb1f3776eeb3657ce935803502ccfcbffee Mon Sep 17 00:00:00 2001 From: DPE bot Date: Fri, 9 Feb 2018 10:46:48 -0800 Subject: [PATCH 049/109] Auto-update dependencies. [(#1355)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1355) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index b1c216fe..c8567263 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ google-api-python-client==1.6.5 -google-auth==1.3.0 +google-auth==1.4.0 google-auth-httplib2==0.0.3 google-cloud==0.32.0 From ea81fe461c1d14943448e9e4b17828838b0de963 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Mon, 26 Feb 2018 09:03:37 -0800 Subject: [PATCH 050/109] Auto-update dependencies. [(#1359)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1359) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c8567263..9fef73c8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ google-api-python-client==1.6.5 -google-auth==1.4.0 +google-auth==1.4.1 google-auth-httplib2==0.0.3 google-cloud==0.32.0 From c925b53b1b00a43c79f702b249895cbe11d55fc3 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Mon, 2 Apr 2018 02:51:10 -0700 Subject: [PATCH 051/109] Auto-update dependencies. --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 9fef73c8..0b454689 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-api-python-client==1.6.5 +google-api-python-client==1.6.6 google-auth==1.4.1 google-auth-httplib2==0.0.3 google-cloud==0.32.0 From 501b1d08d6994b2e0703a346e9926786d4bde653 Mon Sep 17 00:00:00 2001 From: Alix Hamilton Date: Thu, 8 Nov 2018 13:13:45 -0800 Subject: [PATCH 052/109] update Dataproc region tags to standard format [(#1826)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1826) --- samples/snippets/list_clusters.py | 8 +++---- samples/snippets/pyspark_sort.py | 4 ++-- samples/snippets/pyspark_sort_gcs.py | 4 ++-- samples/snippets/submit_job_to_cluster.py | 28 +++++++++++------------ 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/samples/snippets/list_clusters.py b/samples/snippets/list_clusters.py index 14a2ba81..4a016d58 100644 --- a/samples/snippets/list_clusters.py +++ b/samples/snippets/list_clusters.py @@ -19,21 +19,21 @@ import googleapiclient.discovery -# [START list_clusters] +# [START dataproc_list_clusters] def list_clusters(dataproc, project, region): result = dataproc.projects().regions().clusters().list( projectId=project, region=region).execute() return result -# [END list_clusters] +# [END dataproc_list_clusters] -# [START get_client] +# [START dataproc_get_client] def get_client(): """Builds a client to the dataproc API.""" dataproc = googleapiclient.discovery.build('dataproc', 'v1') return dataproc -# [END get_client] +# [END dataproc_get_client] def main(project_id, region): diff --git a/samples/snippets/pyspark_sort.py b/samples/snippets/pyspark_sort.py index 518e906e..0ce2350a 100644 --- a/samples/snippets/pyspark_sort.py +++ b/samples/snippets/pyspark_sort.py @@ -18,11 +18,11 @@ environment. """ -# [START pyspark] +# [START dataproc_pyspark_sort] import pyspark sc = pyspark.SparkContext() rdd = sc.parallelize(['Hello,', 'world!', 'dog', 'elephant', 'panther']) words = sorted(rdd.collect()) print(words) -# [END pyspark] +# [END dataproc_pyspark_sort] diff --git a/samples/snippets/pyspark_sort_gcs.py b/samples/snippets/pyspark_sort_gcs.py index 70f77d8d..f1961c37 100644 --- a/samples/snippets/pyspark_sort_gcs.py +++ b/samples/snippets/pyspark_sort_gcs.py @@ -21,10 +21,10 @@ information. """ -# [START pyspark] +# [START dataproc_pyspark_sort_gcs] import pyspark sc = pyspark.SparkContext() rdd = sc.textFile('gs://path-to-your-GCS-file') print(sorted(rdd.collect())) -# [END pyspark] +# [END dataproc_pyspark_sort_gcs] diff --git a/samples/snippets/submit_job_to_cluster.py b/samples/snippets/submit_job_to_cluster.py index 18150782..f06d5981 100644 --- a/samples/snippets/submit_job_to_cluster.py +++ b/samples/snippets/submit_job_to_cluster.py @@ -64,7 +64,7 @@ def download_output(project_id, cluster_id, output_bucket, job_id): return bucket.blob(output_blob).download_as_string() -# [START create_cluster] +# [START dataproc_create_cluster] def create_cluster(dataproc, project, zone, region, cluster_name): print('Creating cluster...') zone_uri = \ @@ -92,7 +92,7 @@ def create_cluster(dataproc, project, zone, region, cluster_name): region=region, body=cluster_data).execute() return result -# [END create_cluster] +# [END dataproc_create_cluster] def wait_for_cluster_creation(dataproc, project_id, region, cluster_name): @@ -113,7 +113,7 @@ def wait_for_cluster_creation(dataproc, project_id, region, cluster_name): break -# [START list_clusters_with_detail] +# [START dataproc_list_clusters_with_detail] def list_clusters_with_details(dataproc, project, region): result = dataproc.projects().regions().clusters().list( projectId=project, @@ -123,7 +123,7 @@ def list_clusters_with_details(dataproc, project, region): print("{} - {}" .format(cluster['clusterName'], cluster['status']['state'])) return result -# [END list_clusters_with_detail] +# [END dataproc_list_clusters_with_detail] def get_cluster_id_by_name(cluster_list, cluster_name): @@ -133,7 +133,7 @@ def get_cluster_id_by_name(cluster_list, cluster_name): return cluster['clusterUuid'], cluster['config']['configBucket'] -# [START submit_pyspark_job] +# [START dataproc_submit_pyspark_job] def submit_pyspark_job(dataproc, project, region, cluster_name, bucket_name, filename): """Submits the Pyspark job to the cluster, assuming `filename` has @@ -156,10 +156,10 @@ def submit_pyspark_job(dataproc, project, region, job_id = result['reference']['jobId'] print('Submitted job ID {}'.format(job_id)) return job_id -# [END submit_pyspark_job] +# [END dataproc_submit_pyspark_job] -# [START delete] +# [START dataproc_delete] def delete_cluster(dataproc, project, region, cluster): print('Tearing down cluster') result = dataproc.projects().regions().clusters().delete( @@ -167,10 +167,10 @@ def delete_cluster(dataproc, project, region, cluster): region=region, clusterName=cluster).execute() return result -# [END delete] +# [END dataproc_delete] -# [START wait] +# [START dataproc_wait] def wait_for_job(dataproc, project, region, job_id): print('Waiting for job to finish...') while True: @@ -184,16 +184,16 @@ def wait_for_job(dataproc, project, region, job_id): elif result['status']['state'] == 'DONE': print('Job finished.') return result -# [END wait] +# [END dataproc_wait] -# [START get_client] +# [START dataproc_get_client] def get_client(): """Builds an http client authenticated with the service account credentials.""" dataproc = googleapiclient.discovery.build('dataproc', 'v1') return dataproc -# [END get_client] +# [END dataproc_get_client] def main(project_id, zone, cluster_name, bucket_name, @@ -221,11 +221,11 @@ def main(project_id, zone, cluster_name, bucket_name, (cluster_id, output_bucket) = ( get_cluster_id_by_name(cluster_list, cluster_name)) - # [START call_submit_pyspark_job] + # [START dataproc_call_submit_pyspark_job] job_id = submit_pyspark_job( dataproc, project_id, region, cluster_name, bucket_name, spark_filename) - # [END call_submit_pyspark_job] + # [END dataproc_call_submit_pyspark_job] wait_for_job(dataproc, project_id, region, job_id) output = download_output(project_id, cluster_id, output_bucket, job_id) From df1f2b22547b7ca86bbdb791ad930003a815a677 Mon Sep 17 00:00:00 2001 From: James Winegar Date: Tue, 20 Nov 2018 14:42:11 -0600 Subject: [PATCH 053/109] Update submit_job_to_cluster.py [(#1708)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1708) switch region to new 'global' region and remove unnecessary function. --- samples/snippets/submit_job_to_cluster.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/samples/snippets/submit_job_to_cluster.py b/samples/snippets/submit_job_to_cluster.py index f06d5981..ed49013d 100644 --- a/samples/snippets/submit_job_to_cluster.py +++ b/samples/snippets/submit_job_to_cluster.py @@ -34,14 +34,6 @@ def get_pyspark_file(filename): return f, os.path.basename(filename) -def get_region_from_zone(zone): - try: - region_as_list = zone.split('-')[:-1] - return '-'.join(region_as_list) - except (AttributeError, IndexError, ValueError): - raise ValueError('Invalid zone provided, please check your input.') - - def upload_pyspark_file(project_id, bucket_name, filename, file): """Uploads the PySpark file in this directory to the configured input bucket.""" @@ -199,7 +191,7 @@ def get_client(): def main(project_id, zone, cluster_name, bucket_name, pyspark_file=None, create_new_cluster=True): dataproc = get_client() - region = get_region_from_zone(zone) + region = 'global' try: if pyspark_file: spark_file, spark_filename = get_pyspark_file(pyspark_file) From 3adc94f4d0c14453153968c3851fae100e2c5e44 Mon Sep 17 00:00:00 2001 From: DPE bot Date: Tue, 20 Nov 2018 15:40:29 -0800 Subject: [PATCH 054/109] Auto-update dependencies. [(#1846)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1846) ACK, merging. --- samples/snippets/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0b454689..feb3c4dd 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-api-python-client==1.6.6 -google-auth==1.4.1 +google-api-python-client==1.7.4 +google-auth==1.6.1 google-auth-httplib2==0.0.3 -google-cloud==0.32.0 +google-cloud==0.34.0 From ebcdc3e48ec49a3db748d873a20cfc382bf1584f Mon Sep 17 00:00:00 2001 From: Charles Engelke Date: Wed, 21 Nov 2018 13:19:49 -0800 Subject: [PATCH 055/109] Need separate install for google-cloud-storage [(#1863)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1863) --- samples/snippets/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index feb3c4dd..31d0ad5f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,3 +2,4 @@ google-api-python-client==1.7.4 google-auth==1.6.1 google-auth-httplib2==0.0.3 google-cloud==0.34.0 +google-cloud-storage==1.13.0 From 2898cdf9d48e7c899cc0a9832354f35df0f74359 Mon Sep 17 00:00:00 2001 From: Charles Engelke Date: Wed, 21 Nov 2018 13:46:43 -0800 Subject: [PATCH 056/109] Revert "Update dataproc/submit_job_to_cluster.py" [(#1864)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1864) * Revert "Remove test configs for non-testing directories [(#1855)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1855)" This reverts commit 73a73321579337312e8ba85c34fe9c37b42b7f6e. * Revert "Auto-update dependencies. [(#1846)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1846)" This reverts commit 3adc94f4d0c14453153968c3851fae100e2c5e44. * Revert "Tweak slack sample [(#1847)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1847)" This reverts commit a48c010481c166968d9f1bd58106054c5d1c58f9. * Revert "Non-client library example of constructing a Signed URL [(#1837)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1837)" This reverts commit fc3284d995a8a35c473a207e80490fad265782af. * Revert "GCF samples: handle {empty JSON, GET} requests + remove commas [(#1832)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1832)" This reverts commit 6928491ed3d52b0bec694e6b30257f08caac5f2b. * Revert "Correct the maintenance event types [(#1830)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1830)" This reverts commit c22840fd23586349b7b665d851dea046a94ba7c7. * Revert "Fix GCF region tags [(#1827)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1827)" This reverts commit 0fbfef27d35cea23ad0e20fd2c9df3e8a4a046cb. * Revert "Updated to Flask 1.0 [(#1819)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1819)" This reverts commit d52ccf99503311bba2cec2881e8cb0f9b5a6f2bf. * Revert "Fix deprecation warning [(#1801)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1801)" This reverts commit 981737e85f60eca5cc337f172249deddca9b291b. * Revert "Update submit_job_to_cluster.py [(#1708)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1708)" This reverts commit df1f2b22547b7ca86bbdb791ad930003a815a677. --- samples/snippets/submit_job_to_cluster.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/samples/snippets/submit_job_to_cluster.py b/samples/snippets/submit_job_to_cluster.py index ed49013d..f06d5981 100644 --- a/samples/snippets/submit_job_to_cluster.py +++ b/samples/snippets/submit_job_to_cluster.py @@ -34,6 +34,14 @@ def get_pyspark_file(filename): return f, os.path.basename(filename) +def get_region_from_zone(zone): + try: + region_as_list = zone.split('-')[:-1] + return '-'.join(region_as_list) + except (AttributeError, IndexError, ValueError): + raise ValueError('Invalid zone provided, please check your input.') + + def upload_pyspark_file(project_id, bucket_name, filename, file): """Uploads the PySpark file in this directory to the configured input bucket.""" @@ -191,7 +199,7 @@ def get_client(): def main(project_id, zone, cluster_name, bucket_name, pyspark_file=None, create_new_cluster=True): dataproc = get_client() - region = 'global' + region = get_region_from_zone(zone) try: if pyspark_file: spark_file, spark_filename = get_pyspark_file(pyspark_file) From af76f7bae0ea98bb095666af58a58b927d543822 Mon Sep 17 00:00:00 2001 From: aman-ebay Date: Thu, 10 Jan 2019 13:11:27 -0800 Subject: [PATCH 057/109] Create python-api-walkthrough.md [(#1966)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1966) * Create python-api-walkthrough.md This Google Cloud Shell walkthrough is linked to Cloud Dataproc documentation to be published at: https://cloud.google.com/dataproc/docs/tutorials/python-library-example * Update python-api-walkthrough.md --- samples/snippets/python-api-walkthrough.md | 165 +++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 samples/snippets/python-api-walkthrough.md diff --git a/samples/snippets/python-api-walkthrough.md b/samples/snippets/python-api-walkthrough.md new file mode 100644 index 00000000..0004e241 --- /dev/null +++ b/samples/snippets/python-api-walkthrough.md @@ -0,0 +1,165 @@ +# Use the Python Client Library to call Cloud Dataproc APIs + +Estimated completion time: + +## Overview + +This [Cloud Shell](https://cloud.google.com/shell/docs/) walkthrough leads you +through the steps to use the +[Google APIs Client Library for Python](http://code.google.com/p/google-api-python-client/ ) +to programmatically interact with [Cloud Dataproc](https://cloud.google.com/dataproc/docs/). + +As you follow this walkthrough, you run Python code that calls +[Cloud Dataproc REST API](https://cloud.google.com//dataproc/docs/reference/rest/) +methods to: + +* create a Cloud Dataproc cluster +* submit a small PySpark word sort job to run on the cluster +* get job status +* tear down the cluster after job completion + +## Using the walkthrough + +The `submit_job_to_cluster.py file` used in this walkthrough is opened in the +Cloud Shell editor when you launch the walkthrough. You can view +the code as your follow the walkthrough steps. + +**For more information**: See [Cloud Dataproc→Use the Python Client Library](https://cloud.google.com/dataproc/docs/tutorials/python-library-example) for +an explanation of how the code works. + +**To reload this walkthrough:** Run the following command from the +`~/python-docs-samples/dataproc` directory in Cloud Shell: + + cloudshell launch-tutorial python-api-walkthrough.md + +**To copy and run commands**: Click the "Paste in Cloud Shell" button + () + on the side of a code box, then press `Enter` to run the command. + +## Prerequisites (1) + +1. Create or select a Google Cloud Platform project to use for this tutorial. + * + +1. Enable the Cloud Dataproc, Compute Engine, and Cloud Storage APIs in your project. + * + +## Prerequisites (2) + +1. This walkthrough uploads a PySpark file (`pyspark_sort.py`) to a + [Cloud Storage bucket](https://cloud.google.com/storage/docs/key-terms#buckets) in + your project. + * You can use the [Cloud Storage browser page](https://console.cloud.google.com/storage/browser) + in Google Cloud Platform Console to view existing buckets in your project. + +     **OR** + + * To create a new bucket, run the following command. Your bucket name must be unique. + ```bash + gsutil mb -p {{project-id}} gs://your-bucket-name + ``` + +1. Set environment variables. + + * Set the name of your bucket. + ```bash + BUCKET=your-bucket-name + ``` + +## Prerequisites (3) + +1. Set up a Python + [virtual environment](https://virtualenv.readthedocs.org/en/latest/) + in Cloud Shell. + + * Create the virtual environment. + ```bash + virtualenv ENV + ``` + * Activate the virtual environment. + ```bash + source ENV/bin/activate + ``` + +1. Install library dependencies in Cloud Shell. + ```bash + pip install -r requirements.txt + ``` + +## Create a cluster and submit a job + +1. Set a name for your new cluster. + ```bash + CLUSTER=new-cluster-name + ``` + +1. Set a [zone](https://cloud.google.com/compute/docs/regions-zones/#available) + where your new cluster will be located. You can change the + "us-central1-a" zone that is pre-set in the following command. + ```bash + ZONE=us-central1-a + ``` + +1. Run `submit_job.py` with the `--create_new_cluster` flag + to create a new cluster and submit the `pyspark_sort.py` job + to the cluster. + + ```bash + python submit_job_to_cluster.py \ + --project_id={{project-id}} \ + --cluster_name=$CLUSTER \ + --zone=$ZONE \ + --gcs_bucket=$BUCKET \ + --create_new_cluster + ``` + +## Job Output + +Job output in Cloud Shell shows cluster creation, job submission, + job completion, and then tear-down of the cluster. + + ... + Creating cluster... + Cluster created. + Uploading pyspark file to GCS + new-cluster-name - RUNNING + Submitted job ID ... + Waiting for job to finish... + Job finished. + Downloading output file + ..... + ['Hello,', 'dog', 'elephant', 'panther', 'world!'] + ... + Tearing down cluster + ``` +## Congratulations on Completing the Walkthrough! + + +--- + +### Next Steps: + +* **View job details from the Console.** View job details by selecting the + PySpark job from the Cloud Dataproc + [Jobs page](https://console.cloud.google.com/dataproc/jobs) + in the Google Cloud Platform Console. + +* **Delete resources used in the walkthrough.** + The `submit_job.py` job deletes the cluster that it created for this + walkthrough. + + If you created a bucket to use for this walkthrough, + you can run the following command to delete the + Cloud Storage bucket (the bucket must be empty). + ```bash + gsutil rb gs://$BUCKET + ``` + You can run the following command to delete the bucket **and all + objects within it. Note: the deleted objects cannot be recovered.** + ```bash + gsutil rm -r gs://$BUCKET + ``` + +* **For more information.** See the [Cloud Dataproc documentation](https://cloud.google.com/dataproc/docs/) + for API reference and product feature information. + From 8d27c6dd906a5316b291f28b63b17774aa2a86bb Mon Sep 17 00:00:00 2001 From: aman-ebay Date: Fri, 25 Jan 2019 14:47:26 -0800 Subject: [PATCH 058/109] Update list_clusters.py [(#1887)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1887) --- samples/snippets/list_clusters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/list_clusters.py b/samples/snippets/list_clusters.py index 4a016d58..9bbaa3b0 100644 --- a/samples/snippets/list_clusters.py +++ b/samples/snippets/list_clusters.py @@ -52,7 +52,7 @@ def main(project_id, region): # Sets the region to "global" if it's not provided # Note: sub-regions (e.g.: us-central1-a/b) are currently not supported parser.add_argument( - '--region', default='global', help='Region to create clusters in') + '--region', default='global', help='Region to list clusters') args = parser.parse_args() main(args.project_id, args.region) From e4549a5a9cac9303d625486a37f2fac2195fb42c Mon Sep 17 00:00:00 2001 From: DPEBot Date: Wed, 6 Feb 2019 12:06:35 -0800 Subject: [PATCH 059/109] Auto-update dependencies. [(#1980)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/1980) * Auto-update dependencies. * Update requirements.txt * Update requirements.txt --- samples/snippets/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 31d0ad5f..bc5d62ef 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ -google-api-python-client==1.7.4 -google-auth==1.6.1 +google-api-python-client==1.7.8 +google-auth==1.6.2 google-auth-httplib2==0.0.3 google-cloud==0.34.0 -google-cloud-storage==1.13.0 +google-cloud-storage==1.13.2 From c0d6f97aa46407b7280c0e4774d37fbf2b7ae0c4 Mon Sep 17 00:00:00 2001 From: aman-ebay Date: Mon, 20 May 2019 12:57:43 -0700 Subject: [PATCH 060/109] Update Dataproc samples. [(#2158)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2158) * Update requirements.txt * Update python-api-walkthrough.md * Update submit_job_to_cluster.py * Update list_clusters.py --- samples/snippets/README.md | 38 ++- samples/snippets/list_clusters.py | 53 ++-- samples/snippets/python-api-walkthrough.md | 6 +- samples/snippets/requirements.txt | 3 +- samples/snippets/single_job_workflow.py | 208 ++++++++++++++ samples/snippets/submit_job_to_cluster.py | 312 +++++++++++---------- 6 files changed, 426 insertions(+), 194 deletions(-) create mode 100644 samples/snippets/single_job_workflow.py diff --git a/samples/snippets/README.md b/samples/snippets/README.md index 1d919e46..98622be7 100644 --- a/samples/snippets/README.md +++ b/samples/snippets/README.md @@ -1,4 +1,4 @@ -# Cloud Dataproc API Example +# Cloud Dataproc API Examples [![Open in Cloud Shell][shell_img]][shell_link] @@ -7,21 +7,20 @@ Sample command-line programs for interacting with the Cloud Dataproc API. - -Please see [the tutorial on the using the Dataproc API with the Python client +See [the tutorial on the using the Dataproc API with the Python client library](https://cloud.google.com/dataproc/docs/tutorials/python-library-example) -for more information. +for information on a walkthrough you can run to try out the Cloud Dataproc API sample code. -Note that while this sample demonstrates interacting with Dataproc via the API, the functionality -demonstrated here could also be accomplished using the Cloud Console or the gcloud CLI. +Note that while this sample demonstrates interacting with Dataproc via the API, the functionality demonstrated here could also be accomplished using the Cloud Console or the gcloud CLI. -`list_clusters.py` is a simple command-line program to demonstrate connecting to the -Dataproc API and listing the clusters in a region +`list_clusters.py` is a simple command-line program to demonstrate connecting to the Cloud Dataproc API and listing the clusters in a region. -`create_cluster_and_submit_job.py` demonstrates how to create a cluster, submit the +`submit_job_to_cluster.py` demonstrates how to create a cluster, submit the `pyspark_sort.py` job, download the output from Google Cloud Storage, and output the result. -`pyspark_sort.py_gcs` is the asme as `pyspark_sort.py` but demonstrates +`single_job_workflow.py` uses the Cloud Dataproc InstantiateInlineWorkflowTemplate API to create an ephemeral cluster, run a job, then delete the cluster with one API request. + +`pyspark_sort.py_gcs` is the same as `pyspark_sort.py` but demonstrates reading from a GCS bucket. ## Prerequisites to run locally: @@ -59,32 +58,27 @@ To run list_clusters.py: python list_clusters.py $GOOGLE_CLOUD_PROJECT --region=$REGION -`submit_job_to_cluster.py` can create the Dataproc cluster, or use an existing one. -If you'd like to create a cluster ahead of time, either use the -[Cloud Console](console.cloud.google.com) or run: +`submit_job_to_cluster.py` can create the Dataproc cluster or use an existing cluster. To create a cluster before running the code, you can use the [Cloud Console](console.cloud.google.com) or run: gcloud dataproc clusters create your-cluster-name -To run submit_job_to_cluster.py, first create a GCS bucket for Dataproc to stage files, from the Cloud Console or with -gsutil: +To run submit_job_to_cluster.py, first create a GCS bucket (used by Cloud Dataproc to stage files) from the Cloud Console or with gsutil: gsutil mb gs:// -Set the environment variable's name: +Next, set the following environment variables: BUCKET=your-staging-bucket CLUSTER=your-cluster-name -Then, if you want to rely on an existing cluster, run: +Then, if you want to use an existing cluster, run: python submit_job_to_cluster.py --project_id=$GOOGLE_CLOUD_PROJECT --zone=us-central1-b --cluster_name=$CLUSTER --gcs_bucket=$BUCKET -Otherwise, if you want the script to create a new cluster for you: +Alternatively, to create a new cluster, which will be deleted at the end of the job, run: python submit_job_to_cluster.py --project_id=$GOOGLE_CLOUD_PROJECT --zone=us-central1-b --cluster_name=$CLUSTER --gcs_bucket=$BUCKET --create_new_cluster -This will setup a cluster, upload the PySpark file, submit the job, print the result, then -delete the cluster. +The script will setup a cluster, upload the PySpark file, submit the job, print the result, then, if it created the cluster, delete the cluster. -You can optionally specify a `--pyspark_file` argument to change from the default -`pyspark_sort.py` included in this script to a new script. +Optionally, you can add the `--pyspark_file` argument to change from the default `pyspark_sort.py` included in this script to a new script. diff --git a/samples/snippets/list_clusters.py b/samples/snippets/list_clusters.py index 9bbaa3b0..1639c413 100644 --- a/samples/snippets/list_clusters.py +++ b/samples/snippets/list_clusters.py @@ -10,49 +10,54 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Sample command-line program to list Cloud Dataproc clusters in a region. -""" Sample command-line program for listing Google Dataproc Clusters -""" +Example usage: +python list_clusters.py --project_id=my-project-id --region=global +""" import argparse -import googleapiclient.discovery +from google.cloud import dataproc_v1 +from google.cloud.dataproc_v1.gapic.transports import ( + cluster_controller_grpc_transport) # [START dataproc_list_clusters] def list_clusters(dataproc, project, region): - result = dataproc.projects().regions().clusters().list( - projectId=project, - region=region).execute() - return result + """List the details of clusters in the region.""" + for cluster in dataproc.list_clusters(project, region): + print(('{} - {}'.format(cluster.cluster_name, + cluster.status.State.Name( + cluster.status.state)))) # [END dataproc_list_clusters] -# [START dataproc_get_client] -def get_client(): - """Builds a client to the dataproc API.""" - dataproc = googleapiclient.discovery.build('dataproc', 'v1') - return dataproc -# [END dataproc_get_client] +def main(project_id, region): + if region == 'global': + # Use the default gRPC global endpoints. + dataproc_cluster_client = dataproc_v1.ClusterControllerClient() + else: + # Use a regional gRPC endpoint. See: + # https://cloud.google.com/dataproc/docs/concepts/regional-endpoints + client_transport = ( + cluster_controller_grpc_transport.ClusterControllerGrpcTransport( + address='{}-dataproc.googleapis.com:443'.format(region))) + dataproc_cluster_client = dataproc_v1.ClusterControllerClient( + client_transport) -def main(project_id, region): - dataproc = get_client() - result = list_clusters(dataproc, project_id, region) - print(result) + list_clusters(dataproc_cluster_client, project_id, region) if __name__ == '__main__': parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter - ) + description=__doc__, formatter_class=( + argparse.RawDescriptionHelpFormatter)) parser.add_argument( - 'project_id', help='Project ID you want to access.'), - # Sets the region to "global" if it's not provided - # Note: sub-regions (e.g.: us-central1-a/b) are currently not supported + '--project_id', help='Project ID to access.', required=True) parser.add_argument( - '--region', default='global', help='Region to list clusters') + '--region', help='Region of clusters to list.', required=True) args = parser.parse_args() main(args.project_id, args.region) diff --git a/samples/snippets/python-api-walkthrough.md b/samples/snippets/python-api-walkthrough.md index 0004e241..656b54ac 100644 --- a/samples/snippets/python-api-walkthrough.md +++ b/samples/snippets/python-api-walkthrough.md @@ -121,7 +121,7 @@ Job output in Cloud Shell shows cluster creation, job submission, ... Creating cluster... Cluster created. - Uploading pyspark file to GCS + Uploading pyspark file to Cloud Storage new-cluster-name - RUNNING Submitted job ID ... Waiting for job to finish... @@ -140,12 +140,12 @@ Job output in Cloud Shell shows cluster creation, job submission, ### Next Steps: * **View job details from the Console.** View job details by selecting the - PySpark job from the Cloud Dataproc + PySpark job from the Cloud Dataproc [Jobs page](https://console.cloud.google.com/dataproc/jobs) in the Google Cloud Platform Console. * **Delete resources used in the walkthrough.** - The `submit_job.py` job deletes the cluster that it created for this + The `submit_job_to_cluster.py` job deletes the cluster that it created for this walkthrough. If you created a bucket to use for this walkthrough, diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index bc5d62ef..509e241a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,6 @@ -google-api-python-client==1.7.8 +grpcio>=1.2.0 google-auth==1.6.2 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.13.2 +google-cloud-dataproc==0.3.1 diff --git a/samples/snippets/single_job_workflow.py b/samples/snippets/single_job_workflow.py new file mode 100644 index 00000000..b17ea0b9 --- /dev/null +++ b/samples/snippets/single_job_workflow.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +r"""Sample Cloud Dataproc inline workflow to run a pyspark job on an ephermeral +cluster. +Example Usage to run the inline workflow on a managed cluster: +python single_job_workflow.py --project_id=$PROJECT --gcs_bucket=$BUCKET \ + --cluster_name=$CLUSTER --zone=$ZONE +Example Usage to run the inline workflow on a global region managed cluster: +python submit_job_to_cluster.py --project_id=$PROJECT --gcs_bucket=$BUCKET \ + --cluster_name=$CLUSTER --zone=$ZONE --global_region +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os + +from google.cloud import dataproc_v1 +from google.cloud.dataproc_v1.gapic.transports import ( + workflow_template_service_grpc_transport) +from google.cloud import storage + +DEFAULT_FILENAME = "pyspark_sort.py" +waiting_callback = False + + +def get_pyspark_file(pyspark_file=None): + if pyspark_file: + f = open(pyspark_file, "rb") + return f, os.path.basename(pyspark_file) + else: + """Gets the PySpark file from current directory.""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + f = open(os.path.join(current_dir, DEFAULT_FILENAME), "rb") + return f, DEFAULT_FILENAME + + +def get_region_from_zone(zone): + try: + region_as_list = zone.split("-")[:-1] + return "-".join(region_as_list) + except (AttributeError, IndexError, ValueError): + raise ValueError("Invalid zone provided, please check your input.") + + +def upload_pyspark_file(project, bucket_name, filename, spark_file): + """Uploads the PySpark file in this directory to the configured input + bucket.""" + print("Uploading pyspark file to Cloud Storage.") + client = storage.Client(project=project) + bucket = client.get_bucket(bucket_name) + blob = bucket.blob(filename) + blob.upload_from_file(spark_file) + + +def run_workflow(dataproc, project, region, zone, bucket_name, filename, + cluster_name): + + parent = "projects/{}/regions/{}".format(project, region) + zone_uri = ("https://www.googleapis.com/compute/v1/projects/{}/zones/{}" + .format(project, zone)) + + workflow_data = { + "placement": { + "managed_cluster": { + "cluster_name": cluster_name, + "config": { + "gce_cluster_config": {"zone_uri": zone_uri}, + "master_config": { + "num_instances": 1, + "machine_type_uri": "n1-standard-1", + }, + "worker_config": { + "num_instances": 2, + "machine_type_uri": "n1-standard-1", + }, + }, + } + }, + "jobs": [ + { + "pyspark_job": { + "main_python_file_uri": "gs://{}/{}".format( + bucket_name, filename) + }, + "step_id": "pyspark-job", + } + ], + } + + workflow = dataproc.instantiate_inline_workflow_template(parent, + workflow_data) + + workflow.add_done_callback(callback) + global waiting_callback + waiting_callback = True + + +def callback(operation_future): + # Reset global when callback returns. + global waiting_callback + waiting_callback = False + + +def wait_for_workflow_end(): + """Wait for cluster creation.""" + print("Waiting for workflow completion ...") + print("Workflow and job progress, and job driver output available from: " + "https://console.cloud.google.com/dataproc/workflows/") + + while True: + if not waiting_callback: + print("Workflow completed.") + break + + +def main( + project_id, + zone, + cluster_name, + bucket_name, + pyspark_file=None, + create_new_cluster=True, + global_region=True, +): + + # [START dataproc_get_workflow_template_client] + if global_region: + region = "global" + # Use the default gRPC global endpoints. + dataproc_workflow_client = dataproc_v1.WorkflowTemplateServiceClient() + else: + region = get_region_from_zone(zone) + # Use a regional gRPC endpoint. See: + # https://cloud.google.com/dataproc/docs/concepts/regional-endpoints + client_transport = (workflow_template_service_grpc_transport + .WorkflowTemplateServiceGrpcTransport( + address="{}-dataproc.googleapis.com:443" + .format(region))) + dataproc_workflow_client = dataproc_v1.WorkflowTemplateServiceClient( + client_transport + ) + # [END dataproc_get_workflow_template_client] + + try: + spark_file, spark_filename = get_pyspark_file(pyspark_file) + upload_pyspark_file(project_id, bucket_name, spark_filename, + spark_file) + + run_workflow( + dataproc_workflow_client, + project_id, + region, + zone, + bucket_name, + spark_filename, + cluster_name + ) + wait_for_workflow_end() + + finally: + spark_file.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=(argparse + .RawDescriptionHelpFormatter)) + parser.add_argument( + "--project_id", help="Project ID you want to access.", required=True + ) + parser.add_argument( + "--zone", help="Zone to create clusters in/connect to", required=True + ) + parser.add_argument( + "--cluster_name", help="Name of the cluster to create/connect to", + required=True + ) + parser.add_argument( + "--gcs_bucket", help="Bucket to upload Pyspark file to", required=True + ) + parser.add_argument( + "--pyspark_file", help="Pyspark filename. Defaults to pyspark_sort.py" + ) + parser.add_argument("--global_region", + action="store_true", + help="If cluster is in the global region") + + args = parser.parse_args() + main( + args.project_id, + args.zone, + args.cluster_name, + args.gcs_bucket, + args.pyspark_file, + ) diff --git a/samples/snippets/submit_job_to_cluster.py b/samples/snippets/submit_job_to_cluster.py index f06d5981..1c648abc 100644 --- a/samples/snippets/submit_job_to_cluster.py +++ b/samples/snippets/submit_job_to_cluster.py @@ -10,28 +10,48 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +r"""Sample command-line program to run a pyspark job on a new or existing +cluster. -""" Sample command-line program for listing Google Dataproc Clusters""" +Global region clusters are supported with --global_region flag. + +Example Usage to run the pyspark job on a new cluster: +python submit_job_to_cluster.py --project_id=$PROJECT --gcs_bucket=$BUCKET \ + --create_new_cluster --cluster_name=$CLUSTER --zone=$ZONE + +Example Usage to run the pyspark job on an existing global region cluster: +python submit_job_to_cluster.py --project_id=$PROJECT --gcs_bucket=$BUCKET \ + --global_region --cluster_name=$CLUSTER --zone=$ZONE + +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import argparse import os +from google.cloud import dataproc_v1 +from google.cloud.dataproc_v1.gapic.transports import ( + cluster_controller_grpc_transport) +from google.cloud.dataproc_v1.gapic.transports import ( + job_controller_grpc_transport) from google.cloud import storage -import googleapiclient.discovery DEFAULT_FILENAME = 'pyspark_sort.py' +waiting_callback = False -def get_default_pyspark_file(): - """Gets the PySpark file from this directory""" - current_dir = os.path.dirname(os.path.abspath(__file__)) - f = open(os.path.join(current_dir, DEFAULT_FILENAME), 'rb') - return f, DEFAULT_FILENAME - - -def get_pyspark_file(filename): - f = open(filename, 'rb') - return f, os.path.basename(filename) +def get_pyspark_file(pyspark_file=None): + if pyspark_file: + f = open(pyspark_file, "rb") + return f, os.path.basename(pyspark_file) + else: + """Gets the PySpark file from current directory.""" + current_dir = os.path.dirname(os.path.abspath(__file__)) + f = open(os.path.join(current_dir, DEFAULT_FILENAME), "rb") + return f, DEFAULT_FILENAME def get_region_from_zone(zone): @@ -42,222 +62,226 @@ def get_region_from_zone(zone): raise ValueError('Invalid zone provided, please check your input.') -def upload_pyspark_file(project_id, bucket_name, filename, file): - """Uploads the PySpark file in this directory to the configured - input bucket.""" - print('Uploading pyspark file to GCS') - client = storage.Client(project=project_id) +def upload_pyspark_file(project, bucket_name, filename, spark_file): + """Uploads the PySpark file in this directory to the configured input + bucket.""" + print('Uploading pyspark file to Cloud Storage.') + client = storage.Client(project=project) bucket = client.get_bucket(bucket_name) blob = bucket.blob(filename) - blob.upload_from_file(file) + blob.upload_from_file(spark_file) -def download_output(project_id, cluster_id, output_bucket, job_id): +def download_output(project, cluster_id, output_bucket, job_id): """Downloads the output file from Cloud Storage and returns it as a string.""" - print('Downloading output file') - client = storage.Client(project=project_id) + print('Downloading output file.') + client = storage.Client(project=project) bucket = client.get_bucket(output_bucket) output_blob = ( - 'google-cloud-dataproc-metainfo/{}/jobs/{}/driveroutput.000000000' - .format(cluster_id, job_id)) + ('google-cloud-dataproc-metainfo/{}/jobs/{}/driveroutput.000000000'. + format(cluster_id, job_id))) return bucket.blob(output_blob).download_as_string() # [START dataproc_create_cluster] def create_cluster(dataproc, project, zone, region, cluster_name): + """Create the cluster.""" print('Creating cluster...') zone_uri = \ 'https://www.googleapis.com/compute/v1/projects/{}/zones/{}'.format( project, zone) cluster_data = { - 'projectId': project, - 'clusterName': cluster_name, + 'project_id': project, + 'cluster_name': cluster_name, 'config': { - 'gceClusterConfig': { - 'zoneUri': zone_uri + 'gce_cluster_config': { + 'zone_uri': zone_uri }, - 'masterConfig': { - 'numInstances': 1, - 'machineTypeUri': 'n1-standard-1' + 'master_config': { + 'num_instances': 1, + 'machine_type_uri': 'n1-standard-1' }, - 'workerConfig': { - 'numInstances': 2, - 'machineTypeUri': 'n1-standard-1' + 'worker_config': { + 'num_instances': 2, + 'machine_type_uri': 'n1-standard-1' } } } - result = dataproc.projects().regions().clusters().create( - projectId=project, - region=region, - body=cluster_data).execute() - return result + + cluster = dataproc.create_cluster(project, region, cluster_data) + cluster.add_done_callback(callback) + global waiting_callback + waiting_callback = True # [END dataproc_create_cluster] -def wait_for_cluster_creation(dataproc, project_id, region, cluster_name): +def callback(operation_future): + # Reset global when callback returns. + global waiting_callback + waiting_callback = False + + +def wait_for_cluster_creation(): + """Wait for cluster creation.""" print('Waiting for cluster creation...') while True: - result = dataproc.projects().regions().clusters().list( - projectId=project_id, - region=region).execute() - cluster_list = result['clusters'] - cluster = [c - for c in cluster_list - if c['clusterName'] == cluster_name][0] - if cluster['status']['state'] == 'ERROR': - raise Exception(result['status']['details']) - if cluster['status']['state'] == 'RUNNING': + if not waiting_callback: print("Cluster created.") break # [START dataproc_list_clusters_with_detail] def list_clusters_with_details(dataproc, project, region): - result = dataproc.projects().regions().clusters().list( - projectId=project, - region=region).execute() - cluster_list = result['clusters'] - for cluster in cluster_list: - print("{} - {}" - .format(cluster['clusterName'], cluster['status']['state'])) - return result + """List the details of clusters in the region.""" + for cluster in dataproc.list_clusters(project, region): + print(('{} - {}'.format(cluster.cluster_name, + cluster.status.State.Name( + cluster.status.state)))) # [END dataproc_list_clusters_with_detail] -def get_cluster_id_by_name(cluster_list, cluster_name): +def get_cluster_id_by_name(dataproc, project_id, region, cluster_name): """Helper function to retrieve the ID and output bucket of a cluster by name.""" - cluster = [c for c in cluster_list if c['clusterName'] == cluster_name][0] - return cluster['clusterUuid'], cluster['config']['configBucket'] + for cluster in dataproc.list_clusters(project_id, region): + if cluster.cluster_name == cluster_name: + return cluster.cluster_uuid, cluster.config.config_bucket # [START dataproc_submit_pyspark_job] -def submit_pyspark_job(dataproc, project, region, - cluster_name, bucket_name, filename): - """Submits the Pyspark job to the cluster, assuming `filename` has - already been uploaded to `bucket_name`""" +def submit_pyspark_job(dataproc, project, region, cluster_name, bucket_name, + filename): + """Submit the Pyspark job to the cluster (assumes `filename` was uploaded + to `bucket_name.""" job_details = { - 'projectId': project, - 'job': { - 'placement': { - 'clusterName': cluster_name - }, - 'pysparkJob': { - 'mainPythonFileUri': 'gs://{}/{}'.format(bucket_name, filename) - } + 'placement': { + 'cluster_name': cluster_name + }, + 'pyspark_job': { + 'main_python_file_uri': 'gs://{}/{}'.format(bucket_name, filename) } } - result = dataproc.projects().regions().jobs().submit( - projectId=project, - region=region, - body=job_details).execute() - job_id = result['reference']['jobId'] - print('Submitted job ID {}'.format(job_id)) + + result = dataproc.submit_job( + project_id=project, region=region, job=job_details) + job_id = result.reference.job_id + print('Submitted job ID {}.'.format(job_id)) return job_id # [END dataproc_submit_pyspark_job] # [START dataproc_delete] def delete_cluster(dataproc, project, region, cluster): - print('Tearing down cluster') - result = dataproc.projects().regions().clusters().delete( - projectId=project, - region=region, - clusterName=cluster).execute() + """Delete the cluster.""" + print('Tearing down cluster.') + result = dataproc.delete_cluster( + project_id=project, region=region, cluster_name=cluster) return result # [END dataproc_delete] # [START dataproc_wait] def wait_for_job(dataproc, project, region, job_id): + """Wait for job to complete or error out.""" print('Waiting for job to finish...') while True: - result = dataproc.projects().regions().jobs().get( - projectId=project, - region=region, - jobId=job_id).execute() + job = dataproc.get_job(project, region, job_id) # Handle exceptions - if result['status']['state'] == 'ERROR': - raise Exception(result['status']['details']) - elif result['status']['state'] == 'DONE': + if job.status.State.Name(job.status.state) == 'ERROR': + raise Exception(job.status.details) + elif job.status.State.Name(job.status.state) == 'DONE': print('Job finished.') - return result + return job # [END dataproc_wait] -# [START dataproc_get_client] -def get_client(): - """Builds an http client authenticated with the service account - credentials.""" - dataproc = googleapiclient.discovery.build('dataproc', 'v1') - return dataproc -# [END dataproc_get_client] +def main(project_id, + zone, + cluster_name, + bucket_name, + pyspark_file=None, + create_new_cluster=True, + global_region=True): + + # [START dataproc_get_client] + if global_region: + region = 'global' + # Use the default gRPC global endpoints. + dataproc_cluster_client = dataproc_v1.ClusterControllerClient() + dataproc_job_client = dataproc_v1.JobControllerClient() + else: + region = get_region_from_zone(zone) + # Use a regional gRPC endpoint. See: + # https://cloud.google.com/dataproc/docs/concepts/regional-endpoints + client_transport = ( + cluster_controller_grpc_transport.ClusterControllerGrpcTransport( + address='{}-dataproc.googleapis.com:443'.format(region))) + job_transport = ( + job_controller_grpc_transport.JobControllerGrpcTransport( + address='{}-dataproc.googleapis.com:443'.format(region))) + dataproc_cluster_client = dataproc_v1.ClusterControllerClient( + client_transport) + dataproc_job_client = dataproc_v1.JobControllerClient(job_transport) + # [END dataproc_get_client] - -def main(project_id, zone, cluster_name, bucket_name, - pyspark_file=None, create_new_cluster=True): - dataproc = get_client() - region = get_region_from_zone(zone) try: - if pyspark_file: - spark_file, spark_filename = get_pyspark_file(pyspark_file) - else: - spark_file, spark_filename = get_default_pyspark_file() - + spark_file, spark_filename = get_pyspark_file(pyspark_file) if create_new_cluster: - create_cluster( - dataproc, project_id, zone, region, cluster_name) - wait_for_cluster_creation( - dataproc, project_id, region, cluster_name) - - upload_pyspark_file( - project_id, bucket_name, spark_filename, spark_file) + create_cluster(dataproc_cluster_client, project_id, zone, region, + cluster_name) + wait_for_cluster_creation() + upload_pyspark_file(project_id, bucket_name, spark_filename, + spark_file) - cluster_list = list_clusters_with_details( - dataproc, project_id, region)['clusters'] + list_clusters_with_details(dataproc_cluster_client, project_id, + region) (cluster_id, output_bucket) = ( - get_cluster_id_by_name(cluster_list, cluster_name)) + get_cluster_id_by_name(dataproc_cluster_client, project_id, + region, cluster_name)) # [START dataproc_call_submit_pyspark_job] - job_id = submit_pyspark_job( - dataproc, project_id, region, - cluster_name, bucket_name, spark_filename) + job_id = submit_pyspark_job(dataproc_job_client, project_id, region, + cluster_name, bucket_name, spark_filename) # [END dataproc_call_submit_pyspark_job] - wait_for_job(dataproc, project_id, region, job_id) + wait_for_job(dataproc_job_client, project_id, region, job_id) output = download_output(project_id, cluster_id, output_bucket, job_id) print('Received job output {}'.format(output)) return output finally: if create_new_cluster: - delete_cluster(dataproc, project_id, region, cluster_name) - spark_file.close() + delete_cluster(dataproc_cluster_client, project_id, region, + cluster_name) + spark_file.close() if __name__ == '__main__': - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.RawDescriptionHelpFormatter - ) - parser.add_argument( - '--project_id', help='Project ID you want to access.', required=True), - parser.add_argument( - '--zone', help='Zone to create clusters in/connect to', required=True) - parser.add_argument( - '--cluster_name', - help='Name of the cluster to create/connect to', required=True) - parser.add_argument( - '--gcs_bucket', help='Bucket to upload Pyspark file to', required=True) - parser.add_argument( - '--pyspark_file', help='Pyspark filename. Defaults to pyspark_sort.py') + parser = argparse.ArgumentParser(description=__doc__, + formatter_class=argparse. + RawDescriptionHelpFormatter) parser.add_argument( - '--create_new_cluster', - action='store_true', help='States if the cluster should be created') + '--project_id', help='Project ID you want to access.', required=True) + parser.add_argument('--zone', + help='Zone to create clusters in/connect to', + required=True) + parser.add_argument('--cluster_name', + help='Name of the cluster to create/connect to', + required=True) + parser.add_argument('--gcs_bucket', + help='Bucket to upload Pyspark file to', + required=True) + parser.add_argument('--pyspark_file', + help='Pyspark filename. Defaults to pyspark_sort.py') + parser.add_argument('--create_new_cluster', + action='store_true', + help='States if the cluster should be created') + parser.add_argument('--global_region', + action='store_true', + help='If cluster is in the global region') args = parser.parse_args() - main( - args.project_id, args.zone, args.cluster_name, - args.gcs_bucket, args.pyspark_file, args.create_new_cluster) + main(args.project_id, args.zone, args.cluster_name, args.gcs_bucket, + args.pyspark_file, args.create_new_cluster, args.global_region) From f210da8fef1b98f0ea920031e86edd598a3e2215 Mon Sep 17 00:00:00 2001 From: aman-ebay Date: Tue, 21 May 2019 11:24:11 -0700 Subject: [PATCH 061/109] Update python-api-walkthrough.md [(#2172)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2172) --- samples/snippets/python-api-walkthrough.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/samples/snippets/python-api-walkthrough.md b/samples/snippets/python-api-walkthrough.md index 656b54ac..f64d7528 100644 --- a/samples/snippets/python-api-walkthrough.md +++ b/samples/snippets/python-api-walkthrough.md @@ -6,12 +6,12 @@ Estimated completion time: Date: Wed, 23 Oct 2019 16:27:00 -0700 Subject: [PATCH 062/109] Adds updates including compute [(#2436)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2436) * Adds updates including compute * Python 2 compat pytest * Fixing weird \r\n issue from GH merge * Put asset tests back in * Re-add pod operator test * Hack parameter for k8s pod operator --- samples/snippets/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 509e241a..81a0a72b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ grpcio>=1.2.0 -google-auth==1.6.2 +google-auth==1.6.3 google-auth-httplib2==0.0.3 google-cloud==0.34.0 -google-cloud-storage==1.13.2 -google-cloud-dataproc==0.3.1 +google-cloud-storage==1.19.1 +google-cloud-dataproc==0.5.0 From 961ff59ab1a9f379abd1f11ec6c67ccc3fb9be1b Mon Sep 17 00:00:00 2001 From: Brad Miro Date: Fri, 15 Nov 2019 18:34:57 -0500 Subject: [PATCH 063/109] feat: adding samples for dataproc - create cluster [(#2536)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2536) * adding sample for cluster create * small fix * Add create cluster samples * Fixed copyright, added 'dataproc' to region tag and changed imports from 'dataproc' to 'dataproc_v1' * Fix copyright in create_cluster.py --- samples/snippets/create_cluster.py | 54 +++++++++++++++++++ samples/snippets/create_cluster_test.py | 44 +++++++++++++++ ...c_e2e_test.py => dataproc_e2e_donttest.py} | 0 samples/snippets/requirements.txt | 2 +- 4 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 samples/snippets/create_cluster.py create mode 100644 samples/snippets/create_cluster_test.py rename samples/snippets/{dataproc_e2e_test.py => dataproc_e2e_donttest.py} (100%) diff --git a/samples/snippets/create_cluster.py b/samples/snippets/create_cluster.py new file mode 100644 index 00000000..d893a142 --- /dev/null +++ b/samples/snippets/create_cluster.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_cluster(project_id, region, cluster_name): + # [START dataproc_create_cluster] + from google.cloud import dataproc_v1 as dataproc + + # TODO(developer): Uncomment and set the following variables + # project_id = 'YOUR_PROJECT_ID' + # region = 'YOUR_CLUSTER_REGION' + # cluster_name = 'YOUR_CLUSTER_NAME' + + # Create a client with the endpoint set to the desired cluster region + client = dataproc.ClusterControllerClient(client_options={ + 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region) + }) + + # Create the cluster config + cluster = { + 'project_id': project_id, + 'cluster_name': cluster_name, + 'config': { + 'master_config': { + 'num_instances': 1, + 'machine_type_uri': 'n1-standard-1' + }, + 'worker_config': { + 'num_instances': 2, + 'machine_type_uri': 'n1-standard-1' + } + } + } + + # Create the cluster + operation = client.create_cluster(project_id, region, cluster) + result = operation.result() + + # Output a success message + print('Cluster created successfully: {}'.format(result.cluster_name)) + # [END dataproc_create_cluster] diff --git a/samples/snippets/create_cluster_test.py b/samples/snippets/create_cluster_test.py new file mode 100644 index 00000000..d58a1d0b --- /dev/null +++ b/samples/snippets/create_cluster_test.py @@ -0,0 +1,44 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid +import pytest + +from google.cloud import dataproc_v1 as dataproc + +import create_cluster + +PROJECT_ID = os.environ['GCLOUD_PROJECT'] +REGION = 'us-central1' +CLUSTER_NAME = 'test-cluster-{}'.format(str(uuid.uuid4())) + + +@pytest.fixture(autouse=True) +def teardown(): + yield + + client = dataproc.ClusterControllerClient(client_options={ + 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(REGION) + }) + # Client library function + client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME) + + +def test_cluster_create(capsys): + # Wrapper function for client library function + create_cluster.create_cluster(PROJECT_ID, REGION, CLUSTER_NAME) + + out, _ = capsys.readouterr() + assert CLUSTER_NAME in out diff --git a/samples/snippets/dataproc_e2e_test.py b/samples/snippets/dataproc_e2e_donttest.py similarity index 100% rename from samples/snippets/dataproc_e2e_test.py rename to samples/snippets/dataproc_e2e_donttest.py diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 81a0a72b..0ffe752a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -3,4 +3,4 @@ google-auth==1.6.3 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.19.1 -google-cloud-dataproc==0.5.0 +google-cloud-dataproc==0.6.1 From 9edbb0ebda8e7081afe2296dbbd9a6a19c6b7077 Mon Sep 17 00:00:00 2001 From: DPEBot Date: Fri, 20 Dec 2019 17:41:38 -0800 Subject: [PATCH 064/109] Auto-update dependencies. [(#2005)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2005) * Auto-update dependencies. * Revert update of appengine/flexible/datastore. * revert update of appengine/flexible/scipy * revert update of bigquery/bqml * revert update of bigquery/cloud-client * revert update of bigquery/datalab-migration * revert update of bigtable/quickstart * revert update of compute/api * revert update of container_registry/container_analysis * revert update of dataflow/run_template * revert update of datastore/cloud-ndb * revert update of dialogflow/cloud-client * revert update of dlp * revert update of functions/imagemagick * revert update of functions/ocr/app * revert update of healthcare/api-client/fhir * revert update of iam/api-client * revert update of iot/api-client/gcs_file_to_device * revert update of iot/api-client/mqtt_example * revert update of language/automl * revert update of run/image-processing * revert update of vision/automl * revert update testing/requirements.txt * revert update of vision/cloud-client/detect * revert update of vision/cloud-client/product_search * revert update of jobs/v2/api_client * revert update of jobs/v3/api_client * revert update of opencensus * revert update of translate/cloud-client * revert update to speech/cloud-client Co-authored-by: Kurtis Van Gent <31518063+kurtisvg@users.noreply.github.com> Co-authored-by: Doug Mahugh --- samples/snippets/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0ffe752a..f39bf65a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ grpcio>=1.2.0 -google-auth==1.6.3 +google-auth==1.10.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 -google-cloud-storage==1.19.1 +google-cloud-storage==1.23.0 google-cloud-dataproc==0.6.1 From 07c9dfb711a532cfec2c7af775dddad1fb3fb512 Mon Sep 17 00:00:00 2001 From: Brad Miro Date: Thu, 9 Jan 2020 12:59:42 -0500 Subject: [PATCH 065/109] feat: dataproc quickstart sample added and create_cluster updated [(#2629)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2629) * Adding quickstart sample * Added new quickstart sample and updated create_cluster sample * Fix to create_cluster.py * deleted dataproc quickstart files not under dataproc/quickstart/ * Added quickstart test * Linting and formatting fixes * Revert "Linting and formatting fixes" This reverts commit c5afcbcdf9deccbb7a21ddd82ae0fc305e79c008. * Added bucket cleanup to quickstart test * Changes to samples and tests * Linting fixes * Removed todos in favor of clearer docstring * Fixed lint error Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/snippets/create_cluster.py | 31 +++-- samples/snippets/create_cluster_test.py | 9 +- samples/snippets/quickstart/quickstart.py | 128 ++++++++++++++++++ .../snippets/quickstart/quickstart_test.py | 70 ++++++++++ 4 files changed, 223 insertions(+), 15 deletions(-) create mode 100644 samples/snippets/quickstart/quickstart.py create mode 100644 samples/snippets/quickstart/quickstart_test.py diff --git a/samples/snippets/create_cluster.py b/samples/snippets/create_cluster.py index d893a142..a396ddc6 100644 --- a/samples/snippets/create_cluster.py +++ b/samples/snippets/create_cluster.py @@ -14,22 +14,29 @@ # See the License for the specific language governing permissions and # limitations under the License. +# This sample walks a user through creating a Cloud Dataproc cluster using +# the Python client library. + +# [START dataproc_create_cluster] +from google.cloud import dataproc_v1 as dataproc + def create_cluster(project_id, region, cluster_name): - # [START dataproc_create_cluster] - from google.cloud import dataproc_v1 as dataproc + """This sample walks a user through creating a Cloud Dataproc cluster + using the Python client library. - # TODO(developer): Uncomment and set the following variables - # project_id = 'YOUR_PROJECT_ID' - # region = 'YOUR_CLUSTER_REGION' - # cluster_name = 'YOUR_CLUSTER_NAME' + Args: + project_id (string): Project to use for creating resources. + region (string): Region where the resources should live. + cluster_name (string): Name to use for creating a cluster. + """ - # Create a client with the endpoint set to the desired cluster region - client = dataproc.ClusterControllerClient(client_options={ + # Create a client with the endpoint set to the desired cluster region. + cluster_client = dataproc.ClusterControllerClient(client_options={ 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region) }) - # Create the cluster config + # Create the cluster config. cluster = { 'project_id': project_id, 'cluster_name': cluster_name, @@ -45,10 +52,10 @@ def create_cluster(project_id, region, cluster_name): } } - # Create the cluster - operation = client.create_cluster(project_id, region, cluster) + # Create the cluster. + operation = cluster_client.create_cluster(project_id, region, cluster) result = operation.result() - # Output a success message + # Output a success message. print('Cluster created successfully: {}'.format(result.cluster_name)) # [END dataproc_create_cluster] diff --git a/samples/snippets/create_cluster_test.py b/samples/snippets/create_cluster_test.py index d58a1d0b..04274579 100644 --- a/samples/snippets/create_cluster_test.py +++ b/samples/snippets/create_cluster_test.py @@ -20,20 +20,23 @@ import create_cluster + PROJECT_ID = os.environ['GCLOUD_PROJECT'] REGION = 'us-central1' -CLUSTER_NAME = 'test-cluster-{}'.format(str(uuid.uuid4())) +CLUSTER_NAME = 'py-cc-test-{}'.format(str(uuid.uuid4())) @pytest.fixture(autouse=True) def teardown(): yield - client = dataproc.ClusterControllerClient(client_options={ + cluster_client = dataproc.ClusterControllerClient(client_options={ 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(REGION) }) # Client library function - client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME) + operation = cluster_client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME) + # Wait for cluster to delete + operation.result() def test_cluster_create(capsys): diff --git a/samples/snippets/quickstart/quickstart.py b/samples/snippets/quickstart/quickstart.py new file mode 100644 index 00000000..fcbda882 --- /dev/null +++ b/samples/snippets/quickstart/quickstart.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START dataproc_quickstart] +import time + +from google.cloud import dataproc_v1 as dataproc +from google.cloud import storage + + +def quickstart(project_id, region, cluster_name, job_file_path): + """This quickstart sample walks a user through creating a Cloud Dataproc + cluster, submitting a PySpark job from Google Cloud Storage to the + cluster, reading the output of the job and deleting the cluster, all + using the Python client library. + + Args: + project_id (string): Project to use for creating resources. + region (string): Region where the resources should live. + cluster_name (string): Name to use for creating a cluster. + job_file_path (string): Job in GCS to execute against the cluster. + """ + + # Create the cluster client. + cluster_client = dataproc.ClusterControllerClient(client_options={ + 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region) + }) + + # Create the cluster config. + cluster = { + 'project_id': project_id, + 'cluster_name': cluster_name, + 'config': { + 'master_config': { + 'num_instances': 1, + 'machine_type_uri': 'n1-standard-1' + }, + 'worker_config': { + 'num_instances': 2, + 'machine_type_uri': 'n1-standard-1' + } + } + } + + # Create the cluster. + operation = cluster_client.create_cluster(project_id, region, cluster) + result = operation.result() + + print('Cluster created successfully: {}'.format(result.cluster_name)) + + # Create the job client. + job_client = dataproc.JobControllerClient(client_options={ + 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region) + }) + + # Create the job config. + job = { + 'placement': { + 'cluster_name': cluster_name + }, + 'pyspark_job': { + 'main_python_file_uri': job_file_path + } + } + + job_response = job_client.submit_job(project_id, region, job) + job_id = job_response.reference.job_id + + print('Submitted job \"{}\".'.format(job_id)) + + # Termimal states for a job. + terminal_states = { + dataproc.types.JobStatus.ERROR, + dataproc.types.JobStatus.CANCELLED, + dataproc.types.JobStatus.DONE + } + + # Create a timeout such that the job gets cancelled if not in a + # terminal state after a fixed period of time. + timeout_seconds = 600 + time_start = time.time() + + # Wait for the job to complete. + while job_response.status.state not in terminal_states: + if time.time() > time_start + timeout_seconds: + job_client.cancel_job(project_id, region, job_id) + print('Job {} timed out after threshold of {} seconds.'.format( + job_id, timeout_seconds)) + + # Poll for job termination once a second. + time.sleep(1) + job_response = job_client.get_job(project_id, region, job_id) + + # Cloud Dataproc job output gets saved to a GCS bucket allocated to it. + cluster_info = cluster_client.get_cluster( + project_id, region, cluster_name) + + storage_client = storage.Client() + bucket = storage_client.get_bucket(cluster_info.config.config_bucket) + output_blob = ( + 'google-cloud-dataproc-metainfo/{}/jobs/{}/driveroutput.000000000' + .format(cluster_info.cluster_uuid, job_id)) + output = bucket.blob(output_blob).download_as_string() + + print('Job {} finished with state {}:\n{}'.format( + job_id, + job_response.status.State.Name(job_response.status.state), + output)) + + # Delete the cluster once the job has terminated. + operation = cluster_client.delete_cluster(project_id, region, cluster_name) + operation.result() + + print('Cluster {} successfully deleted.'.format(cluster_name)) + # [END dataproc_quickstart] diff --git a/samples/snippets/quickstart/quickstart_test.py b/samples/snippets/quickstart/quickstart_test.py new file mode 100644 index 00000000..df488d0a --- /dev/null +++ b/samples/snippets/quickstart/quickstart_test.py @@ -0,0 +1,70 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import uuid +import pytest + +from google.cloud import dataproc_v1 as dataproc +from google.cloud import storage + +import quickstart + + +PROJECT_ID = os.environ['GCLOUD_PROJECT'] +REGION = 'us-central1' +CLUSTER_NAME = 'py-qs-test-{}'.format(str(uuid.uuid4())) +STAGING_BUCKET = 'py-dataproc-qs-bucket-{}'.format(str(uuid.uuid4())) +JOB_FILE_NAME = 'sum.py' +JOB_FILE_PATH = 'gs://{}/{}'.format(STAGING_BUCKET, JOB_FILE_NAME) +SORT_CODE = ( + "import pyspark\n" + "sc = pyspark.SparkContext()\n" + "rdd = sc.parallelize((1,2,3,4,5))\n" + "sum = rdd.reduce(lambda x, y: x + y)\n" +) + + +@pytest.fixture(autouse=True) +def setup_teardown(): + storage_client = storage.Client() + bucket = storage_client.create_bucket(STAGING_BUCKET) + blob = bucket.blob(JOB_FILE_NAME) + blob.upload_from_string(SORT_CODE) + + yield + + cluster_client = dataproc.ClusterControllerClient(client_options={ + 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(REGION) + }) + + # The quickstart sample deletes the cluster, but if the test fails + # before cluster deletion occurs, it can be manually deleted here. + clusters = cluster_client.list_clusters(PROJECT_ID, REGION) + + for cluster in clusters: + if cluster.cluster_name == CLUSTER_NAME: + cluster_client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME) + + blob.delete() + + +def test_quickstart(capsys): + quickstart.quickstart(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH) + + out, _ = capsys.readouterr() + assert 'Cluster created successfully' in out + assert 'Submitted job' in out + assert 'finished with state DONE:' in out + assert 'successfully deleted' in out From ef562945aaa4722fbd9459dd95ac022246f77a92 Mon Sep 17 00:00:00 2001 From: aman-ebay Date: Thu, 23 Jan 2020 15:50:51 -0800 Subject: [PATCH 066/109] Update Python Cloud Shell walkthrough script [(#2733)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2733) Cloud Shell walkthrough scripts no longer support enabling APIs. APIs must be enabled by linking to the console. Updated product name: "Cloud Dataproc" -> "Dataproc". --- samples/snippets/python-api-walkthrough.md | 24 ++++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/samples/snippets/python-api-walkthrough.md b/samples/snippets/python-api-walkthrough.md index f64d7528..1a8d436f 100644 --- a/samples/snippets/python-api-walkthrough.md +++ b/samples/snippets/python-api-walkthrough.md @@ -1,4 +1,4 @@ -# Use the Python Client Library to call Cloud Dataproc APIs +# Use the Python Client Library to call Dataproc APIs Estimated completion time: @@ -7,13 +7,13 @@ Estimated completion time: -1. Enable the Cloud Dataproc, Compute Engine, and Cloud Storage APIs in your project. - * +1. Click the link below to enable the Dataproc, Compute Engine, and Cloud Storage APIs + in a separate GCP console tab in your browser. + + **Note:** After you select your project and enable the APIs, return to this tutorial by clicking + on the **Cloud Shell** tab in your browser. + + * [Enable APIs](https://console.cloud.google.com/flows/enableapi?apiid=dataproc,compute_component,storage-component.googleapis.com&redirect=https://console.cloud.google.com) ## Prerequisites (2) @@ -140,7 +145,8 @@ Job output in Cloud Shell shows cluster creation, job submission, ### Next Steps: * **View job details from the Console.** View job details by selecting the - PySpark job from the Cloud Dataproc + PySpark job from the Dataproc += [Jobs page](https://console.cloud.google.com/dataproc/jobs) in the Google Cloud Platform Console. @@ -160,5 +166,5 @@ Job output in Cloud Shell shows cluster creation, job submission, gsutil rm -r gs://$BUCKET ``` -* **For more information.** See the [Cloud Dataproc documentation](https://cloud.google.com/dataproc/docs/) +* **For more information.** See the [Dataproc documentation](https://cloud.google.com/dataproc/docs/) for API reference and product feature information. From b635c475362acb30404a8a10ca36869377701a36 Mon Sep 17 00:00:00 2001 From: Brad Miro Date: Tue, 28 Jan 2020 18:24:20 -0500 Subject: [PATCH 067/109] fix: added cli functionality to dataproc quickstart example [(#2734)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2734) * Added CLI functionality to quickstart --- samples/snippets/quickstart/quickstart.py | 57 ++++++++++++++----- .../snippets/quickstart/quickstart_test.py | 23 +++++--- 2 files changed, 58 insertions(+), 22 deletions(-) diff --git a/samples/snippets/quickstart/quickstart.py b/samples/snippets/quickstart/quickstart.py index fcbda882..4159e281 100644 --- a/samples/snippets/quickstart/quickstart.py +++ b/samples/snippets/quickstart/quickstart.py @@ -15,6 +15,18 @@ # limitations under the License. # [START dataproc_quickstart] +""" +This quickstart sample walks a user through creating a Cloud Dataproc +cluster, submitting a PySpark job from Google Cloud Storage to the +cluster, reading the output of the job and deleting the cluster, all +using the Python client library. + +Usage: + python quickstart.py --project_id --region \ + --cluster_name --job_file_path +""" + +import argparse import time from google.cloud import dataproc_v1 as dataproc @@ -22,18 +34,6 @@ def quickstart(project_id, region, cluster_name, job_file_path): - """This quickstart sample walks a user through creating a Cloud Dataproc - cluster, submitting a PySpark job from Google Cloud Storage to the - cluster, reading the output of the job and deleting the cluster, all - using the Python client library. - - Args: - project_id (string): Project to use for creating resources. - region (string): Region where the resources should live. - cluster_name (string): Name to use for creating a cluster. - job_file_path (string): Job in GCS to execute against the cluster. - """ - # Create the cluster client. cluster_client = dataproc.ClusterControllerClient(client_options={ 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region) @@ -125,4 +125,35 @@ def quickstart(project_id, region, cluster_name, job_file_path): operation.result() print('Cluster {} successfully deleted.'.format(cluster_name)) - # [END dataproc_quickstart] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + '--project_id', + type=str, + required=True, + help='Project to use for creating resources.') + parser.add_argument( + '--region', + type=str, + required=True, + help='Region where the resources should live.') + parser.add_argument( + '--cluster_name', + type=str, + required=True, + help='Name to use for creating a cluster.') + parser.add_argument( + '--job_file_path', + type=str, + required=True, + help='Job in GCS to execute against the cluster.') + + args = parser.parse_args() + quickstart(args.project_id, args.region, + args.cluster_name, args.job_file_path) +# [END dataproc_quickstart] diff --git a/samples/snippets/quickstart/quickstart_test.py b/samples/snippets/quickstart/quickstart_test.py index df488d0a..b7fe0576 100644 --- a/samples/snippets/quickstart/quickstart_test.py +++ b/samples/snippets/quickstart/quickstart_test.py @@ -15,12 +15,11 @@ import os import uuid import pytest +import subprocess from google.cloud import dataproc_v1 as dataproc from google.cloud import storage -import quickstart - PROJECT_ID = os.environ['GCLOUD_PROJECT'] REGION = 'us-central1' @@ -29,10 +28,10 @@ JOB_FILE_NAME = 'sum.py' JOB_FILE_PATH = 'gs://{}/{}'.format(STAGING_BUCKET, JOB_FILE_NAME) SORT_CODE = ( - "import pyspark\n" - "sc = pyspark.SparkContext()\n" - "rdd = sc.parallelize((1,2,3,4,5))\n" - "sum = rdd.reduce(lambda x, y: x + y)\n" + "import pyspark\n" + "sc = pyspark.SparkContext()\n" + "rdd = sc.parallelize((1,2,3,4,5))\n" + "sum = rdd.reduce(lambda x, y: x + y)\n" ) @@ -60,10 +59,16 @@ def setup_teardown(): blob.delete() -def test_quickstart(capsys): - quickstart.quickstart(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH) +def test_quickstart(): + command = [ + 'python', 'quickstart/quickstart.py', + '--project_id', PROJECT_ID, + '--region', REGION, + '--cluster_name', CLUSTER_NAME, + '--job_file_path', JOB_FILE_PATH + ] + out = subprocess.check_output(command).decode("utf-8") - out, _ = capsys.readouterr() assert 'Cluster created successfully' in out assert 'Submitted job' in out assert 'finished with state DONE:' in out From d0a263828093b1b2bec561a2f22e48caef7cb99c Mon Sep 17 00:00:00 2001 From: Brad Miro Date: Thu, 27 Feb 2020 12:44:19 -0500 Subject: [PATCH 068/109] Fixed Dataproc quickstart test to properly clean up GCS bucket [(#3001)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3001) --- samples/snippets/quickstart/quickstart_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/snippets/quickstart/quickstart_test.py b/samples/snippets/quickstart/quickstart_test.py index b7fe0576..5361449f 100644 --- a/samples/snippets/quickstart/quickstart_test.py +++ b/samples/snippets/quickstart/quickstart_test.py @@ -57,6 +57,7 @@ def setup_teardown(): cluster_client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME) blob.delete() + bucket.delete() def test_quickstart(): From 16b28a0ccc3ef64025c387055d60eb53a19bdde2 Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Thu, 5 Mar 2020 09:16:19 -0800 Subject: [PATCH 069/109] splitting up #2651 part 1/3 - dataproc + endpoints [(#3025)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3025) * splitting up #2651 * fix typos --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f39bf65a..a54880f1 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -grpcio>=1.2.0 +grpcio==1.27.1 google-auth==1.10.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 From a5ed706b4a5010ae96a54f6ebc291a8a5d5801ae Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Thu, 5 Mar 2020 14:22:12 -0800 Subject: [PATCH 070/109] chore(deps): update dependency google-auth to v1.11.2 [(#2724)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2724) Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index a54880f1..f0d1ac13 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.27.1 -google-auth==1.10.0 +google-auth==1.11.2 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.23.0 From ebfed4094f795c55fec281e39415c4e7a7e485dd Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 9 Mar 2020 18:29:59 +0100 Subject: [PATCH 071/109] chore(deps): update dependency google-cloud-storage to v1.26.0 [(#3046)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3046) * chore(deps): update dependency google-cloud-storage to v1.26.0 * chore(deps): specify dependencies by python version * chore: up other deps to try to remove errors Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Co-authored-by: Leah Cole --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f0d1ac13..95e55c33 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,5 +2,5 @@ grpcio==1.27.1 google-auth==1.11.2 google-auth-httplib2==0.0.3 google-cloud==0.34.0 -google-cloud-storage==1.23.0 +google-cloud-storage==1.26.0 google-cloud-dataproc==0.6.1 From a0ca0b286d6fbbe944278e386ddd4f79e847c4b8 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 11 Mar 2020 17:25:59 +0100 Subject: [PATCH 072/109] chore(deps): update dependency google-cloud-dataproc to v0.7.0 [(#3083)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3083) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 95e55c33..1eb93927 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -3,4 +3,4 @@ google-auth==1.11.2 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.26.0 -google-cloud-dataproc==0.6.1 +google-cloud-dataproc==0.7.0 From f2a80178cb870c10f09a3415d1161c427262be2e Mon Sep 17 00:00:00 2001 From: Brad Miro Date: Tue, 24 Mar 2020 12:17:21 -0400 Subject: [PATCH 073/109] feat: added dataproc workflows samples [(#3056)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3056) * Added workflows sample --- .../instantiate_inline_workflow_template.py | 98 +++++++++++++++++++ ...stantiate_inline_workflow_template_test.py | 31 ++++++ 2 files changed, 129 insertions(+) create mode 100644 samples/snippets/instantiate_inline_workflow_template.py create mode 100644 samples/snippets/instantiate_inline_workflow_template_test.py diff --git a/samples/snippets/instantiate_inline_workflow_template.py b/samples/snippets/instantiate_inline_workflow_template.py new file mode 100644 index 00000000..d492506b --- /dev/null +++ b/samples/snippets/instantiate_inline_workflow_template.py @@ -0,0 +1,98 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This sample walks a user through instantiating an inline +# workflow for Cloud Dataproc using the Python client library. +# +# This script can be run on its own: +# python workflows.py ${PROJECT_ID} ${REGION} + +import sys +# [START dataproc_instantiate_inline_workflow_template] +from google.cloud import dataproc_v1 as dataproc + + +def instantiate_inline_workflow_template(project_id, region): + """This sample walks a user through submitting a workflow + for a Cloud Dataproc using the Python client library. + + Args: + project_id (string): Project to use for running the workflow. + region (string): Region where the workflow resources should live. + """ + + # Create a client with the endpoint set to the desired region. + workflow_template_client = dataproc.WorkflowTemplateServiceClient( + client_options={ + 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)} + ) + + parent = workflow_template_client.region_path(project_id, region) + + template = { + 'jobs': [ + { + 'hadoop_job': { + 'main_jar_file_uri': 'file:///usr/lib/hadoop-mapreduce/' + 'hadoop-mapreduce-examples.jar', + 'args': [ + 'teragen', + '1000', + 'hdfs:///gen/' + ] + }, + 'step_id': 'teragen' + }, + { + 'hadoop_job': { + 'main_jar_file_uri': 'file:///usr/lib/hadoop-mapreduce/' + 'hadoop-mapreduce-examples.jar', + 'args': [ + 'terasort', + 'hdfs:///gen/', + 'hdfs:///sort/' + ] + }, + 'step_id': 'terasort', + 'prerequisite_step_ids': [ + 'teragen' + ] + }], + 'placement': { + 'managed_cluster': { + 'cluster_name': 'my-managed-cluster', + 'config': { + 'gce_cluster_config': { + # Leave 'zone_uri' empty for 'Auto Zone Placement' + # 'zone_uri': '' + 'zone_uri': 'us-central1-a' + } + } + } + } + } + + # Submit the request to instantiate the workflow from an inline template. + operation = workflow_template_client.instantiate_inline_workflow_template( + parent, template + ) + operation.result() + + # Output a success message. + print('Workflow ran successfully.') +# [END dataproc_instantiate_inline_workflow_template] + + +if __name__ == "__main__": + instantiate_inline_workflow_template(sys.argv[1], sys.argv[2]) diff --git a/samples/snippets/instantiate_inline_workflow_template_test.py b/samples/snippets/instantiate_inline_workflow_template_test.py new file mode 100644 index 00000000..6fe37119 --- /dev/null +++ b/samples/snippets/instantiate_inline_workflow_template_test.py @@ -0,0 +1,31 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import instantiate_inline_workflow_template + + +PROJECT_ID = os.environ['GCLOUD_PROJECT'] +REGION = 'us-central1' + + +def test_workflows(capsys): + # Wrapper function for client library function + instantiate_inline_workflow_template.instantiate_inline_workflow_template( + PROJECT_ID, REGION + ) + + out, _ = capsys.readouterr() + assert "successfully" in out From 09cebd14ee365e664714bc0536d070af22bea0fe Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 27 Mar 2020 22:48:04 +0100 Subject: [PATCH 074/109] chore(deps): update dependency grpcio to v1.27.2 [(#3173)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3173) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [grpcio](https://grpc.io) | minor | `==1.25.0` -> `==1.27.2` | | [grpcio](https://grpc.io) | minor | `==1.23.0` -> `==1.27.2` | | [grpcio](https://grpc.io) | minor | `==1.26.0` -> `==1.27.2` | | [grpcio](https://grpc.io) | patch | `==1.27.1` -> `==1.27.2` | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Never, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about these updates again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#GoogleCloudPlatform/python-docs-samples). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1eb93927..cc78ad4c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.27.1 +grpcio==1.27.2 google-auth==1.11.2 google-auth-httplib2==0.0.3 google-cloud==0.34.0 From 2b75ceb71f40f073ef6f076c2875525c6238aae5 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent <31518063+kurtisvg@users.noreply.github.com> Date: Wed, 1 Apr 2020 19:11:50 -0700 Subject: [PATCH 075/109] Simplify noxfile setup. [(#2806)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2806) * chore(deps): update dependency requests to v2.23.0 * Simplify noxfile and add version control. * Configure appengine/standard to only test Python 2.7. * Update Kokokro configs to match noxfile. * Add requirements-test to each folder. * Remove Py2 versions from everything execept appengine/standard. * Remove conftest.py. * Remove appengine/standard/conftest.py * Remove 'no-sucess-flaky-report' from pytest.ini. * Add GAE SDK back to appengine/standard tests. * Fix typo. * Roll pytest to python 2 version. * Add a bunch of testing requirements. * Remove typo. * Add appengine lib directory back in. * Add some additional requirements. * Fix issue with flake8 args. * Even more requirements. * Readd appengine conftest.py. * Add a few more requirements. * Even more Appengine requirements. * Add webtest for appengine/standard/mailgun. * Add some additional requirements. * Add workaround for issue with mailjet-rest. * Add responses for appengine/standard/mailjet. Co-authored-by: Renovate Bot --- samples/snippets/requirements-test.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 samples/snippets/requirements-test.txt diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt new file mode 100644 index 00000000..781d4326 --- /dev/null +++ b/samples/snippets/requirements-test.txt @@ -0,0 +1 @@ +pytest==5.3.2 From 6c2132cd30b5bcdc927d73d5391e1604c03fbf48 Mon Sep 17 00:00:00 2001 From: Brad Miro Date: Tue, 14 Apr 2020 18:16:09 -0400 Subject: [PATCH 076/109] fix: add mains to samples [(#3284)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3284) Added mains to two samples: create_cluster and instantiate_inline_workflow_templates. Fixed their associated tests to accommodate this. Removed subprocess from quickstart/quickstart_test.py to fix [2873](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/2873) fixes #2873 --- samples/snippets/create_cluster.py | 20 +++++++++++++++++-- samples/snippets/create_cluster_test.py | 2 +- .../instantiate_inline_workflow_template.py | 17 ++++++++++++---- .../snippets/quickstart/quickstart_test.py | 15 +++++--------- 4 files changed, 37 insertions(+), 17 deletions(-) diff --git a/samples/snippets/create_cluster.py b/samples/snippets/create_cluster.py index a396ddc6..b4d63d2e 100644 --- a/samples/snippets/create_cluster.py +++ b/samples/snippets/create_cluster.py @@ -16,6 +16,12 @@ # This sample walks a user through creating a Cloud Dataproc cluster using # the Python client library. +# +# This script can be run on its own: +# python create_cluster.py ${PROJECT_ID} ${REGION} ${CLUSTER_NAME} + + +import sys # [START dataproc_create_cluster] from google.cloud import dataproc_v1 as dataproc @@ -33,7 +39,7 @@ def create_cluster(project_id, region, cluster_name): # Create a client with the endpoint set to the desired cluster region. cluster_client = dataproc.ClusterControllerClient(client_options={ - 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region) + 'api_endpoint': f'{region}-dataproc.googleapis.com:443', }) # Create the cluster config. @@ -57,5 +63,15 @@ def create_cluster(project_id, region, cluster_name): result = operation.result() # Output a success message. - print('Cluster created successfully: {}'.format(result.cluster_name)) + print(f'Cluster created successfully: {result.cluster_name}') # [END dataproc_create_cluster] + + +if __name__ == "__main__": + if len(sys.argv) < 4: + sys.exit('python create_cluster.py project_id region cluster_name') + + project_id = sys.argv[1] + region = sys.argv[2] + cluster_name = sys.argv[3] + create_cluster(project_id, region, cluster_name) diff --git a/samples/snippets/create_cluster_test.py b/samples/snippets/create_cluster_test.py index 04274579..72ffce2b 100644 --- a/samples/snippets/create_cluster_test.py +++ b/samples/snippets/create_cluster_test.py @@ -31,7 +31,7 @@ def teardown(): yield cluster_client = dataproc.ClusterControllerClient(client_options={ - 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(REGION) + 'api_endpoint': f'{REGION}-dataproc.googleapis.com:443' }) # Client library function operation = cluster_client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME) diff --git a/samples/snippets/instantiate_inline_workflow_template.py b/samples/snippets/instantiate_inline_workflow_template.py index d492506b..f9358376 100644 --- a/samples/snippets/instantiate_inline_workflow_template.py +++ b/samples/snippets/instantiate_inline_workflow_template.py @@ -16,9 +16,11 @@ # workflow for Cloud Dataproc using the Python client library. # # This script can be run on its own: -# python workflows.py ${PROJECT_ID} ${REGION} +# python instantiate_inline_workflow_template.py ${PROJECT_ID} ${REGION} + import sys + # [START dataproc_instantiate_inline_workflow_template] from google.cloud import dataproc_v1 as dataproc @@ -35,7 +37,8 @@ def instantiate_inline_workflow_template(project_id, region): # Create a client with the endpoint set to the desired region. workflow_template_client = dataproc.WorkflowTemplateServiceClient( client_options={ - 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)} + 'api_endpoint': f'{region}-dataproc.googleapis.com:443' + } ) parent = workflow_template_client.region_path(project_id, region) @@ -91,8 +94,14 @@ def instantiate_inline_workflow_template(project_id, region): # Output a success message. print('Workflow ran successfully.') -# [END dataproc_instantiate_inline_workflow_template] + # [END dataproc_instantiate_inline_workflow_template] if __name__ == "__main__": - instantiate_inline_workflow_template(sys.argv[1], sys.argv[2]) + if len(sys.argv) < 3: + sys.exit('python instantiate_inline_workflow_template.py ' + + 'project_id region') + + project_id = sys.argv[1] + region = sys.argv[2] + instantiate_inline_workflow_template(project_id, region) diff --git a/samples/snippets/quickstart/quickstart_test.py b/samples/snippets/quickstart/quickstart_test.py index 5361449f..a38019d9 100644 --- a/samples/snippets/quickstart/quickstart_test.py +++ b/samples/snippets/quickstart/quickstart_test.py @@ -15,11 +15,12 @@ import os import uuid import pytest -import subprocess from google.cloud import dataproc_v1 as dataproc from google.cloud import storage +import quickstart + PROJECT_ID = os.environ['GCLOUD_PROJECT'] REGION = 'us-central1' @@ -60,15 +61,9 @@ def setup_teardown(): bucket.delete() -def test_quickstart(): - command = [ - 'python', 'quickstart/quickstart.py', - '--project_id', PROJECT_ID, - '--region', REGION, - '--cluster_name', CLUSTER_NAME, - '--job_file_path', JOB_FILE_PATH - ] - out = subprocess.check_output(command).decode("utf-8") +def test_quickstart(capsys): + quickstart.quickstart(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH) + out, _ = capsys.readouterr() assert 'Cluster created successfully' in out assert 'Submitted job' in out From 6021dda411f483bbd7d5d244004bd96c672378a6 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 15 Apr 2020 02:55:38 +0200 Subject: [PATCH 077/109] Update dependency grpcio to v1.28.1 [(#3276)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3276) Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index cc78ad4c..3a3427b5 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.27.2 +grpcio==1.28.1 google-auth==1.11.2 google-auth-httplib2==0.0.3 google-cloud==0.34.0 From ab3cc79a61aae422be899d4fe2fff468ae441181 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 17 Apr 2020 03:09:45 +0200 Subject: [PATCH 078/109] Update dependency google-auth to v1.14.0 [(#3148)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3148) Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3a3427b5..30c8bc57 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.28.1 -google-auth==1.11.2 +google-auth==1.14.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.26.0 From f1c3f0a2d7f92dc329bb6c71103994a21db6e0df Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 28 Apr 2020 06:20:12 +0200 Subject: [PATCH 079/109] chore(deps): update dependency google-auth to v1.14.1 [(#3464)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3464) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-auth](https://togithub.com/googleapis/google-auth-library-python) | patch | `==1.14.0` -> `==1.14.1` | | [google-auth](https://togithub.com/googleapis/google-auth-library-python) | minor | `==1.11.2` -> `==1.14.1` | --- ### Release Notes
googleapis/google-auth-library-python ### [`v1.14.1`](https://togithub.com/googleapis/google-auth-library-python/blob/master/CHANGELOG.md#​1141-httpswwwgithubcomgoogleapisgoogle-auth-library-pythoncomparev1140v1141-2020-04-21) [Compare Source](https://togithub.com/googleapis/google-auth-library-python/compare/v1.14.0...v1.14.1)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Never, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about these updates again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#GoogleCloudPlatform/python-docs-samples). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 30c8bc57..0fc3853a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.28.1 -google-auth==1.14.0 +google-auth==1.14.1 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.26.0 From ecafb22a7a8bde8de49d134e762bdb68baa2b984 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 29 Apr 2020 07:26:36 +0200 Subject: [PATCH 080/109] chore(deps): update dependency google-cloud-storage to v1.28.0 [(#3260)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3260) Co-authored-by: Takashi Matsuo --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0fc3853a..a7595af8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,5 +2,5 @@ grpcio==1.28.1 google-auth==1.14.1 google-auth-httplib2==0.0.3 google-cloud==0.34.0 -google-cloud-storage==1.26.0 +google-cloud-storage==1.28.0 google-cloud-dataproc==0.7.0 From e99beec29ba5c1b3303baadfc10f943725d60280 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 11 May 2020 22:24:11 +0200 Subject: [PATCH 081/109] chore(deps): update dependency google-auth to v1.14.2 [(#3724)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3724) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-auth](https://togithub.com/googleapis/google-auth-library-python) | patch | `==1.14.1` -> `==1.14.2` | --- ### Release Notes
googleapis/google-auth-library-python ### [`v1.14.2`](https://togithub.com/googleapis/google-auth-library-python/blob/master/CHANGELOG.md#​1142-httpswwwgithubcomgoogleapisgoogle-auth-library-pythoncomparev1141v1142-2020-05-07) [Compare Source](https://togithub.com/googleapis/google-auth-library-python/compare/v1.14.1...v1.14.2)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Never, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#GoogleCloudPlatform/python-docs-samples). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index a7595af8..860c8ddd 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.28.1 -google-auth==1.14.1 +google-auth==1.14.2 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.0 From f04c6cc68da09e78e8cbffdda2aa05a4c19b729c Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Tue, 12 May 2020 15:54:12 -0700 Subject: [PATCH 082/109] chore: some lint fixes [(#3743)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3743) --- samples/snippets/create_cluster_test.py | 2 +- samples/snippets/quickstart/quickstart_test.py | 2 +- samples/snippets/single_job_workflow.py | 3 ++- samples/snippets/submit_job_to_cluster.py | 3 ++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/samples/snippets/create_cluster_test.py b/samples/snippets/create_cluster_test.py index 72ffce2b..6d815b5b 100644 --- a/samples/snippets/create_cluster_test.py +++ b/samples/snippets/create_cluster_test.py @@ -14,9 +14,9 @@ import os import uuid -import pytest from google.cloud import dataproc_v1 as dataproc +import pytest import create_cluster diff --git a/samples/snippets/quickstart/quickstart_test.py b/samples/snippets/quickstart/quickstart_test.py index a38019d9..7788e094 100644 --- a/samples/snippets/quickstart/quickstart_test.py +++ b/samples/snippets/quickstart/quickstart_test.py @@ -14,10 +14,10 @@ import os import uuid -import pytest from google.cloud import dataproc_v1 as dataproc from google.cloud import storage +import pytest import quickstart diff --git a/samples/snippets/single_job_workflow.py b/samples/snippets/single_job_workflow.py index b17ea0b9..b2754b06 100644 --- a/samples/snippets/single_job_workflow.py +++ b/samples/snippets/single_job_workflow.py @@ -28,9 +28,10 @@ import os from google.cloud import dataproc_v1 +from google.cloud import storage from google.cloud.dataproc_v1.gapic.transports import ( workflow_template_service_grpc_transport) -from google.cloud import storage + DEFAULT_FILENAME = "pyspark_sort.py" waiting_callback = False diff --git a/samples/snippets/submit_job_to_cluster.py b/samples/snippets/submit_job_to_cluster.py index 1c648abc..389cbec8 100644 --- a/samples/snippets/submit_job_to_cluster.py +++ b/samples/snippets/submit_job_to_cluster.py @@ -33,11 +33,12 @@ import os from google.cloud import dataproc_v1 +from google.cloud import storage from google.cloud.dataproc_v1.gapic.transports import ( cluster_controller_grpc_transport) from google.cloud.dataproc_v1.gapic.transports import ( job_controller_grpc_transport) -from google.cloud import storage + DEFAULT_FILENAME = 'pyspark_sort.py' waiting_callback = False From 2a0f907fabc2ac7a4bbc3024261754bd60cd956f Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 13 May 2020 08:16:04 +0200 Subject: [PATCH 083/109] chore(deps): update dependency google-auth to v1.14.3 [(#3728)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3728) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-auth](https://togithub.com/googleapis/google-auth-library-python) | patch | `==1.14.2` -> `==1.14.3` | --- ### Release Notes
googleapis/google-auth-library-python ### [`v1.14.3`](https://togithub.com/googleapis/google-auth-library-python/blob/master/CHANGELOG.md#​1143-httpswwwgithubcomgoogleapisgoogle-auth-library-pythoncomparev1142v1143-2020-05-11) [Compare Source](https://togithub.com/googleapis/google-auth-library-python/compare/v1.14.2...v1.14.3)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Never, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [x] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#GoogleCloudPlatform/python-docs-samples). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 860c8ddd..6de928ce 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.28.1 -google-auth==1.14.2 +google-auth==1.14.3 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.0 From ec128b8390365a38c3c2eaf2fb64c235d66b78dc Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 19 May 2020 03:07:46 +0200 Subject: [PATCH 084/109] chore(deps): update dependency grpcio to v1.29.0 [(#3786)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3786) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6de928ce..fa67de3d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.28.1 +grpcio==1.29.0 google-auth==1.14.3 google-auth-httplib2==0.0.3 google-cloud==0.34.0 From 64b113133bb145840c0261a7ea79b20e35939d9b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 19 May 2020 04:18:01 +0200 Subject: [PATCH 085/109] chore(deps): update dependency google-cloud-storage to v1.28.1 [(#3785)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3785) * chore(deps): update dependency google-cloud-storage to v1.28.1 * [asset] testing: use uuid instead of time Co-authored-by: Takashi Matsuo --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index fa67de3d..7c7c7e4c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,5 +2,5 @@ grpcio==1.29.0 google-auth==1.14.3 google-auth-httplib2==0.0.3 google-cloud==0.34.0 -google-cloud-storage==1.28.0 +google-cloud-storage==1.28.1 google-cloud-dataproc==0.7.0 From bbb8ef1bc5964a60da13d66095e3444ae8e2cfd1 Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Mon, 18 May 2020 20:56:53 -0700 Subject: [PATCH 086/109] update google-auth to 1.15.0 part 3 [(#3816)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3816) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7c7c7e4c..0eab0356 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.29.0 -google-auth==1.14.3 +google-auth==1.15.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.1 From 32dd11554c957bf5a59a2228600842a94ebb3841 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 20 May 2020 04:38:30 +0200 Subject: [PATCH 087/109] Update dependency google-cloud-dataproc to v0.8.0 [(#3837)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3837) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0eab0356..8bf7313a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -3,4 +3,4 @@ google-auth==1.15.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.1 -google-cloud-dataproc==0.7.0 +google-cloud-dataproc==0.8.0 From 2ed03fe4a14895aea7c350a69f42522eb1b35077 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 29 May 2020 00:27:36 +0200 Subject: [PATCH 088/109] chore(deps): update dependency google-auth to v1.16.0 [(#3903)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3903) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8bf7313a..7d1527e6 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.29.0 -google-auth==1.15.0 +google-auth==1.16.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.1 From 2f493887d423b76e27078f551149b37f5adfc3cf Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Thu, 4 Jun 2020 16:13:06 -0700 Subject: [PATCH 089/109] update google-auth part 3 [(#3963)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/3963) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7d1527e6..9046478f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.29.0 -google-auth==1.16.0 +google-auth==1.16.1 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.1 From a640af8cce7d76b573ff2405e844ab24f7bc0271 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 9 Jun 2020 00:08:05 +0200 Subject: [PATCH 090/109] chore(deps): update dependency google-cloud-dataproc to v0.8.1 [(#4015)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4015) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-dataproc](https://togithub.com/googleapis/python-dataproc) | patch | `==0.8.0` -> `==0.8.1` | --- ### Release Notes
googleapis/python-dataproc ### [`v0.8.1`](https://togithub.com/googleapis/python-dataproc/blob/master/CHANGELOG.md#​081-httpswwwgithubcomgoogleapispython-dataproccomparev080v081-2020-06-05) [Compare Source](https://togithub.com/googleapis/python-dataproc/compare/v0.8.0...v0.8.1)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Never, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#GoogleCloudPlatform/python-docs-samples). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 9046478f..2e882818 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -3,4 +3,4 @@ google-auth==1.16.1 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.1 -google-cloud-dataproc==0.8.0 +google-cloud-dataproc==0.8.1 From 2e10446735c334cf1c72927b58db8508b077fad5 Mon Sep 17 00:00:00 2001 From: Kurtis Van Gent <31518063+kurtisvg@users.noreply.github.com> Date: Tue, 9 Jun 2020 14:34:27 -0700 Subject: [PATCH 091/109] Replace GCLOUD_PROJECT with GOOGLE_CLOUD_PROJECT. [(#4022)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4022) --- samples/snippets/create_cluster_test.py | 2 +- samples/snippets/dataproc_e2e_donttest.py | 2 +- samples/snippets/instantiate_inline_workflow_template_test.py | 2 +- samples/snippets/quickstart/quickstart_test.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/snippets/create_cluster_test.py b/samples/snippets/create_cluster_test.py index 6d815b5b..6b1d6806 100644 --- a/samples/snippets/create_cluster_test.py +++ b/samples/snippets/create_cluster_test.py @@ -21,7 +21,7 @@ import create_cluster -PROJECT_ID = os.environ['GCLOUD_PROJECT'] +PROJECT_ID = os.environ['GOOGLE_CLOUD_PROJECT'] REGION = 'us-central1' CLUSTER_NAME = 'py-cc-test-{}'.format(str(uuid.uuid4())) diff --git a/samples/snippets/dataproc_e2e_donttest.py b/samples/snippets/dataproc_e2e_donttest.py index 0a45d080..44cc03bf 100644 --- a/samples/snippets/dataproc_e2e_donttest.py +++ b/samples/snippets/dataproc_e2e_donttest.py @@ -20,7 +20,7 @@ import submit_job_to_cluster -PROJECT = os.environ['GCLOUD_PROJECT'] +PROJECT = os.environ['GOOGLE_CLOUD_PROJECT'] BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] CLUSTER_NAME = 'testcluster3' ZONE = 'us-central1-b' diff --git a/samples/snippets/instantiate_inline_workflow_template_test.py b/samples/snippets/instantiate_inline_workflow_template_test.py index 6fe37119..22673e4e 100644 --- a/samples/snippets/instantiate_inline_workflow_template_test.py +++ b/samples/snippets/instantiate_inline_workflow_template_test.py @@ -17,7 +17,7 @@ import instantiate_inline_workflow_template -PROJECT_ID = os.environ['GCLOUD_PROJECT'] +PROJECT_ID = os.environ['GOOGLE_CLOUD_PROJECT'] REGION = 'us-central1' diff --git a/samples/snippets/quickstart/quickstart_test.py b/samples/snippets/quickstart/quickstart_test.py index 7788e094..3e17f6fa 100644 --- a/samples/snippets/quickstart/quickstart_test.py +++ b/samples/snippets/quickstart/quickstart_test.py @@ -22,7 +22,7 @@ import quickstart -PROJECT_ID = os.environ['GCLOUD_PROJECT'] +PROJECT_ID = os.environ['GOOGLE_CLOUD_PROJECT'] REGION = 'us-central1' CLUSTER_NAME = 'py-qs-test-{}'.format(str(uuid.uuid4())) STAGING_BUCKET = 'py-dataproc-qs-bucket-{}'.format(str(uuid.uuid4())) From 517516fd89e1d6444ec2f0a46e5430d974c87ba4 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 11 Jun 2020 21:51:16 +0200 Subject: [PATCH 092/109] Update dependency google-auth to v1.17.0 [(#4058)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4058) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 2e882818..a69166ed 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.29.0 -google-auth==1.16.1 +google-auth==1.17.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.1 From ed72b3442b82deebdcb903721383e5a40027ac7d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 12 Jun 2020 02:32:11 +0200 Subject: [PATCH 093/109] chore(deps): update dependency google-auth to v1.17.1 [(#4073)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4073) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index a69166ed..9df8cc5d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.29.0 -google-auth==1.17.0 +google-auth==1.17.1 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.1 From 85dfafb85cabbdec3e0ca70d6cc78fc6b5c1fe3e Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 12 Jun 2020 22:53:46 +0200 Subject: [PATCH 094/109] Update dependency google-auth to v1.17.2 [(#4083)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4083) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 9df8cc5d..2daccfaa 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.29.0 -google-auth==1.17.1 +google-auth==1.17.2 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.1 From 0f5ec7d59daff4f401ccf9f4267831ac5a868d87 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 19 Jun 2020 05:34:55 +0200 Subject: [PATCH 095/109] Update dependency google-auth to v1.18.0 [(#4125)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4125) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 2daccfaa..f498daa7 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.29.0 -google-auth==1.17.2 +google-auth==1.18.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.1 From 109dc455f1281df5a6206ccbb67b43b0058ab035 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 19 Jun 2020 18:41:14 +0200 Subject: [PATCH 096/109] Update dependency google-cloud-dataproc to v1 [(#4109)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4109) Co-authored-by: Takashi Matsuo --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f498daa7..f739aaf1 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -3,4 +3,4 @@ google-auth==1.18.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 google-cloud-storage==1.28.1 -google-cloud-dataproc==0.8.1 +google-cloud-dataproc==1.0.0 From 039a2812e6dc0839f3ee69714ac57174e99f4824 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 20 Jun 2020 01:03:47 +0200 Subject: [PATCH 097/109] chore(deps): update dependency google-cloud-storage to v1.29.0 [(#4040)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4040) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f739aaf1..51046aaa 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -2,5 +2,5 @@ grpcio==1.29.0 google-auth==1.18.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 -google-cloud-storage==1.28.1 +google-cloud-storage==1.29.0 google-cloud-dataproc==1.0.0 From 7ee04a40127cc0bface6d48472fa97d2f5be267d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 24 Jun 2020 00:01:34 +0200 Subject: [PATCH 098/109] chore(deps): update dependency grpcio to v1.30.0 [(#4143)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4143) Co-authored-by: Takashi Matsuo --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 51046aaa..e6ecb26a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.29.0 +grpcio==1.30.0 google-auth==1.18.0 google-auth-httplib2==0.0.3 google-cloud==0.34.0 From 678dd1427224e516c913c291e3022d1608ce8a62 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 9 Jul 2020 02:00:20 +0200 Subject: [PATCH 099/109] Update dependency google-auth-httplib2 to v0.0.4 [(#4255)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4255) Co-authored-by: Takashi Matsuo --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index e6ecb26a..7cfcf22c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ grpcio==1.30.0 google-auth==1.18.0 -google-auth-httplib2==0.0.3 +google-auth-httplib2==0.0.4 google-cloud==0.34.0 google-cloud-storage==1.29.0 google-cloud-dataproc==1.0.0 From bdbdd3ab5b1202ea00faeba7eac5cb4e0a689750 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 13 Jul 2020 00:46:30 +0200 Subject: [PATCH 100/109] chore(deps): update dependency pytest to v5.4.3 [(#4279)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4279) * chore(deps): update dependency pytest to v5.4.3 * specify pytest for python 2 in appengine Co-authored-by: Leah Cole --- samples/snippets/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 781d4326..79738af5 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1 +1 @@ -pytest==5.3.2 +pytest==5.4.3 From 88c8137716d079b08e12600ccd3dbf9cab84c76d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 13 Jul 2020 22:20:34 +0200 Subject: [PATCH 101/109] chore(deps): update dependency google-auth to v1.19.0 [(#4293)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4293) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7cfcf22c..3c4209e5 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.30.0 -google-auth==1.18.0 +google-auth==1.19.0 google-auth-httplib2==0.0.4 google-cloud==0.34.0 google-cloud-storage==1.29.0 From 6bfa38830f14aed0ae21b0af193076e2300948aa Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 17 Jul 2020 02:44:07 +0200 Subject: [PATCH 102/109] chore(deps): update dependency google-cloud-dataproc to v1.0.1 [(#4309)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4309) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-dataproc](https://togithub.com/googleapis/python-dataproc) | patch | `==1.0.0` -> `==1.0.1` | --- ### Release Notes
googleapis/python-dataproc ### [`v1.0.1`](https://togithub.com/googleapis/python-dataproc/blob/master/CHANGELOG.md#​101-httpswwwgithubcomgoogleapispython-dataproccomparev100v101-2020-07-16) [Compare Source](https://togithub.com/googleapis/python-dataproc/compare/v1.0.0...v1.0.1)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Never, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#GoogleCloudPlatform/python-docs-samples). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3c4209e5..16e1a1a0 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -3,4 +3,4 @@ google-auth==1.19.0 google-auth-httplib2==0.0.4 google-cloud==0.34.0 google-cloud-storage==1.29.0 -google-cloud-dataproc==1.0.0 +google-cloud-dataproc==1.0.1 From 155342b0d2972bd1af1a320adac24e0b10ed0605 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 17 Jul 2020 19:02:17 +0200 Subject: [PATCH 103/109] chore(deps): update dependency google-auth to v1.19.1 [(#4304)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4304) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 16e1a1a0..656effb9 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.30.0 -google-auth==1.19.0 +google-auth==1.19.1 google-auth-httplib2==0.0.4 google-cloud==0.34.0 google-cloud-storage==1.29.0 From 22a5d468d5bd5b2295e34520986f361f177cc66b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 18 Jul 2020 02:48:10 +0200 Subject: [PATCH 104/109] chore(deps): update dependency google-auth to v1.19.2 [(#4321)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4321) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-auth](https://togithub.com/googleapis/google-auth-library-python) | patch | `==1.19.1` -> `==1.19.2` | --- ### Release Notes
googleapis/google-auth-library-python ### [`v1.19.2`](https://togithub.com/googleapis/google-auth-library-python/blob/master/CHANGELOG.md#​1192-httpswwwgithubcomgoogleapisgoogle-auth-library-pythoncomparev1191v1192-2020-07-17) [Compare Source](https://togithub.com/googleapis/google-auth-library-python/compare/v1.19.1...v1.19.2)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Never, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#GoogleCloudPlatform/python-docs-samples). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 656effb9..c080627a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.30.0 -google-auth==1.19.1 +google-auth==1.19.2 google-auth-httplib2==0.0.4 google-cloud==0.34.0 google-cloud-storage==1.29.0 From 253f93e9b5cde0f05c088c8fe04227fd98994b5d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 28 Jul 2020 22:36:14 +0200 Subject: [PATCH 105/109] Update dependency google-auth to v1.20.0 [(#4387)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4387) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c080627a..8d61648d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.30.0 -google-auth==1.19.2 +google-auth==1.20.0 google-auth-httplib2==0.0.4 google-cloud==0.34.0 google-cloud-storage==1.29.0 From bf8a0515cac231fc677e7b8788f1ba07f0f0d62c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 1 Aug 2020 21:51:00 +0200 Subject: [PATCH 106/109] Update dependency pytest to v6 [(#4390)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4390) --- samples/snippets/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 79738af5..7e460c8c 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1 +1 @@ -pytest==5.4.3 +pytest==6.0.1 From 88bb9ed890ef980bb02ce19acb93579aa681d722 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 5 Aug 2020 21:49:41 +0200 Subject: [PATCH 107/109] Update dependency grpcio to v1.31.0 [(#4438)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4438) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8d61648d..8b9a1065 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -grpcio==1.30.0 +grpcio==1.31.0 google-auth==1.20.0 google-auth-httplib2==0.0.4 google-cloud==0.34.0 From cabc8adaefb7d7ed08f4a61a9c22bf1331632e3c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 7 Aug 2020 03:36:31 +0200 Subject: [PATCH 108/109] chore(deps): update dependency google-auth to v1.20.1 [(#4452)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4452) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8b9a1065..ebc1d881 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ grpcio==1.31.0 -google-auth==1.20.0 +google-auth==1.20.1 google-auth-httplib2==0.0.4 google-cloud==0.34.0 google-cloud-storage==1.29.0 From 172d30cb4017720f8f2134e5a698727ef6dd3226 Mon Sep 17 00:00:00 2001 From: arithmetic1728 Date: Fri, 7 Aug 2020 13:14:14 -0700 Subject: [PATCH 109/109] chore: update templates --- .github/CODEOWNERS | 8 + .gitignore | 3 +- .kokoro/build.sh | 8 +- .kokoro/docker/docs/Dockerfile | 98 ++++++ .kokoro/docker/docs/fetch_gpg_keys.sh | 45 +++ .kokoro/docs/common.cfg | 21 +- .kokoro/docs/docs-presubmit.cfg | 17 + .kokoro/publish-docs.sh | 39 ++- .kokoro/trampoline_v2.sh | 487 ++++++++++++++++++++++++++ .trampolinerc | 51 +++ docs/conf.py | 11 +- noxfile.py | 37 ++ samples/AUTHORING_GUIDE.md | 1 + samples/CONTRIBUTING.md | 1 + samples/snippets/noxfile.py | 224 ++++++++++++ synth.metadata | 11 +- synth.py | 8 +- 17 files changed, 1043 insertions(+), 27 deletions(-) create mode 100644 .github/CODEOWNERS create mode 100644 .kokoro/docker/docs/Dockerfile create mode 100755 .kokoro/docker/docs/fetch_gpg_keys.sh create mode 100644 .kokoro/docs/docs-presubmit.cfg create mode 100755 .kokoro/trampoline_v2.sh create mode 100644 .trampolinerc create mode 100644 samples/AUTHORING_GUIDE.md create mode 100644 samples/CONTRIBUTING.md create mode 100644 samples/snippets/noxfile.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..4a62702a --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,8 @@ +# Code owners file. +# This file controls who is tagged for review for any given pull request. +# +# For syntax help see: +# https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax + + +/samples/**/*.py @bradmiro @googleapis/python-samples-owners diff --git a/.gitignore b/.gitignore index b87e1ed5..b9daa52f 100644 --- a/.gitignore +++ b/.gitignore @@ -46,6 +46,7 @@ pip-log.txt # Built documentation docs/_build bigquery/docs/generated +docs.metadata # Virtual environment env/ @@ -57,4 +58,4 @@ system_tests/local_test_setup # Make sure a generated file isn't accidentally committed. pylintrc -pylintrc.test \ No newline at end of file +pylintrc.test diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 0458ba16..0cd63701 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -36,4 +36,10 @@ python3.6 -m pip uninstall --yes --quiet nox-automation python3.6 -m pip install --upgrade --quiet nox python3.6 -m nox --version -python3.6 -m nox +# If NOX_SESSION is set, it only runs the specified session, +# otherwise run all the sessions. +if [[ -n "${NOX_SESSION:-}" ]]; then + python3.6 -m nox -s "${NOX_SESSION:-}" +else + python3.6 -m nox +fi diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile new file mode 100644 index 00000000..412b0b56 --- /dev/null +++ b/.kokoro/docker/docs/Dockerfile @@ -0,0 +1,98 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ubuntu:20.04 + +ENV DEBIAN_FRONTEND noninteractive + +# Ensure local Python is preferred over distribution Python. +ENV PATH /usr/local/bin:$PATH + +# Install dependencies. +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + apt-transport-https \ + build-essential \ + ca-certificates \ + curl \ + dirmngr \ + git \ + gpg-agent \ + graphviz \ + libbz2-dev \ + libdb5.3-dev \ + libexpat1-dev \ + libffi-dev \ + liblzma-dev \ + libreadline-dev \ + libsnappy-dev \ + libssl-dev \ + libsqlite3-dev \ + portaudio19-dev \ + redis-server \ + software-properties-common \ + ssh \ + sudo \ + tcl \ + tcl-dev \ + tk \ + tk-dev \ + uuid-dev \ + wget \ + zlib1g-dev \ + && add-apt-repository universe \ + && apt-get update \ + && apt-get -y install jq \ + && apt-get clean autoclean \ + && apt-get autoremove -y \ + && rm -rf /var/lib/apt/lists/* \ + && rm -f /var/cache/apt/archives/*.deb + + +COPY fetch_gpg_keys.sh /tmp +# Install the desired versions of Python. +RUN set -ex \ + && export GNUPGHOME="$(mktemp -d)" \ + && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \ + && /tmp/fetch_gpg_keys.sh \ + && for PYTHON_VERSION in 3.7.8 3.8.5; do \ + wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \ + && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \ + && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \ + && rm -r python-${PYTHON_VERSION}.tar.xz.asc \ + && mkdir -p /usr/src/python-${PYTHON_VERSION} \ + && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \ + && rm python-${PYTHON_VERSION}.tar.xz \ + && cd /usr/src/python-${PYTHON_VERSION} \ + && ./configure \ + --enable-shared \ + # This works only on Python 2.7 and throws a warning on every other + # version, but seems otherwise harmless. + --enable-unicode=ucs4 \ + --with-system-ffi \ + --without-ensurepip \ + && make -j$(nproc) \ + && make install \ + && ldconfig \ + ; done \ + && rm -rf "${GNUPGHOME}" \ + && rm -rf /usr/src/python* \ + && rm -rf ~/.cache/ + +RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ + && python3.7 /tmp/get-pip.py \ + && python3.8 /tmp/get-pip.py \ + && rm /tmp/get-pip.py + +CMD ["python3.7"] diff --git a/.kokoro/docker/docs/fetch_gpg_keys.sh b/.kokoro/docker/docs/fetch_gpg_keys.sh new file mode 100755 index 00000000..d653dd86 --- /dev/null +++ b/.kokoro/docker/docs/fetch_gpg_keys.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A script to fetch gpg keys with retry. +# Avoid jinja parsing the file. +# + +function retry { + if [[ "${#}" -le 1 ]]; then + echo "Usage: ${0} retry_count commands.." + exit 1 + fi + local retries=${1} + local command="${@:2}" + until [[ "${retries}" -le 0 ]]; do + $command && return 0 + if [[ $? -ne 0 ]]; then + echo "command failed, retrying" + ((retries--)) + fi + done + return 1 +} + +# 3.6.9, 3.7.5 (Ned Deily) +retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ + 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D + +# 3.8.0 (Łukasz Langa) +retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ + E3FF2839C048B25C084DEBE9B26995E310250568 + +# diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg index 8b957cf6..676322b2 100644 --- a/.kokoro/docs/common.cfg +++ b/.kokoro/docs/common.cfg @@ -11,12 +11,12 @@ action { gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" # Use the trampoline script to run in docker. -build_file: "python-dataproc/.kokoro/trampoline.sh" +build_file: "python-dataproc/.kokoro/trampoline_v2.sh" # Configure the docker image for kokoro-trampoline. env_vars: { key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" + value: "gcr.io/cloud-devrel-kokoro-resources/python-lib-docs" } env_vars: { key: "TRAMPOLINE_BUILD_FILE" @@ -28,6 +28,23 @@ env_vars: { value: "docs-staging" } +env_vars: { + key: "V2_STAGING_BUCKET" + value: "docs-staging-v2-staging" +} + +# It will upload the docker image after successful builds. +env_vars: { + key: "TRAMPOLINE_IMAGE_UPLOAD" + value: "true" +} + +# It will always build the docker image. +env_vars: { + key: "TRAMPOLINE_DOCKERFILE" + value: ".kokoro/docker/docs/Dockerfile" +} + # Fetch the token needed for reporting release status to GitHub before_action { fetch_keystore { diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg new file mode 100644 index 00000000..11181078 --- /dev/null +++ b/.kokoro/docs/docs-presubmit.cfg @@ -0,0 +1,17 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "STAGING_BUCKET" + value: "gcloud-python-test" +} + +env_vars: { + key: "V2_STAGING_BUCKET" + value: "gcloud-python-test" +} + +# We only upload the image in the main `docs` build. +env_vars: { + key: "TRAMPOLINE_IMAGE_UPLOAD" + value: "false" +} diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 60ba003f..8acb14e8 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -18,26 +18,16 @@ set -eo pipefail # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 -cd github/python-dataproc - -# Remove old nox -python3.6 -m pip uninstall --yes --quiet nox-automation +export PATH="${HOME}/.local/bin:${PATH}" # Install nox -python3.6 -m pip install --upgrade --quiet nox -python3.6 -m nox --version +python3 -m pip install --user --upgrade --quiet nox +python3 -m nox --version # build docs nox -s docs -python3 -m pip install gcp-docuploader - -# install a json parser -sudo apt-get update -sudo apt-get -y install software-properties-common -sudo add-apt-repository universe -sudo apt-get update -sudo apt-get -y install jq +python3 -m pip install --user gcp-docuploader # create metadata python3 -m docuploader create-metadata \ @@ -52,4 +42,23 @@ python3 -m docuploader create-metadata \ cat docs.metadata # upload docs -python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket docs-staging +python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" + + +# docfx yaml files +nox -s docfx + +# create metadata. +python3 -m docuploader create-metadata \ + --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ + --version=$(python3 setup.py --version) \ + --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ + --distribution-name=$(python3 setup.py --name) \ + --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ + --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ + --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) + +cat docs.metadata + +# upload docs +python3 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh new file mode 100755 index 00000000..719bcd5b --- /dev/null +++ b/.kokoro/trampoline_v2.sh @@ -0,0 +1,487 @@ +#!/usr/bin/env bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# trampoline_v2.sh +# +# This script does 3 things. +# +# 1. Prepare the Docker image for the test +# 2. Run the Docker with appropriate flags to run the test +# 3. Upload the newly built Docker image +# +# in a way that is somewhat compatible with trampoline_v1. +# +# To run this script, first download few files from gcs to /dev/shm. +# (/dev/shm is passed into the container as KOKORO_GFILE_DIR). +# +# gsutil cp gs://cloud-devrel-kokoro-resources/python-docs-samples/secrets_viewer_service_account.json /dev/shm +# gsutil cp gs://cloud-devrel-kokoro-resources/python-docs-samples/automl_secrets.txt /dev/shm +# +# Then run the script. +# .kokoro/trampoline_v2.sh +# +# These environment variables are required: +# TRAMPOLINE_IMAGE: The docker image to use. +# TRAMPOLINE_DOCKERFILE: The location of the Dockerfile. +# +# You can optionally change these environment variables: +# TRAMPOLINE_IMAGE_UPLOAD: +# (true|false): Whether to upload the Docker image after the +# successful builds. +# TRAMPOLINE_BUILD_FILE: The script to run in the docker container. +# TRAMPOLINE_WORKSPACE: The workspace path in the docker container. +# Defaults to /workspace. +# Potentially there are some repo specific envvars in .trampolinerc in +# the project root. + + +set -euo pipefail + +TRAMPOLINE_VERSION="2.0.5" + +if command -v tput >/dev/null && [[ -n "${TERM:-}" ]]; then + readonly IO_COLOR_RED="$(tput setaf 1)" + readonly IO_COLOR_GREEN="$(tput setaf 2)" + readonly IO_COLOR_YELLOW="$(tput setaf 3)" + readonly IO_COLOR_RESET="$(tput sgr0)" +else + readonly IO_COLOR_RED="" + readonly IO_COLOR_GREEN="" + readonly IO_COLOR_YELLOW="" + readonly IO_COLOR_RESET="" +fi + +function function_exists { + [ $(LC_ALL=C type -t $1)"" == "function" ] +} + +# Logs a message using the given color. The first argument must be one +# of the IO_COLOR_* variables defined above, such as +# "${IO_COLOR_YELLOW}". The remaining arguments will be logged in the +# given color. The log message will also have an RFC-3339 timestamp +# prepended (in UTC). You can disable the color output by setting +# TERM=vt100. +function log_impl() { + local color="$1" + shift + local timestamp="$(date -u "+%Y-%m-%dT%H:%M:%SZ")" + echo "================================================================" + echo "${color}${timestamp}:" "$@" "${IO_COLOR_RESET}" + echo "================================================================" +} + +# Logs the given message with normal coloring and a timestamp. +function log() { + log_impl "${IO_COLOR_RESET}" "$@" +} + +# Logs the given message in green with a timestamp. +function log_green() { + log_impl "${IO_COLOR_GREEN}" "$@" +} + +# Logs the given message in yellow with a timestamp. +function log_yellow() { + log_impl "${IO_COLOR_YELLOW}" "$@" +} + +# Logs the given message in red with a timestamp. +function log_red() { + log_impl "${IO_COLOR_RED}" "$@" +} + +readonly tmpdir=$(mktemp -d -t ci-XXXXXXXX) +readonly tmphome="${tmpdir}/h" +mkdir -p "${tmphome}" + +function cleanup() { + rm -rf "${tmpdir}" +} +trap cleanup EXIT + +RUNNING_IN_CI="${RUNNING_IN_CI:-false}" + +# The workspace in the container, defaults to /workspace. +TRAMPOLINE_WORKSPACE="${TRAMPOLINE_WORKSPACE:-/workspace}" + +pass_down_envvars=( + # TRAMPOLINE_V2 variables. + # Tells scripts whether they are running as part of CI or not. + "RUNNING_IN_CI" + # Indicates which CI system we're in. + "TRAMPOLINE_CI" + # Indicates the version of the script. + "TRAMPOLINE_VERSION" +) + +log_yellow "Building with Trampoline ${TRAMPOLINE_VERSION}" + +# Detect which CI systems we're in. If we're in any of the CI systems +# we support, `RUNNING_IN_CI` will be true and `TRAMPOLINE_CI` will be +# the name of the CI system. Both envvars will be passing down to the +# container for telling which CI system we're in. +if [[ -n "${KOKORO_BUILD_ID:-}" ]]; then + # descriptive env var for indicating it's on CI. + RUNNING_IN_CI="true" + TRAMPOLINE_CI="kokoro" + if [[ "${TRAMPOLINE_USE_LEGACY_SERVICE_ACCOUNT:-}" == "true" ]]; then + if [[ ! -f "${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json" ]]; then + log_red "${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json does not exist. Did you forget to mount cloud-devrel-kokoro-resources/trampoline? Aborting." + exit 1 + fi + # This service account will be activated later. + TRAMPOLINE_SERVICE_ACCOUNT="${KOKORO_GFILE_DIR}/kokoro-trampoline.service-account.json" + else + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + gcloud auth list + fi + log_yellow "Configuring Container Registry access" + gcloud auth configure-docker --quiet + fi + pass_down_envvars+=( + # KOKORO dynamic variables. + "KOKORO_BUILD_NUMBER" + "KOKORO_BUILD_ID" + "KOKORO_JOB_NAME" + "KOKORO_GIT_COMMIT" + "KOKORO_GITHUB_COMMIT" + "KOKORO_GITHUB_PULL_REQUEST_NUMBER" + "KOKORO_GITHUB_PULL_REQUEST_COMMIT" + # For Build Cop Bot + "KOKORO_GITHUB_COMMIT_URL" + "KOKORO_GITHUB_PULL_REQUEST_URL" + ) +elif [[ "${TRAVIS:-}" == "true" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="travis" + pass_down_envvars+=( + "TRAVIS_BRANCH" + "TRAVIS_BUILD_ID" + "TRAVIS_BUILD_NUMBER" + "TRAVIS_BUILD_WEB_URL" + "TRAVIS_COMMIT" + "TRAVIS_COMMIT_MESSAGE" + "TRAVIS_COMMIT_RANGE" + "TRAVIS_JOB_NAME" + "TRAVIS_JOB_NUMBER" + "TRAVIS_JOB_WEB_URL" + "TRAVIS_PULL_REQUEST" + "TRAVIS_PULL_REQUEST_BRANCH" + "TRAVIS_PULL_REQUEST_SHA" + "TRAVIS_PULL_REQUEST_SLUG" + "TRAVIS_REPO_SLUG" + "TRAVIS_SECURE_ENV_VARS" + "TRAVIS_TAG" + ) +elif [[ -n "${GITHUB_RUN_ID:-}" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="github-workflow" + pass_down_envvars+=( + "GITHUB_WORKFLOW" + "GITHUB_RUN_ID" + "GITHUB_RUN_NUMBER" + "GITHUB_ACTION" + "GITHUB_ACTIONS" + "GITHUB_ACTOR" + "GITHUB_REPOSITORY" + "GITHUB_EVENT_NAME" + "GITHUB_EVENT_PATH" + "GITHUB_SHA" + "GITHUB_REF" + "GITHUB_HEAD_REF" + "GITHUB_BASE_REF" + ) +elif [[ "${CIRCLECI:-}" == "true" ]]; then + RUNNING_IN_CI="true" + TRAMPOLINE_CI="circleci" + pass_down_envvars+=( + "CIRCLE_BRANCH" + "CIRCLE_BUILD_NUM" + "CIRCLE_BUILD_URL" + "CIRCLE_COMPARE_URL" + "CIRCLE_JOB" + "CIRCLE_NODE_INDEX" + "CIRCLE_NODE_TOTAL" + "CIRCLE_PREVIOUS_BUILD_NUM" + "CIRCLE_PROJECT_REPONAME" + "CIRCLE_PROJECT_USERNAME" + "CIRCLE_REPOSITORY_URL" + "CIRCLE_SHA1" + "CIRCLE_STAGE" + "CIRCLE_USERNAME" + "CIRCLE_WORKFLOW_ID" + "CIRCLE_WORKFLOW_JOB_ID" + "CIRCLE_WORKFLOW_UPSTREAM_JOB_IDS" + "CIRCLE_WORKFLOW_WORKSPACE_ID" + ) +fi + +# Configure the service account for pulling the docker image. +function repo_root() { + local dir="$1" + while [[ ! -d "${dir}/.git" ]]; do + dir="$(dirname "$dir")" + done + echo "${dir}" +} + +# Detect the project root. In CI builds, we assume the script is in +# the git tree and traverse from there, otherwise, traverse from `pwd` +# to find `.git` directory. +if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + PROGRAM_PATH="$(realpath "$0")" + PROGRAM_DIR="$(dirname "${PROGRAM_PATH}")" + PROJECT_ROOT="$(repo_root "${PROGRAM_DIR}")" +else + PROJECT_ROOT="$(repo_root $(pwd))" +fi + +log_yellow "Changing to the project root: ${PROJECT_ROOT}." +cd "${PROJECT_ROOT}" + +# To support relative path for `TRAMPOLINE_SERVICE_ACCOUNT`, we need +# to use this environment variable in `PROJECT_ROOT`. +if [[ -n "${TRAMPOLINE_SERVICE_ACCOUNT:-}" ]]; then + + mkdir -p "${tmpdir}/gcloud" + gcloud_config_dir="${tmpdir}/gcloud" + + log_yellow "Using isolated gcloud config: ${gcloud_config_dir}." + export CLOUDSDK_CONFIG="${gcloud_config_dir}" + + log_yellow "Using ${TRAMPOLINE_SERVICE_ACCOUNT} for authentication." + gcloud auth activate-service-account \ + --key-file "${TRAMPOLINE_SERVICE_ACCOUNT}" + log_yellow "Configuring Container Registry access" + gcloud auth configure-docker --quiet +fi + +required_envvars=( + # The basic trampoline configurations. + "TRAMPOLINE_IMAGE" + "TRAMPOLINE_BUILD_FILE" +) + +if [[ -f "${PROJECT_ROOT}/.trampolinerc" ]]; then + source "${PROJECT_ROOT}/.trampolinerc" +fi + +log_yellow "Checking environment variables." +for e in "${required_envvars[@]}" +do + if [[ -z "${!e:-}" ]]; then + log "Missing ${e} env var. Aborting." + exit 1 + fi +done + +# We want to support legacy style TRAMPOLINE_BUILD_FILE used with V1 +# script: e.g. "github/repo-name/.kokoro/run_tests.sh" +TRAMPOLINE_BUILD_FILE="${TRAMPOLINE_BUILD_FILE#github/*/}" +log_yellow "Using TRAMPOLINE_BUILD_FILE: ${TRAMPOLINE_BUILD_FILE}" + +# ignore error on docker operations and test execution +set +e + +log_yellow "Preparing Docker image." +# We only download the docker image in CI builds. +if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + # Download the docker image specified by `TRAMPOLINE_IMAGE` + + # We may want to add --max-concurrent-downloads flag. + + log_yellow "Start pulling the Docker image: ${TRAMPOLINE_IMAGE}." + if docker pull "${TRAMPOLINE_IMAGE}"; then + log_green "Finished pulling the Docker image: ${TRAMPOLINE_IMAGE}." + has_image="true" + else + log_red "Failed pulling the Docker image: ${TRAMPOLINE_IMAGE}." + has_image="false" + fi +else + # For local run, check if we have the image. + if docker images "${TRAMPOLINE_IMAGE}:latest" | grep "${TRAMPOLINE_IMAGE}"; then + has_image="true" + else + has_image="false" + fi +fi + + +# The default user for a Docker container has uid 0 (root). To avoid +# creating root-owned files in the build directory we tell docker to +# use the current user ID. +user_uid="$(id -u)" +user_gid="$(id -g)" +user_name="$(id -un)" + +# To allow docker in docker, we add the user to the docker group in +# the host os. +docker_gid=$(cut -d: -f3 < <(getent group docker)) + +update_cache="false" +if [[ "${TRAMPOLINE_DOCKERFILE:-none}" != "none" ]]; then + # Build the Docker image from the source. + context_dir=$(dirname "${TRAMPOLINE_DOCKERFILE}") + docker_build_flags=( + "-f" "${TRAMPOLINE_DOCKERFILE}" + "-t" "${TRAMPOLINE_IMAGE}" + "--build-arg" "UID=${user_uid}" + "--build-arg" "USERNAME=${user_name}" + ) + if [[ "${has_image}" == "true" ]]; then + docker_build_flags+=("--cache-from" "${TRAMPOLINE_IMAGE}") + fi + + log_yellow "Start building the docker image." + if [[ "${TRAMPOLINE_VERBOSE:-false}" == "true" ]]; then + echo "docker build" "${docker_build_flags[@]}" "${context_dir}" + fi + + # ON CI systems, we want to suppress docker build logs, only + # output the logs when it fails. + if [[ "${RUNNING_IN_CI:-}" == "true" ]]; then + if docker build "${docker_build_flags[@]}" "${context_dir}" \ + > "${tmpdir}/docker_build.log" 2>&1; then + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + cat "${tmpdir}/docker_build.log" + fi + + log_green "Finished building the docker image." + update_cache="true" + else + log_red "Failed to build the Docker image, aborting." + log_yellow "Dumping the build logs:" + cat "${tmpdir}/docker_build.log" + exit 1 + fi + else + if docker build "${docker_build_flags[@]}" "${context_dir}"; then + log_green "Finished building the docker image." + update_cache="true" + else + log_red "Failed to build the Docker image, aborting." + exit 1 + fi + fi +else + if [[ "${has_image}" != "true" ]]; then + log_red "We do not have ${TRAMPOLINE_IMAGE} locally, aborting." + exit 1 + fi +fi + +# We use an array for the flags so they are easier to document. +docker_flags=( + # Remove the container after it exists. + "--rm" + + # Use the host network. + "--network=host" + + # Run in priviledged mode. We are not using docker for sandboxing or + # isolation, just for packaging our dev tools. + "--privileged" + + # Run the docker script with the user id. Because the docker image gets to + # write in ${PWD} you typically want this to be your user id. + # To allow docker in docker, we need to use docker gid on the host. + "--user" "${user_uid}:${docker_gid}" + + # Pass down the USER. + "--env" "USER=${user_name}" + + # Mount the project directory inside the Docker container. + "--volume" "${PROJECT_ROOT}:${TRAMPOLINE_WORKSPACE}" + "--workdir" "${TRAMPOLINE_WORKSPACE}" + "--env" "PROJECT_ROOT=${TRAMPOLINE_WORKSPACE}" + + # Mount the temporary home directory. + "--volume" "${tmphome}:/h" + "--env" "HOME=/h" + + # Allow docker in docker. + "--volume" "/var/run/docker.sock:/var/run/docker.sock" + + # Mount the /tmp so that docker in docker can mount the files + # there correctly. + "--volume" "/tmp:/tmp" + # Pass down the KOKORO_GFILE_DIR and KOKORO_KEYSTORE_DIR + # TODO(tmatsuo): This part is not portable. + "--env" "TRAMPOLINE_SECRET_DIR=/secrets" + "--volume" "${KOKORO_GFILE_DIR:-/dev/shm}:/secrets/gfile" + "--env" "KOKORO_GFILE_DIR=/secrets/gfile" + "--volume" "${KOKORO_KEYSTORE_DIR:-/dev/shm}:/secrets/keystore" + "--env" "KOKORO_KEYSTORE_DIR=/secrets/keystore" +) + +# Add an option for nicer output if the build gets a tty. +if [[ -t 0 ]]; then + docker_flags+=("-it") +fi + +# Passing down env vars +for e in "${pass_down_envvars[@]}" +do + if [[ -n "${!e:-}" ]]; then + docker_flags+=("--env" "${e}=${!e}") + fi +done + +# If arguments are given, all arguments will become the commands run +# in the container, otherwise run TRAMPOLINE_BUILD_FILE. +if [[ $# -ge 1 ]]; then + log_yellow "Running the given commands '" "${@:1}" "' in the container." + readonly commands=("${@:1}") + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + echo docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" "${commands[@]}" + fi + docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" "${commands[@]}" +else + log_yellow "Running the tests in a Docker container." + docker_flags+=("--entrypoint=${TRAMPOLINE_BUILD_FILE}") + if [[ "${TRAMPOLINE_VERBOSE:-}" == "true" ]]; then + echo docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" + fi + docker run "${docker_flags[@]}" "${TRAMPOLINE_IMAGE}" +fi + + +test_retval=$? + +if [[ ${test_retval} -eq 0 ]]; then + log_green "Build finished with ${test_retval}" +else + log_red "Build finished with ${test_retval}" +fi + +# Only upload it when the test passes. +if [[ "${update_cache}" == "true" ]] && \ + [[ $test_retval == 0 ]] && \ + [[ "${TRAMPOLINE_IMAGE_UPLOAD:-false}" == "true" ]]; then + log_yellow "Uploading the Docker image." + if docker push "${TRAMPOLINE_IMAGE}"; then + log_green "Finished uploading the Docker image." + else + log_red "Failed uploading the Docker image." + fi + # Call trampoline_after_upload_hook if it's defined. + if function_exists trampoline_after_upload_hook; then + trampoline_after_upload_hook + fi + +fi + +exit "${test_retval}" diff --git a/.trampolinerc b/.trampolinerc new file mode 100644 index 00000000..995ee291 --- /dev/null +++ b/.trampolinerc @@ -0,0 +1,51 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Template for .trampolinerc + +# Add required env vars here. +required_envvars+=( + "STAGING_BUCKET" + "V2_STAGING_BUCKET" +) + +# Add env vars which are passed down into the container here. +pass_down_envvars+=( + "STAGING_BUCKET" + "V2_STAGING_BUCKET" +) + +# Prevent unintentional override on the default image. +if [[ "${TRAMPOLINE_IMAGE_UPLOAD:-false}" == "true" ]] && \ + [[ -z "${TRAMPOLINE_IMAGE:-}" ]]; then + echo "Please set TRAMPOLINE_IMAGE if you want to upload the Docker image." + exit 1 +fi + +# Define the default value if it makes sense. +if [[ -z "${TRAMPOLINE_IMAGE_UPLOAD:-}" ]]; then + TRAMPOLINE_IMAGE_UPLOAD="" +fi + +if [[ -z "${TRAMPOLINE_IMAGE:-}" ]]; then + TRAMPOLINE_IMAGE="" +fi + +if [[ -z "${TRAMPOLINE_DOCKERFILE:-}" ]]; then + TRAMPOLINE_DOCKERFILE="" +fi + +if [[ -z "${TRAMPOLINE_BUILD_FILE:-}" ]]; then + TRAMPOLINE_BUILD_FILE="" +fi diff --git a/docs/conf.py b/docs/conf.py index 01872827..476a3a7e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,6 +20,10 @@ # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath("..")) +# For plugins that can not read conf.py. +# See also: https://github.com/docascode/sphinx-docfx-yaml/issues/85 +sys.path.insert(0, os.path.abspath(".")) + __version__ = "" # -- General configuration ------------------------------------------------ @@ -90,7 +94,12 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ["_build"] +exclude_patterns = [ + "_build", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "samples/snippets/README.rst", +] # The reST default role (used for this markup: `text`) to use for all # documents. diff --git a/noxfile.py b/noxfile.py index a5b7aec1..016cd4a1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -100,6 +100,10 @@ def system(session): """Run the system test suite.""" system_test_path = os.path.join("tests", "system.py") system_test_folder_path = os.path.join("tests", "system") + + # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true. + if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false": + session.skip("RUN_SYSTEM_TESTS is set to false, skipping") # Sanity check: Only run tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable") @@ -160,3 +164,36 @@ def docs(session): os.path.join("docs", ""), os.path.join("docs", "_build", "html", ""), ) + + +@nox.session(python=DEFAULT_PYTHON_VERSION) +def docfx(session): + """Build the docfx yaml files for this library.""" + + session.install("-e", ".") + session.install("sphinx<3.0.0", "alabaster", "recommonmark", "sphinx-docfx-yaml") + + shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run( + "sphinx-build", + "-T", # show full traceback on exception + "-N", # no colors + "-D", + ( + "extensions=sphinx.ext.autodoc," + "sphinx.ext.autosummary," + "docfx_yaml.extension," + "sphinx.ext.intersphinx," + "sphinx.ext.coverage," + "sphinx.ext.napoleon," + "sphinx.ext.todo," + "sphinx.ext.viewcode," + "recommonmark" + ), + "-b", + "html", + "-d", + os.path.join("docs", "_build", "doctrees", ""), + os.path.join("docs", ""), + os.path.join("docs", "_build", "html", ""), + ) diff --git a/samples/AUTHORING_GUIDE.md b/samples/AUTHORING_GUIDE.md new file mode 100644 index 00000000..55c97b32 --- /dev/null +++ b/samples/AUTHORING_GUIDE.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/AUTHORING_GUIDE.md \ No newline at end of file diff --git a/samples/CONTRIBUTING.md b/samples/CONTRIBUTING.md new file mode 100644 index 00000000..34c882b6 --- /dev/null +++ b/samples/CONTRIBUTING.md @@ -0,0 +1 @@ +See https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py new file mode 100644 index 00000000..ba55d7ce --- /dev/null +++ b/samples/snippets/noxfile.py @@ -0,0 +1,224 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +from pathlib import Path +import sys + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +# Copy `noxfile_config.py` to your directory and modify it instead. + + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + 'ignored_versions': ["2.7"], + + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + 'envs': {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append('.') + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars(): + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG['gcloud_project_env'] + # This should error out if not set. + ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG['envs']) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to tested samples. +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG['ignored_versions'] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +# +# Style Checks +# + + +def _determine_local_import_names(start_dir): + """Determines all import names that should be considered "local". + + This is used when running the linter to insure that import order is + properly checked. + """ + file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] + return [ + basename + for basename, extension in file_ext_pairs + if extension == ".py" + or os.path.isdir(os.path.join(start_dir, basename)) + and basename not in ("__pycache__") + ] + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--import-order-style=google", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session): + session.install("flake8", "flake8-import-order") + + local_names = _determine_local_import_names(".") + args = FLAKE8_COMMON_ARGS + [ + "--application-import-names", + ",".join(local_names), + "." + ] + session.run("flake8", *args) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests(session, post_install=None): + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars() + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session): + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip("SKIPPED: {} tests are disabled for this sample.".format( + session.python + )) + + +# +# Readmegen +# + + +def _get_repo_root(): + """ Returns the root folder of the project. """ + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session, path): + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/synth.metadata b/synth.metadata index ee8ad34f..29458e54 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,22 +4,21 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-dataproc.git", - "sha": "380e122da3e7ef329d39f658f6dbfa775c612efc" + "sha": "29e42dc71aa02e38bf7a5d83cc6a13e8487a48c2" } }, { "git": { - "name": "googleapis", - "remote": "https://github.com/googleapis/googleapis.git", - "sha": "c8fc4b79cc54749520e6fc7726deee7a22d1f988", - "internalRef": "323858574" + "name": "synthtool", + "remote": "https://github.com/googleapis/synthtool.git", + "sha": "5f2f711c91199ba2f609d3f06a2fe22aee4e5be3" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "21f1470ecd01424dc91c70f1a7c798e4e87d1eec" + "sha": "5f2f711c91199ba2f609d3f06a2fe22aee4e5be3" } } ], diff --git a/synth.py b/synth.py index 087d4bcb..1680ecb6 100644 --- a/synth.py +++ b/synth.py @@ -18,6 +18,7 @@ import synthtool as s from synthtool import gcp +from synthtool.languages import python gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() @@ -91,9 +92,14 @@ # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- -templated_files = common.py_library(unit_cov_level=97, cov_level=89) +templated_files = common.py_library(unit_cov_level=97, cov_level=89, samples=True) s.move(templated_files) +# ---------------------------------------------------------------------------- +# Samples templates +# ---------------------------------------------------------------------------- +python.py_samples(skip_readmes=True) + # TODO(busunkim): Use latest sphinx after microgenerator transition s.replace("noxfile.py", """['"]sphinx['"]""", '"sphinx<3.0.0"')