From 5557be91c29ccda00073ab31a5dc72cb985877e2 Mon Sep 17 00:00:00 2001 From: bradmiro Date: Mon, 6 Apr 2020 14:24:23 -0400 Subject: [PATCH 01/10] Added mains to samples --- dataproc/create_cluster.py | 18 ++++++++++++++++-- dataproc/create_cluster_test.py | 14 ++++++++------ .../instantiate_inline_workflow_template.py | 13 ++++++++----- ...nstantiate_inline_workflow_template_test.py | 16 +++++++++------- 4 files changed, 41 insertions(+), 20 deletions(-) diff --git a/dataproc/create_cluster.py b/dataproc/create_cluster.py index a396ddc6c854..fb9cfe5f510c 100644 --- a/dataproc/create_cluster.py +++ b/dataproc/create_cluster.py @@ -16,8 +16,14 @@ # This sample walks a user through creating a Cloud Dataproc cluster using # the Python client library. +# +# This script can be run on its own: +# python create_cluster.py ${PROJECT_ID} ${REGION} ${CLUSTER_NAME} + # [START dataproc_create_cluster] +import sys + from google.cloud import dataproc_v1 as dataproc @@ -33,7 +39,8 @@ def create_cluster(project_id, region, cluster_name): # Create a client with the endpoint set to the desired cluster region. cluster_client = dataproc.ClusterControllerClient(client_options={ - 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region) + 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region), + 'project_id': project_id }) # Create the cluster config. @@ -58,4 +65,11 @@ def create_cluster(project_id, region, cluster_name): # Output a success message. print('Cluster created successfully: {}'.format(result.cluster_name)) - # [END dataproc_create_cluster] + + +if __name__ == "__main__": + project_id = sys.argv[1] + region = sys.argv[2] + cluster_name = sys.argv[3] + create_cluster(project_id, region, cluster_name) +# [END dataproc_create_cluster] diff --git a/dataproc/create_cluster_test.py b/dataproc/create_cluster_test.py index 042745792739..2809a0df761a 100644 --- a/dataproc/create_cluster_test.py +++ b/dataproc/create_cluster_test.py @@ -15,11 +15,10 @@ import os import uuid import pytest +import subprocess from google.cloud import dataproc_v1 as dataproc -import create_cluster - PROJECT_ID = os.environ['GCLOUD_PROJECT'] REGION = 'us-central1' @@ -40,8 +39,11 @@ def teardown(): def test_cluster_create(capsys): - # Wrapper function for client library function - create_cluster.create_cluster(PROJECT_ID, REGION, CLUSTER_NAME) - - out, _ = capsys.readouterr() + command = [ + 'python', 'create_cluster.py', + PROJECT_ID, + REGION, + CLUSTER_NAME, + ] + out = subprocess.check_output(command).decode("utf-8") assert CLUSTER_NAME in out diff --git a/dataproc/instantiate_inline_workflow_template.py b/dataproc/instantiate_inline_workflow_template.py index d492506bc72b..42a3d90b59b1 100644 --- a/dataproc/instantiate_inline_workflow_template.py +++ b/dataproc/instantiate_inline_workflow_template.py @@ -16,14 +16,15 @@ # workflow for Cloud Dataproc using the Python client library. # # This script can be run on its own: -# python workflows.py ${PROJECT_ID} ${REGION} +# python instantiate_inline_workflow_template.py ${PROJECT_ID} ${REGION} -import sys # [START dataproc_instantiate_inline_workflow_template] +import sys + from google.cloud import dataproc_v1 as dataproc -def instantiate_inline_workflow_template(project_id, region): +def instantiate_inline_workflow_template(project_id="PROJECT_ID", region="REGION"): """This sample walks a user through submitting a workflow for a Cloud Dataproc using the Python client library. @@ -91,8 +92,10 @@ def instantiate_inline_workflow_template(project_id, region): # Output a success message. print('Workflow ran successfully.') -# [END dataproc_instantiate_inline_workflow_template] if __name__ == "__main__": - instantiate_inline_workflow_template(sys.argv[1], sys.argv[2]) + project_id = sys.argv[1] + region = sys.argv[2] + instantiate_inline_workflow_template(project_id, region) +# [END dataproc_instantiate_inline_workflow_template] diff --git a/dataproc/instantiate_inline_workflow_template_test.py b/dataproc/instantiate_inline_workflow_template_test.py index 6fe3711973b6..317542644073 100644 --- a/dataproc/instantiate_inline_workflow_template_test.py +++ b/dataproc/instantiate_inline_workflow_template_test.py @@ -13,19 +13,21 @@ # limitations under the License. import os - -import instantiate_inline_workflow_template +import subprocess PROJECT_ID = os.environ['GCLOUD_PROJECT'] REGION = 'us-central1' -def test_workflows(capsys): +def test_workflows(): # Wrapper function for client library function - instantiate_inline_workflow_template.instantiate_inline_workflow_template( - PROJECT_ID, REGION - ) + command = [ + 'python', 'instantiate_inline_workflow_template.py', + PROJECT_ID, + REGION, + ] + + out = subprocess.check_output(command).decode("utf-8") - out, _ = capsys.readouterr() assert "successfully" in out From 9740aab234ea58b41849d9d26555e871367248b5 Mon Sep 17 00:00:00 2001 From: bradmiro Date: Mon, 6 Apr 2020 16:27:54 -0400 Subject: [PATCH 02/10] bug fix --- dataproc/create_cluster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dataproc/create_cluster.py b/dataproc/create_cluster.py index fb9cfe5f510c..f887438db7de 100644 --- a/dataproc/create_cluster.py +++ b/dataproc/create_cluster.py @@ -40,7 +40,6 @@ def create_cluster(project_id, region, cluster_name): # Create a client with the endpoint set to the desired cluster region. cluster_client = dataproc.ClusterControllerClient(client_options={ 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region), - 'project_id': project_id }) # Create the cluster config. From 08e06e733fb0e51324cfb5423466c143f0a4fc95 Mon Sep 17 00:00:00 2001 From: bradmiro Date: Mon, 6 Apr 2020 18:31:33 -0400 Subject: [PATCH 03/10] Checked for arg count and revert tests --- dataproc/create_cluster.py | 12 ++++++++---- dataproc/create_cluster_test.py | 14 ++++++-------- dataproc/instantiate_inline_workflow_template.py | 11 +++++++---- .../instantiate_inline_workflow_template_test.py | 16 +++++++--------- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/dataproc/create_cluster.py b/dataproc/create_cluster.py index f887438db7de..8471e85c09eb 100644 --- a/dataproc/create_cluster.py +++ b/dataproc/create_cluster.py @@ -67,8 +67,12 @@ def create_cluster(project_id, region, cluster_name): if __name__ == "__main__": - project_id = sys.argv[1] - region = sys.argv[2] - cluster_name = sys.argv[3] - create_cluster(project_id, region, cluster_name) + if len(sys.arv) != 4: + print("INSUFFICIENT ARGS: Please provide a " + + "PROJECT_ID, REGION AND CLUSTER_NAME.") + else: + project_id = sys.argv[1] + region = sys.argv[2] + cluster_name = sys.argv[3] + create_cluster(project_id, region, cluster_name) # [END dataproc_create_cluster] diff --git a/dataproc/create_cluster_test.py b/dataproc/create_cluster_test.py index 2809a0df761a..042745792739 100644 --- a/dataproc/create_cluster_test.py +++ b/dataproc/create_cluster_test.py @@ -15,10 +15,11 @@ import os import uuid import pytest -import subprocess from google.cloud import dataproc_v1 as dataproc +import create_cluster + PROJECT_ID = os.environ['GCLOUD_PROJECT'] REGION = 'us-central1' @@ -39,11 +40,8 @@ def teardown(): def test_cluster_create(capsys): - command = [ - 'python', 'create_cluster.py', - PROJECT_ID, - REGION, - CLUSTER_NAME, - ] - out = subprocess.check_output(command).decode("utf-8") + # Wrapper function for client library function + create_cluster.create_cluster(PROJECT_ID, REGION, CLUSTER_NAME) + + out, _ = capsys.readouterr() assert CLUSTER_NAME in out diff --git a/dataproc/instantiate_inline_workflow_template.py b/dataproc/instantiate_inline_workflow_template.py index 42a3d90b59b1..5d39f637975e 100644 --- a/dataproc/instantiate_inline_workflow_template.py +++ b/dataproc/instantiate_inline_workflow_template.py @@ -24,7 +24,7 @@ from google.cloud import dataproc_v1 as dataproc -def instantiate_inline_workflow_template(project_id="PROJECT_ID", region="REGION"): +def instantiate_inline_workflow_template(project_id, region): """This sample walks a user through submitting a workflow for a Cloud Dataproc using the Python client library. @@ -95,7 +95,10 @@ def instantiate_inline_workflow_template(project_id="PROJECT_ID", region="REGION if __name__ == "__main__": - project_id = sys.argv[1] - region = sys.argv[2] - instantiate_inline_workflow_template(project_id, region) + if len(sys.arv) != 3: + print("INSUFFICIENT ARGS: Please provide a PROJECT_ID and REGION.") + else: + project_id = sys.argv[1] + region = sys.argv[2] + instantiate_inline_workflow_template(project_id, region) # [END dataproc_instantiate_inline_workflow_template] diff --git a/dataproc/instantiate_inline_workflow_template_test.py b/dataproc/instantiate_inline_workflow_template_test.py index 317542644073..6fe3711973b6 100644 --- a/dataproc/instantiate_inline_workflow_template_test.py +++ b/dataproc/instantiate_inline_workflow_template_test.py @@ -13,21 +13,19 @@ # limitations under the License. import os -import subprocess + +import instantiate_inline_workflow_template PROJECT_ID = os.environ['GCLOUD_PROJECT'] REGION = 'us-central1' -def test_workflows(): +def test_workflows(capsys): # Wrapper function for client library function - command = [ - 'python', 'instantiate_inline_workflow_template.py', - PROJECT_ID, - REGION, - ] - - out = subprocess.check_output(command).decode("utf-8") + instantiate_inline_workflow_template.instantiate_inline_workflow_template( + PROJECT_ID, REGION + ) + out, _ = capsys.readouterr() assert "successfully" in out From b760c4665197a3e9a1bd8a19ea0a98d6676d8647 Mon Sep 17 00:00:00 2001 From: bradmiro Date: Mon, 6 Apr 2020 18:41:59 -0400 Subject: [PATCH 04/10] lint fixes --- dataproc/create_cluster.py | 4 ++-- dataproc/instantiate_inline_workflow_template.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dataproc/create_cluster.py b/dataproc/create_cluster.py index 8471e85c09eb..6a70f11923b9 100644 --- a/dataproc/create_cluster.py +++ b/dataproc/create_cluster.py @@ -68,8 +68,8 @@ def create_cluster(project_id, region, cluster_name): if __name__ == "__main__": if len(sys.arv) != 4: - print("INSUFFICIENT ARGS: Please provide a " - + "PROJECT_ID, REGION AND CLUSTER_NAME.") + print("INSUFFICIENT ARGS: Please provide a " + + "PROJECT_ID, REGION AND CLUSTER_NAME.") else: project_id = sys.argv[1] region = sys.argv[2] diff --git a/dataproc/instantiate_inline_workflow_template.py b/dataproc/instantiate_inline_workflow_template.py index 5d39f637975e..001be0d95424 100644 --- a/dataproc/instantiate_inline_workflow_template.py +++ b/dataproc/instantiate_inline_workflow_template.py @@ -97,7 +97,7 @@ def instantiate_inline_workflow_template(project_id, region): if __name__ == "__main__": if len(sys.arv) != 3: print("INSUFFICIENT ARGS: Please provide a PROJECT_ID and REGION.") - else: + else: project_id = sys.argv[1] region = sys.argv[2] instantiate_inline_workflow_template(project_id, region) From 821e4070865523663d6fde97db14ec7ef6a0e51c Mon Sep 17 00:00:00 2001 From: bradmiro Date: Thu, 9 Apr 2020 17:19:03 -0400 Subject: [PATCH 05/10] moved region tags --- dataproc/create_cluster.py | 5 ++--- dataproc/instantiate_inline_workflow_template.py | 5 +++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dataproc/create_cluster.py b/dataproc/create_cluster.py index 6a70f11923b9..4f3b3be2d79d 100644 --- a/dataproc/create_cluster.py +++ b/dataproc/create_cluster.py @@ -21,9 +21,9 @@ # python create_cluster.py ${PROJECT_ID} ${REGION} ${CLUSTER_NAME} -# [START dataproc_create_cluster] import sys +# [START dataproc_create_cluster] from google.cloud import dataproc_v1 as dataproc @@ -64,7 +64,7 @@ def create_cluster(project_id, region, cluster_name): # Output a success message. print('Cluster created successfully: {}'.format(result.cluster_name)) - + # [END dataproc_create_cluster] if __name__ == "__main__": if len(sys.arv) != 4: @@ -75,4 +75,3 @@ def create_cluster(project_id, region, cluster_name): region = sys.argv[2] cluster_name = sys.argv[3] create_cluster(project_id, region, cluster_name) -# [END dataproc_create_cluster] diff --git a/dataproc/instantiate_inline_workflow_template.py b/dataproc/instantiate_inline_workflow_template.py index 001be0d95424..b6b4d9ecfa91 100644 --- a/dataproc/instantiate_inline_workflow_template.py +++ b/dataproc/instantiate_inline_workflow_template.py @@ -18,9 +18,10 @@ # This script can be run on its own: # python instantiate_inline_workflow_template.py ${PROJECT_ID} ${REGION} -# [START dataproc_instantiate_inline_workflow_template] + import sys +# [START dataproc_instantiate_inline_workflow_template] from google.cloud import dataproc_v1 as dataproc @@ -92,6 +93,7 @@ def instantiate_inline_workflow_template(project_id, region): # Output a success message. print('Workflow ran successfully.') + # [END dataproc_instantiate_inline_workflow_template] if __name__ == "__main__": @@ -101,4 +103,3 @@ def instantiate_inline_workflow_template(project_id, region): project_id = sys.argv[1] region = sys.argv[2] instantiate_inline_workflow_template(project_id, region) -# [END dataproc_instantiate_inline_workflow_template] From 1f615a2a23fdfabe052dc68dcd0b4fd37b99b14c Mon Sep 17 00:00:00 2001 From: bradmiro Date: Fri, 10 Apr 2020 19:22:10 -0400 Subject: [PATCH 06/10] Fix mains and remove subprocess from quickstart test --- dataproc/create_cluster.py | 16 ++++++++-------- dataproc/instantiate_inline_workflow_template.py | 12 ++++++------ dataproc/quickstart/quickstart_test.py | 15 +++++---------- 3 files changed, 19 insertions(+), 24 deletions(-) diff --git a/dataproc/create_cluster.py b/dataproc/create_cluster.py index 4f3b3be2d79d..dd868434c236 100644 --- a/dataproc/create_cluster.py +++ b/dataproc/create_cluster.py @@ -66,12 +66,12 @@ def create_cluster(project_id, region, cluster_name): print('Cluster created successfully: {}'.format(result.cluster_name)) # [END dataproc_create_cluster] + if __name__ == "__main__": - if len(sys.arv) != 4: - print("INSUFFICIENT ARGS: Please provide a " - + "PROJECT_ID, REGION AND CLUSTER_NAME.") - else: - project_id = sys.argv[1] - region = sys.argv[2] - cluster_name = sys.argv[3] - create_cluster(project_id, region, cluster_name) + if len(sys.argv) < 4: + sys.exit('python create_cluster.py project_id region cluster_name') + + project_id = sys.argv[1] + region = sys.argv[2] + cluster_name = sys.argv[3] + create_cluster(project_id, region, cluster_name) diff --git a/dataproc/instantiate_inline_workflow_template.py b/dataproc/instantiate_inline_workflow_template.py index b6b4d9ecfa91..1b1eb0b65549 100644 --- a/dataproc/instantiate_inline_workflow_template.py +++ b/dataproc/instantiate_inline_workflow_template.py @@ -97,9 +97,9 @@ def instantiate_inline_workflow_template(project_id, region): if __name__ == "__main__": - if len(sys.arv) != 3: - print("INSUFFICIENT ARGS: Please provide a PROJECT_ID and REGION.") - else: - project_id = sys.argv[1] - region = sys.argv[2] - instantiate_inline_workflow_template(project_id, region) + if len(sys.argv) < 3: + sys.exit('python create_cluster.py project_id region') + + project_id = sys.argv[1] + region = sys.argv[2] + instantiate_inline_workflow_template(project_id, region) diff --git a/dataproc/quickstart/quickstart_test.py b/dataproc/quickstart/quickstart_test.py index 5361449ff358..a38019d9b188 100644 --- a/dataproc/quickstart/quickstart_test.py +++ b/dataproc/quickstart/quickstart_test.py @@ -15,11 +15,12 @@ import os import uuid import pytest -import subprocess from google.cloud import dataproc_v1 as dataproc from google.cloud import storage +import quickstart + PROJECT_ID = os.environ['GCLOUD_PROJECT'] REGION = 'us-central1' @@ -60,15 +61,9 @@ def setup_teardown(): bucket.delete() -def test_quickstart(): - command = [ - 'python', 'quickstart/quickstart.py', - '--project_id', PROJECT_ID, - '--region', REGION, - '--cluster_name', CLUSTER_NAME, - '--job_file_path', JOB_FILE_PATH - ] - out = subprocess.check_output(command).decode("utf-8") +def test_quickstart(capsys): + quickstart.quickstart(PROJECT_ID, REGION, CLUSTER_NAME, JOB_FILE_PATH) + out, _ = capsys.readouterr() assert 'Cluster created successfully' in out assert 'Submitted job' in out From a961de3e02535d88a9a8ffb38fd24b37c9723711 Mon Sep 17 00:00:00 2001 From: bradmiro Date: Fri, 10 Apr 2020 19:24:04 -0400 Subject: [PATCH 07/10] fix error message in create cluster --- dataproc/instantiate_inline_workflow_template.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dataproc/instantiate_inline_workflow_template.py b/dataproc/instantiate_inline_workflow_template.py index 1b1eb0b65549..cab3fc6e9924 100644 --- a/dataproc/instantiate_inline_workflow_template.py +++ b/dataproc/instantiate_inline_workflow_template.py @@ -98,7 +98,8 @@ def instantiate_inline_workflow_template(project_id, region): if __name__ == "__main__": if len(sys.argv) < 3: - sys.exit('python create_cluster.py project_id region') + sys.exit('python instantiate_inline_workflow_template.py ' + + 'project_id region') project_id = sys.argv[1] region = sys.argv[2] From 46991f42f0198742804315d54e01eaa5909e547f Mon Sep 17 00:00:00 2001 From: bradmiro Date: Mon, 13 Apr 2020 19:11:45 -0400 Subject: [PATCH 08/10] replaced .format with fstrings --- dataproc/create_cluster.py | 4 ++-- dataproc/instantiate_inline_workflow_template.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/dataproc/create_cluster.py b/dataproc/create_cluster.py index dd868434c236..b4d63d2e13f5 100644 --- a/dataproc/create_cluster.py +++ b/dataproc/create_cluster.py @@ -39,7 +39,7 @@ def create_cluster(project_id, region, cluster_name): # Create a client with the endpoint set to the desired cluster region. cluster_client = dataproc.ClusterControllerClient(client_options={ - 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region), + 'api_endpoint': f'{region}-dataproc.googleapis.com:443', }) # Create the cluster config. @@ -63,7 +63,7 @@ def create_cluster(project_id, region, cluster_name): result = operation.result() # Output a success message. - print('Cluster created successfully: {}'.format(result.cluster_name)) + print(f'Cluster created successfully: {result.cluster_name}') # [END dataproc_create_cluster] diff --git a/dataproc/instantiate_inline_workflow_template.py b/dataproc/instantiate_inline_workflow_template.py index cab3fc6e9924..f9358376f9f9 100644 --- a/dataproc/instantiate_inline_workflow_template.py +++ b/dataproc/instantiate_inline_workflow_template.py @@ -37,7 +37,8 @@ def instantiate_inline_workflow_template(project_id, region): # Create a client with the endpoint set to the desired region. workflow_template_client = dataproc.WorkflowTemplateServiceClient( client_options={ - 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)} + 'api_endpoint': f'{region}-dataproc.googleapis.com:443' + } ) parent = workflow_template_client.region_path(project_id, region) From f526bc874c32ab29eb3d721616fc32b514bf706f Mon Sep 17 00:00:00 2001 From: bradmiro Date: Mon, 13 Apr 2020 19:14:40 -0400 Subject: [PATCH 09/10] add fstring to test --- dataproc/create_cluster_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataproc/create_cluster_test.py b/dataproc/create_cluster_test.py index 042745792739..00afff093107 100644 --- a/dataproc/create_cluster_test.py +++ b/dataproc/create_cluster_test.py @@ -31,7 +31,7 @@ def teardown(): yield cluster_client = dataproc.ClusterControllerClient(client_options={ - 'api_endpoint': '{}-dataproc.googleapis.com:443'.format(REGION) + 'api_endpoint': f'{region}-dataproc.googleapis.com:443' }) # Client library function operation = cluster_client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME) From 0dbf815af2ee602c5a5859d6a97d08c429a356c3 Mon Sep 17 00:00:00 2001 From: bradmiro Date: Tue, 14 Apr 2020 12:57:50 -0400 Subject: [PATCH 10/10] fixed create_cluster_test --- dataproc/create_cluster_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataproc/create_cluster_test.py b/dataproc/create_cluster_test.py index 00afff093107..72ffce2bf06a 100644 --- a/dataproc/create_cluster_test.py +++ b/dataproc/create_cluster_test.py @@ -31,7 +31,7 @@ def teardown(): yield cluster_client = dataproc.ClusterControllerClient(client_options={ - 'api_endpoint': f'{region}-dataproc.googleapis.com:443' + 'api_endpoint': f'{REGION}-dataproc.googleapis.com:443' }) # Client library function operation = cluster_client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME)