diff --git a/samples/snippets/list_clusters.py b/samples/snippets/list_clusters.py index 14a2ba81..4a016d58 100644 --- a/samples/snippets/list_clusters.py +++ b/samples/snippets/list_clusters.py @@ -19,21 +19,21 @@ import googleapiclient.discovery -# [START list_clusters] +# [START dataproc_list_clusters] def list_clusters(dataproc, project, region): result = dataproc.projects().regions().clusters().list( projectId=project, region=region).execute() return result -# [END list_clusters] +# [END dataproc_list_clusters] -# [START get_client] +# [START dataproc_get_client] def get_client(): """Builds a client to the dataproc API.""" dataproc = googleapiclient.discovery.build('dataproc', 'v1') return dataproc -# [END get_client] +# [END dataproc_get_client] def main(project_id, region): diff --git a/samples/snippets/pyspark_sort.py b/samples/snippets/pyspark_sort.py index 518e906e..0ce2350a 100644 --- a/samples/snippets/pyspark_sort.py +++ b/samples/snippets/pyspark_sort.py @@ -18,11 +18,11 @@ environment. """ -# [START pyspark] +# [START dataproc_pyspark_sort] import pyspark sc = pyspark.SparkContext() rdd = sc.parallelize(['Hello,', 'world!', 'dog', 'elephant', 'panther']) words = sorted(rdd.collect()) print(words) -# [END pyspark] +# [END dataproc_pyspark_sort] diff --git a/samples/snippets/pyspark_sort_gcs.py b/samples/snippets/pyspark_sort_gcs.py index 70f77d8d..f1961c37 100644 --- a/samples/snippets/pyspark_sort_gcs.py +++ b/samples/snippets/pyspark_sort_gcs.py @@ -21,10 +21,10 @@ information. """ -# [START pyspark] +# [START dataproc_pyspark_sort_gcs] import pyspark sc = pyspark.SparkContext() rdd = sc.textFile('gs://path-to-your-GCS-file') print(sorted(rdd.collect())) -# [END pyspark] +# [END dataproc_pyspark_sort_gcs] diff --git a/samples/snippets/submit_job_to_cluster.py b/samples/snippets/submit_job_to_cluster.py index 18150782..f06d5981 100644 --- a/samples/snippets/submit_job_to_cluster.py +++ b/samples/snippets/submit_job_to_cluster.py @@ -64,7 +64,7 @@ def download_output(project_id, cluster_id, output_bucket, job_id): return bucket.blob(output_blob).download_as_string() -# [START create_cluster] +# [START dataproc_create_cluster] def create_cluster(dataproc, project, zone, region, cluster_name): print('Creating cluster...') zone_uri = \ @@ -92,7 +92,7 @@ def create_cluster(dataproc, project, zone, region, cluster_name): region=region, body=cluster_data).execute() return result -# [END create_cluster] +# [END dataproc_create_cluster] def wait_for_cluster_creation(dataproc, project_id, region, cluster_name): @@ -113,7 +113,7 @@ def wait_for_cluster_creation(dataproc, project_id, region, cluster_name): break -# [START list_clusters_with_detail] +# [START dataproc_list_clusters_with_detail] def list_clusters_with_details(dataproc, project, region): result = dataproc.projects().regions().clusters().list( projectId=project, @@ -123,7 +123,7 @@ def list_clusters_with_details(dataproc, project, region): print("{} - {}" .format(cluster['clusterName'], cluster['status']['state'])) return result -# [END list_clusters_with_detail] +# [END dataproc_list_clusters_with_detail] def get_cluster_id_by_name(cluster_list, cluster_name): @@ -133,7 +133,7 @@ def get_cluster_id_by_name(cluster_list, cluster_name): return cluster['clusterUuid'], cluster['config']['configBucket'] -# [START submit_pyspark_job] +# [START dataproc_submit_pyspark_job] def submit_pyspark_job(dataproc, project, region, cluster_name, bucket_name, filename): """Submits the Pyspark job to the cluster, assuming `filename` has @@ -156,10 +156,10 @@ def submit_pyspark_job(dataproc, project, region, job_id = result['reference']['jobId'] print('Submitted job ID {}'.format(job_id)) return job_id -# [END submit_pyspark_job] +# [END dataproc_submit_pyspark_job] -# [START delete] +# [START dataproc_delete] def delete_cluster(dataproc, project, region, cluster): print('Tearing down cluster') result = dataproc.projects().regions().clusters().delete( @@ -167,10 +167,10 @@ def delete_cluster(dataproc, project, region, cluster): region=region, clusterName=cluster).execute() return result -# [END delete] +# [END dataproc_delete] -# [START wait] +# [START dataproc_wait] def wait_for_job(dataproc, project, region, job_id): print('Waiting for job to finish...') while True: @@ -184,16 +184,16 @@ def wait_for_job(dataproc, project, region, job_id): elif result['status']['state'] == 'DONE': print('Job finished.') return result -# [END wait] +# [END dataproc_wait] -# [START get_client] +# [START dataproc_get_client] def get_client(): """Builds an http client authenticated with the service account credentials.""" dataproc = googleapiclient.discovery.build('dataproc', 'v1') return dataproc -# [END get_client] +# [END dataproc_get_client] def main(project_id, zone, cluster_name, bucket_name, @@ -221,11 +221,11 @@ def main(project_id, zone, cluster_name, bucket_name, (cluster_id, output_bucket) = ( get_cluster_id_by_name(cluster_list, cluster_name)) - # [START call_submit_pyspark_job] + # [START dataproc_call_submit_pyspark_job] job_id = submit_pyspark_job( dataproc, project_id, region, cluster_name, bucket_name, spark_filename) - # [END call_submit_pyspark_job] + # [END dataproc_call_submit_pyspark_job] wait_for_job(dataproc, project_id, region, job_id) output = download_output(project_id, cluster_id, output_bucket, job_id)