From 6ae83853f994720935640ea6c0cf20f140a155e2 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 27 May 2016 19:24:40 -0400 Subject: [PATCH] Convert table usage examples to tested snippets. --- docs/bigquery-usage.rst | 109 +++++--------- docs/bigquery_snippets.py | 297 +++++++++++++++++++++++++++++++++++++- 2 files changed, 324 insertions(+), 82 deletions(-) diff --git a/docs/bigquery-usage.rst b/docs/bigquery-usage.rst index d1689de22f6f..f2d0b4f6d21b 100644 --- a/docs/bigquery-usage.rst +++ b/docs/bigquery-usage.rst @@ -120,107 +120,64 @@ Tables Tables exist within datasets. List tables for the dataset: -.. doctest:: - - >>> from gcloud import bigquery - >>> client = bigquery.Client() - >>> dataset = client.dataset('dataset_name') - >>> tables, next_page_token = dataset.list_tables() # API request - >>> [table.name for table in tables] - ['table_name'] +.. literalinclude:: bigquery_snippets.py + :start-after: [START dataset_list_tables] + :end-before: [END dataset_list_tables] Create a table: -.. doctest:: - - >>> from gcloud import bigquery - >>> client = bigquery.Client() - >>> dataset = client.dataset('dataset_name') - >>> table = dataset.table(name='person_ages') - >>> table.create() # API request +.. literalinclude:: bigquery_snippets.py + :start-after: [START table_create] + :end-before: [END table_create] Check for the existence of a table: -.. doctest:: - - >>> from gcloud import bigquery - >>> client = bigquery.Client() - >>> dataset = client.dataset('dataset_name') - >>> table = dataset.table(name='person_ages') - >>> table.exists() # API request - True +.. literalinclude:: bigquery_snippets.py + :start-after: [START table_exists] + :end-before: [END table_exists] Refresh metadata for a table (to pick up changes made by another client): -.. doctest:: - - >>> from gcloud import bigquery - >>> client = bigquery.Client() - >>> dataset = client.dataset('dataset_name') - >>> dataset.reload() # API request +.. literalinclude:: bigquery_snippets.py + :start-after: [START table_reload] + :end-before: [END table_reload] Patch specific properties for a table: -.. doctest:: - - >>> from gcloud import bigquery - >>> client = bigquery.Client() - >>> dataset = client.dataset('dataset_name') - >>> table = dataset.table(name='person_ages') - >>> table.patch(friendly_name='Person Ages', - ... description='Ages of persons') # API request +.. literalinclude:: bigquery_snippets.py + :start-after: [START table_patch] + :end-before: [END table_patch] Update all writable metadata for a table -.. doctest:: - - >>> from gcloud import bigquery - >>> from gcloud.bigquery import SchemaField - >>> client = bigquery.Client() - >>> dataset = client.dataset('dataset_name') - >>> table = dataset.table(name='person_ages') - >>> table.schema = [ - ... SchemaField('full_name', 'STRING', mode='required'), - ... SchemaField('age', 'INTEGER', mode='required)] - >>> table.update() # API request +.. literalinclude:: bigquery_snippets.py + :start-after: [START table_update] + :end-before: [END table_update] -Upload table data from a file: +Get rows from a table's data: -.. doctest:: +.. literalinclude:: bigquery_snippets.py + :start-after: [START table_fetch_data] + :end-before: [END table_fetch_data] - >>> from gcloud import bigquery - >>> client = bigquery.Client() - >>> dataset = client.dataset('dataset_name') - >>> table = dataset.table(name='person_ages') - >>> table.schema = [ - ... SchemaField('full_name', 'STRING', mode='required'), - ... SchemaField('age', 'INTEGER', mode='required)] - >>> with open('person_ages.csv', 'rb') as csv_file: - ... table.upload_from_file(csv_file, 'CSV', - ... create_disposition='CREATE_IF_NEEDED') +Insert rows into a table's data: -Get rows from a table's data: +.. literalinclude:: bigquery_snippets.py + :start-after: [START table_insert_data] + :end-before: [END table_insert_data] -.. doctest:: +Upload table data from a file: - >>> from gcloud import bigquery - >>> client = bigquery.Client() - >>> dataset = client.dataset('dataset_name') - >>> table = dataset.table(name='person_ages') - >>> rows, next_page_token = table.fetch_data(max_results=100) # API request - >>> for row in rows: - ... for field, value in zip(table.schema, row): - ... do_something(field, value) +.. literalinclude:: bigquery_snippets.py + :start-after: [START table_upload_from_file] + :end-before: [END table_upload_from_file] Delete a table: -.. doctest:: +.. literalinclude:: bigquery_snippets.py + :start-after: [START table_delete] + :end-before: [END table_delete] - >>> from gcloud import bigquery - >>> client = bigquery.Client() - >>> dataset = client.dataset('dataset_name') - >>> table = dataset.table(name='person_ages') - >>> table.delete() # API request Jobs ---- diff --git a/docs/bigquery_snippets.py b/docs/bigquery_snippets.py index 923d3fa12b50..1da17826043a 100644 --- a/docs/bigquery_snippets.py +++ b/docs/bigquery_snippets.py @@ -23,10 +23,30 @@ need to be deleted during teardown. """ +import operator import time +from gcloud.bigquery import SchemaField from gcloud.bigquery.client import Client +ORIGINAL_FRIENDLY_NAME = 'Original friendly name' +ORIGINAL_DESCRIPTION = 'Original description' +LOCALLY_CHANGED_FRIENDLY_NAME = 'Locally-changed friendly name' +LOCALLY_CHANGED_DESCRIPTION = 'Locally-changed description' +PATCHED_FRIENDLY_NAME = 'Patched friendly name' +PATCHED_DESCRIPTION = 'Patched description' +UPDATED_FRIENDLY_NAME = 'Updated friendly name' +UPDATED_DESCRIPTION = 'Updated description' + +SCHEMA = [ + SchemaField('full_name', 'STRING', mode='required'), + SchemaField('age', 'INTEGER', mode='required'), +] + +QUERY = ( + 'SELECT name FROM [bigquery-public-data:usa_names.usa_1910_2013] ' + 'WHERE state = "TX"') + def snippet(func): """Mark ``func`` as a snippet example function.""" @@ -38,6 +58,15 @@ def _millis(): return time.time() * 1000 +class _CloseOnDelete(object): + + def __init__(self, wrapped): + self._wrapped = wrapped + + def delete(self): + self._wrapped.close() + + @snippet def client_list_datasets(client, to_delete): # pylint: disable=unused-argument """List datasets for a project.""" @@ -87,8 +116,6 @@ def dataset_exists(client, to_delete): def dataset_reload(client, to_delete): """Reload a dataset's metadata.""" DATASET_NAME = 'dataset_reload_%d' % (_millis(),) - ORIGINAL_DESCRIPTION = 'Original description' - LOCALLY_CHANGED_DESCRIPTION = 'Locally-changed description' dataset = client.dataset(DATASET_NAME) dataset.description = ORIGINAL_DESCRIPTION dataset.create() @@ -107,8 +134,6 @@ def dataset_reload(client, to_delete): def dataset_patch(client, to_delete): """Patch a dataset's metadata.""" DATASET_NAME = 'dataset_patch_%d' % (_millis(),) - ORIGINAL_DESCRIPTION = 'Original description' - PATCHED_DESCRIPTION = 'Patched description' dataset = client.dataset(DATASET_NAME) dataset.description = ORIGINAL_DESCRIPTION dataset.create() @@ -130,8 +155,6 @@ def dataset_patch(client, to_delete): def dataset_update(client, to_delete): """Update a dataset's metadata.""" DATASET_NAME = 'dataset_update_%d' % (_millis(),) - ORIGINAL_DESCRIPTION = 'Original description' - UPDATED_DESCRIPTION = 'Updated description' dataset = client.dataset(DATASET_NAME) dataset.description = ORIGINAL_DESCRIPTION dataset.create() @@ -172,6 +195,268 @@ def dataset_delete(client, to_delete): # pylint: disable=unused-argument # [END dataset_delete] +@snippet +def dataset_list_tables(client, to_delete): + """List tables within a dataset.""" + DATASET_NAME = 'dataset_list_tables_dataset_%d' % (_millis(),) + TABLE_NAME = 'dataset_list_tables_table_%d' % (_millis(),) + dataset = client.dataset(DATASET_NAME) + dataset.create() + to_delete.append(dataset) + + # [START dataset_list_tables] + tables, token = dataset.list_tables() # API request + assert len(tables) == 0 + assert token is None + table = dataset.table(TABLE_NAME) + table.view_query = QUERY + table.create() # API request + tables, token = dataset.list_tables() # API request + assert len(tables) == 1 + assert tables[0].name == TABLE_NAME + # [END dataset_list_tables] + to_delete.insert(0, table) + + +@snippet +def table_create(client, to_delete): + """Create a table.""" + DATASET_NAME = 'table_create_dataset_%d' % (_millis(),) + TABLE_NAME = 'table_create_table_%d' % (_millis(),) + dataset = client.dataset(DATASET_NAME) + dataset.create() + to_delete.append(dataset) + + # [START table_create] + table = dataset.table(TABLE_NAME, SCHEMA) + table.create() # API request + # [END table_create] + + to_delete.insert(0, table) + + +@snippet +def table_exists(client, to_delete): + """Test existence of a table.""" + DATASET_NAME = 'table_exists_dataset_%d' % (_millis(),) + TABLE_NAME = 'table_exists_table_%d' % (_millis(),) + dataset = client.dataset(DATASET_NAME) + dataset.create() + to_delete.append(dataset) + + # [START table_exists] + table = dataset.table(TABLE_NAME, SCHEMA) + assert not table.exists() # API request + table.create() # API request + assert table.exists() # API request + # [END table_exists] + + to_delete.insert(0, table) + + +@snippet +def table_reload(client, to_delete): + """Reload a table's metadata.""" + DATASET_NAME = 'table_reload_dataset_%d' % (_millis(),) + TABLE_NAME = 'table_reload_table_%d' % (_millis(),) + dataset = client.dataset(DATASET_NAME) + dataset.create() + to_delete.append(dataset) + + table = dataset.table(TABLE_NAME, SCHEMA) + table.friendly_name = ORIGINAL_FRIENDLY_NAME + table.description = ORIGINAL_DESCRIPTION + table.create() + to_delete.insert(0, table) + + # [START table_reload] + assert table.friendly_name == ORIGINAL_FRIENDLY_NAME + assert table.description == ORIGINAL_DESCRIPTION + table.friendly_name = LOCALLY_CHANGED_FRIENDLY_NAME + table.description = LOCALLY_CHANGED_DESCRIPTION + table.reload() # API request + assert table.friendly_name == ORIGINAL_FRIENDLY_NAME + assert table.description == ORIGINAL_DESCRIPTION + # [END table_reload] + + +@snippet +def table_patch(client, to_delete): + """Patch a table's metadata.""" + DATASET_NAME = 'table_patch_dataset_%d' % (_millis(),) + TABLE_NAME = 'table_patch_table_%d' % (_millis(),) + dataset = client.dataset(DATASET_NAME) + dataset.description = ORIGINAL_DESCRIPTION + dataset.create() + to_delete.append(dataset) + + table = dataset.table(TABLE_NAME, SCHEMA) + table.friendly_name = ORIGINAL_FRIENDLY_NAME + table.description = ORIGINAL_DESCRIPTION + table.create() + to_delete.insert(0, table) + + # [START table_patch] + assert table.friendly_name == ORIGINAL_FRIENDLY_NAME + assert table.description == ORIGINAL_DESCRIPTION + table.patch( + friendly_name=PATCHED_FRIENDLY_NAME, + description=PATCHED_DESCRIPTION, + ) # API request + assert table.friendly_name == PATCHED_FRIENDLY_NAME + assert table.description == PATCHED_DESCRIPTION + # [END table_patch] + + +@snippet +def table_update(client, to_delete): + """Update a table's metadata.""" + DATASET_NAME = 'table_update_dataset_%d' % (_millis(),) + TABLE_NAME = 'table_update_table_%d' % (_millis(),) + dataset = client.dataset(DATASET_NAME) + dataset.description = ORIGINAL_DESCRIPTION + dataset.create() + to_delete.append(dataset) + + table = dataset.table(TABLE_NAME, SCHEMA) + table.friendly_name = ORIGINAL_FRIENDLY_NAME + table.description = ORIGINAL_DESCRIPTION + table.create() + to_delete.insert(0, table) + + # [START table_update] + assert table.friendly_name == ORIGINAL_FRIENDLY_NAME + assert table.description == ORIGINAL_DESCRIPTION + NEW_SCHEMA = table.schema[:] + NEW_SCHEMA.append(SchemaField('phone', 'string')) + table.friendly_name = UPDATED_FRIENDLY_NAME + table.description = UPDATED_DESCRIPTION + table.schema = NEW_SCHEMA + table.update() # API request + assert table.friendly_name == UPDATED_FRIENDLY_NAME + assert table.description == UPDATED_DESCRIPTION + assert table.schema == NEW_SCHEMA + # [END table_update] + + +def _warm_up_inserted_table_data(table): + # Allow for 90 seconds of "warm up" before rows visible. See: + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + rows = () + counter = 18 + + while len(rows) == 0 and counter > 0: + counter -= 1 + rows, _, _ = table.fetch_data() + if len(rows) == 0: + time.sleep(5) + + +@snippet +def table_insert_fetch_data(client, to_delete): + """Insert / fetch table data.""" + DATASET_NAME = 'table_insert_fetch_data_dataset_%d' % (_millis(),) + TABLE_NAME = 'table_insert_fetch_data_table_%d' % (_millis(),) + dataset = client.dataset(DATASET_NAME) + dataset.create() + to_delete.append(dataset) + + table = dataset.table(TABLE_NAME, SCHEMA) + table.create() + to_delete.insert(0, table) + + # [START table_insert_data] + ROWS_TO_INSERT = [ + (u'Phred Phlyntstone', 32), + (u'Wylma Phlyntstone', 29), + ] + + table.insert_data(ROWS_TO_INSERT) + # [END table_insert_data] + + _warm_up_inserted_table_data(table) + + found_rows = [] + + def do_something(row): + found_rows.append(row) + + # [START table_fetch_data] + rows, _, token = table.fetch_data() + while True: + for row in rows: + do_something(row) + if token is None: + break + rows, _, token = table.fetch_data(page_token=token) + # [END table_fetch_data] + + assert len(found_rows) == len(ROWS_TO_INSERT) + by_age = operator.itemgetter(1) + found_rows = reversed(sorted(found_rows, key=by_age)) + for found, to_insert in zip(found_rows, ROWS_TO_INSERT): + assert found == to_insert + + +@snippet +def table_upload_from_file(client, to_delete): + """Upload table data from a CSV file.""" + import csv + import tempfile + DATASET_NAME = 'table_upload_from_file_dataset_%d' % (_millis(),) + TABLE_NAME = 'table_upload_from_file_table_%d' % (_millis(),) + dataset = client.dataset(DATASET_NAME) + dataset.create() + to_delete.append(dataset) + + table = dataset.table(TABLE_NAME, SCHEMA) + table.create() + to_delete.insert(0, table) + + csv_file = tempfile.NamedTemporaryFile(suffix='.csv') + to_delete.append(_CloseOnDelete(csv_file)) + + # [START table_upload_from_file] + writer = csv.writer(csv_file) + writer.writerow((b'full_name', b'age')) + writer.writerow((b'Phred Phlyntstone', b'32')) + writer.writerow((b'Wylma Phlyntstone', b'29')) + csv_file.flush() + + with open(csv_file.name, 'rb') as readable: + table.upload_from_file( + readable, source_format='CSV', skip_leading_rows=1) + # [END table_upload_from_file] + + _warm_up_inserted_table_data(table) + + rows, total, token = table.fetch_data() + + assert len(rows) == total == 2 + assert token is None + assert rows[0] == (u'Phred Phlyntstone', 32) + assert rows[1] == (u'Wylma Phlyntstone', 29) + + +@snippet +def table_delete(client, to_delete): # pylint: disable=unused-argument + """Delete a table.""" + DATASET_NAME = 'table_delete_dataset_%d' % (_millis(),) + TABLE_NAME = 'table_create_table_%d' % (_millis(),) + dataset = client.dataset(DATASET_NAME) + dataset.create() + to_delete.append(dataset) + + table = dataset.table(TABLE_NAME, SCHEMA) + table.create() + + # [START table_delete] + assert table.exists() # API request + table.delete() # API request + assert not table.exists() # API request + # [END table_delete] + + def _find_examples(): funcs = [obj for obj in globals().values() if getattr(obj, '_snippet', False)]