Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BigQuery: Replace table.insert_data() with client.create_rows() #4151

Merged
merged 6 commits into from
Oct 11, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
client.create_rows() accepts list of dicts as rows parameter
  • Loading branch information
alixhami committed Oct 10, 2017
commit d5cd2c7394a958945932bf7f097e718881359b36
15 changes: 11 additions & 4 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -843,10 +843,15 @@ def create_rows(self, table, rows, row_ids=None, skip_invalid_rows=None,
:type table: :class:`~google.cloud.bigquery.client.Table`

This comment was marked as spam.

This comment was marked as spam.

This comment was marked as spam.

:param client: the destination table for the row data.

:type rows: list of tuples
:param rows: Row data to be inserted. Each tuple should contain data
for each schema field on the current table and in the
same order as the schema fields.
:type rows: One of:
list of tuples
list of dictionaries
:param rows: Row data to be inserted. If a list of tuples is given,
each tuple should contain data for each schema field on
the current table and in the same order as the schema
fields. If a list of dictionaries is given, the keys must
include all required fields in the schema. Keys which do
not correspond to a field in the schema are ignored.

:type row_ids: list of string
:param row_ids: (Optional) Unique ids, one per row being inserted.
Expand Down Expand Up @@ -887,6 +892,8 @@ def create_rows(self, table, rows, row_ids=None, skip_invalid_rows=None,
data = {'rows': rows_info}

for index, row in enumerate(rows):
if isinstance(row, dict):
row = table.row_from_mapping(row)

This comment was marked as spam.

This comment was marked as spam.

row_info = {}

for field, value in zip(table._schema, row):
Expand Down
65 changes: 65 additions & 0 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1996,6 +1996,71 @@ def _row_data(row):
self.assertEqual(req['path'], '/%s' % PATH)
self.assertEqual(req['data'], SENT)

def test_create_rows_w_list_of_dictionaries(self):
import datetime
from google.cloud._helpers import UTC
from google.cloud._helpers import _datetime_to_rfc3339
from google.cloud._helpers import _microseconds_from_datetime
from google.cloud.bigquery.table import Table, SchemaField
from google.cloud.bigquery.dataset import DatasetReference

PROJECT = 'PROJECT'
DS_ID = 'DS_ID'
TABLE_ID = 'TABLE_ID'
WHEN_TS = 1437767599.006
WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(
tzinfo=UTC)
PATH = 'projects/%s/datasets/%s/tables/%s/insertAll' % (
PROJECT, DS_ID, TABLE_ID)
creds = _make_credentials()
http = object()
client = self._make_one(project=PROJECT, credentials=creds, _http=http)
conn = client._connection = _Connection({})
table_ref = DatasetReference(PROJECT, DS_ID).table(TABLE_ID)
schema = [
SchemaField('full_name', 'STRING', mode='REQUIRED'),
SchemaField('age', 'INTEGER', mode='REQUIRED'),
SchemaField('joined', 'TIMESTAMP', mode='NULLABLE'),
]
table = Table(table_ref, schema=schema)
ROWS = [
{
'full_name': 'Phred Phlyntstone', 'age': 32,
'joined': _datetime_to_rfc3339(WHEN)
},
{
'full_name': 'Bharney Rhubble', 'age': 33,
'joined': WHEN + datetime.timedelta(seconds=1)
},
{
'full_name': 'Wylma Phlyntstone', 'age': 29,
'joined': WHEN + datetime.timedelta(seconds=2)
},
{
'full_name': 'Bhettye Rhubble', 'age': 27, 'joined': None
},
]

def _row_data(row):
joined = row['joined']
if isinstance(joined, datetime.datetime):
row['joined'] = _microseconds_from_datetime(joined) * 1e-6
row['age'] = str(row['age'])
return row

SENT = {
'rows': [{'json': _row_data(row)} for row in ROWS],
}

errors = client.create_rows(table, ROWS)

self.assertEqual(len(errors), 0)
self.assertEqual(len(conn._requested), 1)
req = conn._requested[0]
self.assertEqual(req['method'], 'POST')
self.assertEqual(req['path'], '/%s' % PATH)
self.assertEqual(req['data'], SENT)

def test_create_rows_w_skip_invalid_and_ignore_unknown(self):
from google.cloud.bigquery.table import Table, SchemaField
from google.cloud.bigquery.dataset import DatasetReference
Expand Down