Skip to content

Commit

Permalink
adds to_dataframe() helper to QueryJob
Browse files Browse the repository at this point in the history
  • Loading branch information
alixhami committed Nov 22, 2017
1 parent 10fcd7c commit 6762f95
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 1 deletion.
15 changes: 14 additions & 1 deletion bigquery/google/cloud/bigquery/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -1929,7 +1929,7 @@ def result(self, timeout=None, retry=DEFAULT_RETRY):
:type retry: :class:`google.api_core.retry.Retry`
:param retry: (Optional) How to retry the call that retrieves rows.
:rtype: :class:`~google.api_core.page_iterator.Iterator`
:rtype: :class:`~google.cloud.bigquery.table.RowIterator`
:returns:
Iterator of row data :class:`~google.cloud.bigquery.table.Row`-s.
During each page, the iterator will have the ``total_rows``
Expand All @@ -1949,6 +1949,19 @@ def result(self, timeout=None, retry=DEFAULT_RETRY):
return self._client.list_rows(dest_table, selected_fields=schema,
retry=retry)

def to_dataframe(self):
"""Return a pandas DataFrame from a QueryJob
Returns:
A :class:`~pandas.DataFrame` populated with row data and column
headers from the query results. The column headers are derived
from the destination table's schema.
Raises:
ValueError: If the `pandas` library cannot be imported.
"""
return self.result().to_dataframe()

def __iter__(self):
return iter(self.result())

Expand Down
35 changes: 35 additions & 0 deletions bigquery/tests/unit/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -2724,6 +2724,41 @@ def test_reload_w_alternate_client(self):
self.assertEqual(req['path'], PATH)
self._verifyResourceProperties(job, RESOURCE)

@unittest.skipIf(pandas is None, 'Requires `pandas`')
def test_to_dataframe(self):
begun_resource = self._make_resource()
query_resource = {
'jobComplete': True,
'jobReference': {
'projectId': self.PROJECT,
'jobId': self.JOB_ID,
},
'schema': {
'fields': [
{'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
{'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'},
],
},
'rows': [
{'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
{'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
{'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]},
{'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]},
],
}
done_resource = copy.deepcopy(begun_resource)
done_resource['status'] = {'state': 'DONE'}
connection = _Connection(
begun_resource, query_resource, done_resource, query_resource)
client = _make_client(project=self.PROJECT, connection=connection)
job = self._make_one(self.JOB_ID, self.QUERY, client)

df = job.to_dataframe()

self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 4) # verify the number of rows
self.assertEqual(list(df), ['name', 'age']) # verify the column names

def test_iter(self):
import types

Expand Down

0 comments on commit 6762f95

Please sign in to comment.