diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 0b77ef0e1cc8e..23926e42f4f55 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1949,6 +1949,15 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): return self._client.list_rows(dest_table, selected_fields=schema, retry=retry) + def to_dataframe(self): + import pandas as pd + + iterator = self.result() + column_headers = [field.name for field in iterator.schema] + rows = [row.values() for row in iterator] + + return pd.DataFrame(rows, columns=column_headers) + class QueryPlanEntryStep(object): """Map a single step in a query plan entry. diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py index 7376005a8de19..e0f36a207dc0c 100644 --- a/bigquery/tests/system.py +++ b/bigquery/tests/system.py @@ -1235,6 +1235,23 @@ def test_query_future(self): row_tuples = [r.values() for r in iterator] self.assertEqual(row_tuples, [(1,)]) + def test_query_to_dataframe(self): + import pandas as pd + + query = """ + SELECT corpus AS title, COUNT(*) AS unique_words + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY title + ORDER BY unique_words DESC + LIMIT 10""" + + query_job = Config.CLIENT.query(query) + df = query_job.to_dataframe() + + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(list(df), ['title', 'unique_words']) + self.assertEqual(len(df), 10) + def test_query_table_def(self): gs_url = self._write_csv_to_storage( 'bq_external_test' + unique_resource_id(), 'person_ages.csv', diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 4f94a1881a305..b5616a17fbaa1 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -2720,6 +2720,70 @@ def test_reload_w_alternate_client(self): self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) + def test_to_dataframe(self): + import pandas as pd + + begun_resource = self._makeResource() + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'schema': { + 'fields': [ + {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ], + }, + 'rows': [ + {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, + {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, + {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]}, + {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]}, + ], + } + done_resource = copy.deepcopy(begun_resource) + done_resource['status'] = {'state': 'DONE'} + connection = _Connection( + begun_resource, query_resource, done_resource, query_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + df = job.to_dataframe() + + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 4) + self.assertEqual(list(df), ['name', 'age']) + + def test_to_dataframe_w_empty_results(self): + import pandas as pd + + begun_resource = self._makeResource() + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'schema': { + 'fields': [ + {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ], + }, + } + done_resource = copy.deepcopy(begun_resource) + done_resource['status'] = {'state': 'DONE'} + connection = _Connection( + begun_resource, query_resource, done_resource, query_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + df = job.to_dataframe() + + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 0) + self.assertEqual(list(df), ['name', 'age']) + class TestQueryPlanEntryStep(unittest.TestCase, _Base): KIND = 'KIND'