diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 91301b1ed8d2..faeb86ceec25 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1949,6 +1949,15 @@ def result(self, timeout=None, retry=DEFAULT_RETRY): return self._client.list_rows(dest_table, selected_fields=schema, retry=retry) + def to_dataframe(self): + import pandas as pd + + iterator = self.result() + column_headers = [field.name for field in iterator.schema] + rows = [row.values() for row in iterator] + + return pd.DataFrame(rows, columns=column_headers) + def __iter__(self): return iter(self.result()) diff --git a/bigquery/tests/system.py b/bigquery/tests/system.py index 05faf6fb71ee..9482643f2e4b 100644 --- a/bigquery/tests/system.py +++ b/bigquery/tests/system.py @@ -1242,6 +1242,23 @@ def test_query_iter(self): row_tuples = [r.values() for r in query_job] self.assertEqual(row_tuples, [(1,)]) + def test_query_to_dataframe(self): + import pandas as pd + + query = """ + SELECT corpus AS title, COUNT(*) AS unique_words + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY title + ORDER BY unique_words DESC + LIMIT 10""" + + query_job = Config.CLIENT.query(query) + df = query_job.to_dataframe() + + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(list(df), ['title', 'unique_words']) + self.assertEqual(len(df), 10) + def test_query_table_def(self): gs_url = self._write_csv_to_storage( 'bq_external_test' + unique_resource_id(), 'person_ages.csv', diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 8fd2ee07c13e..98568168cd2f 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -2720,6 +2720,70 @@ def test_reload_w_alternate_client(self): self.assertEqual(req['path'], PATH) self._verifyResourceProperties(job, RESOURCE) + def test_to_dataframe(self): + import pandas as pd + + begun_resource = self._makeResource() + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'schema': { + 'fields': [ + {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ], + }, + 'rows': [ + {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]}, + {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]}, + {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]}, + {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]}, + ], + } + done_resource = copy.deepcopy(begun_resource) + done_resource['status'] = {'state': 'DONE'} + connection = _Connection( + begun_resource, query_resource, done_resource, query_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + df = job.to_dataframe() + + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 4) + self.assertEqual(list(df), ['name', 'age']) + + def test_to_dataframe_w_empty_results(self): + import pandas as pd + + begun_resource = self._makeResource() + query_resource = { + 'jobComplete': True, + 'jobReference': { + 'projectId': self.PROJECT, + 'jobId': self.JOB_ID, + }, + 'schema': { + 'fields': [ + {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'}, + {'name': 'age', 'type': 'INTEGER', 'mode': 'NULLABLE'}, + ], + }, + } + done_resource = copy.deepcopy(begun_resource) + done_resource['status'] = {'state': 'DONE'} + connection = _Connection( + begun_resource, query_resource, done_resource, query_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + df = job.to_dataframe() + + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 0) + self.assertEqual(list(df), ['name', 'age']) + def test_iter(self): import types