From 5ab62710792f73bc7aa299777dad31392fac3b87 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Sat, 12 Aug 2017 16:41:19 -0400 Subject: [PATCH] Add 'QueryJob.referenced_tables' property. --- bigquery/google/cloud/bigquery/job.py | 30 ++++++++++++++ bigquery/tests/unit/test_job.py | 58 ++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index bbe9b78ddade..f582868f4730 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1386,6 +1386,36 @@ def statement_type(self): """ return self._job_statistics().get('statementType') + @property + def referenced_tables(self): + """Return referenced tables from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.referencedTables + + :rtype: list of dict + :returns: mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + tables = [] + client = self._require_client(None) + datasets_by_project_name = {} + + for table in self._job_statistics().get('referencedTables', ()): + + t_project = table['projectId'] + + ds_name = table['datasetId'] + t_dataset = datasets_by_project_name.get((t_project, ds_name)) + if t_dataset is None: + t_dataset = client.dataset(ds_name, project=t_project) + datasets_by_project_name[(t_project, ds_name)] = t_dataset + + t_name = table['tableId'] + tables.append(t_dataset.table(t_name)) + + return tables + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 677fa259f1c4..379373e6ab7e 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1791,6 +1791,60 @@ def test_statement_type(self): query_stats['statementType'] = statement_type self.assertEqual(job.statement_type, statement_type) + def test_referenced_tables(self): + from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.table import Table + + ref_tables_resource = [{ + 'projectId': self.PROJECT, + 'datasetId': 'dataset', + 'tableId': 'local1', + }, { + + 'projectId': self.PROJECT, + 'datasetId': 'dataset', + 'tableId': 'local2', + }, { + + 'projectId': 'other-project-123', + 'datasetId': 'other-dataset', + 'tableId': 'other-table', + }] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.referenced_tables, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats['referencedTables'] = ref_tables_resource + + local1, local2, remote = job.referenced_tables + + self.assertIsInstance(local1, Table) + self.assertEqual(local1.name, 'local1') + self.assertIsInstance(local1._dataset, Dataset) + self.assertEqual(local1.dataset_name, 'dataset') + self.assertEqual(local1.project, self.PROJECT) + self.assertIs(local1._dataset._client, client) + + self.assertIsInstance(local2, Table) + self.assertEqual(local2.name, 'local2') + self.assertIsInstance(local2._dataset, Dataset) + self.assertEqual(local2.dataset_name, 'dataset') + self.assertEqual(local2.project, self.PROJECT) + self.assertIs(local2._dataset._client, client) + + self.assertIsInstance(remote, Table) + self.assertEqual(remote.name, 'other-table') + self.assertIsInstance(remote._dataset, Dataset) + self.assertEqual(remote.dataset_name, 'other-dataset') + self.assertEqual(remote.project, 'other-project-123') + self.assertIs(remote._dataset._client, client) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults @@ -2413,10 +2467,10 @@ def __init__(self, project='project', connection=None): self.project = project self._connection = connection - def dataset(self, name): + def dataset(self, name, project=None): from google.cloud.bigquery.dataset import Dataset - return Dataset(name, client=self) + return Dataset(name, client=self, project=project) class _Table(object):