Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'QueryJob.referenced_tables' property. #3801

Merged
merged 1 commit into from
Sep 8, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions bigquery/google/cloud/bigquery/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -1386,6 +1386,36 @@ def statement_type(self):
"""
return self._job_statistics().get('statementType')

@property
def referenced_tables(self):
"""Return referenced tables from job statistics, if present.

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.referencedTables

:rtype: list of dict
:returns: mappings describing the query plan, or an empty list
if the query has not yet completed.
"""
tables = []
client = self._require_client(None)
datasets_by_project_name = {}

for table in self._job_statistics().get('referencedTables', ()):

t_project = table['projectId']

ds_name = table['datasetId']
t_dataset = datasets_by_project_name.get((t_project, ds_name))
if t_dataset is None:
t_dataset = client.dataset(ds_name, project=t_project)
datasets_by_project_name[(t_project, ds_name)] = t_dataset

t_name = table['tableId']
tables.append(t_dataset.table(t_name))

return tables

def query_results(self):
"""Construct a QueryResults instance, bound to this job.

Expand Down
58 changes: 56 additions & 2 deletions bigquery/tests/unit/test_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -1791,6 +1791,60 @@ def test_statement_type(self):
query_stats['statementType'] = statement_type
self.assertEqual(job.statement_type, statement_type)

def test_referenced_tables(self):
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.table import Table

ref_tables_resource = [{
'projectId': self.PROJECT,
'datasetId': 'dataset',
'tableId': 'local1',
}, {

'projectId': self.PROJECT,
'datasetId': 'dataset',
'tableId': 'local2',
}, {

'projectId': 'other-project-123',
'datasetId': 'other-dataset',
'tableId': 'other-table',
}]
client = _Client(self.PROJECT)
job = self._make_one(self.JOB_NAME, self.QUERY, client)
self.assertEqual(job.referenced_tables, [])

statistics = job._properties['statistics'] = {}
self.assertEqual(job.referenced_tables, [])

query_stats = statistics['query'] = {}
self.assertEqual(job.referenced_tables, [])

query_stats['referencedTables'] = ref_tables_resource

local1, local2, remote = job.referenced_tables

self.assertIsInstance(local1, Table)
self.assertEqual(local1.name, 'local1')
self.assertIsInstance(local1._dataset, Dataset)
self.assertEqual(local1.dataset_name, 'dataset')
self.assertEqual(local1.project, self.PROJECT)
self.assertIs(local1._dataset._client, client)

self.assertIsInstance(local2, Table)
self.assertEqual(local2.name, 'local2')
self.assertIsInstance(local2._dataset, Dataset)
self.assertEqual(local2.dataset_name, 'dataset')
self.assertEqual(local2.project, self.PROJECT)
self.assertIs(local2._dataset._client, client)

self.assertIsInstance(remote, Table)
self.assertEqual(remote.name, 'other-table')
self.assertIsInstance(remote._dataset, Dataset)
self.assertEqual(remote.dataset_name, 'other-dataset')
self.assertEqual(remote.project, 'other-project-123')
self.assertIs(remote._dataset._client, client)

def test_query_results(self):
from google.cloud.bigquery.query import QueryResults

Expand Down Expand Up @@ -2413,10 +2467,10 @@ def __init__(self, project='project', connection=None):
self.project = project
self._connection = connection

def dataset(self, name):
def dataset(self, name, project=None):
from google.cloud.bigquery.dataset import Dataset

return Dataset(name, client=self)
return Dataset(name, client=self, project=project)


class _Table(object):
Expand Down