Skip to content

Commit

Permalink
BigQuery: Use DatasetListItem for client.list_datasets
Browse files Browse the repository at this point in the history
Listing datasets only includes a subset of the properties available on a
dataset. The DatasetListItem class is used to explicitly document which
features are available and to prevent confusion from trying to use the
resulting object in other contexts, like updating.
  • Loading branch information
tswast committed Nov 22, 2017
1 parent b24ce6e commit dad8587
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 9 deletions.
10 changes: 6 additions & 4 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from google.cloud.bigquery._helpers import _snake_to_camel_case
from google.cloud.bigquery._http import Connection
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.job import CopyJob
from google.cloud.bigquery.job import ExtractJob
Expand Down Expand Up @@ -183,8 +184,9 @@ def list_datasets(self, include_all=False, filter=None, max_results=None,
:param retry: (Optional) How to retry the RPC.
:rtype: :class:`~google.api_core.page_iterator.Iterator`
:returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`.
accessible to the current client.
:returns:
Iterator of :class:`~google.cloud.bigquery.dataset.DatasetListItem`.
associated with the client's project.
"""
extra_params = {}
if include_all:
Expand Down Expand Up @@ -1273,10 +1275,10 @@ def _item_to_dataset(iterator, resource):
:type resource: dict
:param resource: An item to be converted to a dataset.
:rtype: :class:`.Dataset`
:rtype: :class:`.DatasetListItem`
:returns: The next dataset in the page.
"""
return Dataset.from_api_repr(resource)
return DatasetListItem(resource)


def _item_to_job(iterator, resource):
Expand Down
92 changes: 89 additions & 3 deletions bigquery/google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,7 @@ def full_dataset_id(self):

@property
def reference(self):
"""A :class:`~google.cloud.bigquery.dataset.DatasetReference` pointing to
this dataset.
"""A reference to this dataset.
Returns:
google.cloud.bigquery.dataset.DatasetReference:
Expand Down Expand Up @@ -546,4 +545,91 @@ def table(self, table_id):
:rtype: :class:`~google.cloud.bigquery.table.TableReference`
:returns: a TableReference for a table in this dataset.
"""
return TableReference(self, table_id)
return TableReference(self.reference, table_id)


class DatasetListItem(object):
"""A read-only dataset resource from a list operation.
For performance reasons, the BigQuery API only includes some of the
dataset properties when listing datasets. Notably,
:attr:`~google.cloud.bigquery.dataset.Dataset.access_entries` is missing.
For a full list of the properties that the BigQuery API returns, see the
`REST documentation for datasets.list
<https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list>`_.
Args:
resource (dict):
A dataset-like resource object from a dataset list response.
"""

def __init__(self, resource):
self._properties = resource

@property
def project(self):
"""Project bound to the dataset.
:rtype: str
:returns: the project.
"""
return self._properties.get('datasetReference').get('projectId')

@property
def dataset_id(self):
"""Dataset ID.
:rtype: str
:returns: the dataset ID.
"""
return self._properties.get('datasetReference').get('datasetId')

@property
def full_dataset_id(self):
"""ID for the dataset resource, in the form "project_id:dataset_id".
:rtype: str, or ``NoneType``
:returns: the ID (None until set from the server).
"""
return self._properties.get('id')

@property
def friendly_name(self):
"""Title of the dataset.
:rtype: str, or ``NoneType``
:returns: The name as set by the user, or None (the default).
"""
return self._properties.get('friendlyName')

@property
def labels(self):
"""Labels for the dataset.
:rtype: dict, {str -> str}
:returns: A dict of the the dataset's labels.
"""
return self._properties['labels']

@property
def reference(self):
"""A reference to this dataset.
Returns:
google.cloud.bigquery.dataset.DatasetReference:
A pointer to this dataset
"""
return DatasetReference(self.project, self.dataset_id)

def table(self, table_id):
"""Constructs a TableReference.
:type table_id: str
:param table_id: the ID of the table.
:rtype: :class:`~google.cloud.bigquery.table.TableReference`
:returns: a TableReference for a table in this dataset.
"""
return TableReference(self.reference, table_id)
4 changes: 2 additions & 2 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def test_list_projects_explicit_response_missing_projects_key(self):
{'maxResults': 3, 'pageToken': TOKEN})

def test_list_datasets_defaults(self):
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetListItem

DATASET_1 = 'dataset_one'
DATASET_2 = 'dataset_two'
Expand Down Expand Up @@ -215,7 +215,7 @@ def test_list_datasets_defaults(self):

self.assertEqual(len(datasets), len(DATA['datasets']))
for found, expected in zip(datasets, DATA['datasets']):
self.assertIsInstance(found, Dataset)
self.assertIsInstance(found, DatasetListItem)
self.assertEqual(found.full_dataset_id, expected['id'])
self.assertEqual(found.friendly_name, expected['friendlyName'])
self.assertEqual(token, TOKEN)
Expand Down
57 changes: 57 additions & 0 deletions bigquery/tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,3 +460,60 @@ def test_table(self):
self.assertEqual(table.table_id, 'table_id')
self.assertEqual(table.dataset_id, self.DS_ID)
self.assertEqual(table.project, self.PROJECT)


class TestDatasetListItem(unittest.TestCase):

@staticmethod
def _get_target_class():
from google.cloud.bigquery.dataset import DatasetListItem

return DatasetListItem

def _make_one(self, *args, **kw):
return self._get_target_class()(*args, **kw)

def test_ctor(self):
project = 'test-project'
dataset_id = 'test_dataset'
resource = {
'kind': 'bigquery#dataset',
'id': '{}:{}'.format(project, dataset_id),
'datasetReference': {
'projectId': project,
'datasetId': dataset_id,
},
'friendlyName': 'Data of the Test',
'labels': {
'some-stuff': 'this-is-a-label',
},
}

dataset = self._make_one(resource)
self.assertEqual(dataset.project, project)
self.assertEqual(dataset.dataset_id, dataset_id)
self.assertEqual(
dataset.full_dataset_id,
'{}:{}'.format(project, dataset_id))
self.assertEqual(dataset.reference.project, project)
self.assertEqual(dataset.reference.dataset_id, dataset_id)
self.assertEqual(dataset.friendly_name, 'Data of the Test')
self.assertEqual(dataset.labels['some-stuff'], 'this-is-a-label')

def test_table(self):
from google.cloud.bigquery.table import TableReference

project = 'test-project'
dataset_id = 'test_dataset'
resource = {
'datasetReference': {
'projectId': project,
'datasetId': dataset_id,
},
}
dataset = self._make_one(resource)
table = dataset.table('table_id')
self.assertIsInstance(table, TableReference)
self.assertEqual(table.table_id, 'table_id')
self.assertEqual(table.dataset_id, dataset_id)
self.assertEqual(table.project, project)

0 comments on commit dad8587

Please sign in to comment.