Skip to content

Commit

Permalink
BigQuery: Use DatasetListItem for client.list_datasets (#4439)
Browse files Browse the repository at this point in the history
* BigQuery: Use DatasetListItem for client.list_datasets

Listing datasets only includes a subset of the properties available on a
dataset. The DatasetListItem class is used to explicitly document which
features are available and to prevent confusion from trying to use the
resulting object in other contexts, like updating.

* Fix lint errors.

* Make dataset & table reference required, labels optional.

* Fix lint error
  • Loading branch information
tswast authored Dec 6, 2017
1 parent 3fb9f16 commit cc852af
Show file tree
Hide file tree
Showing 6 changed files with 292 additions and 18 deletions.
11 changes: 7 additions & 4 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from google.cloud.bigquery._helpers import _snake_to_camel_case
from google.cloud.bigquery._http import Connection
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.job import CopyJob
from google.cloud.bigquery.job import ExtractJob
Expand Down Expand Up @@ -181,8 +182,10 @@ def list_datasets(self, include_all=False, filter=None, max_results=None,
:param retry: (Optional) How to retry the RPC.
:rtype: :class:`~google.api_core.page_iterator.Iterator`
:returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`.
accessible to the current client.
:returns:
Iterator of
:class:`~google.cloud.bigquery.dataset.DatasetListItem`.
associated with the client's project.
"""
extra_params = {}
if include_all:
Expand Down Expand Up @@ -1275,10 +1278,10 @@ def _item_to_dataset(iterator, resource):
:type resource: dict
:param resource: An item to be converted to a dataset.
:rtype: :class:`.Dataset`
:rtype: :class:`.DatasetListItem`
:returns: The next dataset in the page.
"""
return Dataset.from_api_repr(resource)
return DatasetListItem(resource)


def _item_to_job(iterator, resource):
Expand Down
108 changes: 104 additions & 4 deletions bigquery/google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,7 @@ def full_dataset_id(self):

@property
def reference(self):
"""A :class:`~google.cloud.bigquery.dataset.DatasetReference` pointing to
this dataset.
"""A reference to this dataset.
Returns:
google.cloud.bigquery.dataset.DatasetReference:
Expand Down Expand Up @@ -420,7 +419,7 @@ def labels(self):
:rtype: dict, {str -> str}
:returns: A dict of the the dataset's labels.
"""
return self._properties['labels']
return self._properties.get('labels', {})

@labels.setter
def labels(self, value):
Expand Down Expand Up @@ -546,4 +545,105 @@ def table(self, table_id):
:rtype: :class:`~google.cloud.bigquery.table.TableReference`
:returns: a TableReference for a table in this dataset.
"""
return TableReference(self, table_id)
return TableReference(self.reference, table_id)


class DatasetListItem(object):
"""A read-only dataset resource from a list operation.
For performance reasons, the BigQuery API only includes some of the
dataset properties when listing datasets. Notably,
:attr:`~google.cloud.bigquery.dataset.Dataset.access_entries` is missing.
For a full list of the properties that the BigQuery API returns, see the
`REST documentation for datasets.list
<https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list>`_.
Args:
resource (dict):
A dataset-like resource object from a dataset list response. A
``datasetReference`` property is required.
Raises:
ValueError:
If ``datasetReference`` or one of its required members is missing
from ``resource``.
"""

def __init__(self, resource):
if 'datasetReference' not in resource:
raise ValueError('resource must contain a datasetReference value')
if 'projectId' not in resource['datasetReference']:
raise ValueError(
"resource['datasetReference'] must contain a projectId value")
if 'datasetId' not in resource['datasetReference']:
raise ValueError(
"resource['datasetReference'] must contain a datasetId value")
self._properties = resource

@property
def project(self):
"""Project bound to the dataset.
:rtype: str
:returns: the project.
"""
return self._properties['datasetReference']['projectId']

@property
def dataset_id(self):
"""Dataset ID.
:rtype: str
:returns: the dataset ID.
"""
return self._properties['datasetReference']['datasetId']

@property
def full_dataset_id(self):
"""ID for the dataset resource, in the form "project_id:dataset_id".
:rtype: str, or ``NoneType``
:returns: the ID (None until set from the server).
"""
return self._properties.get('id')

@property
def friendly_name(self):
"""Title of the dataset.
:rtype: str, or ``NoneType``
:returns: The name as set by the user, or None (the default).
"""
return self._properties.get('friendlyName')

@property
def labels(self):
"""Labels for the dataset.
:rtype: dict, {str -> str}
:returns: A dict of the the dataset's labels.
"""
return self._properties.get('labels', {})

@property
def reference(self):
"""A reference to this dataset.
Returns:
google.cloud.bigquery.dataset.DatasetReference:
A pointer to this dataset
"""
return DatasetReference(self.project, self.dataset_id)

def table(self, table_id):
"""Constructs a TableReference.
:type table_id: str
:param table_id: the ID of the table.
:rtype: :class:`~google.cloud.bigquery.table.TableReference`
:returns: a TableReference for a table in this dataset.
"""
return TableReference(self.reference, table_id)
36 changes: 28 additions & 8 deletions bigquery/google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _reference_getter(table):
this table.
Returns:
google.cloud.bigquery.table.TableReference: pointer to this table
google.cloud.bigquery.table.TableReference: pointer to this table.
"""
from google.cloud.bigquery import dataset

Expand Down Expand Up @@ -295,7 +295,7 @@ def labels(self):
:rtype: dict, {str -> str}
:returns: A dict of the the table's labels.
"""
return self._properties['labels']
return self._properties.get('labels', {})

@labels.setter
def labels(self, value):
Expand Down Expand Up @@ -756,10 +756,28 @@ class TableListItem(object):
Args:
resource (dict):
A table-like resource object from a table list response.
A table-like resource object from a table list response. A
``tableReference`` property is required.
Raises:
ValueError:
If ``tableReference`` or one of its required members is missing
from ``resource``.
"""

def __init__(self, resource):
if 'tableReference' not in resource:
raise ValueError('resource must contain a tableReference value')
if 'projectId' not in resource['tableReference']:
raise ValueError(
"resource['tableReference'] must contain a projectId value")
if 'datasetId' not in resource['tableReference']:
raise ValueError(
"resource['tableReference'] must contain a datasetId value")
if 'tableId' not in resource['tableReference']:
raise ValueError(
"resource['tableReference'] must contain a tableId value")

self._properties = resource

@property
Expand All @@ -769,7 +787,7 @@ def project(self):
Returns:
str: the project ID of the table.
"""
return self._properties.get('tableReference', {}).get('projectId')
return self._properties['tableReference']['projectId']

@property
def dataset_id(self):
Expand All @@ -778,7 +796,7 @@ def dataset_id(self):
Returns:
str: the dataset ID of the table.
"""
return self._properties.get('tableReference', {}).get('datasetId')
return self._properties['tableReference']['datasetId']

@property
def table_id(self):
Expand All @@ -787,7 +805,7 @@ def table_id(self):
Returns:
str: the table ID.
"""
return self._properties.get('tableReference', {}).get('tableId')
return self._properties['tableReference']['tableId']

reference = property(_reference_getter)

Expand Down Expand Up @@ -842,8 +860,10 @@ def partition_expiration(self):
Returns:
int: The time in ms for partition expiration
"""
return int(
self._properties.get('timePartitioning', {}).get('expirationMs'))
expiration = self._properties.get(
'timePartitioning', {}).get('expirationMs')
if expiration is not None:
return int(expiration)

@property
def friendly_name(self):
Expand Down
4 changes: 2 additions & 2 deletions bigquery/tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def test_list_projects_explicit_response_missing_projects_key(self):
{'maxResults': 3, 'pageToken': TOKEN})

def test_list_datasets_defaults(self):
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetListItem

DATASET_1 = 'dataset_one'
DATASET_2 = 'dataset_two'
Expand Down Expand Up @@ -215,7 +215,7 @@ def test_list_datasets_defaults(self):

self.assertEqual(len(datasets), len(DATA['datasets']))
for found, expected in zip(datasets, DATA['datasets']):
self.assertIsInstance(found, Dataset)
self.assertIsInstance(found, DatasetListItem)
self.assertEqual(found.full_dataset_id, expected['id'])
self.assertEqual(found.friendly_name, expected['friendlyName'])
self.assertEqual(token, TOKEN)
Expand Down
97 changes: 97 additions & 0 deletions bigquery/tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,10 @@ def test_labels_setter_bad_value(self):
with self.assertRaises(ValueError):
dataset.labels = None

def test_labels_getter_missing_value(self):
dataset = self._make_one(self.DS_REF)
self.assertEqual(dataset.labels, {})

def test_from_api_repr_missing_identity(self):
self._setUpConstants()
RESOURCE = {}
Expand Down Expand Up @@ -460,3 +464,96 @@ def test_table(self):
self.assertEqual(table.table_id, 'table_id')
self.assertEqual(table.dataset_id, self.DS_ID)
self.assertEqual(table.project, self.PROJECT)


class TestDatasetListItem(unittest.TestCase):

@staticmethod
def _get_target_class():
from google.cloud.bigquery.dataset import DatasetListItem

return DatasetListItem

def _make_one(self, *args, **kw):
return self._get_target_class()(*args, **kw)

def test_ctor(self):
project = 'test-project'
dataset_id = 'test_dataset'
resource = {
'kind': 'bigquery#dataset',
'id': '{}:{}'.format(project, dataset_id),
'datasetReference': {
'projectId': project,
'datasetId': dataset_id,
},
'friendlyName': 'Data of the Test',
'labels': {
'some-stuff': 'this-is-a-label',
},
}

dataset = self._make_one(resource)
self.assertEqual(dataset.project, project)
self.assertEqual(dataset.dataset_id, dataset_id)
self.assertEqual(
dataset.full_dataset_id,
'{}:{}'.format(project, dataset_id))
self.assertEqual(dataset.reference.project, project)
self.assertEqual(dataset.reference.dataset_id, dataset_id)
self.assertEqual(dataset.friendly_name, 'Data of the Test')
self.assertEqual(dataset.labels['some-stuff'], 'this-is-a-label')

def test_ctor_missing_properties(self):
resource = {
'datasetReference': {
'projectId': 'testproject',
'datasetId': 'testdataset',
},
}
dataset = self._make_one(resource)
self.assertEqual(dataset.project, 'testproject')
self.assertEqual(dataset.dataset_id, 'testdataset')
self.assertIsNone(dataset.full_dataset_id)
self.assertIsNone(dataset.friendly_name)
self.assertEqual(dataset.labels, {})

def test_ctor_wo_project(self):
resource = {
'datasetReference': {
'datasetId': 'testdataset',
},
}
with self.assertRaises(ValueError):
self._make_one(resource)

def test_ctor_wo_dataset(self):
resource = {
'datasetReference': {
'projectId': 'testproject',
},
}
with self.assertRaises(ValueError):
self._make_one(resource)

def test_ctor_wo_reference(self):
with self.assertRaises(ValueError):
self._make_one({})

def test_table(self):
from google.cloud.bigquery.table import TableReference

project = 'test-project'
dataset_id = 'test_dataset'
resource = {
'datasetReference': {
'projectId': project,
'datasetId': dataset_id,
},
}
dataset = self._make_one(resource)
table = dataset.table('table_id')
self.assertIsInstance(table, TableReference)
self.assertEqual(table.table_id, 'table_id')
self.assertEqual(table.dataset_id, dataset_id)
self.assertEqual(table.project, project)
Loading

0 comments on commit cc852af

Please sign in to comment.