From 72079dccacbe0117563814b4ccde0e66766ec0b6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 22 Nov 2017 12:50:06 -0800 Subject: [PATCH 1/4] BigQuery: Use DatasetListItem for client.list_datasets Listing datasets only includes a subset of the properties available on a dataset. The DatasetListItem class is used to explicitly document which features are available and to prevent confusion from trying to use the resulting object in other contexts, like updating. --- bigquery/google/cloud/bigquery/client.py | 10 ++- bigquery/google/cloud/bigquery/dataset.py | 92 ++++++++++++++++++++++- bigquery/tests/unit/test_client.py | 4 +- bigquery/tests/unit/test_dataset.py | 57 ++++++++++++++ 4 files changed, 154 insertions(+), 9 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 34fc9114ce92..6706aaa11ccb 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -36,6 +36,7 @@ from google.cloud.bigquery._helpers import _snake_to_camel_case from google.cloud.bigquery._http import Connection from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import ExtractJob @@ -181,8 +182,9 @@ def list_datasets(self, include_all=False, filter=None, max_results=None, :param retry: (Optional) How to retry the RPC. :rtype: :class:`~google.api_core.page_iterator.Iterator` - :returns: Iterator of :class:`~google.cloud.bigquery.dataset.Dataset`. - accessible to the current client. + :returns: + Iterator of :class:`~google.cloud.bigquery.dataset.DatasetListItem`. + associated with the client's project. """ extra_params = {} if include_all: @@ -1275,10 +1277,10 @@ def _item_to_dataset(iterator, resource): :type resource: dict :param resource: An item to be converted to a dataset. - :rtype: :class:`.Dataset` + :rtype: :class:`.DatasetListItem` :returns: The next dataset in the page. """ - return Dataset.from_api_repr(resource) + return DatasetListItem(resource) def _item_to_job(iterator, resource): diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 25e00405e2c8..32cdfef5f08f 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -281,8 +281,7 @@ def full_dataset_id(self): @property def reference(self): - """A :class:`~google.cloud.bigquery.dataset.DatasetReference` pointing to - this dataset. + """A reference to this dataset. Returns: google.cloud.bigquery.dataset.DatasetReference: @@ -546,4 +545,91 @@ def table(self, table_id): :rtype: :class:`~google.cloud.bigquery.table.TableReference` :returns: a TableReference for a table in this dataset. """ - return TableReference(self, table_id) + return TableReference(self.reference, table_id) + + +class DatasetListItem(object): + """A read-only dataset resource from a list operation. + + For performance reasons, the BigQuery API only includes some of the + dataset properties when listing datasets. Notably, + :attr:`~google.cloud.bigquery.dataset.Dataset.access_entries` is missing. + + For a full list of the properties that the BigQuery API returns, see the + `REST documentation for datasets.list + `_. + + + Args: + resource (dict): + A dataset-like resource object from a dataset list response. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def project(self): + """Project bound to the dataset. + + :rtype: str + :returns: the project. + """ + return self._properties.get('datasetReference').get('projectId') + + @property + def dataset_id(self): + """Dataset ID. + + :rtype: str + :returns: the dataset ID. + """ + return self._properties.get('datasetReference').get('datasetId') + + @property + def full_dataset_id(self): + """ID for the dataset resource, in the form "project_id:dataset_id". + + :rtype: str, or ``NoneType`` + :returns: the ID (None until set from the server). + """ + return self._properties.get('id') + + @property + def friendly_name(self): + """Title of the dataset. + + :rtype: str, or ``NoneType`` + :returns: The name as set by the user, or None (the default). + """ + return self._properties.get('friendlyName') + + @property + def labels(self): + """Labels for the dataset. + + :rtype: dict, {str -> str} + :returns: A dict of the the dataset's labels. + """ + return self._properties['labels'] + + @property + def reference(self): + """A reference to this dataset. + + Returns: + google.cloud.bigquery.dataset.DatasetReference: + A pointer to this dataset + """ + return DatasetReference(self.project, self.dataset_id) + + def table(self, table_id): + """Constructs a TableReference. + + :type table_id: str + :param table_id: the ID of the table. + + :rtype: :class:`~google.cloud.bigquery.table.TableReference` + :returns: a TableReference for a table in this dataset. + """ + return TableReference(self.reference, table_id) \ No newline at end of file diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 721181e3cc81..f54feacdd93d 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -183,7 +183,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): {'maxResults': 3, 'pageToken': TOKEN}) def test_list_datasets_defaults(self): - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.dataset import DatasetListItem DATASET_1 = 'dataset_one' DATASET_2 = 'dataset_two' @@ -215,7 +215,7 @@ def test_list_datasets_defaults(self): self.assertEqual(len(datasets), len(DATA['datasets'])) for found, expected in zip(datasets, DATA['datasets']): - self.assertIsInstance(found, Dataset) + self.assertIsInstance(found, DatasetListItem) self.assertEqual(found.full_dataset_id, expected['id']) self.assertEqual(found.friendly_name, expected['friendlyName']) self.assertEqual(token, TOKEN) diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py index e9e2f0dec813..b221f26b40a2 100644 --- a/bigquery/tests/unit/test_dataset.py +++ b/bigquery/tests/unit/test_dataset.py @@ -460,3 +460,60 @@ def test_table(self): self.assertEqual(table.table_id, 'table_id') self.assertEqual(table.dataset_id, self.DS_ID) self.assertEqual(table.project, self.PROJECT) + + +class TestDatasetListItem(unittest.TestCase): + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.dataset import DatasetListItem + + return DatasetListItem + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + project = 'test-project' + dataset_id = 'test_dataset' + resource = { + 'kind': 'bigquery#dataset', + 'id': '{}:{}'.format(project, dataset_id), + 'datasetReference': { + 'projectId': project, + 'datasetId': dataset_id, + }, + 'friendlyName': 'Data of the Test', + 'labels': { + 'some-stuff': 'this-is-a-label', + }, + } + + dataset = self._make_one(resource) + self.assertEqual(dataset.project, project) + self.assertEqual(dataset.dataset_id, dataset_id) + self.assertEqual( + dataset.full_dataset_id, + '{}:{}'.format(project, dataset_id)) + self.assertEqual(dataset.reference.project, project) + self.assertEqual(dataset.reference.dataset_id, dataset_id) + self.assertEqual(dataset.friendly_name, 'Data of the Test') + self.assertEqual(dataset.labels['some-stuff'], 'this-is-a-label') + + def test_table(self): + from google.cloud.bigquery.table import TableReference + + project = 'test-project' + dataset_id = 'test_dataset' + resource = { + 'datasetReference': { + 'projectId': project, + 'datasetId': dataset_id, + }, + } + dataset = self._make_one(resource) + table = dataset.table('table_id') + self.assertIsInstance(table, TableReference) + self.assertEqual(table.table_id, 'table_id') + self.assertEqual(table.dataset_id, dataset_id) + self.assertEqual(table.project, project) From c5dcbd6a216707725e394c41170511ad09c4344b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 27 Nov 2017 11:11:35 -0800 Subject: [PATCH 2/4] Fix lint errors. --- bigquery/google/cloud/bigquery/client.py | 3 ++- bigquery/google/cloud/bigquery/dataset.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 6706aaa11ccb..5d3a70416c22 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -183,7 +183,8 @@ def list_datasets(self, include_all=False, filter=None, max_results=None, :rtype: :class:`~google.api_core.page_iterator.Iterator` :returns: - Iterator of :class:`~google.cloud.bigquery.dataset.DatasetListItem`. + Iterator of + :class:`~google.cloud.bigquery.dataset.DatasetListItem`. associated with the client's project. """ extra_params = {} diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 32cdfef5f08f..7f93daaaf182 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -632,4 +632,4 @@ def table(self, table_id): :rtype: :class:`~google.cloud.bigquery.table.TableReference` :returns: a TableReference for a table in this dataset. """ - return TableReference(self.reference, table_id) \ No newline at end of file + return TableReference(self.reference, table_id) From c53cb25909edbfe7fe827069571f66084d3dc1f2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 5 Dec 2017 13:01:55 -0800 Subject: [PATCH 3/4] Make dataset & table reference required, labels optional. --- bigquery/google/cloud/bigquery/dataset.py | 24 +++++++--- bigquery/google/cloud/bigquery/table.py | 35 +++++++++++---- bigquery/tests/unit/test_dataset.py | 40 +++++++++++++++++ bigquery/tests/unit/test_table.py | 54 +++++++++++++++++++++++ 4 files changed, 140 insertions(+), 13 deletions(-) diff --git a/bigquery/google/cloud/bigquery/dataset.py b/bigquery/google/cloud/bigquery/dataset.py index 7f93daaaf182..c8f588671ab5 100644 --- a/bigquery/google/cloud/bigquery/dataset.py +++ b/bigquery/google/cloud/bigquery/dataset.py @@ -419,7 +419,7 @@ def labels(self): :rtype: dict, {str -> str} :returns: A dict of the the dataset's labels. """ - return self._properties['labels'] + return self._properties.get('labels', {}) @labels.setter def labels(self, value): @@ -562,10 +562,24 @@ class DatasetListItem(object): Args: resource (dict): - A dataset-like resource object from a dataset list response. + A dataset-like resource object from a dataset list response. A + ``datasetReference`` property is required. + + Raises: + ValueError: + If ``datasetReference`` or one of its required members is missing + from ``resource``. """ def __init__(self, resource): + if 'datasetReference' not in resource: + raise ValueError('resource must contain a datasetReference value') + if 'projectId' not in resource['datasetReference']: + raise ValueError( + "resource['datasetReference'] must contain a projectId value") + if 'datasetId' not in resource['datasetReference']: + raise ValueError( + "resource['datasetReference'] must contain a datasetId value") self._properties = resource @property @@ -575,7 +589,7 @@ def project(self): :rtype: str :returns: the project. """ - return self._properties.get('datasetReference').get('projectId') + return self._properties['datasetReference']['projectId'] @property def dataset_id(self): @@ -584,7 +598,7 @@ def dataset_id(self): :rtype: str :returns: the dataset ID. """ - return self._properties.get('datasetReference').get('datasetId') + return self._properties['datasetReference']['datasetId'] @property def full_dataset_id(self): @@ -611,7 +625,7 @@ def labels(self): :rtype: dict, {str -> str} :returns: A dict of the the dataset's labels. """ - return self._properties['labels'] + return self._properties.get('labels', {}) @property def reference(self): diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index d7e4745fa8c8..4d17a25655c9 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -49,7 +49,7 @@ def _reference_getter(table): this table. Returns: - google.cloud.bigquery.table.TableReference: pointer to this table + google.cloud.bigquery.table.TableReference: pointer to this table. """ from google.cloud.bigquery import dataset @@ -295,7 +295,7 @@ def labels(self): :rtype: dict, {str -> str} :returns: A dict of the the table's labels. """ - return self._properties['labels'] + return self._properties.get('labels', {}) @labels.setter def labels(self, value): @@ -756,10 +756,28 @@ class TableListItem(object): Args: resource (dict): - A table-like resource object from a table list response. + A table-like resource object from a table list response. A + ``tableReference`` property is required. + + Raises: + ValueError: + If ``tableReference`` or one of its required members is missing + from ``resource``. """ def __init__(self, resource): + if 'tableReference' not in resource: + raise ValueError('resource must contain a tableReference value') + if 'projectId' not in resource['tableReference']: + raise ValueError( + "resource['tableReference'] must contain a projectId value") + if 'datasetId' not in resource['tableReference']: + raise ValueError( + "resource['tableReference'] must contain a datasetId value") + if 'tableId' not in resource['tableReference']: + raise ValueError( + "resource['tableReference'] must contain a tableId value") + self._properties = resource @property @@ -769,7 +787,7 @@ def project(self): Returns: str: the project ID of the table. """ - return self._properties.get('tableReference', {}).get('projectId') + return self._properties['tableReference']['projectId'] @property def dataset_id(self): @@ -778,7 +796,7 @@ def dataset_id(self): Returns: str: the dataset ID of the table. """ - return self._properties.get('tableReference', {}).get('datasetId') + return self._properties['tableReference']['datasetId'] @property def table_id(self): @@ -787,7 +805,7 @@ def table_id(self): Returns: str: the table ID. """ - return self._properties.get('tableReference', {}).get('tableId') + return self._properties['tableReference']['tableId'] reference = property(_reference_getter) @@ -842,8 +860,9 @@ def partition_expiration(self): Returns: int: The time in ms for partition expiration """ - return int( - self._properties.get('timePartitioning', {}).get('expirationMs')) + expiration = self._properties.get('timePartitioning', {}).get('expirationMs') + if expiration is not None: + return int(expiration) @property def friendly_name(self): diff --git a/bigquery/tests/unit/test_dataset.py b/bigquery/tests/unit/test_dataset.py index b221f26b40a2..33c38720beac 100644 --- a/bigquery/tests/unit/test_dataset.py +++ b/bigquery/tests/unit/test_dataset.py @@ -404,6 +404,10 @@ def test_labels_setter_bad_value(self): with self.assertRaises(ValueError): dataset.labels = None + def test_labels_getter_missing_value(self): + dataset = self._make_one(self.DS_REF) + self.assertEqual(dataset.labels, {}) + def test_from_api_repr_missing_identity(self): self._setUpConstants() RESOURCE = {} @@ -500,6 +504,42 @@ def test_ctor(self): self.assertEqual(dataset.friendly_name, 'Data of the Test') self.assertEqual(dataset.labels['some-stuff'], 'this-is-a-label') + def test_ctor_missing_properties(self): + resource = { + 'datasetReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + }, + } + dataset = self._make_one(resource) + self.assertEqual(dataset.project, 'testproject') + self.assertEqual(dataset.dataset_id, 'testdataset') + self.assertIsNone(dataset.full_dataset_id) + self.assertIsNone(dataset.friendly_name) + self.assertEqual(dataset.labels, {}) + + def test_ctor_wo_project(self): + resource = { + 'datasetReference': { + 'datasetId': 'testdataset', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_dataset(self): + resource = { + 'datasetReference': { + 'projectId': 'testproject', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_reference(self): + with self.assertRaises(ValueError): + self._make_one({}) + def test_table(self): from google.cloud.bigquery.table import TableReference diff --git a/bigquery/tests/unit/test_table.py b/bigquery/tests/unit/test_table.py index dffa815511f1..a97bbe19c7fe 100644 --- a/bigquery/tests/unit/test_table.py +++ b/bigquery/tests/unit/test_table.py @@ -841,6 +841,60 @@ def test_ctor_view(self): # Server default for useLegacySql is True. self.assertTrue(table.view_use_legacy_sql) + def test_ctor_missing_properties(self): + resource = { + 'tableReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + 'tableId': 'testtable', + }, + } + table = self._make_one(resource) + self.assertEqual(table.project, 'testproject') + self.assertEqual(table.dataset_id, 'testdataset') + self.assertEqual(table.table_id, 'testtable') + self.assertIsNone(table.full_table_id) + self.assertIsNone(table.friendly_name) + self.assertIsNone(table.table_type) + self.assertIsNone(table.partitioning_type) + self.assertIsNone(table.partition_expiration) + self.assertEqual(table.labels, {}) + self.assertIsNone(table.view_use_legacy_sql) + + def test_ctor_wo_project(self): + resource = { + 'tableReference': { + 'datasetId': 'testdataset', + 'tableId': 'testtable', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_dataset(self): + resource = { + 'tableReference': { + 'projectId': 'testproject', + 'tableId': 'testtable', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_table(self): + resource = { + 'tableReference': { + 'projectId': 'testproject', + 'datasetId': 'testdataset', + }, + } + with self.assertRaises(ValueError): + self._make_one(resource) + + def test_ctor_wo_reference(self): + with self.assertRaises(ValueError): + self._make_one({}) + class TestRow(unittest.TestCase): From f2f309da8cc828f56381d477f71754860f8041ec Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 5 Dec 2017 13:20:27 -0800 Subject: [PATCH 4/4] Fix lint error --- bigquery/google/cloud/bigquery/table.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/table.py b/bigquery/google/cloud/bigquery/table.py index 4d17a25655c9..d240fa6d0910 100644 --- a/bigquery/google/cloud/bigquery/table.py +++ b/bigquery/google/cloud/bigquery/table.py @@ -860,7 +860,8 @@ def partition_expiration(self): Returns: int: The time in ms for partition expiration """ - expiration = self._properties.get('timePartitioning', {}).get('expirationMs') + expiration = self._properties.get( + 'timePartitioning', {}).get('expirationMs') if expiration is not None: return int(expiration)