Skip to content

Commit

Permalink
Add methods for Models API (#7562)
Browse files Browse the repository at this point in the history
* Generate protos for Models API.

The Models API is a component of the bigquery_v2 interface.
It is not available as a gRPC API, but it does provide
protocol buffers. This commit adds those protocol buffers
to the client so that they can be used to avoid much manual
work to create resource classes that can be serialized to/from
JSON.

* Add handwritten model API classes.

These classes provide the top-level classes for the Model API. The
protocol buffer objects are used for all sub-objects.

The pattern for mutable properties follows the same as with Table and
Dataset: a `_properties` dictionary contains the property values in the
REST API format.
  • Loading branch information
tswast authored Apr 4, 2019
1 parent 33fcc1c commit c436873
Show file tree
Hide file tree
Showing 47 changed files with 6,644 additions and 64 deletions.
7 changes: 3 additions & 4 deletions bigquery/LICENSE
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@

Apache License
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
https://www.apache.org/licenses/

TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

Expand Down Expand Up @@ -193,7 +192,7 @@
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0
https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
Expand Down
4 changes: 3 additions & 1 deletion bigquery/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include README.rst LICENSE
recursive-include google *.json *.proto
recursive-include tests *
global-exclude *.pyc __pycache__
global-exclude *.py[co]
global-exclude __pycache__
8 changes: 8 additions & 0 deletions bigquery/docs/gapic/v2/enums.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Enums for BigQuery API Client
=============================

.. autoclass:: google.cloud.bigquery_v2.gapic.enums.Model
:members:

.. autoclass:: google.cloud.bigquery_v2.gapic.enums.StandardSqlDataType
:members:
5 changes: 5 additions & 0 deletions bigquery/docs/gapic/v2/types.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Types for BigQuery API Client
=============================

.. automodule:: google.cloud.bigquery_v2.types
:members:
21 changes: 20 additions & 1 deletion bigquery/docs/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,14 @@ Table
table.TimePartitioning
table.TimePartitioningType

Model
=====

.. autosummary::
:toctree: generated

model.Model
model.ModelReference

Schema
======
Expand Down Expand Up @@ -139,9 +147,20 @@ External Configuration


Magics
======================
======

.. toctree::
:maxdepth: 2

magics

Additional Types
================

Protocol buffer classes for working with the Models API.

.. toctree::
:maxdepth: 2

gapic/v2/enums
gapic/v2/types
6 changes: 4 additions & 2 deletions bigquery/google/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Copyright 2016 Google LLC
# -*- coding: utf-8 -*-
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
Expand Down
6 changes: 4 additions & 2 deletions bigquery/google/cloud/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Copyright 2016 Google LLC
# -*- coding: utf-8 -*-
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
Expand Down
5 changes: 5 additions & 0 deletions bigquery/google/cloud/bigquery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
from google.cloud.bigquery.job import SourceFormat
from google.cloud.bigquery.job import UnknownJob
from google.cloud.bigquery.job import WriteDisposition
from google.cloud.bigquery.model import Model
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.query import ArrayQueryParameter
from google.cloud.bigquery.query import ScalarQueryParameter
from google.cloud.bigquery.query import StructQueryParameter
Expand Down Expand Up @@ -100,6 +102,9 @@
"UnknownJob",
"TimePartitioningType",
"TimePartitioning",
# Models
"Model",
"ModelReference",
# Shared helpers
"SchemaField",
"UDFResource",
Expand Down
49 changes: 49 additions & 0 deletions bigquery/google/cloud/bigquery/_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,3 +581,52 @@ def _str_or_none(value):
"""Helper: serialize value to JSON string."""
if value is not None:
return str(value)


def _parse_3_part_id(full_id, default_project=None, property_name="table_id"):
output_project_id = default_project
output_dataset_id = None
output_resource_id = None
parts = full_id.split(".")

if len(parts) != 2 and len(parts) != 3:
raise ValueError(
"{property_name} must be a fully-qualified ID in "
'standard SQL format. e.g. "project.dataset.{property_name}", '
"got {}".format(full_id, property_name=property_name)
)

if len(parts) == 2 and not default_project:
raise ValueError(
"When default_project is not set, {property_name} must be a "
"fully-qualified ID in standard SQL format. "
'e.g. "project.dataset_id.{property_name}", got {}'.format(
full_id, property_name=property_name
)
)

if len(parts) == 2:
output_dataset_id, output_resource_id = parts
else:
output_project_id, output_dataset_id, output_resource_id = parts

return output_project_id, output_dataset_id, output_resource_id


def _build_resource_from_properties(obj, filter_fields):
"""Build a resource based on a ``_properties`` dictionary, filtered by
``filter_fields``, which follow the name of the Python object.
"""
partial = {}
for filter_field in filter_fields:
api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field)
if api_field is None and filter_field not in obj._properties:
raise ValueError("No property %s" % filter_field)
elif api_field is not None:
partial[api_field] = obj._properties.get(api_field)
else:
# allows properties that are not defined in the library
# and properties that have the same name as API resource key
partial[filter_field] = obj._properties[filter_field]

return partial
171 changes: 171 additions & 0 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery import job
from google.cloud.bigquery.model import Model
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.query import _QueryResults
from google.cloud.bigquery.retry import DEFAULT_RETRY
from google.cloud.bigquery.table import _table_arg_to_table
Expand Down Expand Up @@ -428,6 +430,33 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY):
api_response = self._call_api(retry, method="GET", path=dataset_ref.path)
return Dataset.from_api_repr(api_response)

def get_model(self, model_ref, retry=DEFAULT_RETRY):
"""[Beta] Fetch the model referenced by ``model_ref``.
Args:
model_ref (Union[ \
:class:`~google.cloud.bigquery.model.ModelReference`, \
str, \
]):
A reference to the model to fetch from the BigQuery API.
If a string is passed in, this method attempts to create a
model reference from a string using
:func:`google.cloud.bigquery.model.ModelReference.from_string`.
retry (:class:`google.api_core.retry.Retry`):
(Optional) How to retry the RPC.
Returns:
google.cloud.bigquery.model.Model:
A ``Model`` instance.
"""
if isinstance(model_ref, str):
model_ref = ModelReference.from_string(
model_ref, default_project=self.project
)

api_response = self._call_api(retry, method="GET", path=model_ref.path)
return Model.from_api_repr(api_response)

def get_table(self, table, retry=DEFAULT_RETRY):
"""Fetch the table referenced by ``table``.
Expand Down Expand Up @@ -488,6 +517,41 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY):
)
return Dataset.from_api_repr(api_response)

def update_model(self, model, fields, retry=DEFAULT_RETRY):
"""[Beta] Change some fields of a model.
Use ``fields`` to specify which fields to update. At least one field
must be provided. If a field is listed in ``fields`` and is ``None``
in ``model``, it will be deleted.
If ``model.etag`` is not ``None``, the update will only succeed if
the model on the server has the same ETag. Thus reading a model with
``get_model``, changing its fields, and then passing it to
``update_model`` will ensure that the changes will only be saved if
no modifications to the model occurred since the read.
Args:
model (google.cloud.bigquery.model.Model): The model to update.
fields (Sequence[str]):
The fields of ``model`` to change, spelled as the Model
properties (e.g. "friendly_name").
retry (google.api_core.retry.Retry):
(Optional) A description of how to retry the API call.
Returns:
google.cloud.bigquery.model.Model:
The model resource returned from the API call.
"""
partial = model._build_resource(fields)
if model.etag:
headers = {"If-Match": model.etag}
else:
headers = None
api_response = self._call_api(
retry, method="PATCH", path=model.path, data=partial, headers=headers
)
return Model.from_api_repr(api_response)

def update_table(self, table, fields, retry=DEFAULT_RETRY):
"""Change some fields of a table.
Expand Down Expand Up @@ -523,6 +587,64 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY):
)
return Table.from_api_repr(api_response)

def list_models(
self, dataset, max_results=None, page_token=None, retry=DEFAULT_RETRY
):
"""[Beta] List models in the dataset.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/models/list
Args:
dataset (Union[ \
:class:`~google.cloud.bigquery.dataset.Dataset`, \
:class:`~google.cloud.bigquery.dataset.DatasetReference`, \
str, \
]):
A reference to the dataset whose models to list from the
BigQuery API. If a string is passed in, this method attempts
to create a dataset reference from a string using
:func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.
max_results (int):
(Optional) Maximum number of models to return. If not passed,
defaults to a value set by the API.
page_token (str):
(Optional) Token representing a cursor into the models. If
not passed, the API will return the first page of models. The
token marks the beginning of the iterator to be returned and
the value of the ``page_token`` can be accessed at
``next_page_token`` of the
:class:`~google.api_core.page_iterator.HTTPIterator`.
retry (:class:`google.api_core.retry.Retry`):
(Optional) How to retry the RPC.
Returns:
google.api_core.page_iterator.Iterator:
Iterator of
:class:`~google.cloud.bigquery.model.Model` contained
within the requested dataset.
"""
if isinstance(dataset, str):
dataset = DatasetReference.from_string(
dataset, default_project=self.project
)

if not isinstance(dataset, (Dataset, DatasetReference)):
raise TypeError("dataset must be a Dataset, DatasetReference, or string")

path = "%s/models" % dataset.path
result = page_iterator.HTTPIterator(
client=self,
api_request=functools.partial(self._call_api, retry),
path=path,
item_to_value=_item_to_model,
items_key="models",
page_token=page_token,
max_results=max_results,
)
result.dataset = dataset
return result

def list_tables(
self, dataset, max_results=None, page_token=None, retry=DEFAULT_RETRY
):
Expand Down Expand Up @@ -629,6 +751,40 @@ def delete_dataset(
if not not_found_ok:
raise

def delete_model(self, model, retry=DEFAULT_RETRY, not_found_ok=False):
"""[Beta] Delete a model
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/models/delete
Args:
model (Union[ \
:class:`~google.cloud.bigquery.model.Model`, \
:class:`~google.cloud.bigquery.model.ModelReference`, \
str, \
]):
A reference to the model to delete. If a string is passed in,
this method attempts to create a model reference from a
string using
:func:`google.cloud.bigquery.model.ModelReference.from_string`.
retry (:class:`google.api_core.retry.Retry`):
(Optional) How to retry the RPC.
not_found_ok (bool):
Defaults to ``False``. If ``True``, ignore "not found" errors
when deleting the model.
"""
if isinstance(model, str):
model = ModelReference.from_string(model, default_project=self.project)

if not isinstance(model, (Model, ModelReference)):
raise TypeError("model must be a Model or a ModelReference")

try:
self._call_api(retry, method="DELETE", path=model.path)
except google.api_core.exceptions.NotFound:
if not not_found_ok:
raise

def delete_table(self, table, retry=DEFAULT_RETRY, not_found_ok=False):
"""Delete a table
Expand Down Expand Up @@ -1823,6 +1979,21 @@ def _item_to_job(iterator, resource):
return iterator.client.job_from_resource(resource)


def _item_to_model(iterator, resource):
"""Convert a JSON model to the native object.
Args:
iterator (google.api_core.page_iterator.Iterator):
The iterator that is currently in use.
resource (dict):
An item to be converted to a model.
Returns:
google.cloud.bigquery.model.Model: The next model in the page.
"""
return Model.from_api_repr(resource)


def _item_to_table(iterator, resource):
"""Convert a JSON table to the native object.
Expand Down
Loading

0 comments on commit c436873

Please sign in to comment.