Skip to content

Commit

Permalink
bigquery: add Client.list_rows, remove Table.fetch_data
Browse files Browse the repository at this point in the history
  • Loading branch information
jba committed Oct 6, 2017
1 parent 01fc4f4 commit 8098482
Show file tree
Hide file tree
Showing 5 changed files with 333 additions and 355 deletions.
79 changes: 77 additions & 2 deletions bigquery/google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@
from google.cloud.bigquery._http import Connection
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.table import Table, _TABLE_HAS_NO_SCHEMA
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.job import CopyJob
from google.cloud.bigquery.job import ExtractJob
from google.cloud.bigquery.job import LoadJob
from google.cloud.bigquery.job import QueryJob
from google.cloud.bigquery.job import QueryJobConfig
from google.cloud.bigquery.query import QueryResults
from google.cloud.bigquery._helpers import _item_to_row
from google.cloud.bigquery._helpers import _rows_page_start


class Project(object):
Expand Down Expand Up @@ -346,7 +348,6 @@ def delete_table(self, table):
:type table: One of:
:class:`~google.cloud.bigquery.table.Table`
:class:`~google.cloud.bigquery.table.TableReference`
:param table: the table to delete, or a reference to it.
"""
if not isinstance(table, (Table, TableReference)):
Expand Down Expand Up @@ -667,6 +668,80 @@ def query_rows(self, query, job_config=None, job_id=None, timeout=None):
job.begin()
return job.result(timeout=timeout)

def list_rows(self, table, selected_fields=None, max_results=None,
page_token=None, start_index=None):
"""List the rows of the table.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list
.. note::
This method assumes that the provided schema is up-to-date with the
schema as defined on the back-end: if the two schemas are not
identical, the values returned may be incomplete. To ensure that the
local copy of the schema is up-to-date, call ``client.get_table``.
:type table: One of:
:class:`~google.cloud.bigquery.table.Table`
:class:`~google.cloud.bigquery.table.TableReference`
:param table: the table to list, or a reference to it.
:type selected_fields: list of :class:`SchemaField`
:param selected_fields:
The fields to return. Required if ``table`` is a
:class:`~google.cloud.bigquery.table.TableReference`.
:type max_results: int
:param max_results: maximum number of rows to return.
:type page_token: str
:param page_token: (Optional) Token representing a cursor into the
table's rows.
:type start_index: int
:param page_token: (Optional) The zero-based index of the starting
row to read.
:rtype: :class:`~google.api.core.page_iterator.Iterator`
:returns: Iterator of row data :class:`tuple`s. During each page, the
iterator will have the ``total_rows`` attribute set,
which counts the total number of rows **in the table**
(this is distinct from the total number of rows in the
current page: ``iterator.page.num_items``).
"""
if selected_fields is not None:
schema = selected_fields
elif isinstance(table, TableReference):
raise ValueError('need selected_fields with TableReference')
elif isinstance(table, Table):
if len(table._schema) == 0:
raise ValueError(_TABLE_HAS_NO_SCHEMA)
schema = table.schema
else:
raise TypeError('table should be Table or TableReference')

params = {}
if selected_fields is not None:
params['selectedFields'] = [f.name for f in selected_fields]
if start_index is not None:
params['startIndex'] = start_index

iterator = page_iterator.HTTPIterator(
client=self,
api_request=self._connection.api_request,
path='%s/data' % (table.path,),
item_to_value=_item_to_row,
items_key='rows',
page_token=page_token,
next_token='pageToken',
max_results=max_results,
page_start=_rows_page_start,
extra_params=params)
iterator.schema = schema
return iterator


# pylint: disable=unused-argument
def _item_to_project(iterator, resource):
Expand Down
58 changes: 0 additions & 58 deletions bigquery/google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,10 @@
from google.resumable_media.requests import MultipartUpload
from google.resumable_media.requests import ResumableUpload

from google.api.core import page_iterator
from google.cloud import exceptions
from google.cloud._helpers import _datetime_from_microseconds
from google.cloud._helpers import _millis_from_datetime
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery._helpers import _item_to_row
from google.cloud.bigquery._helpers import _rows_page_start
from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_ROW


Expand Down Expand Up @@ -768,61 +765,6 @@ def update(self, client=None):
method='PUT', path=self.path, data=self._build_resource())
self._set_properties(api_response)

def fetch_data(self, max_results=None, page_token=None, client=None):
"""API call: fetch the table data via a GET request
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list
.. note::
This method assumes that its instance's ``schema`` attribute is
up-to-date with the schema as defined on the back-end: if the
two schemas are not identical, the values returned may be
incomplete. To ensure that the local copy of the schema is
up-to-date, call ``client.get_table``.
:type max_results: int
:param max_results: (Optional) Maximum number of rows to return.
:type page_token: str
:param page_token: (Optional) Token representing a cursor into the
table's rows.
:type client: :class:`~google.cloud.bigquery.client.Client`
:param client: (Optional) The client to use. If not passed, falls
back to the ``client`` stored on the current dataset.
:rtype: :class:`~google.api.core.page_iterator.Iterator`
:returns: Iterator of row data :class:`tuple`s. During each page, the
iterator will have the ``total_rows`` attribute set,
which counts the total number of rows **in the table**
(this is distinct from the total number of rows in the
current page: ``iterator.page.num_items``).
"""
if len(self._schema) == 0:
raise ValueError(_TABLE_HAS_NO_SCHEMA)

params = {}

if max_results is not None:
params['maxResults'] = max_results

client = self._require_client(client)
path = '%s/data' % (self.path,)
iterator = page_iterator.HTTPIterator(
client=client,
api_request=client._connection.api_request,
path=path,
item_to_value=_item_to_row,
items_key='rows',
page_token=page_token,
page_start=_rows_page_start,
next_token='pageToken',
extra_params=params)
iterator.schema = self._schema
return iterator

def row_from_mapping(self, mapping):
"""Convert a mapping to a row tuple using the schema.
Expand Down
2 changes: 1 addition & 1 deletion bigquery/tests/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def test_update_table(self):

@staticmethod
def _fetch_single_page(table):
iterator = table.fetch_data()
iterator = Config.CLIENT.list_rows(table)
page = six.next(iterator.pages)
return list(page)

Expand Down
Loading

0 comments on commit 8098482

Please sign in to comment.