Skip to content

Commit

Permalink
feat(bigquery): add support for sheets ranges (#9416)
Browse files Browse the repository at this point in the history
* add range parameter

* update samples to use range

* update renge usage
  • Loading branch information
emar-kar authored and tswast committed Oct 8, 2019
1 parent 59d967e commit cf27795
Show file tree
Hide file tree
Showing 7 changed files with 213 additions and 104 deletions.
102 changes: 0 additions & 102 deletions bigquery/docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2303,108 +2303,6 @@ def test_query_external_gcs_permanent_table(client, to_delete):
assert len(w_states) == 4


def test_query_external_sheets_temporary_table(client):
# [START bigquery_query_external_sheets_temp]
# [START bigquery_auth_drive_scope]
import google.auth

# from google.cloud import bigquery

# Create credentials with Drive & BigQuery API scopes
# Both APIs must be enabled for your project before running this code
credentials, project = google.auth.default(
scopes=[
"https://www.googleapis.com/auth/drive",
"https://www.googleapis.com/auth/bigquery",
]
)
client = bigquery.Client(credentials=credentials, project=project)
# [END bigquery_auth_drive_scope]

# Configure the external data source and query job
external_config = bigquery.ExternalConfig("GOOGLE_SHEETS")
# Use a shareable link or grant viewing access to the email address you
# used to authenticate with BigQuery (this example Sheet is public)
sheet_url = (
"https://docs.google.com/spreadsheets"
"/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing"
)
external_config.source_uris = [sheet_url]
external_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
]
external_config.options.skip_leading_rows = 1 # optionally skip header row
table_id = "us_states"
job_config = bigquery.QueryJobConfig()
job_config.table_definitions = {table_id: external_config}

# Example query to find states starting with 'W'
sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id)

query_job = client.query(sql, job_config=job_config) # API request

w_states = list(query_job) # Waits for query to finish
print("There are {} states with names starting with W.".format(len(w_states)))
# [END bigquery_query_external_sheets_temp]
assert len(w_states) == 4


def test_query_external_sheets_permanent_table(client, to_delete):
dataset_id = "query_external_sheets_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_query_external_sheets_perm]
import google.auth

# from google.cloud import bigquery
# dataset_id = 'my_dataset'

# Create credentials with Drive & BigQuery API scopes
# Both APIs must be enabled for your project before running this code
credentials, project = google.auth.default(
scopes=[
"https://www.googleapis.com/auth/drive",
"https://www.googleapis.com/auth/bigquery",
]
)
client = bigquery.Client(credentials=credentials, project=project)

# Configure the external data source
dataset_ref = client.dataset(dataset_id)
table_id = "us_states"
schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
]
table = bigquery.Table(dataset_ref.table(table_id), schema=schema)
external_config = bigquery.ExternalConfig("GOOGLE_SHEETS")
# Use a shareable link or grant viewing access to the email address you
# used to authenticate with BigQuery (this example Sheet is public)
sheet_url = (
"https://docs.google.com/spreadsheets"
"/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing"
)
external_config.source_uris = [sheet_url]
external_config.options.skip_leading_rows = 1 # optionally skip header row
table.external_data_configuration = external_config

# Create a permanent table linked to the Sheets file
table = client.create_table(table) # API request

# Example query to find states starting with 'W'
sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id)

query_job = client.query(sql) # API request

w_states = list(query_job) # Waits for query to finish
print("There are {} states with names starting with W.".format(len(w_states)))
# [END bigquery_query_external_sheets_perm]
assert len(w_states) == 4


def test_ddl_create_view(client, to_delete, capsys):
"""Create a view via a DDL query."""
project = client.project
Expand Down
14 changes: 14 additions & 0 deletions bigquery/google/cloud/bigquery/external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from google.cloud.bigquery._helpers import _to_bytes
from google.cloud.bigquery._helpers import _bytes_to_json
from google.cloud.bigquery._helpers import _int_or_none
from google.cloud.bigquery._helpers import _str_or_none
from google.cloud.bigquery.schema import SchemaField


Expand Down Expand Up @@ -524,6 +525,19 @@ def skip_leading_rows(self):
def skip_leading_rows(self, value):
self._properties["skipLeadingRows"] = str(value)

@property
def range(self):
"""str: The range of a sheet that BigQuery will query from.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions
"""
return _str_or_none(self._properties.get("range"))

@range.setter
def range(self, value):
self._properties["range"] = value

def to_api_repr(self):
"""Build an API representation of this object.
Expand Down
73 changes: 73 additions & 0 deletions bigquery/samples/query_external_sheets_permanent_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def query_external_sheets_permanent_table(dataset_id):

# [START bigquery_query_external_sheets_perm]
from google.cloud import bigquery
import google.auth

# Create credentials with Drive & BigQuery API scopes.
# Both APIs must be enabled for your project before running this code.
credentials, project = google.auth.default(
scopes=[
"https://www.googleapis.com/auth/drive",
"https://www.googleapis.com/auth/bigquery",
]
)

# TODO(developer): Construct a BigQuery client object.
client = bigquery.Client(credentials=credentials, project=project)

# TODO(developer): Set dataset_id to the ID of the dataset to fetch.
# dataset_id = "your-project.your_dataset"

# Configure the external data source.
dataset = client.get_dataset(dataset_id)
table_id = "us_states"
schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
]
table = bigquery.Table(dataset.table(table_id), schema=schema)
external_config = bigquery.ExternalConfig("GOOGLE_SHEETS")
# Use a shareable link or grant viewing access to the email address you
# used to authenticate with BigQuery (this example Sheet is public).
sheet_url = (
"https://docs.google.com/spreadsheets"
"/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing"
)
external_config.source_uris = [sheet_url]
external_config.options.skip_leading_rows = 1 # Optionally skip header row.
external_config.options.range = (
"us-states!A20:B49"
) # Optionally set range of the sheet to query from.
table.external_data_configuration = external_config

# Create a permanent table linked to the Sheets file.
table = client.create_table(table) # Make an API request.

# Example query to find states starting with "W".
sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id)
query_job = client.query(sql) # Make an API request.

# Wait for the query to complete.
w_states = list(query_job)
print(
"There are {} states with names starting with W in the selected range.".format(
len(w_states)
)
)
# [END bigquery_query_external_sheets_perm]
69 changes: 69 additions & 0 deletions bigquery/samples/query_external_sheets_temporary_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def query_external_sheets_temporary_table():

# [START bigquery_query_external_sheets_temp]
# [START bigquery_auth_drive_scope]
from google.cloud import bigquery
import google.auth

# Create credentials with Drive & BigQuery API scopes.
# Both APIs must be enabled for your project before running this code.
credentials, project = google.auth.default(
scopes=[
"https://www.googleapis.com/auth/drive",
"https://www.googleapis.com/auth/bigquery",
]
)

# TODO(developer): Construct a BigQuery client object.
client = bigquery.Client(credentials=credentials, project=project)
# [END bigquery_auth_drive_scope]

# Configure the external data source and query job.
external_config = bigquery.ExternalConfig("GOOGLE_SHEETS")

# Use a shareable link or grant viewing access to the email address you
# used to authenticate with BigQuery (this example Sheet is public).
sheet_url = (
"https://docs.google.com/spreadsheets"
"/d/1i_QCL-7HcSyUZmIbP9E6lO_T5u3HnpLe7dnpHaijg_E/edit?usp=sharing"
)
external_config.source_uris = [sheet_url]
external_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
]
external_config.options.skip_leading_rows = 1 # Optionally skip header row.
external_config.options.range = (
"us-states!A20:B49"
) # Optionally set range of the sheet to query from.
table_id = "us_states"
job_config = bigquery.QueryJobConfig()
job_config.table_definitions = {table_id: external_config}

# Example query to find states starting with "W".
sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id)
query_job = client.query(sql, job_config=job_config) # Make an API request.

# Wait for the query to complete.
w_states = list(query_job)
print(
"There are {} states with names starting with W in the selected range.".format(
len(w_states)
)
)
# [END bigquery_query_external_sheets_temp]
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from .. import query_external_sheets_permanent_table


def test_query_external_sheets_permanent_table(capsys, dataset_id):

query_external_sheets_permanent_table.query_external_sheets_permanent_table(
dataset_id
)
out, err = capsys.readouterr()
assert "There are 2 states with names starting with W in the selected range." in out
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from .. import query_external_sheets_temporary_table


def test_query_external_sheets_temporary_table(capsys):

query_external_sheets_temporary_table.query_external_sheets_temporary_table()
out, err = capsys.readouterr()
assert "There are 2 states with names starting with W in the selected range." in out
11 changes: 9 additions & 2 deletions bigquery/tests/unit/test_external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,10 @@ def test_from_api_repr_sheets(self):
self.BASE_RESOURCE,
{
"sourceFormat": "GOOGLE_SHEETS",
"googleSheetsOptions": {"skipLeadingRows": "123"},
"googleSheetsOptions": {
"skipLeadingRows": "123",
"range": "Sheet1!A5:B10",
},
},
)

Expand All @@ -140,26 +143,30 @@ def test_from_api_repr_sheets(self):
self.assertEqual(ec.source_format, "GOOGLE_SHEETS")
self.assertIsInstance(ec.options, external_config.GoogleSheetsOptions)
self.assertEqual(ec.options.skip_leading_rows, 123)
self.assertEqual(ec.options.range, "Sheet1!A5:B10")

got_resource = ec.to_api_repr()

self.assertEqual(got_resource, resource)

del resource["googleSheetsOptions"]["skipLeadingRows"]
del resource["googleSheetsOptions"]["range"]
ec = external_config.ExternalConfig.from_api_repr(resource)
self.assertIsNone(ec.options.skip_leading_rows)
self.assertIsNone(ec.options.range)
got_resource = ec.to_api_repr()
self.assertEqual(got_resource, resource)

def test_to_api_repr_sheets(self):
ec = external_config.ExternalConfig("GOOGLE_SHEETS")
options = external_config.GoogleSheetsOptions()
options.skip_leading_rows = 123
options.range = "Sheet1!A5:B10"
ec._options = options

exp_resource = {
"sourceFormat": "GOOGLE_SHEETS",
"googleSheetsOptions": {"skipLeadingRows": "123"},
"googleSheetsOptions": {"skipLeadingRows": "123", "range": "Sheet1!A5:B10"},
}

got_resource = ec.to_api_repr()
Expand Down

0 comments on commit cf27795

Please sign in to comment.