From f0ace2ac2307ef359511a235f80f5ce9e46264c1 Mon Sep 17 00:00:00 2001 From: aribray <45905583+aribray@users.noreply.github.com> Date: Wed, 18 Jan 2023 16:29:06 -0600 Subject: [PATCH] docs: adds snippet for creating table with external data config (#1420) * docs: add samples for creating table with external data configuration and creating an external table definition Co-authored-by: Anthonios Partheniou --- docs/usage/tables.rst | 9 +++ ...reate_table_external_data_configuration.py | 66 +++++++++++++++++++ samples/tests/conftest.py | 18 ++++- ...reate_table_external_data_configuration.py | 32 +++++++++ 4 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 samples/create_table_external_data_configuration.py create mode 100644 samples/tests/test_create_table_external_data_configuration.py diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst index d924fe214..105e93637 100644 --- a/docs/usage/tables.rst +++ b/docs/usage/tables.rst @@ -58,6 +58,15 @@ Create an empty table with the :start-after: [START bigquery_create_table] :end-before: [END bigquery_create_table] +Create a table using an external data source with the +:func:`~google.cloud.bigquery.client.Client.create_table` method: + +.. literalinclude:: ../samples/create_table_external_data_configuration.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table_external_data_configuration] + :end-before: [END bigquery_create_table_external_data_configuration] + Create a clustered table with the :func:`~google.cloud.bigquery.client.Client.create_table` method: diff --git a/samples/create_table_external_data_configuration.py b/samples/create_table_external_data_configuration.py new file mode 100644 index 000000000..068f91555 --- /dev/null +++ b/samples/create_table_external_data_configuration.py @@ -0,0 +1,66 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_external_data_configuration( + table_id: str, +) -> None: + """Create a table using an external data source""" + orig_table_id = table_id + # [START bigquery_create_table_external_data_configuration] + # [START bigquery_create_external_table_definition] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + table_id = "your-project.your_dataset.your_table_name" + # [END bigquery_create_table_external_data_configuration] + table_id = orig_table_id + # [START bigquery_create_table_external_data_configuration] + + # TODO(developer): Set the external source format of your table. + # Note that the set of allowed values for external data sources is + # different than the set used for loading data (see :class:`~google.cloud.bigquery.job.SourceFormat`). + external_source_format = "AVRO" + + # TODO(developer): Set the source_uris to point to your data in Google Cloud + source_uris = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro", + ] + + # Create ExternalConfig object with external source format + external_config = bigquery.ExternalConfig(external_source_format) + # Set source_uris that point to your data in Google Cloud + external_config.source_uris = source_uris + + # TODO(developer) You have the option to set a reference_file_schema_uri, which points to + # a reference file for the table schema + reference_file_schema_uri = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro" + + external_config.reference_file_schema_uri = reference_file_schema_uri + # [END bigquery_create_external_table_definition] + + table = bigquery.Table(table_id) + # Set the external data configuration of the table + table.external_data_configuration = external_config + table = client.create_table(table) # Make an API request. + + print( + f"Created table with external source format {table.external_data_configuration.source_format}" + ) + # [END bigquery_create_table_external_data_configuration] diff --git a/samples/tests/conftest.py b/samples/tests/conftest.py index b7a2ad587..99bd2e367 100644 --- a/samples/tests/conftest.py +++ b/samples/tests/conftest.py @@ -13,7 +13,7 @@ # limitations under the License. import datetime -from typing import Iterator +from typing import Iterator, List import uuid import google.auth @@ -47,6 +47,22 @@ def random_table_id(dataset_id: str) -> str: return "{}.{}".format(dataset_id, random_table_id) +@pytest.fixture +def avro_source_uris() -> List[str]: + avro_source_uris = [ + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro", + "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro", + ] + return avro_source_uris + + +@pytest.fixture +def reference_file_schema_uri() -> str: + reference_file_schema_uri = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro" + return reference_file_schema_uri + + @pytest.fixture def random_dataset_id(client: bigquery.Client) -> Iterator[str]: now = datetime.datetime.now() diff --git a/samples/tests/test_create_table_external_data_configuration.py b/samples/tests/test_create_table_external_data_configuration.py new file mode 100644 index 000000000..bf4cf17d4 --- /dev/null +++ b/samples/tests/test_create_table_external_data_configuration.py @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from .. import create_table_external_data_configuration + +if typing.TYPE_CHECKING: + import pytest + + +def test_create_table_external_data_configuration( + capsys: "pytest.CaptureFixture[str]", + random_table_id: str, +) -> None: + + create_table_external_data_configuration.create_table_external_data_configuration( + random_table_id + ) + out, _ = capsys.readouterr() + assert "Created table with external source format AVRO" in out