From 280cecec047f511a27d39ac2a19745192ae07d36 Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Mon, 20 Jan 2020 19:39:58 +0530 Subject: [PATCH 1/2] refactor(bigquery): update code samples of load table autodetect and truncate --- bigquery/docs/snippets.py | 164 ------------------ bigquery/docs/usage/tables.rst | 58 +++++++ .../samples/load_table_uri_autodetect_csv.py | 45 +++++ .../samples/load_table_uri_autodetect_json.py | 42 +++++ .../samples/load_table_uri_truncate_avro.py | 55 ++++++ .../samples/load_table_uri_truncate_csv.py | 56 ++++++ .../samples/load_table_uri_truncate_json.py | 55 ++++++ .../samples/load_table_uri_truncate_orc.py | 55 ++++++ .../load_table_uri_truncate_parquet.py | 55 ++++++ .../test_load_table_uri_autodetect_csv.py | 22 +++ .../test_load_table_uri_autodetect_json.py | 22 +++ .../test_load_table_uri_truncate_avro.py | 21 +++ .../tests/test_load_table_uri_truncate_csv.py | 21 +++ .../test_load_table_uri_truncate_json.py | 21 +++ .../tests/test_load_table_uri_truncate_orc.py | 21 +++ .../test_load_table_uri_truncate_parquet.py | 21 +++ 16 files changed, 570 insertions(+), 164 deletions(-) create mode 100644 bigquery/samples/load_table_uri_autodetect_csv.py create mode 100644 bigquery/samples/load_table_uri_autodetect_json.py create mode 100644 bigquery/samples/load_table_uri_truncate_avro.py create mode 100644 bigquery/samples/load_table_uri_truncate_csv.py create mode 100644 bigquery/samples/load_table_uri_truncate_json.py create mode 100644 bigquery/samples/load_table_uri_truncate_orc.py create mode 100644 bigquery/samples/load_table_uri_truncate_parquet.py create mode 100644 bigquery/samples/tests/test_load_table_uri_autodetect_csv.py create mode 100644 bigquery/samples/tests/test_load_table_uri_autodetect_json.py create mode 100644 bigquery/samples/tests/test_load_table_uri_truncate_avro.py create mode 100644 bigquery/samples/tests/test_load_table_uri_truncate_csv.py create mode 100644 bigquery/samples/tests/test_load_table_uri_truncate_json.py create mode 100644 bigquery/samples/tests/test_load_table_uri_truncate_orc.py create mode 100644 bigquery/samples/tests/test_load_table_uri_truncate_parquet.py diff --git a/bigquery/docs/snippets.py b/bigquery/docs/snippets.py index bb584fa0494a..25ef6a0af0c7 100644 --- a/bigquery/docs/snippets.py +++ b/bigquery/docs/snippets.py @@ -25,7 +25,6 @@ import time import pytest -import six try: import fastparquet @@ -844,169 +843,6 @@ def test_load_table_from_uri_orc(client, to_delete, capsys): assert "Loaded 50 rows." in out -def test_load_table_from_uri_autodetect(client, to_delete, capsys): - """Load table from a GCS URI using various formats and auto-detected schema - Each file format has its own tested load from URI sample. Because most of - the code is common for autodetect, append, and truncate, this sample - includes snippets for all supported formats but only calls a single load - job. - This code snippet is made up of shared code, then format-specific code, - followed by more shared code. Note that only the last format in the - format-specific code section will be tested in this test. - """ - dataset_id = "load_table_from_uri_auto_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - # Shared code - # [START bigquery_load_table_gcs_csv_autodetect] - # [START bigquery_load_table_gcs_json_autodetect] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - - dataset_ref = client.dataset(dataset_id) - job_config = bigquery.LoadJobConfig() - job_config.autodetect = True - # [END bigquery_load_table_gcs_csv_autodetect] - # [END bigquery_load_table_gcs_json_autodetect] - - # Format-specific code - # [START bigquery_load_table_gcs_csv_autodetect] - job_config.skip_leading_rows = 1 - # The source format defaults to CSV, so the line below is optional. - job_config.source_format = bigquery.SourceFormat.CSV - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - # [END bigquery_load_table_gcs_csv_autodetect] - # unset csv-specific attribute - del job_config._properties["load"]["skipLeadingRows"] - - # [START bigquery_load_table_gcs_json_autodetect] - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" - # [END bigquery_load_table_gcs_json_autodetect] - - # Shared code - # [START bigquery_load_table_gcs_csv_autodetect] - # [START bigquery_load_table_gcs_json_autodetect] - load_job = client.load_table_from_uri( - uri, dataset_ref.table("us_states"), job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(dataset_ref.table("us_states")) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_csv_autodetect] - # [END bigquery_load_table_gcs_json_autodetect] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - -def test_load_table_from_uri_truncate(client, to_delete, capsys): - """Replaces table data with data from a GCS URI using various formats - Each file format has its own tested load from URI sample. Because most of - the code is common for autodetect, append, and truncate, this sample - includes snippets for all supported formats but only calls a single load - job. - This code snippet is made up of shared code, then format-specific code, - followed by more shared code. Note that only the last format in the - format-specific code section will be tested in this test. - """ - dataset_id = "load_table_from_uri_trunc_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - client.create_dataset(dataset) - to_delete.append(dataset) - - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - table_ref = dataset.table("us_states") - body = six.BytesIO(b"Washington,WA") - client.load_table_from_file(body, table_ref, job_config=job_config).result() - previous_rows = client.get_table(table_ref).num_rows - assert previous_rows > 0 - - # Shared code - # [START bigquery_load_table_gcs_avro_truncate] - # [START bigquery_load_table_gcs_csv_truncate] - # [START bigquery_load_table_gcs_json_truncate] - # [START bigquery_load_table_gcs_parquet_truncate] - # [START bigquery_load_table_gcs_orc_truncate] - # from google.cloud import bigquery - # client = bigquery.Client() - # table_ref = client.dataset('my_dataset').table('existing_table') - - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE - # [END bigquery_load_table_gcs_avro_truncate] - # [END bigquery_load_table_gcs_csv_truncate] - # [END bigquery_load_table_gcs_json_truncate] - # [END bigquery_load_table_gcs_parquet_truncate] - # [END bigquery_load_table_gcs_orc_truncate] - - # Format-specific code - # [START bigquery_load_table_gcs_avro_truncate] - job_config.source_format = bigquery.SourceFormat.AVRO - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" - # [END bigquery_load_table_gcs_avro_truncate] - - # [START bigquery_load_table_gcs_csv_truncate] - job_config.skip_leading_rows = 1 - # The source format defaults to CSV, so the line below is optional. - job_config.source_format = bigquery.SourceFormat.CSV - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - # [END bigquery_load_table_gcs_csv_truncate] - # unset csv-specific attribute - del job_config._properties["load"]["skipLeadingRows"] - - # [START bigquery_load_table_gcs_json_truncate] - job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" - # [END bigquery_load_table_gcs_json_truncate] - - # [START bigquery_load_table_gcs_parquet_truncate] - job_config.source_format = bigquery.SourceFormat.PARQUET - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" - # [END bigquery_load_table_gcs_parquet_truncate] - - # [START bigquery_load_table_gcs_orc_truncate] - job_config.source_format = bigquery.SourceFormat.ORC - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" - # [END bigquery_load_table_gcs_orc_truncate] - - # Shared code - # [START bigquery_load_table_gcs_avro_truncate] - # [START bigquery_load_table_gcs_csv_truncate] - # [START bigquery_load_table_gcs_json_truncate] - # [START bigquery_load_table_gcs_parquet_truncate] - # [START bigquery_load_table_gcs_orc_truncate] - load_job = client.load_table_from_uri( - uri, table_ref, job_config=job_config - ) # API request - print("Starting job {}".format(load_job.job_id)) - - load_job.result() # Waits for table load to complete. - print("Job finished.") - - destination_table = client.get_table(table_ref) - print("Loaded {} rows.".format(destination_table.num_rows)) - # [END bigquery_load_table_gcs_avro_truncate] - # [END bigquery_load_table_gcs_csv_truncate] - # [END bigquery_load_table_gcs_json_truncate] - # [END bigquery_load_table_gcs_parquet_truncate] - # [END bigquery_load_table_gcs_orc_truncate] - - out, _ = capsys.readouterr() - assert "Loaded 50 rows." in out - - def test_load_table_add_column(client, to_delete): dataset_id = "load_table_add_column_{}".format(_millis()) dataset_ref = client.dataset(dataset_id) diff --git a/bigquery/docs/usage/tables.rst b/bigquery/docs/usage/tables.rst index b6f8dbdde646..3f2e57c4ff3d 100644 --- a/bigquery/docs/usage/tables.rst +++ b/bigquery/docs/usage/tables.rst @@ -110,6 +110,22 @@ Load a Parquet file from Cloud Storage: See also: `Loading Parquet data from Cloud Storage `_. +Load a autodetect CSV file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_autodetect_csv.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_csv_autodetect] + :end-before: [END bigquery_load_table_gcs_csv_autodetect] + +Load a autodetect JSON file from Cloud Storage: + +.. literalinclude:: ../samples/load_table_uri_autodetect_json.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_json_autodetect] + :end-before: [END bigquery_load_table_gcs_json_autodetect] + Updating a Table ^^^^^^^^^^^^^^^^ @@ -198,3 +214,45 @@ Restore a deleted table from a snapshot by using the :dedent: 4 :start-after: [START bigquery_undelete_table] :end-before: [END bigquery_undelete_table] + +Overwrite a Table +^^^^^^^^^^^^^^^^^ +Overwrite the AVRO file from Cloud Storage to table data: + +.. literalinclude:: ../samples/load_table_uri_truncate_avro.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_avro_truncate] + :end-before: [END bigquery_load_table_gcs_avro_truncate] + +Overwrite the CSV file from Cloud Storage to table data: + +.. literalinclude:: ../samples/load_table_uri_truncate_csv.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_csv_truncate] + :end-before: [END bigquery_load_table_gcs_csv_truncate] + +Overwrite the JSON file from Cloud Storage to table data: + +.. literalinclude:: ../samples/load_table_uri_truncate_json.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_json_truncate] + :end-before: [END bigquery_load_table_gcs_json_truncate] + +Overwrite the ORC file from Cloud Storage to table data: + +.. literalinclude:: ../samples/load_table_uri_truncate_orc.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_orc_truncate] + :end-before: [END bigquery_load_table_gcs_orc_truncate] + +Overwrite the PARQUET file from Cloud Storage to table data: + +.. literalinclude:: ../samples/load_table_uri_truncate_parquet.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_gcs_parquet_truncate] + :end-before: [END bigquery_load_table_gcs_parquet_truncate] \ No newline at end of file diff --git a/bigquery/samples/load_table_uri_autodetect_csv.py b/bigquery/samples/load_table_uri_autodetect_csv.py new file mode 100644 index 000000000000..fce37d0ed18d --- /dev/null +++ b/bigquery/samples/load_table_uri_autodetect_csv.py @@ -0,0 +1,45 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_autodetect_csv(table_id): + + # [START bigquery_load_table_gcs_csv_autodetect] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # "cloud-samples-tests", "us", "test", "test" + # ) + job_config = bigquery.LoadJobConfig( + autodetect=True, + skip_leading_rows=1, + # The source format defaults to CSV, so the line below is optional. + source_format=bigquery.SourceFormat.CSV, + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + load_job.result() # Waits for the job to complete. + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv_autodetect] diff --git a/bigquery/samples/load_table_uri_autodetect_json.py b/bigquery/samples/load_table_uri_autodetect_json.py new file mode 100644 index 000000000000..65fd64c311a4 --- /dev/null +++ b/bigquery/samples/load_table_uri_autodetect_json.py @@ -0,0 +1,42 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_autodetect_json(table_id): + + # [START bigquery_load_table_gcs_json_autodetect] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + # Set the encryption key to use for the destination. + # TODO: Replace this key with a key you have created in KMS. + # kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + # "cloud-samples-tests", "us", "test", "test" + # ) + job_config = bigquery.LoadJobConfig( + autodetect=True, source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON + ) + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + load_job.result() # Waits for the job to complete. + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_json_autodetect] diff --git a/bigquery/samples/load_table_uri_truncate_avro.py b/bigquery/samples/load_table_uri_truncate_avro.py new file mode 100644 index 000000000000..22a82286c585 --- /dev/null +++ b/bigquery/samples/load_table_uri_truncate_avro.py @@ -0,0 +1,55 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_avro(table_id): + + # [START bigquery_load_table_gcs_avro_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.AVRO, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.avro" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_avro_truncate] diff --git a/bigquery/samples/load_table_uri_truncate_csv.py b/bigquery/samples/load_table_uri_truncate_csv.py new file mode 100644 index 000000000000..3d2ff574f59f --- /dev/null +++ b/bigquery/samples/load_table_uri_truncate_csv.py @@ -0,0 +1,56 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_csv(table_id): + + # [START bigquery_load_table_gcs_csv_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.CSV, + skip_leading_rows=1, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_csv_truncate] diff --git a/bigquery/samples/load_table_uri_truncate_json.py b/bigquery/samples/load_table_uri_truncate_json.py new file mode 100644 index 000000000000..cc7d70567544 --- /dev/null +++ b/bigquery/samples/load_table_uri_truncate_json.py @@ -0,0 +1,55 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_json(table_id): + + # [START bigquery_load_table_gcs_json_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.json" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_json_truncate] diff --git a/bigquery/samples/load_table_uri_truncate_orc.py b/bigquery/samples/load_table_uri_truncate_orc.py new file mode 100644 index 000000000000..bbf696bbcfdb --- /dev/null +++ b/bigquery/samples/load_table_uri_truncate_orc.py @@ -0,0 +1,55 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_orc(table_id): + + # [START bigquery_load_table_gcs_orc_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.ORC, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.orc" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_orc_truncate] diff --git a/bigquery/samples/load_table_uri_truncate_parquet.py b/bigquery/samples/load_table_uri_truncate_parquet.py new file mode 100644 index 000000000000..2fe67fdd6623 --- /dev/null +++ b/bigquery/samples/load_table_uri_truncate_parquet.py @@ -0,0 +1,55 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_truncate_parquet(table_id): + + # [START bigquery_load_table_gcs_parquet_truncate] + import six + + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name + + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + ) + + body = six.BytesIO(b"Washington,WA") + client.load_table_from_file(body, table_id, job_config=job_config).result() + previous_rows = client.get_table(table_id).num_rows + assert previous_rows > 0 + + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.PARQUET, + ) + + uri = "gs://cloud-samples-data/bigquery/us-states/us-states.parquet" + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_parquet_truncate] diff --git a/bigquery/samples/tests/test_load_table_uri_autodetect_csv.py b/bigquery/samples/tests/test_load_table_uri_autodetect_csv.py new file mode 100644 index 000000000000..7233bd1dd4c2 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_autodetect_csv.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_autodetect_csv + + +def test_load_table_uri_autodetect_csv(capsys, random_table_id): + + load_table_uri_autodetect_csv.load_table_uri_autodetect_csv(random_table_id) + out, err = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_autodetect_json.py b/bigquery/samples/tests/test_load_table_uri_autodetect_json.py new file mode 100644 index 000000000000..f9227f4edbb9 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_autodetect_json.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_autodetect_json + + +def test_load_table_uri_autodetect_csv(capsys, random_table_id): + + load_table_uri_autodetect_json.load_table_uri_autodetect_json(random_table_id) + out, err = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_avro.py b/bigquery/samples/tests/test_load_table_uri_truncate_avro.py new file mode 100644 index 000000000000..eb5e65cd07e1 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_truncate_avro.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_avro + + +def test_load_table_uri_truncate_avro(capsys, random_table_id): + load_table_uri_truncate_avro.load_table_uri_truncate_avro(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_csv.py b/bigquery/samples/tests/test_load_table_uri_truncate_csv.py new file mode 100644 index 000000000000..0832797f7130 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_truncate_csv.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_csv + + +def test_load_table_uri_truncate_csv(capsys, random_table_id): + load_table_uri_truncate_csv.load_table_uri_truncate_csv(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_json.py b/bigquery/samples/tests/test_load_table_uri_truncate_json.py new file mode 100644 index 000000000000..35d42aae91c2 --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_truncate_json.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_json + + +def test_load_table_uri_truncate_json(capsys, random_table_id): + load_table_uri_truncate_json.load_table_uri_truncate_json(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_orc.py b/bigquery/samples/tests/test_load_table_uri_truncate_orc.py new file mode 100644 index 000000000000..7a61516a12cc --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_truncate_orc.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_orc + + +def test_load_table_uri_truncate_orc(capsys, random_table_id): + load_table_uri_truncate_orc.load_table_uri_truncate_orc(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_parquet.py b/bigquery/samples/tests/test_load_table_uri_truncate_parquet.py new file mode 100644 index 000000000000..2a9c33c322bc --- /dev/null +++ b/bigquery/samples/tests/test_load_table_uri_truncate_parquet.py @@ -0,0 +1,21 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_uri_truncate_parquet + + +def test_load_table_uri_truncate_parquet(capsys, random_table_id): + load_table_uri_truncate_parquet.load_table_uri_truncate_parquet(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out From ba9f7569b922809ab05cc6a13515a4ac324d2fc7 Mon Sep 17 00:00:00 2001 From: HemangChothani Date: Fri, 24 Jan 2020 11:58:45 +0530 Subject: [PATCH 2/2] refactor(bigquery): update copyright to 2020 and method description --- bigquery/docs/usage/tables.rst | 15 ++++++++------- bigquery/samples/load_table_uri_autodetect_csv.py | 2 +- .../samples/load_table_uri_autodetect_json.py | 2 +- bigquery/samples/load_table_uri_truncate_avro.py | 2 +- bigquery/samples/load_table_uri_truncate_csv.py | 2 +- bigquery/samples/load_table_uri_truncate_json.py | 2 +- bigquery/samples/load_table_uri_truncate_orc.py | 2 +- .../samples/load_table_uri_truncate_parquet.py | 2 +- .../tests/test_load_table_uri_autodetect_csv.py | 2 +- .../tests/test_load_table_uri_autodetect_json.py | 2 +- .../tests/test_load_table_uri_truncate_avro.py | 2 +- .../tests/test_load_table_uri_truncate_csv.py | 2 +- .../tests/test_load_table_uri_truncate_json.py | 2 +- .../tests/test_load_table_uri_truncate_orc.py | 2 +- .../tests/test_load_table_uri_truncate_parquet.py | 2 +- 15 files changed, 22 insertions(+), 21 deletions(-) diff --git a/bigquery/docs/usage/tables.rst b/bigquery/docs/usage/tables.rst index 3f2e57c4ff3d..5c8020750a55 100644 --- a/bigquery/docs/usage/tables.rst +++ b/bigquery/docs/usage/tables.rst @@ -110,7 +110,7 @@ Load a Parquet file from Cloud Storage: See also: `Loading Parquet data from Cloud Storage `_. -Load a autodetect CSV file from Cloud Storage: +Load a CSV file from Cloud Storage and auto-detect schema: .. literalinclude:: ../samples/load_table_uri_autodetect_csv.py :language: python @@ -118,7 +118,7 @@ Load a autodetect CSV file from Cloud Storage: :start-after: [START bigquery_load_table_gcs_csv_autodetect] :end-before: [END bigquery_load_table_gcs_csv_autodetect] -Load a autodetect JSON file from Cloud Storage: +Load a JSON file from Cloud Storage and auto-detect schema: .. literalinclude:: ../samples/load_table_uri_autodetect_json.py :language: python @@ -217,7 +217,8 @@ Restore a deleted table from a snapshot by using the Overwrite a Table ^^^^^^^^^^^^^^^^^ -Overwrite the AVRO file from Cloud Storage to table data: + +Replace the table data with an Avro file from Cloud Storage: .. literalinclude:: ../samples/load_table_uri_truncate_avro.py :language: python @@ -225,7 +226,7 @@ Overwrite the AVRO file from Cloud Storage to table data: :start-after: [START bigquery_load_table_gcs_avro_truncate] :end-before: [END bigquery_load_table_gcs_avro_truncate] -Overwrite the CSV file from Cloud Storage to table data: +Replace the table data with a CSV file from Cloud Storage: .. literalinclude:: ../samples/load_table_uri_truncate_csv.py :language: python @@ -233,7 +234,7 @@ Overwrite the CSV file from Cloud Storage to table data: :start-after: [START bigquery_load_table_gcs_csv_truncate] :end-before: [END bigquery_load_table_gcs_csv_truncate] -Overwrite the JSON file from Cloud Storage to table data: +Replace the table data with a JSON file from Cloud Storage: .. literalinclude:: ../samples/load_table_uri_truncate_json.py :language: python @@ -241,7 +242,7 @@ Overwrite the JSON file from Cloud Storage to table data: :start-after: [START bigquery_load_table_gcs_json_truncate] :end-before: [END bigquery_load_table_gcs_json_truncate] -Overwrite the ORC file from Cloud Storage to table data: +Replace the table data with an ORC file from Cloud Storage: .. literalinclude:: ../samples/load_table_uri_truncate_orc.py :language: python @@ -249,7 +250,7 @@ Overwrite the ORC file from Cloud Storage to table data: :start-after: [START bigquery_load_table_gcs_orc_truncate] :end-before: [END bigquery_load_table_gcs_orc_truncate] -Overwrite the PARQUET file from Cloud Storage to table data: +Replace the table data with a Parquet file from Cloud Storage: .. literalinclude:: ../samples/load_table_uri_truncate_parquet.py :language: python diff --git a/bigquery/samples/load_table_uri_autodetect_csv.py b/bigquery/samples/load_table_uri_autodetect_csv.py index fce37d0ed18d..09a5d708d437 100644 --- a/bigquery/samples/load_table_uri_autodetect_csv.py +++ b/bigquery/samples/load_table_uri_autodetect_csv.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/load_table_uri_autodetect_json.py b/bigquery/samples/load_table_uri_autodetect_json.py index 65fd64c311a4..61b7aab1287d 100644 --- a/bigquery/samples/load_table_uri_autodetect_json.py +++ b/bigquery/samples/load_table_uri_autodetect_json.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/load_table_uri_truncate_avro.py b/bigquery/samples/load_table_uri_truncate_avro.py index 22a82286c585..98a791477dd1 100644 --- a/bigquery/samples/load_table_uri_truncate_avro.py +++ b/bigquery/samples/load_table_uri_truncate_avro.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/load_table_uri_truncate_csv.py b/bigquery/samples/load_table_uri_truncate_csv.py index 3d2ff574f59f..73de7a8c17cf 100644 --- a/bigquery/samples/load_table_uri_truncate_csv.py +++ b/bigquery/samples/load_table_uri_truncate_csv.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/load_table_uri_truncate_json.py b/bigquery/samples/load_table_uri_truncate_json.py index cc7d70567544..a30fae73629e 100644 --- a/bigquery/samples/load_table_uri_truncate_json.py +++ b/bigquery/samples/load_table_uri_truncate_json.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/load_table_uri_truncate_orc.py b/bigquery/samples/load_table_uri_truncate_orc.py index bbf696bbcfdb..18f963be2e7d 100644 --- a/bigquery/samples/load_table_uri_truncate_orc.py +++ b/bigquery/samples/load_table_uri_truncate_orc.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/load_table_uri_truncate_parquet.py b/bigquery/samples/load_table_uri_truncate_parquet.py index 2fe67fdd6623..28692d840d37 100644 --- a/bigquery/samples/load_table_uri_truncate_parquet.py +++ b/bigquery/samples/load_table_uri_truncate_parquet.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/tests/test_load_table_uri_autodetect_csv.py b/bigquery/samples/tests/test_load_table_uri_autodetect_csv.py index 7233bd1dd4c2..a407197834f0 100644 --- a/bigquery/samples/tests/test_load_table_uri_autodetect_csv.py +++ b/bigquery/samples/tests/test_load_table_uri_autodetect_csv.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/tests/test_load_table_uri_autodetect_json.py b/bigquery/samples/tests/test_load_table_uri_autodetect_json.py index f9227f4edbb9..df14d26ed2c4 100644 --- a/bigquery/samples/tests/test_load_table_uri_autodetect_json.py +++ b/bigquery/samples/tests/test_load_table_uri_autodetect_json.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_avro.py b/bigquery/samples/tests/test_load_table_uri_truncate_avro.py index eb5e65cd07e1..ba680cabd49f 100644 --- a/bigquery/samples/tests/test_load_table_uri_truncate_avro.py +++ b/bigquery/samples/tests/test_load_table_uri_truncate_avro.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_csv.py b/bigquery/samples/tests/test_load_table_uri_truncate_csv.py index 0832797f7130..5c1da7dcec62 100644 --- a/bigquery/samples/tests/test_load_table_uri_truncate_csv.py +++ b/bigquery/samples/tests/test_load_table_uri_truncate_csv.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_json.py b/bigquery/samples/tests/test_load_table_uri_truncate_json.py index 35d42aae91c2..180ca7f40b2e 100644 --- a/bigquery/samples/tests/test_load_table_uri_truncate_json.py +++ b/bigquery/samples/tests/test_load_table_uri_truncate_json.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_orc.py b/bigquery/samples/tests/test_load_table_uri_truncate_orc.py index 7a61516a12cc..322bf31276c3 100644 --- a/bigquery/samples/tests/test_load_table_uri_truncate_orc.py +++ b/bigquery/samples/tests/test_load_table_uri_truncate_orc.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/bigquery/samples/tests/test_load_table_uri_truncate_parquet.py b/bigquery/samples/tests/test_load_table_uri_truncate_parquet.py index 2a9c33c322bc..ca901defa815 100644 --- a/bigquery/samples/tests/test_load_table_uri_truncate_parquet.py +++ b/bigquery/samples/tests/test_load_table_uri_truncate_parquet.py @@ -1,4 +1,4 @@ -# Copyright 2019 Google LLC +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.