From 3e8fbaef0d4263d83d67abbec47936b5db84082a Mon Sep 17 00:00:00 2001
From: Peter Lamut <plamut@users.noreply.github.com>
Date: Fri, 1 Nov 2019 23:25:04 +0200
Subject: [PATCH] docs(bigquery): document how to achieve higher write limit
 and add tests (#9574)

* test(bigquery): add insert_rows*() tests w/o row IDs

* Groom the insert_rows_json() method's docstring

* docs: document how to achieve higher insert write limit

* Make method names less confusing for insert IDs
---
 bigquery/docs/usage/tables.rst                |  14 +++
 bigquery/google/cloud/bigquery/client.py      |  35 +++---
 ...le_insert_rows_explicit_none_insert_ids.py |  36 ++++++
 ...le_insert_rows_explicit_none_insert_ids.py |  33 ++++++
 bigquery/tests/unit/test_client.py            | 104 ++++++++++++++++++
 5 files changed, 206 insertions(+), 16 deletions(-)
 create mode 100644 bigquery/samples/table_insert_rows_explicit_none_insert_ids.py
 create mode 100644 bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py

diff --git a/bigquery/docs/usage/tables.rst b/bigquery/docs/usage/tables.rst
index 6a6cbd356639..d58dcc5d9ac4 100644
--- a/bigquery/docs/usage/tables.rst
+++ b/bigquery/docs/usage/tables.rst
@@ -122,6 +122,20 @@ Insert rows into a table's data with the
    :start-after: [START bigquery_table_insert_rows]
    :end-before: [END bigquery_table_insert_rows]
 
+Insert rows into a table's data with the
+:func:`~google.cloud.bigquery.client.Client.insert_rows` method, achieving
+higher write limit:
+
+.. literalinclude:: ../samples/table_insert_rows_explicit_none_insert_ids.py
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_table_insert_rows_explicit_none_insert_ids]
+   :end-before: [END bigquery_table_insert_rows_explicit_none_insert_ids]
+
+Mind that inserting data with ``None`` row insert IDs can come at the expense of
+more duplicate inserts. See also:
+`Streaming inserts <https://cloud.google.com/bigquery/quotas#streaming_inserts>`_.
+
 Add an empty column to the existing table with the
 :func:`~google.cloud.bigquery.update_table` method:
 
diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py
index 02bfc651af0d..bae4359300f8 100644
--- a/bigquery/google/cloud/bigquery/client.py
+++ b/bigquery/google/cloud/bigquery/client.py
@@ -2264,29 +2264,32 @@ def insert_rows_json(
             table (Union[ \
                 google.cloud.bigquery.table.Table \
                 google.cloud.bigquery.table.TableReference, \
-                str, \
+                str \
             ]):
                 The destination table for the row data, or a reference to it.
             json_rows (Sequence[Dict]):
                 Row data to be inserted. Keys must match the table schema fields
                 and values must be JSON-compatible representations.
-            row_ids (Sequence[str]):
-                (Optional) Unique ids, one per row being inserted. If omitted,
-                unique IDs are created.
-            skip_invalid_rows (bool):
-                (Optional) Insert all valid rows of a request, even if invalid
-                rows exist. The default value is False, which causes the entire
-                request to fail if any invalid rows exist.
-            ignore_unknown_values (bool):
-                (Optional) Accept rows that contain values that do not match the
-                schema. The unknown values are ignored. Default is False, which
+            row_ids (Optional[Sequence[Optional[str]]]):
+                Unique IDs, one per row being inserted. An ID can also be
+                ``None``, indicating that an explicit insert ID should **not**
+                be used for that row. If the argument is omitted altogether,
+                unique IDs are created automatically.
+            skip_invalid_rows (Optional[bool]):
+                Insert all valid rows of a request, even if invalid rows exist.
+                The default value is ``False``, which causes the entire request
+                to fail if any invalid rows exist.
+            ignore_unknown_values (Optional[bool]):
+                Accept rows that contain values that do not match the schema.
+                The unknown values are ignored. Default is ``False``, which
                 treats unknown values as errors.
-            template_suffix (str):
-                (Optional) treat ``name`` as a template table and provide a suffix.
-                BigQuery will create the table ``<name> + <template_suffix>`` based
-                on the schema of the template table. See
+            template_suffix (Optional[str]):
+                Treat ``name`` as a template table and provide a suffix.
+                BigQuery will create the table ``<name> + <template_suffix>``
+                based on the schema of the template table. See
                 https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables
-            retry (google.api_core.retry.Retry): (Optional) How to retry the RPC.
+            retry (Optional[google.api_core.retry.Retry]):
+                How to retry the RPC.
 
         Returns:
             Sequence[Mappings]:
diff --git a/bigquery/samples/table_insert_rows_explicit_none_insert_ids.py b/bigquery/samples/table_insert_rows_explicit_none_insert_ids.py
new file mode 100644
index 000000000000..953e7e210312
--- /dev/null
+++ b/bigquery/samples/table_insert_rows_explicit_none_insert_ids.py
@@ -0,0 +1,36 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def table_insert_rows_explicit_none_insert_ids(client, table_id):
+
+    # [START bigquery_table_insert_rows_explicit_none_insert_ids]
+    # TODO(developer): Import the client library.
+    # from google.cloud import bigquery
+
+    # TODO(developer): Construct a BigQuery client object.
+    # client = bigquery.Client()
+
+    # TODO(developer): Set table_id to the ID of the model to fetch.
+    # table_id = "your-project.your_dataset.your_table"
+
+    table = client.get_table(table_id)  # Make an API request.
+    rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)]
+
+    errors = client.insert_rows(
+        table, rows_to_insert, row_ids=[None] * len(rows_to_insert)
+    )  # Make an API request.
+    if errors == []:
+        print("New rows have been added.")
+    # [END bigquery_table_insert_rows_explicit_none_insert_ids]
diff --git a/bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py
new file mode 100644
index 000000000000..6a59609baacf
--- /dev/null
+++ b/bigquery/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py
@@ -0,0 +1,33 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from google.cloud import bigquery
+
+from .. import table_insert_rows_explicit_none_insert_ids as mut
+
+
+def test_table_insert_rows_explicit_none_insert_ids(capsys, client, random_table_id):
+
+    schema = [
+        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
+        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
+    ]
+
+    table = bigquery.Table(random_table_id, schema=schema)
+    table = client.create_table(table)
+
+    mut.table_insert_rows_explicit_none_insert_ids(client, random_table_id)
+    out, err = capsys.readouterr()
+    assert "New rows have been added." in out
diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py
index 91b9bc642187..b4e5e96f1e8e 100644
--- a/bigquery/tests/unit/test_client.py
+++ b/bigquery/tests/unit/test_client.py
@@ -4572,6 +4572,40 @@ def test_insert_rows_w_record_schema(self):
             method="POST", path="/%s" % PATH, data=SENT
         )
 
+    def test_insert_rows_w_explicit_none_insert_ids(self):
+        from google.cloud.bigquery.schema import SchemaField
+        from google.cloud.bigquery.table import Table
+
+        PATH = "projects/{}/datasets/{}/tables/{}/insertAll".format(
+            self.PROJECT, self.DS_ID, self.TABLE_ID,
+        )
+        creds = _make_credentials()
+        http = object()
+        client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+        conn = client._connection = make_connection({})
+        schema = [
+            SchemaField("full_name", "STRING", mode="REQUIRED"),
+            SchemaField("age", "INTEGER", mode="REQUIRED"),
+        ]
+        table = Table(self.TABLE_REF, schema=schema)
+        ROWS = [
+            {"full_name": "Phred Phlyntstone", "age": 32},
+            {"full_name": "Bharney Rhubble", "age": 33},
+        ]
+
+        def _row_data(row):
+            row["age"] = str(row["age"])
+            return row
+
+        SENT = {"rows": [{"json": _row_data(row), "insertId": None} for row in ROWS]}
+
+        errors = client.insert_rows(table, ROWS, row_ids=[None] * len(ROWS))
+
+        self.assertEqual(len(errors), 0)
+        conn.api_request.assert_called_once_with(
+            method="POST", path="/{}".format(PATH), data=SENT
+        )
+
     def test_insert_rows_errors(self):
         from google.cloud.bigquery.table import Table
 
@@ -4765,6 +4799,55 @@ def test_insert_rows_from_dataframe_many_columns(self):
         assert len(actual_calls) == 1
         assert actual_calls[0] == expected_call
 
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self):
+        from google.cloud.bigquery.table import SchemaField
+        from google.cloud.bigquery.table import Table
+
+        API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format(
+            self.PROJECT, self.DS_ID, self.TABLE_REF.table_id
+        )
+
+        dataframe = pandas.DataFrame(
+            [
+                {"name": u"Little One", "adult": False},
+                {"name": u"Young Gun", "adult": True},
+            ]
+        )
+
+        # create client
+        creds = _make_credentials()
+        http = object()
+        client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+        conn = client._connection = make_connection({}, {})
+
+        # create table
+        schema = [
+            SchemaField("name", "STRING", mode="REQUIRED"),
+            SchemaField("adult", "BOOLEAN", mode="REQUIRED"),
+        ]
+        table = Table(self.TABLE_REF, schema=schema)
+
+        error_info = client.insert_rows_from_dataframe(
+            table, dataframe, row_ids=[None] * len(dataframe)
+        )
+
+        self.assertEqual(len(error_info), 1)
+        assert error_info[0] == []  # no chunk errors
+
+        EXPECTED_SENT_DATA = {
+            "rows": [
+                {"insertId": None, "json": {"name": "Little One", "adult": "false"}},
+                {"insertId": None, "json": {"name": "Young Gun", "adult": "true"}},
+            ]
+        }
+
+        actual_calls = conn.api_request.call_args_list
+        assert len(actual_calls) == 1
+        assert actual_calls[0] == mock.call(
+            method="POST", path=API_PATH, data=EXPECTED_SENT_DATA
+        )
+
     def test_insert_rows_json(self):
         from google.cloud.bigquery.table import Table, SchemaField
         from google.cloud.bigquery.dataset import DatasetReference
@@ -4833,6 +4916,27 @@ def test_insert_rows_json_with_string_id(self):
             data=expected,
         )
 
+    def test_insert_rows_json_w_explicit_none_insert_ids(self):
+        rows = [{"col1": "val1"}, {"col2": "val2"}]
+        creds = _make_credentials()
+        http = object()
+        client = self._make_one(
+            project="default-project", credentials=creds, _http=http
+        )
+        conn = client._connection = make_connection({})
+
+        errors = client.insert_rows_json(
+            "proj.dset.tbl", rows, row_ids=[None] * len(rows),
+        )
+
+        self.assertEqual(len(errors), 0)
+        expected = {"rows": [{"json": row, "insertId": None} for row in rows]}
+        conn.api_request.assert_called_once_with(
+            method="POST",
+            path="/projects/proj/datasets/dset/tables/tbl/insertAll",
+            data=expected,
+        )
+
     def test_list_partitions(self):
         from google.cloud.bigquery.table import Table