From 3e8fbaef0d4263d83d67abbec47936b5db84082a Mon Sep 17 00:00:00 2001
From: Peter Lamut <>
Date: Fri, 1 Nov 2019 23:25:04 +0200
Subject: [PATCH] docs(bigquery): document how to achieve higher write limit
 and add tests (#9574)

* test(bigquery): add insert_rows*() tests w/o row IDs

* Groom the insert_rows_json() method's docstring

* docs: document how to achieve higher insert write limit

* Make method names less confusing for insert IDs
 bigquery/docs/usage/tables.rst                |  14 +++
 bigquery/google/cloud/bigquery/      |  35 +++--- |  36 ++++++ |  33 ++++++
 bigquery/tests/unit/            | 104 ++++++++++++++++++
 5 files changed, 206 insertions(+), 16 deletions(-)
 create mode 100644 bigquery/samples/
 create mode 100644 bigquery/samples/tests/

diff --git a/bigquery/docs/usage/tables.rst b/bigquery/docs/usage/tables.rst
index 6a6cbd356639..d58dcc5d9ac4 100644
--- a/bigquery/docs/usage/tables.rst
+++ b/bigquery/docs/usage/tables.rst
@@ -122,6 +122,20 @@ Insert rows into a table's data with the
    :start-after: [START bigquery_table_insert_rows]
    :end-before: [END bigquery_table_insert_rows]
+Insert rows into a table's data with the
+:func:`` method, achieving
+higher write limit:
+.. literalinclude:: ../samples/
+   :language: python
+   :dedent: 4
+   :start-after: [START bigquery_table_insert_rows_explicit_none_insert_ids]
+   :end-before: [END bigquery_table_insert_rows_explicit_none_insert_ids]
+Mind that inserting data with ``None`` row insert IDs can come at the expense of
+more duplicate inserts. See also:
+`Streaming inserts <>`_.
 Add an empty column to the existing table with the
 :func:`` method:
diff --git a/bigquery/google/cloud/bigquery/ b/bigquery/google/cloud/bigquery/
index 02bfc651af0d..bae4359300f8 100644
--- a/bigquery/google/cloud/bigquery/
+++ b/bigquery/google/cloud/bigquery/
@@ -2264,29 +2264,32 @@ def insert_rows_json(
             table (Union[ \
       , \
-                str, \
+                str \
                 The destination table for the row data, or a reference to it.
             json_rows (Sequence[Dict]):
                 Row data to be inserted. Keys must match the table schema fields
                 and values must be JSON-compatible representations.
-            row_ids (Sequence[str]):
-                (Optional) Unique ids, one per row being inserted. If omitted,
-                unique IDs are created.
-            skip_invalid_rows (bool):
-                (Optional) Insert all valid rows of a request, even if invalid
-                rows exist. The default value is False, which causes the entire
-                request to fail if any invalid rows exist.
-            ignore_unknown_values (bool):
-                (Optional) Accept rows that contain values that do not match the
-                schema. The unknown values are ignored. Default is False, which
+            row_ids (Optional[Sequence[Optional[str]]]):
+                Unique IDs, one per row being inserted. An ID can also be
+                ``None``, indicating that an explicit insert ID should **not**
+                be used for that row. If the argument is omitted altogether,
+                unique IDs are created automatically.
+            skip_invalid_rows (Optional[bool]):
+                Insert all valid rows of a request, even if invalid rows exist.
+                The default value is ``False``, which causes the entire request
+                to fail if any invalid rows exist.
+            ignore_unknown_values (Optional[bool]):
+                Accept rows that contain values that do not match the schema.
+                The unknown values are ignored. Default is ``False``, which
                 treats unknown values as errors.
-            template_suffix (str):
-                (Optional) treat ``name`` as a template table and provide a suffix.
-                BigQuery will create the table ``<name> + <template_suffix>`` based
-                on the schema of the template table. See
+            template_suffix (Optional[str]):
+                Treat ``name`` as a template table and provide a suffix.
+                BigQuery will create the table ``<name> + <template_suffix>``
+                based on the schema of the template table. See
-            retry (google.api_core.retry.Retry): (Optional) How to retry the RPC.
+            retry (Optional[google.api_core.retry.Retry]):
+                How to retry the RPC.
diff --git a/bigquery/samples/ b/bigquery/samples/
new file mode 100644
index 000000000000..953e7e210312
--- /dev/null
+++ b/bigquery/samples/
@@ -0,0 +1,36 @@
+# Copyright 2019 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+def table_insert_rows_explicit_none_insert_ids(client, table_id):
+    # [START bigquery_table_insert_rows_explicit_none_insert_ids]
+    # TODO(developer): Import the client library.
+    # from import bigquery
+    # TODO(developer): Construct a BigQuery client object.
+    # client = bigquery.Client()
+    # TODO(developer): Set table_id to the ID of the model to fetch.
+    # table_id = "your-project.your_dataset.your_table"
+    table = client.get_table(table_id)  # Make an API request.
+    rows_to_insert = [(u"Phred Phlyntstone", 32), (u"Wylma Phlyntstone", 29)]
+    errors = client.insert_rows(
+        table, rows_to_insert, row_ids=[None] * len(rows_to_insert)
+    )  # Make an API request.
+    if errors == []:
+        print("New rows have been added.")
+    # [END bigquery_table_insert_rows_explicit_none_insert_ids]
diff --git a/bigquery/samples/tests/ b/bigquery/samples/tests/
new file mode 100644
index 000000000000..6a59609baacf
--- /dev/null
+++ b/bigquery/samples/tests/
@@ -0,0 +1,33 @@
+# Copyright 2019 Google LLC
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from import bigquery
+from .. import table_insert_rows_explicit_none_insert_ids as mut
+def test_table_insert_rows_explicit_none_insert_ids(capsys, client, random_table_id):
+    schema = [
+        bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
+        bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
+    ]
+    table = bigquery.Table(random_table_id, schema=schema)
+    table = client.create_table(table)
+    mut.table_insert_rows_explicit_none_insert_ids(client, random_table_id)
+    out, err = capsys.readouterr()
+    assert "New rows have been added." in out
diff --git a/bigquery/tests/unit/ b/bigquery/tests/unit/
index 91b9bc642187..b4e5e96f1e8e 100644
--- a/bigquery/tests/unit/
+++ b/bigquery/tests/unit/
@@ -4572,6 +4572,40 @@ def test_insert_rows_w_record_schema(self):
             method="POST", path="/%s" % PATH, data=SENT
+    def test_insert_rows_w_explicit_none_insert_ids(self):
+        from import SchemaField
+        from import Table
+        PATH = "projects/{}/datasets/{}/tables/{}/insertAll".format(
+            self.PROJECT, self.DS_ID, self.TABLE_ID,
+        )
+        creds = _make_credentials()
+        http = object()
+        client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+        conn = client._connection = make_connection({})
+        schema = [
+            SchemaField("full_name", "STRING", mode="REQUIRED"),
+            SchemaField("age", "INTEGER", mode="REQUIRED"),
+        ]
+        table = Table(self.TABLE_REF, schema=schema)
+        ROWS = [
+            {"full_name": "Phred Phlyntstone", "age": 32},
+            {"full_name": "Bharney Rhubble", "age": 33},
+        ]
+        def _row_data(row):
+            row["age"] = str(row["age"])
+            return row
+        SENT = {"rows": [{"json": _row_data(row), "insertId": None} for row in ROWS]}
+        errors = client.insert_rows(table, ROWS, row_ids=[None] * len(ROWS))
+        self.assertEqual(len(errors), 0)
+        conn.api_request.assert_called_once_with(
+            method="POST", path="/{}".format(PATH), data=SENT
+        )
     def test_insert_rows_errors(self):
         from import Table
@@ -4765,6 +4799,55 @@ def test_insert_rows_from_dataframe_many_columns(self):
         assert len(actual_calls) == 1
         assert actual_calls[0] == expected_call
+    @unittest.skipIf(pandas is None, "Requires `pandas`")
+    def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self):
+        from import SchemaField
+        from import Table
+        API_PATH = "/projects/{}/datasets/{}/tables/{}/insertAll".format(
+            self.PROJECT, self.DS_ID, self.TABLE_REF.table_id
+        )
+        dataframe = pandas.DataFrame(
+            [
+                {"name": u"Little One", "adult": False},
+                {"name": u"Young Gun", "adult": True},
+            ]
+        )
+        # create client
+        creds = _make_credentials()
+        http = object()
+        client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+        conn = client._connection = make_connection({}, {})
+        # create table
+        schema = [
+            SchemaField("name", "STRING", mode="REQUIRED"),
+            SchemaField("adult", "BOOLEAN", mode="REQUIRED"),
+        ]
+        table = Table(self.TABLE_REF, schema=schema)
+        error_info = client.insert_rows_from_dataframe(
+            table, dataframe, row_ids=[None] * len(dataframe)
+        )
+        self.assertEqual(len(error_info), 1)
+        assert error_info[0] == []  # no chunk errors
+            "rows": [
+                {"insertId": None, "json": {"name": "Little One", "adult": "false"}},
+                {"insertId": None, "json": {"name": "Young Gun", "adult": "true"}},
+            ]
+        }
+        actual_calls = conn.api_request.call_args_list
+        assert len(actual_calls) == 1
+        assert actual_calls[0] ==
+            method="POST", path=API_PATH, data=EXPECTED_SENT_DATA
+        )
     def test_insert_rows_json(self):
         from import Table, SchemaField
         from import DatasetReference
@@ -4833,6 +4916,27 @@ def test_insert_rows_json_with_string_id(self):
+    def test_insert_rows_json_w_explicit_none_insert_ids(self):
+        rows = [{"col1": "val1"}, {"col2": "val2"}]
+        creds = _make_credentials()
+        http = object()
+        client = self._make_one(
+            project="default-project", credentials=creds, _http=http
+        )
+        conn = client._connection = make_connection({})
+        errors = client.insert_rows_json(
+            "proj.dset.tbl", rows, row_ids=[None] * len(rows),
+        )
+        self.assertEqual(len(errors), 0)
+        expected = {"rows": [{"json": row, "insertId": None} for row in rows]}
+        conn.api_request.assert_called_once_with(
+            method="POST",
+            path="/projects/proj/datasets/dset/tables/tbl/insertAll",
+            data=expected,
+        )
     def test_list_partitions(self):
         from import Table