apache · tomlynchRNA · Feb 2, 2023 · Feb 2, 2023 · Feb 6, 2023 · Feb 2, 2023
diff --git a/sdks/python/apache_beam/io/gcp/bigquery.py b/sdks/python/apache_beam/io/gcp/bigquery.py
@@ -405,7 +405,9 @@ def chain_after(result):
 from apache_beam.utils.annotations import deprecated
 from apache_beam.utils.annotations import experimental
 
+
 try:
+  from google.api_core.exceptions import ClientError, GoogleAPICallError
   from apache_beam.io.gcp.internal.clients.bigquery import DatasetReference
   from apache_beam.io.gcp.internal.clients.bigquery import TableReference
   from apache_beam.io.gcp.internal.clients.bigquery import JobReference
@@ -1295,6 +1297,7 @@ class BigQueryWriteFn(DoFn):
 
   FAILED_ROWS = 'FailedRows'
   FAILED_ROWS_WITH_ERRORS = 'FailedRowsWithErrors'
+  FAILED_INSERT_NOTFOUND = 'notFound'
   STREAMING_API_LOGGING_FREQUENCY_SEC = 300
 
   def __init__(
@@ -1551,17 +1554,23 @@ def _flush_batch(self, destination):
       insert_ids = [None for r in rows_and_insert_ids]
     else:
       insert_ids = [r[1] for r in rows_and_insert_ids]
-
     while True:
+      errors = []
+      passed = False
       start = time.time()
-      passed, errors = self.bigquery_wrapper.insert_rows(
-          project_id=table_reference.projectId,
-          dataset_id=table_reference.datasetId,
-          table_id=table_reference.tableId,
-          rows=rows,
-          insert_ids=insert_ids,
-          skip_invalid_rows=True,
-          ignore_unknown_values=self.ignore_unknown_columns)
+      try:
+        passed, errors = self.bigquery_wrapper.insert_rows(
+              project_id=table_reference.projectId,
+              dataset_id=table_reference.datasetId,
+              table_id=table_reference.tableId,
+              rows=rows,
+              insert_ids=insert_ids,
+              skip_invalid_rows=True,
+              ignore_unknown_values=self.ignore_unknown_columns)
+      except (ClientError, GoogleAPICallError) as e:
+        if e.code == 404:
+          _KNOWN_TABLES.remove(destination)
-          _KNOWN_TABLES.remove(destination)
+          _KNOWN_TABLES.remove(destination)
+          self._create_table_if_needed(bigquery_tools.parse_table_reference(destination), self.schema)
-          _KNOWN_TABLES.remove(destination)
+          _KNOWN_TABLES.remove(destination)
+          self._create_table_if_needed(bigquery_tools.parse_table_reference(destination), self.schema)
+          raise
       self.batch_latency_metric.update((time.time() - start) * 1000)
 
       failed_rows = [(rows[entry['index']], entry["errors"])
@@ -1579,7 +1588,6 @@ def _flush_batch(self, destination):
         message = (
             'There were errors inserting to BigQuery. Will{} retry. '
             'Errors were {}'.format(("" if should_retry else " not"), errors))
-
         # The log level is:
         # - WARNING when we are continuing to retry, and have a deadline.
         # - ERROR when we will no longer retry, or MAY retry forever.