databrickslabs · JCZuurmond · Sep 30, 2024 · Sep 30, 2024 · Sep 30, 2024 · Sep 30, 2024
diff --git a/src/databricks/labs/lsql/core.py b/src/databricks/labs/lsql/core.py
@@ -13,7 +13,8 @@
 import requests
 import sqlglot
 from databricks.sdk import WorkspaceClient, errors
-from databricks.sdk.errors import DataLoss, NotFound
+from databricks.sdk.errors import BadRequest, DataLoss, NotFound
+from databricks.sdk.retries import retried
 from databricks.sdk.service.sql import (
     ColumnInfoTypeName,
     Disposition,
@@ -119,6 +120,13 @@ def __repr__(self):
         return f"Row({', '.join(f'{k}={v!r}' for (k, v) in zip(self.__columns__, self, strict=True))})"
 
 
+def _is_retryable_delta_concurrent_append(e: BaseException) -> str | None:
+    """Retry a concurrent append to a delta table"""
+    if isinstance(e, BadRequest) and "DELTA_CONCURRENT_APPEND" in str(e):
+        return "Concurrent append"
+    return None
+
+
 class StatementExecutionExt:
     """Execute SQL statements in a stateless manner.
 
@@ -182,6 +190,7 @@ def __init__(  # pylint: disable=too-many-arguments,too-many-positional-argument
             ColumnInfoTypeName.TIMESTAMP: self._parse_timestamp,
         }
 
+    @retried(is_retryable=_is_retryable_delta_concurrent_append, timeout=timedelta(seconds=10))
     def execute(
         self,
         statement: str,
@@ -467,8 +476,6 @@ def _raise_if_needed(status: StatementStatus):
             raise NotFound(error_message)
         if "does not exist" in error_message:
             raise NotFound(error_message)
-        if "DELTA_MISSING_TRANSACTION_LOG" in error_message:
-            raise DataLoss(error_message)
         mapping = {
             ServiceErrorCode.ABORTED: errors.Aborted,
             ServiceErrorCode.ALREADY_EXISTS: errors.AlreadyExists,

diff --git a/tests/integration/test_backends.py b/tests/integration/test_backends.py
@@ -1,7 +1,12 @@
+import math
+import time
+
 import pytest
 from databricks.labs.blueprint.commands import CommandExecutor
 from databricks.labs.blueprint.installation import Installation
+from databricks.labs.blueprint.parallel import Threads
 from databricks.labs.blueprint.wheels import ProductInfo, WheelsV2
+from databricks.sdk.errors import BadRequest
 
 from databricks.labs.lsql import Row
 from databricks.labs.lsql.backends import SqlBackend, StatementExecutionBackend
@@ -186,3 +191,44 @@ def test_runtime_backend_use_statements(ws):
 """
     result = commands.run(permission_denied_query)
     assert result == "PASSED"
+
+
+def wait_until_seconds_rollover(*, rollover_seconds: int = 10) -> None:
+    """Wait until the next rollover.
+
+    Useful to align concurrent writes.
+
+    Args:
+        rollover_seconds (int) : The multiple of seconds to wait until the next rollover.
+    """
+    nano, micro = 1e9, 1e6
+
+    nanoseconds_now = time.clock_gettime_ns(time.CLOCK_REALTIME)
+    nanoseconds_target = math.ceil(nanoseconds_now / nano // rollover_seconds) * nano * rollover_seconds
+
+    # To hit the rollover more accurate, first sleep until almost target
+    nanoseconds_until_almost_target = (nanoseconds_target - nanoseconds_now) - micro
+    time.sleep(max(nanoseconds_until_almost_target / nano, 0))
+
+    # Then busy-wait until the rollover occurs
+    while time.clock_gettime_ns(time.CLOCK_REALTIME) < nanoseconds_target:
+        pass
+
+
+def test_runtime_backend_handles_concurrent_append(sql_backend, make_random, make_table) -> None:
+    table = make_table(
+        name=f"lsql_test_{make_random()}",
+        ctas="SELECT r.id AS x, random() AS y FROM range(1000000) r"
+    )
+
+    def update_table() -> None:
+        wait_until_seconds_rollover()  # Update the table at the same time
+        sql_backend.execute(f"UPDATE {table.full_name} SET y = y * 2 WHERE (x % 2 = 0)")
+
+    try:
+        Threads.strict("concurrent appends", [update_table, update_table])
+    except BadRequest as e:
+        if "[DELTA_CONCURRENT_APPEND]" in str(e):
+            assert False, str(e)
+        else:
+            raise  # Raise in case of unexpected error