singer-io · prijendev · Dec 13, 2021 · Oct 26, 2021 · Oct 28, 2021 · Oct 28, 2021
diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
@@ -514,7 +514,18 @@ def sync(client, config, catalog, state):
                             from_row=from_row,
                             columns=columns,
                             sheet_data_rows=sheet_data_rows)
-                        if row_num < to_row:
+
+                        # Here row_num is the addition of from_row and total records get in response(per batch).
+                        # Condition row_num < to_row was checking that if records on the current page are less than expected(to_row) or not.
+                        # If the condition returns true then it was breaking the loop.
+                        # API does not return the last empty rows in response.
+                        # For example, rows 199 and 200 are empty, and a total of 400 rows are there in the sheet. So, in 1st iteration,
+                        # to_row = 200, from_row = 2, row_num = 2(from_row) + 197 = 199(1st row contain header value)
+                        # So, the above condition become true and breaks the loop without syncing records from 201 to 400.
+                        # sheet_data_rows is no of records return in the current page. If it's a whole blank page then stop looping.
+                        # So, in the above case, it syncs records 201 to 400 also even if rows 199 and 200 are blank.
+                        # Then when the next batch 401 to 600 is empty, it breaks the loop.
+                        if not sheet_data_rows: # If a whole blank page found, then stop looping.
                             is_last_row = True
 
                         # Process records, send batch of records to target

diff --git a/tests/test_google_sheets_pagination.py b/tests/test_google_sheets_pagination.py
@@ -25,6 +25,7 @@ def test_run(self):
         Verify that for each stream you can get multiple pages of data
         and that when all fields are selected more than the automatic fields are replicated.
 
+        Verify by primary keys that data is unique for page
         PREREQUISITE
         This test relies on the existence of a specific sheet with the name Pagination that has a column
         called 'id' with values 1 -> 238.
@@ -43,9 +44,8 @@ def test_run(self):
         record_count_by_stream = self.run_and_verify_sync(conn_id)
         synced_records = runner.get_records_from_target_output()
 
-        for stream in testable_streams.difference({
-                'sadsheet-pagination',  # BUG TDL-14376
-        }):
+        # Added back `sadsheet-pagination` to testable_streams as # BUG TDL-14376 resolved.
+        for stream in testable_streams:
             with self.subTest(stream=stream):
 
                 our_fake_pk = 'id'
@@ -61,6 +61,22 @@ def test_run(self):
                 # verify that we can paginate with all fields selected
                 self.assertGreater(record_count_by_stream.get(stream, 0), self.API_LIMIT)
 
+
+                if stream == "sadsheet-pagination":
+                    # verify the data for the "sadsheet-pagination" stream is free of any duplicates or breaks by checking
+                    # our fake pk value ('id')
+                    expected_pk_list = list(range(1, 238))
+                    expected_pk_list = [x for x in expected_pk_list if x not in [198, 199]]
+                    self.assertEqual(expected_pk_list, fake_pk_list)
+
+                    # verify the data for the "sadsheet-pagination" stream is free of any duplicates or breaks by checking
+                    # the actual primary key values (__sdc_row)
+                    expected_pk_list = list(range(2, 239))
+                    expected_pk_list = [x for x in expected_pk_list if x not in [199, 200]]
+                    self.assertEqual(expected_pk_list, actual_pk_list)
+
+                    continue
+
                 # verify the data for the "Pagination" stream is free of any duplicates or breaks by checking
                 # our fake pk value ('id')
                 # THIS ASSERTION CAN BE MADE BECAUSE WE SETUP DATA IN A SPECIFIC WAY. DONT COPY THIS