From dc50e3d65bdccd0ce6d5ad307a1d5d570f6f5c11 Mon Sep 17 00:00:00 2001
From: Andre Kurait <akurait@amazon.com>
Date: Mon, 6 May 2024 13:36:11 -0500
Subject: [PATCH] Update E2E tests for large document generation

Signed-off-by: Andre Kurait <akurait@amazon.com>
---
 test/operations.py | 31 ++++++++++++++++++++++++++-----
 test/tests.py      | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/test/operations.py b/test/operations.py
index 23582d8519..0cd9d4716b 100644
--- a/test/operations.py
+++ b/test/operations.py
@@ -1,3 +1,7 @@
+import datetime
+import random
+import string
+import requests
 import json
 from requests import Session
 
@@ -27,12 +31,29 @@ def delete_document(endpoint: str, index_name: str, doc_id: str, auth,
     return response
 
 
-def create_document(endpoint: str, index_name: str, doc_id: str, auth,
-                    verify_ssl: bool = False, session: Session = Session()):
-    document = {
-        'title': 'Test Document',
-        'content': 'This is a sample document for testing OpenSearch.'
+def generate_large_doc(size_mb):
+    # Calculate number of characters needed (1 char = 1 byte)
+    num_chars = size_mb * 1000 * 1000
+
+    # Generate random string of the desired length
+    large_string = ''.join(random.choices(string.ascii_letters + string.digits, k=num_chars))
+
+    return {
+        "timestamp": datetime.datetime.now().isoformat(),
+        "large_field": large_string
     }
+
+
+def create_document(endpoint: str, index_name: str, doc_id: str, auth,
+                    verify_ssl: bool = False, doc_body: dict=None, session: Session = Session()):
+    if doc_body is None:
+        document = {
+            'title': 'Test Document',
+            'content': 'This is a sample document for testing OpenSearch.'
+        }
+    else:
+        document = doc_body
+
     url = f'{endpoint}/{index_name}/_doc/{doc_id}'
     headers = {'Content-Type': 'application/json'}
     response = session.put(url, headers=headers, data=json.dumps(document), auth=auth, verify=verify_ssl)
diff --git a/test/tests.py b/test/tests.py
index 55048e910b..29b76359a9 100644
--- a/test/tests.py
+++ b/test/tests.py
@@ -5,6 +5,11 @@
 import requests
 import secrets
 import string
+from operations import create_index, check_index, create_document, \
+    delete_document, delete_index, generate_large_doc, get_document
+from http import HTTPStatus
+from typing import Tuple, Callable
+import unittest
 import subprocess
 import time
 import unittest
@@ -339,3 +344,31 @@ def test_0007_timeBetweenRequestsOnSameConnection(self):
                 self.assert_source_target_doc_match(index_name, doc_id)
         finally:
             proxy_single_connection_session.close()
+
+    def test_0008_largeRequest(self):
+        index_name = f"test_0007_{self.unique_id}"
+        doc_id = "1"
+
+        # Create large document, 20MB which is less than the default max of 100MB in http.max_content_length
+        large_doc = generate_large_doc(size_mb=20)
+
+        # Send large request to proxy and verify response
+        proxy_response = create_document(self.proxy_endpoint, index_name, doc_id, self.source_auth,
+                                         self.source_verify_ssl, doc_body=large_doc)
+        self.assertEqual(proxy_response.status_code, HTTPStatus.CREATED)
+
+        # Verify document created on source and target
+        source_response = get_document(self.source_endpoint, index_name, doc_id, self.source_auth,
+                                       self.source_verify_ssl)
+        target_response = retry_request(get_document, args=(self.target_endpoint, index_name, doc_id,
+                                                            self.target_auth, self.target_verify_ssl),
+                                        expected_status_code=HTTPStatus.OK)
+        self.assertEqual(source_response.status_code, HTTPStatus.OK)
+        self.assertEqual(target_response.status_code, HTTPStatus.OK)
+
+        # Verify tuple outputs contain full response
+        self.verify_tuple_outputs(index_name, doc_id, large_doc)
+
+    def verify_tuple_outputs(self, index_name, doc_id, expected_doc):
+        # TODO: Verify tuple outputs in Replayer contain the full document
+        return