Merge branch 'flexllm_part3' into inf_test_fix

flexflow · Feb 24, 2025 · 6ec7412 · 6ec7412
2 parents 78ebf55 + 3d21098
commit 6ec7412
Show file tree

Hide file tree

Showing 7 changed files with 60 additions and 11 deletions.
diff --git a/inference/python/incr_decoding.py b/inference/python/incr_decoding.py
@@ -101,7 +101,7 @@ def main():
     )
     llm.compile(
         generation_config,
-        max_requests_per_batch=1,
+        max_requests_per_batch=4,
         max_seq_length=256,
         max_tokens_per_batch=64,
     )

diff --git a/inference/python/spec_infer.py b/inference/python/spec_infer.py
@@ -130,15 +130,15 @@ def main():
     for ssm in ssms:
         ssm.compile(
             generation_config,
-            max_requests_per_batch=1,
+            max_requests_per_batch=4,
             max_seq_length=256,
             max_tokens_per_batch=64,
         )
 
     # Compile the LLM for inference and load the weights into memory
     llm.compile(
         generation_config,
-        max_requests_per_batch=1,
+        max_requests_per_batch=4,
         max_seq_length=256,
         max_tokens_per_batch=64,
         ssms=ssms,

diff --git a/python/flexflow/core/flexflow_cffi.py b/python/flexflow/core/flexflow_cffi.py
@@ -4718,7 +4718,7 @@ def generate(self, requests_list: List[Request]):
         ]  # entry will be None for finetuning requests
         c_output_texts = [
             (
-                ffi.new("char[]", max_sequence_length * 5)
+                ffi.new("char[]", max_sequence_length * 10)
                 if request.req_type == RequestType.REQ_INFERENCE
                 else ffi.NULL
             )

diff --git a/src/c/flexflow_c.cc b/src/c/flexflow_c.cc
@@ -1780,10 +1780,9 @@ void flexflow_model_generate(flexflow_model_t handle_,
       if (max_lengths[i] >= 0) {
         assert(total_tokens <= max_lengths[i] || num_output_tokens == 0);
       }
-      // assert(results[i].output_tokens.size() <= max_seq_lengths[i] ||
-      //        results[i].output_tokens.size() ==
-      //        results[i].input_tokens.size());
       output_length_and_tokens[i][0] = results[i].output_tokens.size();
+      assert(results[i].output_tokens.size() <= max_lengths[i]+100 && "Exceeding python buffer size for token ids");
+      assert(results[i].output_text.length() <= max_lengths[i]*10 && "Exceeding python buffer size for output text");
       std::copy(results[i].output_tokens.begin(),
                 results[i].output_tokens.end(),
                 output_length_and_tokens[i] + 1);

diff --git a/tests/inference/generate_inf_test_configs.py b/tests/inference/generate_inf_test_configs.py
@@ -19,7 +19,6 @@
     "use_4bit_quantization": False,
     "use_8bit_quantization": False,
     "enable_peft": False,
-    "peft_activation_reserve_space_size": 1024, # 1GB
     "profiling": False,
     "benchmarking": False,
     "inference_debugging": False,
@@ -34,7 +33,7 @@
     "full_precision": True,
     "prompt": "",
     "output_file": "",
-    "max_length": 128,
+    "max_length": 255,
 }
 ssm_configs = {
     "ssms": [

diff --git a/tests/inference/test_inference_output.py b/tests/inference/test_inference_output.py
@@ -43,7 +43,41 @@ def compare_single_line(file_a, file_b):
             raise AssertionError(
                 f"File contents differ at position {i}:\n  {file_a} -> {list_a[i]}\n  {file_b} -> {list_b[i]}"
             )
+def compare_token_ids(file1_path, file2_path):
+    prefix = "token IDs: "
+
+    # Read lines from both files.
+    with open(file1_path, 'r') as f1, open(file2_path, 'r') as f2:
+        lines1 = f1.readlines()
+        lines2 = f2.readlines()
+
+    # Filter lines that start with the specified prefix.
+    token_lines1 = [line for line in lines1 if line.startswith(prefix)]
+    token_lines2 = [line for line in lines2 if line.startswith(prefix)]
+
+    # Check if both files have the same number of token lines.
+    if len(token_lines1) != len(token_lines2):
+        print(f"Error: Number of token ID lines differ: {len(token_lines1)} vs {len(token_lines2)}")
+        return False
+
+    # Compare corresponding token lines.
+    for i, (line1, line2) in enumerate(zip(token_lines1, token_lines2)):
+        try:
+            tokens1 = [int(tok.strip()) for tok in line1[len(prefix):].strip().split(",") if tok.strip()]
+        except ValueError as e:
+            print(f"Error parsing integers in file1, line {i}: {line1}\n{e}")
+            continue
+
+        try:
+            tokens2 = [int(tok.strip()) for tok in line2[len(prefix):].strip().split(",") if tok.strip()]
+        except ValueError as e:
+            print(f"Error parsing integers in file2, line {i}: {line2}\n{e}")
+            continue
 
+        # Determine number of tokens to compare: first 50 or less if the list is shorter.
+        n_to_compare = min(50, len(tokens1), len(tokens2))
+        if tokens1[:n_to_compare] != tokens2[:n_to_compare]:
+            raise AssertionError(f"Mismatch in line {i}:\nFile1 tokens (first {n_to_compare}): {tokens1[:n_to_compare]}\nFile2 tokens (first {n_to_compare}): {tokens2[:n_to_compare]}")
 
 def group_model_files(prefix):
     """
@@ -118,7 +152,8 @@ def test_output_alignment(file_a, file_b):
     """
     Each file pair is tested and reported separately.
     """
-    compare_single_line(file_a, file_b)
+    # compare_single_line(file_a, file_b)
+    compare_token_ids(file_a, file_b)
 
 
 

diff --git a/tests/inference_tests.sh b/tests/inference_tests.sh
@@ -18,7 +18,23 @@ CPP_INFERENCE_TESTS=${CPP_INFERENCE_TESTS:-OFF}
 rm -rf inference/prompt inference/output inference/inf_test_configs || true
 # Create test prompt file
 mkdir -p ./inference/prompt
-echo '["Three tips for staying healthy are: "]' > ./inference/prompt/test.json
+# echo '["Three tips for staying healthy are: "]' > ./inference/prompt/test.json
+# sample_prompts='[
+#   "The largest ocean on Earth is",
+#   "The inventor of the telephone was",
+#   "The speed of light is",
+#   "The tallest mountain in the world is",
+#   "The first man on the moon was"
+# ]'
+sample_prompts='[
+    "In the year 2075, artificial intelligence has become deeply integrated into every aspect of human life. Autonomous robots manage infrastructure, AI-powered doctors perform complex surgeries with unmatched precision, and personalized AI assistants anticipate people's needs before they even express them. Despite these advancements, ethical concerns continue to grow. One of the most pressing debates surrounding AI development in this era is whether",
+    "The rapid development of space exploration has led humanity to establish permanent settlements beyond Earth. With bases on the Moon and Mars, scientists and engineers work tirelessly to create sustainable ecosystems that can support human life in the long term. However, numerous challenges remain, from radiation exposure to psychological effects of isolation in deep space. One of the most critical issues that must be addressed before humanity can expand further into the solar system is",
+    "Throughout history, scientific discoveries have continuously reshaped our understanding of the universe. The shift from a geocentric to a heliocentric model, the theory of relativity, and the advent of quantum mechanics have all challenged previous assumptions and opened new frontiers of knowledge. As we continue to explore the cosmos, scientists are now focused on solving one of the most perplexing mysteries of all: the nature of dark matter and dark energy. If researchers were to uncover definitive proof regarding their existence, it could mean that",
+    "The emergence of advanced genetic engineering techniques has revolutionized modern medicine, allowing scientists to edit DNA with unprecedented precision. With technologies like CRISPR, researchers have already corrected genetic mutations that cause severe diseases and are even exploring the potential of enhancing human traits such as intelligence and longevity. However, this progress raises profound ethical concerns, as the ability to manipulate the human genome could lead to unforeseen consequences. One of the major dilemmas in the future of genetic engineering revolves around",
+    "Climate change has become the defining challenge of the 21st century, with rising global temperatures, extreme weather events, and melting ice caps threatening ecosystems and human populations worldwide. Scientists and policymakers are racing against time to develop sustainable solutions, from carbon capture technologies to alternative energy sources like nuclear fusion. Despite these efforts, one of the biggest obstacles to achieving global climate stability is the fact that"
+]'
+echo "$sample_prompts" > ./inference/prompt/test.json
+
 # Create output folder
 mkdir -p ./inference/output