diff --git a/inference/python/incr_decoding.py b/inference/python/incr_decoding.py index bf044670d..968aa65b2 100644 --- a/inference/python/incr_decoding.py +++ b/inference/python/incr_decoding.py @@ -101,7 +101,7 @@ def main(): ) llm.compile( generation_config, - max_requests_per_batch=1, + max_requests_per_batch=4, max_seq_length=256, max_tokens_per_batch=64, ) diff --git a/inference/python/spec_infer.py b/inference/python/spec_infer.py index 38dc6db63..a7652be59 100644 --- a/inference/python/spec_infer.py +++ b/inference/python/spec_infer.py @@ -130,7 +130,7 @@ def main(): for ssm in ssms: ssm.compile( generation_config, - max_requests_per_batch=1, + max_requests_per_batch=4, max_seq_length=256, max_tokens_per_batch=64, ) @@ -138,7 +138,7 @@ def main(): # Compile the LLM for inference and load the weights into memory llm.compile( generation_config, - max_requests_per_batch=1, + max_requests_per_batch=4, max_seq_length=256, max_tokens_per_batch=64, ssms=ssms, diff --git a/python/flexflow/core/flexflow_cffi.py b/python/flexflow/core/flexflow_cffi.py index 6cf4138a8..48c9bf211 100644 --- a/python/flexflow/core/flexflow_cffi.py +++ b/python/flexflow/core/flexflow_cffi.py @@ -4718,7 +4718,7 @@ def generate(self, requests_list: List[Request]): ] # entry will be None for finetuning requests c_output_texts = [ ( - ffi.new("char[]", max_sequence_length * 5) + ffi.new("char[]", max_sequence_length * 10) if request.req_type == RequestType.REQ_INFERENCE else ffi.NULL ) diff --git a/src/c/flexflow_c.cc b/src/c/flexflow_c.cc index 4c6ac5a09..7c3d8ff11 100644 --- a/src/c/flexflow_c.cc +++ b/src/c/flexflow_c.cc @@ -1780,10 +1780,9 @@ void flexflow_model_generate(flexflow_model_t handle_, if (max_lengths[i] >= 0) { assert(total_tokens <= max_lengths[i] || num_output_tokens == 0); } - // assert(results[i].output_tokens.size() <= max_seq_lengths[i] || - // results[i].output_tokens.size() == - // results[i].input_tokens.size()); output_length_and_tokens[i][0] = results[i].output_tokens.size(); + assert(results[i].output_tokens.size() <= max_lengths[i]+100 && "Exceeding python buffer size for token ids"); + assert(results[i].output_text.length() <= max_lengths[i]*10 && "Exceeding python buffer size for output text"); std::copy(results[i].output_tokens.begin(), results[i].output_tokens.end(), output_length_and_tokens[i] + 1); diff --git a/tests/inference/generate_inf_test_configs.py b/tests/inference/generate_inf_test_configs.py index fc0444885..4b24c0f80 100644 --- a/tests/inference/generate_inf_test_configs.py +++ b/tests/inference/generate_inf_test_configs.py @@ -19,7 +19,6 @@ "use_4bit_quantization": False, "use_8bit_quantization": False, "enable_peft": False, - "peft_activation_reserve_space_size": 1024, # 1GB "profiling": False, "benchmarking": False, "inference_debugging": False, @@ -34,7 +33,7 @@ "full_precision": True, "prompt": "", "output_file": "", - "max_length": 128, + "max_length": 255, } ssm_configs = { "ssms": [ diff --git a/tests/inference/test_inference_output.py b/tests/inference/test_inference_output.py index f5021fa23..4542d1a13 100644 --- a/tests/inference/test_inference_output.py +++ b/tests/inference/test_inference_output.py @@ -43,7 +43,41 @@ def compare_single_line(file_a, file_b): raise AssertionError( f"File contents differ at position {i}:\n {file_a} -> {list_a[i]}\n {file_b} -> {list_b[i]}" ) +def compare_token_ids(file1_path, file2_path): + prefix = "token IDs: " + + # Read lines from both files. + with open(file1_path, 'r') as f1, open(file2_path, 'r') as f2: + lines1 = f1.readlines() + lines2 = f2.readlines() + + # Filter lines that start with the specified prefix. + token_lines1 = [line for line in lines1 if line.startswith(prefix)] + token_lines2 = [line for line in lines2 if line.startswith(prefix)] + + # Check if both files have the same number of token lines. + if len(token_lines1) != len(token_lines2): + print(f"Error: Number of token ID lines differ: {len(token_lines1)} vs {len(token_lines2)}") + return False + + # Compare corresponding token lines. + for i, (line1, line2) in enumerate(zip(token_lines1, token_lines2)): + try: + tokens1 = [int(tok.strip()) for tok in line1[len(prefix):].strip().split(",") if tok.strip()] + except ValueError as e: + print(f"Error parsing integers in file1, line {i}: {line1}\n{e}") + continue + + try: + tokens2 = [int(tok.strip()) for tok in line2[len(prefix):].strip().split(",") if tok.strip()] + except ValueError as e: + print(f"Error parsing integers in file2, line {i}: {line2}\n{e}") + continue + # Determine number of tokens to compare: first 50 or less if the list is shorter. + n_to_compare = min(50, len(tokens1), len(tokens2)) + if tokens1[:n_to_compare] != tokens2[:n_to_compare]: + raise AssertionError(f"Mismatch in line {i}:\nFile1 tokens (first {n_to_compare}): {tokens1[:n_to_compare]}\nFile2 tokens (first {n_to_compare}): {tokens2[:n_to_compare]}") def group_model_files(prefix): """ @@ -118,7 +152,8 @@ def test_output_alignment(file_a, file_b): """ Each file pair is tested and reported separately. """ - compare_single_line(file_a, file_b) + # compare_single_line(file_a, file_b) + compare_token_ids(file_a, file_b) diff --git a/tests/inference_tests.sh b/tests/inference_tests.sh index 54c1884e8..eb208d278 100755 --- a/tests/inference_tests.sh +++ b/tests/inference_tests.sh @@ -18,7 +18,23 @@ CPP_INFERENCE_TESTS=${CPP_INFERENCE_TESTS:-OFF} rm -rf inference/prompt inference/output inference/inf_test_configs || true # Create test prompt file mkdir -p ./inference/prompt -echo '["Three tips for staying healthy are: "]' > ./inference/prompt/test.json +# echo '["Three tips for staying healthy are: "]' > ./inference/prompt/test.json +# sample_prompts='[ +# "The largest ocean on Earth is", +# "The inventor of the telephone was", +# "The speed of light is", +# "The tallest mountain in the world is", +# "The first man on the moon was" +# ]' +sample_prompts='[ + "In the year 2075, artificial intelligence has become deeply integrated into every aspect of human life. Autonomous robots manage infrastructure, AI-powered doctors perform complex surgeries with unmatched precision, and personalized AI assistants anticipate people's needs before they even express them. Despite these advancements, ethical concerns continue to grow. One of the most pressing debates surrounding AI development in this era is whether", + "The rapid development of space exploration has led humanity to establish permanent settlements beyond Earth. With bases on the Moon and Mars, scientists and engineers work tirelessly to create sustainable ecosystems that can support human life in the long term. However, numerous challenges remain, from radiation exposure to psychological effects of isolation in deep space. One of the most critical issues that must be addressed before humanity can expand further into the solar system is", + "Throughout history, scientific discoveries have continuously reshaped our understanding of the universe. The shift from a geocentric to a heliocentric model, the theory of relativity, and the advent of quantum mechanics have all challenged previous assumptions and opened new frontiers of knowledge. As we continue to explore the cosmos, scientists are now focused on solving one of the most perplexing mysteries of all: the nature of dark matter and dark energy. If researchers were to uncover definitive proof regarding their existence, it could mean that", + "The emergence of advanced genetic engineering techniques has revolutionized modern medicine, allowing scientists to edit DNA with unprecedented precision. With technologies like CRISPR, researchers have already corrected genetic mutations that cause severe diseases and are even exploring the potential of enhancing human traits such as intelligence and longevity. However, this progress raises profound ethical concerns, as the ability to manipulate the human genome could lead to unforeseen consequences. One of the major dilemmas in the future of genetic engineering revolves around", + "Climate change has become the defining challenge of the 21st century, with rising global temperatures, extreme weather events, and melting ice caps threatening ecosystems and human populations worldwide. Scientists and policymakers are racing against time to develop sustainable solutions, from carbon capture technologies to alternative energy sources like nuclear fusion. Despite these efforts, one of the biggest obstacles to achieving global climate stability is the fact that" +]' +echo "$sample_prompts" > ./inference/prompt/test.json + # Create output folder mkdir -p ./inference/output