Skip to content

Commit

Permalink
Merge branch 'flexllm_part3' into inf_test_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro authored Feb 24, 2025
2 parents 78ebf55 + 3d21098 commit 6ec7412
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 11 deletions.
2 changes: 1 addition & 1 deletion inference/python/incr_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def main():
)
llm.compile(
generation_config,
max_requests_per_batch=1,
max_requests_per_batch=4,
max_seq_length=256,
max_tokens_per_batch=64,
)
Expand Down
4 changes: 2 additions & 2 deletions inference/python/spec_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,15 @@ def main():
for ssm in ssms:
ssm.compile(
generation_config,
max_requests_per_batch=1,
max_requests_per_batch=4,
max_seq_length=256,
max_tokens_per_batch=64,
)

# Compile the LLM for inference and load the weights into memory
llm.compile(
generation_config,
max_requests_per_batch=1,
max_requests_per_batch=4,
max_seq_length=256,
max_tokens_per_batch=64,
ssms=ssms,
Expand Down
2 changes: 1 addition & 1 deletion python/flexflow/core/flexflow_cffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4718,7 +4718,7 @@ def generate(self, requests_list: List[Request]):
] # entry will be None for finetuning requests
c_output_texts = [
(
ffi.new("char[]", max_sequence_length * 5)
ffi.new("char[]", max_sequence_length * 10)
if request.req_type == RequestType.REQ_INFERENCE
else ffi.NULL
)
Expand Down
5 changes: 2 additions & 3 deletions src/c/flexflow_c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1780,10 +1780,9 @@ void flexflow_model_generate(flexflow_model_t handle_,
if (max_lengths[i] >= 0) {
assert(total_tokens <= max_lengths[i] || num_output_tokens == 0);
}
// assert(results[i].output_tokens.size() <= max_seq_lengths[i] ||
// results[i].output_tokens.size() ==
// results[i].input_tokens.size());
output_length_and_tokens[i][0] = results[i].output_tokens.size();
assert(results[i].output_tokens.size() <= max_lengths[i]+100 && "Exceeding python buffer size for token ids");
assert(results[i].output_text.length() <= max_lengths[i]*10 && "Exceeding python buffer size for output text");
std::copy(results[i].output_tokens.begin(),
results[i].output_tokens.end(),
output_length_and_tokens[i] + 1);
Expand Down
3 changes: 1 addition & 2 deletions tests/inference/generate_inf_test_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"use_4bit_quantization": False,
"use_8bit_quantization": False,
"enable_peft": False,
"peft_activation_reserve_space_size": 1024, # 1GB
"profiling": False,
"benchmarking": False,
"inference_debugging": False,
Expand All @@ -34,7 +33,7 @@
"full_precision": True,
"prompt": "",
"output_file": "",
"max_length": 128,
"max_length": 255,
}
ssm_configs = {
"ssms": [
Expand Down
37 changes: 36 additions & 1 deletion tests/inference/test_inference_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,41 @@ def compare_single_line(file_a, file_b):
raise AssertionError(
f"File contents differ at position {i}:\n {file_a} -> {list_a[i]}\n {file_b} -> {list_b[i]}"
)
def compare_token_ids(file1_path, file2_path):
prefix = "token IDs: "

# Read lines from both files.
with open(file1_path, 'r') as f1, open(file2_path, 'r') as f2:
lines1 = f1.readlines()
lines2 = f2.readlines()

# Filter lines that start with the specified prefix.
token_lines1 = [line for line in lines1 if line.startswith(prefix)]
token_lines2 = [line for line in lines2 if line.startswith(prefix)]

# Check if both files have the same number of token lines.
if len(token_lines1) != len(token_lines2):
print(f"Error: Number of token ID lines differ: {len(token_lines1)} vs {len(token_lines2)}")
return False

# Compare corresponding token lines.
for i, (line1, line2) in enumerate(zip(token_lines1, token_lines2)):
try:
tokens1 = [int(tok.strip()) for tok in line1[len(prefix):].strip().split(",") if tok.strip()]
except ValueError as e:
print(f"Error parsing integers in file1, line {i}: {line1}\n{e}")
continue

try:
tokens2 = [int(tok.strip()) for tok in line2[len(prefix):].strip().split(",") if tok.strip()]
except ValueError as e:
print(f"Error parsing integers in file2, line {i}: {line2}\n{e}")
continue

# Determine number of tokens to compare: first 50 or less if the list is shorter.
n_to_compare = min(50, len(tokens1), len(tokens2))
if tokens1[:n_to_compare] != tokens2[:n_to_compare]:
raise AssertionError(f"Mismatch in line {i}:\nFile1 tokens (first {n_to_compare}): {tokens1[:n_to_compare]}\nFile2 tokens (first {n_to_compare}): {tokens2[:n_to_compare]}")

def group_model_files(prefix):
"""
Expand Down Expand Up @@ -118,7 +152,8 @@ def test_output_alignment(file_a, file_b):
"""
Each file pair is tested and reported separately.
"""
compare_single_line(file_a, file_b)
# compare_single_line(file_a, file_b)
compare_token_ids(file_a, file_b)



Expand Down
18 changes: 17 additions & 1 deletion tests/inference_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,23 @@ CPP_INFERENCE_TESTS=${CPP_INFERENCE_TESTS:-OFF}
rm -rf inference/prompt inference/output inference/inf_test_configs || true
# Create test prompt file
mkdir -p ./inference/prompt
echo '["Three tips for staying healthy are: "]' > ./inference/prompt/test.json
# echo '["Three tips for staying healthy are: "]' > ./inference/prompt/test.json
# sample_prompts='[
# "The largest ocean on Earth is",
# "The inventor of the telephone was",
# "The speed of light is",
# "The tallest mountain in the world is",
# "The first man on the moon was"
# ]'
sample_prompts='[
"In the year 2075, artificial intelligence has become deeply integrated into every aspect of human life. Autonomous robots manage infrastructure, AI-powered doctors perform complex surgeries with unmatched precision, and personalized AI assistants anticipate people's needs before they even express them. Despite these advancements, ethical concerns continue to grow. One of the most pressing debates surrounding AI development in this era is whether",
"The rapid development of space exploration has led humanity to establish permanent settlements beyond Earth. With bases on the Moon and Mars, scientists and engineers work tirelessly to create sustainable ecosystems that can support human life in the long term. However, numerous challenges remain, from radiation exposure to psychological effects of isolation in deep space. One of the most critical issues that must be addressed before humanity can expand further into the solar system is",
"Throughout history, scientific discoveries have continuously reshaped our understanding of the universe. The shift from a geocentric to a heliocentric model, the theory of relativity, and the advent of quantum mechanics have all challenged previous assumptions and opened new frontiers of knowledge. As we continue to explore the cosmos, scientists are now focused on solving one of the most perplexing mysteries of all: the nature of dark matter and dark energy. If researchers were to uncover definitive proof regarding their existence, it could mean that",
"The emergence of advanced genetic engineering techniques has revolutionized modern medicine, allowing scientists to edit DNA with unprecedented precision. With technologies like CRISPR, researchers have already corrected genetic mutations that cause severe diseases and are even exploring the potential of enhancing human traits such as intelligence and longevity. However, this progress raises profound ethical concerns, as the ability to manipulate the human genome could lead to unforeseen consequences. One of the major dilemmas in the future of genetic engineering revolves around",
"Climate change has become the defining challenge of the 21st century, with rising global temperatures, extreme weather events, and melting ice caps threatening ecosystems and human populations worldwide. Scientists and policymakers are racing against time to develop sustainable solutions, from carbon capture technologies to alternative energy sources like nuclear fusion. Despite these efforts, one of the biggest obstacles to achieving global climate stability is the fact that"
]'
echo "$sample_prompts" > ./inference/prompt/test.json
# Create output folder
mkdir -p ./inference/output
Expand Down

0 comments on commit 6ec7412

Please sign in to comment.