Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Feb 24, 2025
1 parent 91bcf2d commit 20850ad
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 11 deletions.
2 changes: 1 addition & 1 deletion inference/python/incr_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def main():
)
llm.compile(
generation_config,
max_requests_per_batch=1,
max_requests_per_batch=4,
max_seq_length=256,
max_tokens_per_batch=64,
)
Expand Down
4 changes: 2 additions & 2 deletions inference/python/spec_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,15 @@ def main():
for ssm in ssms:
ssm.compile(
generation_config,
max_requests_per_batch=1,
max_requests_per_batch=4,
max_seq_length=256,
max_tokens_per_batch=64,
)

# Compile the LLM for inference and load the weights into memory
llm.compile(
generation_config,
max_requests_per_batch=1,
max_requests_per_batch=4,
max_seq_length=256,
max_tokens_per_batch=64,
ssms=ssms,
Expand Down
2 changes: 1 addition & 1 deletion python/flexflow/core/flexflow_cffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4718,7 +4718,7 @@ def generate(self, requests_list: List[Request]):
] # entry will be None for finetuning requests
c_output_texts = [
(
ffi.new("char[]", max_sequence_length * 5)
ffi.new("char[]", max_sequence_length * 10)
if request.req_type == RequestType.REQ_INFERENCE
else ffi.NULL
)
Expand Down
5 changes: 2 additions & 3 deletions src/c/flexflow_c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1780,10 +1780,9 @@ void flexflow_model_generate(flexflow_model_t handle_,
if (max_lengths[i] >= 0) {
assert(total_tokens <= max_lengths[i] || num_output_tokens == 0);
}
// assert(results[i].output_tokens.size() <= max_seq_lengths[i] ||
// results[i].output_tokens.size() ==
// results[i].input_tokens.size());
output_length_and_tokens[i][0] = results[i].output_tokens.size();
assert(results[i].output_tokens.size() <= max_lengths[i]+100 && "Exceeding python buffer size for token ids");
assert(results[i].output_text.length() <= max_lengths[i]*10 && "Exceeding python buffer size for output text");
std::copy(results[i].output_tokens.begin(),
results[i].output_tokens.end(),
output_length_and_tokens[i] + 1);
Expand Down
3 changes: 1 addition & 2 deletions tests/inference/generate_inf_test_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"use_4bit_quantization": False,
"use_8bit_quantization": False,
"enable_peft": False,
"peft_activation_reserve_space_size": 1024, # 1GB
"profiling": False,
"benchmarking": False,
"inference_debugging": False,
Expand All @@ -34,7 +33,7 @@
"full_precision": True,
"prompt": "",
"output_file": "",
"max_length": 128,
"max_length": 255,
}
ssm_configs = {
"ssms": [
Expand Down
37 changes: 36 additions & 1 deletion tests/inference/test_inference_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,41 @@ def compare_single_line(file_a, file_b):
raise AssertionError(
f"File contents differ at position {i}:\n {file_a} -> {list_a[i]}\n {file_b} -> {list_b[i]}"
)
def compare_token_ids(file1_path, file2_path):
prefix = "token IDs: "

# Read lines from both files.
with open(file1_path, 'r') as f1, open(file2_path, 'r') as f2:
lines1 = f1.readlines()
lines2 = f2.readlines()

# Filter lines that start with the specified prefix.
token_lines1 = [line for line in lines1 if line.startswith(prefix)]
token_lines2 = [line for line in lines2 if line.startswith(prefix)]

# Check if both files have the same number of token lines.
if len(token_lines1) != len(token_lines2):
print(f"Error: Number of token ID lines differ: {len(token_lines1)} vs {len(token_lines2)}")
return False

# Compare corresponding token lines.
for i, (line1, line2) in enumerate(zip(token_lines1, token_lines2)):
try:
tokens1 = [int(tok.strip()) for tok in line1[len(prefix):].strip().split(",") if tok.strip()]
except ValueError as e:
print(f"Error parsing integers in file1, line {i}: {line1}\n{e}")
continue

try:
tokens2 = [int(tok.strip()) for tok in line2[len(prefix):].strip().split(",") if tok.strip()]
except ValueError as e:
print(f"Error parsing integers in file2, line {i}: {line2}\n{e}")
continue

# Determine number of tokens to compare: first 50 or less if the list is shorter.
n_to_compare = min(50, len(tokens1), len(tokens2))
if tokens1[:n_to_compare] != tokens2[:n_to_compare]:
raise AssertionError(f"Mismatch in line {i}:\nFile1 tokens (first {n_to_compare}): {tokens1[:n_to_compare]}\nFile2 tokens (first {n_to_compare}): {tokens2[:n_to_compare]}")

def group_model_files(prefix):
"""
Expand Down Expand Up @@ -118,7 +152,8 @@ def test_output_alignment(file_a, file_b):
"""
Each file pair is tested and reported separately.
"""
compare_single_line(file_a, file_b)
# compare_single_line(file_a, file_b)
compare_token_ids(file_a, file_b)



Expand Down
11 changes: 10 additions & 1 deletion tests/inference_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,16 @@ CPP_INFERENCE_TESTS=${CPP_INFERENCE_TESTS:-OFF}
rm -rf inference/prompt inference/output inference/inf_test_configs || true
# Create test prompt file
mkdir -p ./inference/prompt
echo '["Three tips for staying healthy are: "]' > ./inference/prompt/test.json
# echo '["Three tips for staying healthy are: "]' > ./inference/prompt/test.json
sample_prompts='[
"The largest ocean on Earth is",
"The inventor of the telephone was",
"The speed of light is",
"The tallest mountain in the world is",
"The first man on the moon was"
]'
echo "$sample_prompts" > ./inference/prompt/test.json

# Create output folder
mkdir -p ./inference/output

Expand Down

0 comments on commit 20850ad

Please sign in to comment.