Skip to content

Commit

Permalink
fix correctness
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuohan123 committed Jan 17, 2024
1 parent 29f4f96 commit bd56a69
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 1 deletion.
6 changes: 6 additions & 0 deletions vllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,12 @@ def _process_model_outputs(
request_output = RequestOutput.from_seq_group(seq_group)
request_outputs.append(request_output)

# Update prefix state, now all the uncomputed prefixes are computed.
for seq_group in scheduled_seq_groups:
if (seq_group.prefix is not None and seq_group.prefix.allocated
and not seq_group.prefix.computed):
seq_group.prefix.computed = True

if self.log_stats:
# Log the system stats.
self._log_system_stats(scheduler_outputs.prompt_run,
Expand Down
1 change: 1 addition & 0 deletions vllm/prefix.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def __init__(
self.hash = hash(token_ids)
assert self.length % block_size == 0
self.block_table: Optional[BlockTable] = None
self.computed = False

@property
def allocated(self) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion vllm/worker/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _prepare_prompt(
prompt_lens.append(prompt_len)
prefix_len = 0
prefix = seq_group_metadata.prefix
if prefix is not None and prefix.allocated:
if prefix is not None and prefix.computed:
prefix_len = prefix.get_length()
prompt_tokens = prompt_tokens[prefix_len:]
prefix_block_tables.append(prefix.get_block_numbers())
Expand Down

0 comments on commit bd56a69

Please sign in to comment.