Skip to content

Commit

Permalink
add notes and small fix
Browse files Browse the repository at this point in the history
  • Loading branch information
zhuohan123 committed Jan 18, 2024
1 parent bd56a69 commit 6b00283
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
4 changes: 2 additions & 2 deletions vllm/core/block_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def swap_in(self, seq_group: SequenceGroup) -> Dict[int, int]:
# CPU block -> GPU block.
if seq_group.prefix is not None:
# make sure to swap in the prefix first
assert seq_group.prefix.allocated
assert seq_group.prefix.allocated and seq_group.prefix.computed

mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}
for seq in seq_group.get_seqs(status=SequenceStatus.SWAPPED):
Expand Down Expand Up @@ -278,7 +278,7 @@ def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:
for gpu_block in block_table:
if (seq_group.prefix is not None
and gpu_block in seq_group.prefix.block_table):
# We do not swap out the prefix blocks.
# NOTE: We do not swap out the prefix blocks for now.
self.gpu_allocator.free(gpu_block)
continue

Expand Down
6 changes: 6 additions & 0 deletions vllm/prefix.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ class Prefix:
"""Data and states associated with a prefix of prompt tokens for multiple
sequence groups.
NOTE: This feature is experimental and may be replaced with automatic
prefix caching in the future.
Args:
prefix_id: The id of the prefix in the prefix pool.
token_ids: The token ids of the prefix.
Expand Down Expand Up @@ -49,6 +52,9 @@ def set_block_table(self, block_table: BlockTable) -> None:
class PrefixPool:
"""Manages all the prompt prefixes.
NOTE: This feature is experimental and may be replaced with automatic
prefix caching in the future.
Args:
block_size: The block size of the executed model.
Expand Down

0 comments on commit 6b00283

Please sign in to comment.