Skip to content

Commit

Permalink
yapf formatting in vllm/core
Browse files Browse the repository at this point in the history
  • Loading branch information
jberkhahn committed Aug 6, 2024
1 parent 2e78fec commit c89c730
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 32 deletions.
7 changes: 4 additions & 3 deletions vllm/core/block/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ class RefCounter(RefCounterProtocol):

def __init__(self, all_block_indices: Iterable[BlockId]):
deduped = set(all_block_indices)
self._refcounts: Dict[BlockId,
RefCount] = {index: 0
for index in deduped}
self._refcounts: Dict[BlockId, RefCount] = {
index: 0
for index in deduped
}

def incr(self, block_id: BlockId) -> RefCount:
assert block_id in self._refcounts
Expand Down
35 changes: 18 additions & 17 deletions vllm/core/block_manager_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,8 @@ def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()

# Use watermark to avoid frequent cache eviction.
if (self.num_total_gpu_blocks - num_required_blocks <
self.watermark_blocks):
if (self.num_total_gpu_blocks - num_required_blocks
< self.watermark_blocks):
return AllocStatus.NEVER
if num_free_gpu_blocks - num_required_blocks >= self.watermark_blocks:
return AllocStatus.OK
Expand All @@ -301,21 +301,22 @@ def _allocate_sequence(self, \

block_table: BlockTable = []
if seq is not None:
for logical_idx in range(num_prompt_blocks):
if (self.block_sliding_window is not None
and logical_idx >= self.block_sliding_window):
block = block_table[logical_idx % self.block_sliding_window]
# Set the reference counts of the token blocks.
block.ref_count = ref_count
elif not is_encoder_decoder and self.enable_caching:
block = self.gpu_allocator.allocate(
seq.hash_of_block(logical_idx),
seq.num_hashed_tokens_of_block(logical_idx))
else:
block = self.gpu_allocator.allocate()
# Set the reference counts of the token blocks.
block.ref_count = ref_count
block_table.append(block)
for logical_idx in range(num_prompt_blocks):
if (self.block_sliding_window is not None
and logical_idx >= self.block_sliding_window):
block = block_table[logical_idx %
self.block_sliding_window]
# Set the reference counts of the token blocks.
block.ref_count = ref_count
elif not is_encoder_decoder and self.enable_caching:
block = self.gpu_allocator.allocate(
seq.hash_of_block(logical_idx),
seq.num_hashed_tokens_of_block(logical_idx))
else:
block = self.gpu_allocator.allocate()
# Set the reference counts of the token blocks.
block.ref_count = ref_count
block_table.append(block)

return block_table

Expand Down
4 changes: 2 additions & 2 deletions vllm/core/block_manager_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ def can_allocate(self, seq_group: SequenceGroup) -> AllocStatus:
device=Device.GPU)

# Use watermark to avoid frequent cache eviction.
if (self.num_total_gpu_blocks - num_required_blocks <
self.watermark_blocks):
if (self.num_total_gpu_blocks - num_required_blocks
< self.watermark_blocks):
return AllocStatus.NEVER
if num_free_gpu_blocks - num_required_blocks >= self.watermark_blocks:
return AllocStatus.OK
Expand Down
19 changes: 9 additions & 10 deletions vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,8 +800,8 @@ def _schedule_default(self) -> SchedulerOutputs:
running_scheduled.swapped_out) == 0:
swapped_in = self._schedule_swapped(budget, curr_loras)

assert (budget.num_batched_tokens <=
self.scheduler_config.max_num_batched_tokens)
assert (budget.num_batched_tokens
<= self.scheduler_config.max_num_batched_tokens)
assert budget.num_curr_seqs <= self.scheduler_config.max_num_seqs

# Update waiting requests.
Expand Down Expand Up @@ -877,8 +877,8 @@ def _schedule_chunked_prefill(self):
curr_loras,
enable_chunking=True)

assert (budget.num_batched_tokens <=
self.scheduler_config.max_num_batched_tokens)
assert (budget.num_batched_tokens
<= self.scheduler_config.max_num_batched_tokens)
assert budget.num_curr_seqs <= self.scheduler_config.max_num_seqs

# Update waiting requests.
Expand Down Expand Up @@ -983,8 +983,8 @@ def schedule(self) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs]:
# NOTE: We use get_len instead of get_prompt_len because when
# a sequence is preempted, prefill includes previous generated
# output tokens.
if (token_chunk_size + seqs[0].data.get_num_computed_tokens() <
seqs[0].data.get_len()):
if (token_chunk_size + seqs[0].data.get_num_computed_tokens()
< seqs[0].data.get_len()):
do_sample = False

# It assumes the scheduled_seq_groups is ordered by
Expand Down Expand Up @@ -1166,10 +1166,9 @@ def _passed_delay(self, now: float) -> bool:
if self.scheduler_config.delay_factor > 0 and self.waiting:
earliest_arrival_time = min(
[e.metrics.arrival_time for e in self.waiting])
passed_delay = (
(now - earliest_arrival_time) >
(self.scheduler_config.delay_factor * self.last_prompt_latency)
or not self.running)
passed_delay = ((now - earliest_arrival_time)
> (self.scheduler_config.delay_factor *
self.last_prompt_latency) or not self.running)
else:
passed_delay = True
return passed_delay
Expand Down

0 comments on commit c89c730

Please sign in to comment.