Skip to content

Commit

Permalink
[V1] Simplify GPUModelRunner._update_states check (#13265)
Browse files Browse the repository at this point in the history
  • Loading branch information
njhill authored Feb 14, 2025
1 parent 4da1f66 commit 085b7b2
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions vllm/v1/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,8 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> bool:
self.input_batch.block_table.append_row(req_index, start_index,
req_data.new_block_ids)

batch_changed = len(removed_req_indices) > 0 or len(req_ids_to_add) > 0

# Add the new or resumed requests to the persistent batch.
# The smaller empty indices are filled first.
removed_req_indices = sorted(removed_req_indices, reverse=True)
Expand All @@ -363,8 +365,8 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> bool:
# Condense the batched states if there are empty indices.
if removed_req_indices:
self.input_batch.condense(removed_req_indices)
return (len(unscheduled_req_ids) > 0 or len(req_ids_to_add) > 0
or len(scheduler_output.finished_req_ids) > 0)

return batch_changed

def _prepare_inputs(self, scheduler_output: "SchedulerOutput"):
total_num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
Expand Down

0 comments on commit 085b7b2

Please sign in to comment.