From c455b6d7088c82d840bc15e3ea75e79ce272d58c Mon Sep 17 00:00:00 2001
From: Nick Hill <nhill@redhat.com>
Date: Thu, 13 Feb 2025 22:57:52 -0800
Subject: [PATCH] [V1][BugFix] Simplify GPUModelRunner._update_states check

Signed-off-by: Nick Hill <nhill@redhat.com>
---
 vllm/v1/worker/gpu_model_runner.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index b2f6c33858cbc..e90b76dcdd9ad 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -347,6 +347,8 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> bool:
             self.input_batch.block_table.append_row(req_index, start_index,
                                                     req_data.new_block_ids)
 
+        batch_changed = len(removed_req_indices) > 0 or len(req_ids_to_add) > 0
+
         # Add the new or resumed requests to the persistent batch.
         # The smaller empty indices are filled first.
         removed_req_indices = sorted(removed_req_indices, reverse=True)
@@ -363,8 +365,8 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> bool:
         # Condense the batched states if there are empty indices.
         if removed_req_indices:
             self.input_batch.condense(removed_req_indices)
-        return (len(unscheduled_req_ids) > 0 or len(req_ids_to_add) > 0
-                or len(scheduler_output.finished_req_ids) > 0)
+
+        return batch_changed
 
     def _prepare_inputs(self, scheduler_output: "SchedulerOutput"):
         total_num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens