diff --git a/llumnix/backends/vllm/llm_engine.py b/llumnix/backends/vllm/llm_engine.py index 46164fa0..0892b4a7 100644 --- a/llumnix/backends/vllm/llm_engine.py +++ b/llumnix/backends/vllm/llm_engine.py @@ -249,7 +249,7 @@ def __init__( self._thread.start() def _start_engine_loop(self) -> None: - self._stop_event.clear() + self._stop_event.clear() with self.state_lock: self.state = EngineState.RUNNING diff --git a/llumnix/llumlet/llumlet.py b/llumnix/llumlet/llumlet.py index 2da4f93e..08bda163 100644 --- a/llumnix/llumlet/llumlet.py +++ b/llumnix/llumlet/llumlet.py @@ -109,6 +109,7 @@ def check_state(self): with self.backend_engine.state_lock: if self.backend_engine.state == EngineState.CRASHED: + # pylint: disable=protected-access self.backend_engine._stop_event.set() if self.backend_engine._thread.is_alive(): self.backend_engine._thread.join() diff --git a/tests/unit_test/llumlet/test_engine_step_exception.py b/tests/unit_test/llumlet/test_engine_step_exception.py index f395ed89..f96100c1 100644 --- a/tests/unit_test/llumlet/test_engine_step_exception.py +++ b/tests/unit_test/llumlet/test_engine_step_exception.py @@ -39,14 +39,14 @@ def set_error_step(self, broken: bool): self.backend_engine._thread.join() def raise_error_step(): - self.backend_engine.engine.step() + self.origin_step() raise ValueError("Mock engine step error") if broken: self.backend_engine.engine.step = raise_error_step else: self.backend_engine.engine.step = self.origin_step - + self.backend_engine._thread = threading.Thread( target=self.backend_engine._start_engine_loop, args=(), daemon=True, name="engine_loop" ) @@ -58,7 +58,7 @@ def test_engine_step_exception(setup_ray_env): migration_config = MigrationConfig("LCFS", "rpc", 16, 1, 4, 5, 20) node_id = ray.get_runtime_context().get_node_id() scheduling_strategy = NodeAffinitySchedulingStrategy(node_id=node_id, soft=False) - + origin_free_memory, _ = torch.cuda.mem_get_info() actor_name = "instance_0"