Remove process_model_outputs request timestamps

AlibabaPAI · Feb 7, 2025 · fa2fc9c · fa2fc9c
1 parent 2a980ca
commit fa2fc9c
Show file tree

Hide file tree

Showing 3 changed files with 3 additions and 15 deletions.
diff --git a/llumnix/backends/vllm/llm_engine.py b/llumnix/backends/vllm/llm_engine.py
@@ -116,7 +116,8 @@ def from_engine_args(
             usage_context=usage_context,
         )
         return engine
-
+
+    # TODO(s5u13b): 
     def _process_request_outputs(
             self,
             outputs: List[Tuple[RequestOutput,ServerInfo]],
@@ -128,15 +129,9 @@ def _process_request_outputs(
             request_outputs, server_infos = zip(*outputs)
             request_outputs = list(request_outputs)
             server_infos = list(server_infos)
-        for request_output, server_info in zip(request_outputs, server_infos):
-            if hasattr(server_info, 'request_timestamps'):
-                request_output.request_timestamps = server_info.request_timestamps
-                request_output.request_timestamps.engine_process_model_outputs_timestamp_end = time.time()
+        for request_output in request_outputs:
             if request_output.finished:
                 logger.info("engine finished request {}".format(request_output.request_id))
-        for server_info in server_infos:
-            if hasattr(server_info, 'request_timestamps'):
-                server_info.request_timestamps.engine_process_model_outputs_timestamp_begin = time.time()
         for request_output in request_outputs:
             if hasattr(request_output, 'request_timestamps'):
                 request_output.request_timestamps.engine_step_timestamp_begin = step_begin_time

diff --git a/llumnix/entrypoints/utils.py b/llumnix/entrypoints/utils.py
@@ -78,7 +78,6 @@ async def retry_manager_method_async(ray_call, method_name, *args, **kwargs):
 def init_per_token_latency_breakdown_dict() -> Dict[str, int]:
     per_token_latency_breakdown_dict = {
         'step_latency_engine': [],
-        'process_model_outputs_latency': [],
         'step_postprocess_latency': [],
         'across_async_put_queue_thread_latency': [],
         'across_async_put_queue_actor_latency': [],

diff --git a/llumnix/server_info.py b/llumnix/server_info.py
@@ -21,8 +21,6 @@ def __init__(self):
         self.manager_generate_timestamp = -1.0
         self.llumlet_generate_timestamp = -1.0
         self.engine_add_request_timestamp = -1.0
-        self.engine_process_model_outputs_timestamp_begin = -1.0
-        self.engine_process_model_outputs_timestamp_end = -1.0
         self.engine_step_timestamp_begin = -1.0
         self.engine_step_timestamp_end = -1.0
         self.engine_step_postprocess_timestamp_end = -1.0
@@ -33,10 +31,6 @@ def __init__(self):
         self.api_server_background_process_get_queue_timestamp = -1.0
         self.api_server_generate_benchmark_timestamp_end = -1.0
 
-    @property
-    def process_model_outputs_latency(self):
-        return (self.engine_process_model_outputs_timestamp_end - self.engine_process_model_outputs_timestamp_begin)*1000
-
     @property
     def step_latency_engine(self):
         return (self.engine_step_timestamp_end - self.engine_step_timestamp_begin)*1000