Skip to content

Commit

Permalink
Remove process_model_outputs request timestamps
Browse files Browse the repository at this point in the history
  • Loading branch information
s5u13b committed Feb 7, 2025
1 parent 2a980ca commit fa2fc9c
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 15 deletions.
11 changes: 3 additions & 8 deletions llumnix/backends/vllm/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,8 @@ def from_engine_args(
usage_context=usage_context,
)
return engine


# TODO(s5u13b):
def _process_request_outputs(
self,
outputs: List[Tuple[RequestOutput,ServerInfo]],
Expand All @@ -128,15 +129,9 @@ def _process_request_outputs(
request_outputs, server_infos = zip(*outputs)
request_outputs = list(request_outputs)
server_infos = list(server_infos)
for request_output, server_info in zip(request_outputs, server_infos):
if hasattr(server_info, 'request_timestamps'):
request_output.request_timestamps = server_info.request_timestamps
request_output.request_timestamps.engine_process_model_outputs_timestamp_end = time.time()
for request_output in request_outputs:
if request_output.finished:
logger.info("engine finished request {}".format(request_output.request_id))
for server_info in server_infos:
if hasattr(server_info, 'request_timestamps'):
server_info.request_timestamps.engine_process_model_outputs_timestamp_begin = time.time()
for request_output in request_outputs:
if hasattr(request_output, 'request_timestamps'):
request_output.request_timestamps.engine_step_timestamp_begin = step_begin_time
Expand Down
1 change: 0 additions & 1 deletion llumnix/entrypoints/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ async def retry_manager_method_async(ray_call, method_name, *args, **kwargs):
def init_per_token_latency_breakdown_dict() -> Dict[str, int]:
per_token_latency_breakdown_dict = {
'step_latency_engine': [],
'process_model_outputs_latency': [],
'step_postprocess_latency': [],
'across_async_put_queue_thread_latency': [],
'across_async_put_queue_actor_latency': [],
Expand Down
6 changes: 0 additions & 6 deletions llumnix/server_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ def __init__(self):
self.manager_generate_timestamp = -1.0
self.llumlet_generate_timestamp = -1.0
self.engine_add_request_timestamp = -1.0
self.engine_process_model_outputs_timestamp_begin = -1.0
self.engine_process_model_outputs_timestamp_end = -1.0
self.engine_step_timestamp_begin = -1.0
self.engine_step_timestamp_end = -1.0
self.engine_step_postprocess_timestamp_end = -1.0
Expand All @@ -33,10 +31,6 @@ def __init__(self):
self.api_server_background_process_get_queue_timestamp = -1.0
self.api_server_generate_benchmark_timestamp_end = -1.0

@property
def process_model_outputs_latency(self):
return (self.engine_process_model_outputs_timestamp_end - self.engine_process_model_outputs_timestamp_begin)*1000

@property
def step_latency_engine(self):
return (self.engine_step_timestamp_end - self.engine_step_timestamp_begin)*1000
Expand Down

0 comments on commit fa2fc9c

Please sign in to comment.