Skip to content

Commit

Permalink
Fix benchmark_serving
Browse files Browse the repository at this point in the history
  • Loading branch information
s5u13b committed Jan 16, 2025
1 parent 21c6fa1 commit 5501476
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions benchmark/benchmark_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,9 +402,8 @@ async def measured(*args, **kwargs):
self._all_token_latencies.append(lat_arr)
self._decode_sum_latencies.append(decode_sum_latency)
self._all_decode_token_latencies.extend(lat_arr[1:,1])
self._inference_latencies.append(0.0)
if 'per_token_latency_breakdown_list' in output:
step_latency = np.mean([request_timestamps['engine_step_latency'] for request_timestamps in output['per_token_latency_breakdown_list']])
self._inference_latencies.append(step_latency)
self._per_token_latency_breakdown_list.append(output['per_token_latency_breakdown_list'])
return prompt, output
return measured
Expand Down

0 comments on commit 5501476

Please sign in to comment.