From 8bd76fbcafa91333ba46546aa8a26c648e231833 Mon Sep 17 00:00:00 2001 From: Zhaoyi Li <36555117+Lzy17@users.noreply.github.com> Date: Thu, 16 Jan 2025 09:45:17 -0600 Subject: [PATCH] Enable user marker for vllm profiling (#357) * Enable user marker for vllm profiling --------- Co-authored-by: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> --- vllm/utils.py | 31 +++++++++++++++++++++++++++++++ vllm/worker/model_runner.py | 11 +++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/vllm/utils.py b/vllm/utils.py index ad02c68d16276..e1bf3ffd01a48 100644 --- a/vllm/utils.py +++ b/vllm/utils.py @@ -282,6 +282,37 @@ def inner(*args, **kwds): return func +class rpd_user_marker: + + def __init__(self, name=None): + self.name = name + self.marker = None + + def __enter__(self): + if is_hipScopedMarker_available(): + from hipScopedMarker import hipScopedMarker + marker_name = self.name if self.name else "UserMarker Undefined" + self.marker = hipScopedMarker(f"{marker_name}") + self.marker.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if is_hipScopedMarker_available() and self.marker: + self.marker.__exit__(exc_type, exc_val, exc_tb) + + def start(self): + if is_hipScopedMarker_available(): + from hipScopedMarker import hipScopedMarker + marker_name = self.name if self.name else "UserMarker Undefined" + self.marker = hipScopedMarker(f"{marker_name}") + self.marker.__enter__() + return self + + def end(self, exc_type=0, exc_val=0, exc_tb=0): + if is_hipScopedMarker_available() and self.marker: + self.marker.__exit__(exc_type, exc_val, exc_tb) + + class Device(enum.Enum): GPU = enum.auto() CPU = enum.auto() diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index faf2703e03649..f9c1ab4848c05 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -47,8 +47,8 @@ from vllm.sequence import IntermediateTensors, SequenceGroupMetadata from vllm.utils import (DeviceMemoryProfiler, GiB_bytes, PyObjectCache, async_tensor_h2d, flatten_2d_lists, - is_pin_memory_available, rpd_mark, supports_dynamo, - weak_ref_tensor) + is_pin_memory_available, rpd_mark, rpd_user_marker, + supports_dynamo, weak_ref_tensor) from vllm.worker.model_runner_base import ( ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase, _add_attn_metadata_broadcastable_dict, @@ -1630,6 +1630,12 @@ def execute_model( assert model_input.attn_metadata is not None prefill_meta = model_input.attn_metadata.prefill_metadata decode_meta = model_input.attn_metadata.decode_metadata + if prefill_meta: + marker_instance = rpd_user_marker(name="Prefill") + else: + marker_instance = rpd_user_marker(name="Decode") + + marker_instance.start() # TODO(andoorve): We can remove this once all # virtual engines share the same kv cache. virtual_engine = model_input.virtual_engine @@ -1765,6 +1771,7 @@ def execute_model( output.hidden_states = hidden_states + marker_instance.end() return [output] def need_recv_kv(self, model_input, kv_caches) -> bool: