From 6fea4c05dfd6e439685cd12064111cab5071d72d Mon Sep 17 00:00:00 2001 From: Shanshan Shen <467638484@qq.com> Date: Wed, 25 Dec 2024 17:03:10 +0800 Subject: [PATCH] add rocm get_current_memory_usage() func Signed-off-by: Shanshan Shen <467638484@qq.com> --- vllm/platforms/rocm.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 7778b565372cb..00b532cd2b7bf 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -110,3 +110,10 @@ def verify_quantization(cls, quant: str) -> None: "Using AWQ quantization with ROCm, but VLLM_USE_TRITON_AWQ" " is not set, enabling VLLM_USE_TRITON_AWQ.") envs.VLLM_USE_TRITON_AWQ = True + + @classmethod + def get_current_memory_usage(cls, + device: Optional[torch.types.Device] = None + ) -> float: + torch.cuda.reset_peak_memory_stats(device) + return torch.cuda.max_memory_allocated(device)