From 6fea4c05dfd6e439685cd12064111cab5071d72d Mon Sep 17 00:00:00 2001
From: Shanshan Shen <467638484@qq.com>
Date: Wed, 25 Dec 2024 17:03:10 +0800
Subject: [PATCH] add rocm get_current_memory_usage() func

Signed-off-by: Shanshan Shen <467638484@qq.com>
---
 vllm/platforms/rocm.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 7778b565372cb..00b532cd2b7bf 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -110,3 +110,10 @@ def verify_quantization(cls, quant: str) -> None:
                 "Using AWQ quantization with ROCm, but VLLM_USE_TRITON_AWQ"
                 " is not set, enabling VLLM_USE_TRITON_AWQ.")
         envs.VLLM_USE_TRITON_AWQ = True
+
+    @classmethod
+    def get_current_memory_usage(cls,
+                                 device: Optional[torch.types.Device] = None
+                                 ) -> float:
+        torch.cuda.reset_peak_memory_stats(device)
+        return torch.cuda.max_memory_allocated(device)