add rocm get_current_memory_usage() func

Signed-off-by: Shanshan Shen <467638484@qq.com>
vllm-project · Dec 25, 2024 · 6fea4c0 · 6fea4c0
1 parent 7e96d29
commit 6fea4c0
Showing 1 changed file with 7 additions and 0 deletions.
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
@@ -110,3 +110,10 @@ def verify_quantization(cls, quant: str) -> None:
                 "Using AWQ quantization with ROCm, but VLLM_USE_TRITON_AWQ"
                 " is not set, enabling VLLM_USE_TRITON_AWQ.")
         envs.VLLM_USE_TRITON_AWQ = True
+
+    @classmethod
+    def get_current_memory_usage(cls,
+                                 device: Optional[torch.types.Device] = None
+                                 ) -> float:
+        torch.cuda.reset_peak_memory_stats(device)
+        return torch.cuda.max_memory_allocated(device)