diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index 87ade377266a2..59f4f54587e25 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -1194,6 +1194,10 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False): gc.collect() if not current_platform.is_cpu(): torch.cuda.empty_cache() + try: + torch._C._host_emptyCache() + except AttributeError: + logger.warning("torch._C._host_emptyCache() only available in Pytorch >=2.5") #noqa: E501 def in_the_same_node_as(pg: ProcessGroup, source_rank: int = 0) -> List[bool]: