From df7a79d22dabfcde2e44f0265adb3543f0fbd712 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Mon, 13 Jan 2025 16:20:52 +0800 Subject: [PATCH] [platform] add ray_device_key (#11948) Signed-off-by: youkaichao --- vllm/executor/ray_utils.py | 19 +++++++++++++------ vllm/platforms/cuda.py | 1 + vllm/platforms/hpu.py | 1 + vllm/platforms/interface.py | 4 ++++ vllm/platforms/neuron.py | 1 + vllm/platforms/rocm.py | 2 ++ vllm/platforms/tpu.py | 2 ++ vllm/platforms/xpu.py | 3 +++ vllm/v1/executor/ray_utils.py | 13 +++++++++++-- 9 files changed, 38 insertions(+), 8 deletions(-) diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py index 8d766bad1a072..9f40f6a65dcd7 100644 --- a/vllm/executor/ray_utils.py +++ b/vllm/executor/ray_utils.py @@ -8,6 +8,7 @@ from vllm.config import ParallelConfig from vllm.executor.msgspec_utils import decode_hook, encode_hook from vllm.logger import init_logger +from vllm.platforms import current_platform from vllm.sequence import ExecuteModelRequest, IntermediateTensors from vllm.utils import get_ip from vllm.worker.worker_base import WorkerWrapperBase @@ -47,7 +48,12 @@ def get_node_ip(self) -> str: def get_node_and_gpu_ids(self) -> Tuple[str, List[int]]: node_id = ray.get_runtime_context().get_node_id() - gpu_ids = ray.get_gpu_ids() + device_key = current_platform.ray_device_key + if not device_key: + raise RuntimeError("current platform %s does not support ray.", + current_platform.device_name) + gpu_ids = ray.get_runtime_context().get_accelerator_ids( + )[device_key] return node_id, gpu_ids def execute_model_spmd( @@ -249,11 +255,12 @@ def initialize_ray_cluster( # Placement group is already set. return - device_str = "GPU" - if current_platform.is_tpu(): - device_str = "TPU" - elif current_platform.is_hpu(): - device_str = 'HPU' + device_str = current_platform.ray_device_key + if not device_str: + raise ValueError( + f"current platform {current_platform.device_name} does not " + "support ray.") + # Create placement group for worker processes current_placement_group = ray.util.get_current_placement_group() if current_placement_group: diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 23ceac83e49de..3f77ec50ed31f 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -77,6 +77,7 @@ class CudaPlatformBase(Platform): device_name: str = "cuda" device_type: str = "cuda" dispatch_key: str = "CUDA" + ray_device_key: str = "GPU" @classmethod def get_device_capability(cls, diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py index 8152d881fa8d9..0acb2804a5f66 100644 --- a/vllm/platforms/hpu.py +++ b/vllm/platforms/hpu.py @@ -19,6 +19,7 @@ class HpuPlatform(Platform): device_name: str = "hpu" device_type: str = "hpu" dispatch_key: str = "HPU" + ray_device_key: str = "HPU" @classmethod def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int, diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index fe398801c5dd9..ec917f75689dd 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -82,6 +82,10 @@ class Platform: # check https://github.com/pytorch/pytorch/blob/313dac6c1ca0fa0cde32477509cce32089f8532a/torchgen/model.py#L134 # noqa # use "CPU" as a fallback for platforms not registered in PyTorch dispatch_key: str = "CPU" + # available ray device keys: + # https://github.com/ray-project/ray/blob/10ba5adadcc49c60af2c358a33bb943fb491a171/python/ray/_private/ray_constants.py#L438 # noqa + # empty string means the device does not support ray + ray_device_key: str = "" # The torch.compile backend for compiling simple and # standalone functions. The default value is "inductor" to keep # the same behavior as PyTorch. diff --git a/vllm/platforms/neuron.py b/vllm/platforms/neuron.py index a4bbbd27c8a89..7f4a867b32ba1 100644 --- a/vllm/platforms/neuron.py +++ b/vllm/platforms/neuron.py @@ -16,6 +16,7 @@ class NeuronPlatform(Platform): _enum = PlatformEnum.NEURON device_name: str = "neuron" device_type: str = "neuron" + ray_device_key: str = "neuron_cores" supported_quantization: list[str] = ["neuron_quant"] @classmethod diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 1c2f602efc856..f12e948113723 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -64,6 +64,8 @@ class RocmPlatform(Platform): device_name: str = "rocm" device_type: str = "cuda" dispatch_key: str = "CUDA" + ray_device_key: str = "GPU" + supported_quantization: list[str] = [ "awq", "gptq", "fp8", "compressed_tensors", "compressed-tensors", "fbgemm_fp8", "gguf" diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 8a59b53ca4b15..460eb170bba34 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -19,6 +19,8 @@ class TpuPlatform(Platform): device_name: str = "tpu" device_type: str = "tpu" dispatch_key: str = "XLA" + ray_device_key: str = "TPU" + supported_quantization: list[str] = [ "tpu_int8", "compressed-tensors", "compressed_tensors" ] diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 00692a5d23031..cb74f79b31794 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -19,6 +19,9 @@ class XPUPlatform(Platform): device_name: str = "xpu" device_type: str = "xpu" dispatch_key: str = "XPU" + # Intel XPU's device key is "GPU" for Ray. + # see https://github.com/ray-project/ray/blob/6a5eb5865eeb9ccf058a79b44f107e327e360673/python/ray/_private/accelerators/intel_gpu.py#L20 # noqa: E501 + ray_device_key: str = "GPU" @classmethod def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int, diff --git a/vllm/v1/executor/ray_utils.py b/vllm/v1/executor/ray_utils.py index 7733610e59c7f..fc9715b7a5909 100644 --- a/vllm/v1/executor/ray_utils.py +++ b/vllm/v1/executor/ray_utils.py @@ -41,7 +41,12 @@ def get_node_ip(self) -> str: def get_node_and_gpu_ids(self) -> Tuple[str, List[int]]: node_id = ray.get_runtime_context().get_node_id() - gpu_ids = ray.get_gpu_ids() + device_key = current_platform.ray_device_key + if not device_key: + raise RuntimeError("current platform %s does not support ray.", + current_platform.device_name) + gpu_ids = ray.get_runtime_context().get_accelerator_ids( + )[device_key] return node_id, gpu_ids def setup_device_if_necessary(self): @@ -211,7 +216,11 @@ def initialize_ray_cluster( # Placement group is already set. return - device_str = "GPU" if not current_platform.is_tpu() else "TPU" + device_str = current_platform.ray_device_key + if not device_str: + raise ValueError( + f"current platform {current_platform.device_name} does not " + "support ray.") # Create placement group for worker processes current_placement_group = ray.util.get_current_placement_group() if current_placement_group: