From a9fae81c43f357e888b8bc14373247680d57a20c Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 31 Jan 2025 23:38:35 +0000 Subject: [PATCH] [Doc] Improve installation signposting (#12575) - Make device tab names more explicit - Add comprehensive list of devices to https://docs.vllm.ai/en/latest/getting_started/installation/index.html - Add `attention` blocks to the intro of all devices that don't have pre-built wheels/images --------- Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../ai_accelerator/hpu-gaudi.inc.md | 4 ++ .../installation/ai_accelerator/index.md | 33 +++++++------ .../installation/ai_accelerator/neuron.inc.md | 4 ++ .../ai_accelerator/openvino.inc.md | 4 ++ .../installation/ai_accelerator/tpu.inc.md | 4 ++ .../installation/cpu/apple.inc.md | 4 ++ .../installation/cpu/arm.inc.md | 4 ++ .../getting_started/installation/cpu/index.md | 13 ++--- .../installation/cpu/x86.inc.md | 12 +++-- .../getting_started/installation/gpu/index.md | 49 ++++++++++--------- .../installation/gpu/rocm.inc.md | 20 ++++---- .../installation/gpu/xpu.inc.md | 4 ++ .../getting_started/installation/index.md | 15 ++++++ 13 files changed, 111 insertions(+), 59 deletions(-) diff --git a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md index 704a16233981f..f3b0d6dc9bdc8 100644 --- a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md @@ -2,6 +2,10 @@ This tab provides instructions on running vLLM with Intel Gaudi devices. +:::{attention} +There are no pre-built wheels or images for this device, so you must build vLLM from source. +::: + ## Requirements - OS: Ubuntu 22.04 LTS diff --git a/docs/source/getting_started/installation/ai_accelerator/index.md b/docs/source/getting_started/installation/ai_accelerator/index.md index 88352f639567b..01793572fee7c 100644 --- a/docs/source/getting_started/installation/ai_accelerator/index.md +++ b/docs/source/getting_started/installation/ai_accelerator/index.md @@ -5,7 +5,8 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::::{tab-set} :sync-group: device -::::{tab-item} TPU +::::{tab-item} Google TPU +:selected: :sync: tpu :::{include} tpu.inc.md @@ -25,7 +26,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::: -::::{tab-item} Neuron +::::{tab-item} AWS Neuron :sync: neuron :::{include} neuron.inc.md @@ -52,7 +53,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::::{tab-set} :sync-group: device -::::{tab-item} TPU +::::{tab-item} Google TPU :sync: tpu :::{include} tpu.inc.md @@ -72,7 +73,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::: -::::{tab-item} Neuron +::::{tab-item} AWS Neuron :sync: neuron :::{include} neuron.inc.md @@ -99,7 +100,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::::{tab-set} :sync-group: device -::::{tab-item} TPU +::::{tab-item} Google TPU :sync: tpu :::{include} tpu.inc.md @@ -119,7 +120,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::: -::::{tab-item} Neuron +::::{tab-item} AWS Neuron :sync: neuron :::{include} neuron.inc.md @@ -146,7 +147,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::::{tab-set} :sync-group: device -::::{tab-item} TPU +::::{tab-item} Google TPU :sync: tpu :::{include} tpu.inc.md @@ -166,7 +167,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::: -::::{tab-item} Neuron +::::{tab-item} AWS Neuron :sync: neuron :::{include} neuron.inc.md @@ -193,7 +194,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::::{tab-set} :sync-group: device -::::{tab-item} TPU +::::{tab-item} Google TPU :sync: tpu :::{include} tpu.inc.md @@ -213,7 +214,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::: -::::{tab-item} Neuron +::::{tab-item} AWS Neuron :sync: neuron :::{include} neuron.inc.md @@ -242,7 +243,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::::{tab-set} :sync-group: device -::::{tab-item} TPU +::::{tab-item} Google TPU :sync: tpu :::{include} tpu.inc.md @@ -262,7 +263,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::: -::::{tab-item} Neuron +::::{tab-item} AWS Neuron :sync: neuron :::{include} neuron.inc.md @@ -289,7 +290,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::::{tab-set} :sync-group: device -::::{tab-item} TPU +::::{tab-item} Google TPU :sync: tpu :::{include} tpu.inc.md @@ -309,7 +310,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::: -::::{tab-item} Neuron +::::{tab-item} AWS Neuron :sync: neuron :::{include} neuron.inc.md @@ -336,7 +337,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::::{tab-set} :sync-group: device -::::{tab-item} TPU +::::{tab-item} Google TPU :sync: tpu :::{include} tpu.inc.md @@ -354,7 +355,7 @@ vLLM is a Python library that supports the following AI accelerators. Select you :::: -::::{tab-item} Neuron +::::{tab-item} AWS Neuron :sync: neuron :::{include} neuron.inc.md diff --git a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md index 145cc9d668efd..f149818acafb8 100644 --- a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md @@ -4,6 +4,10 @@ vLLM 0.3.3 onwards supports model inferencing and serving on AWS Trainium/Infere Paged Attention and Chunked Prefill are currently in development and will be available soon. Data types currently supported in Neuron SDK are FP16 and BF16. +:::{attention} +There are no pre-built wheels or images for this device, so you must build vLLM from source. +::: + ## Requirements - OS: Linux diff --git a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md index a7867472583d6..112e8d4d9b256 100644 --- a/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/openvino.inc.md @@ -2,6 +2,10 @@ vLLM powered by OpenVINO supports all LLM models from [vLLM supported models list](#supported-models) and can perform optimal model serving on all x86-64 CPUs with, at least, AVX2 support, as well as on both integrated and discrete Intel® GPUs ([the list of supported GPUs](https://docs.openvino.ai/2024/about-openvino/release-notes-openvino/system-requirements.html#gpu)). +:::{attention} +There are no pre-built wheels or images for this device, so you must build vLLM from source. +::: + ## Requirements - OS: Linux diff --git a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md index 6827afc805fd8..c0d50feafce56 100644 --- a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md +++ b/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md @@ -30,6 +30,10 @@ For TPU pricing information, see [Cloud TPU pricing](https://cloud.google.com/tp You may need additional persistent storage for your TPU VMs. For more information, see [Storage options for Cloud TPU data](https://cloud.devsite.corp.google.com/tpu/docs/storage-options). +:::{attention} +There are no pre-built wheels for this device, so you must either use the pre-built Docker image or build vLLM from source. +::: + ## Requirements - Google Cloud TPU VM diff --git a/docs/source/getting_started/installation/cpu/apple.inc.md b/docs/source/getting_started/installation/cpu/apple.inc.md index 0808b869fdb7b..3bf1d47fa0ff9 100644 --- a/docs/source/getting_started/installation/cpu/apple.inc.md +++ b/docs/source/getting_started/installation/cpu/apple.inc.md @@ -4,6 +4,10 @@ vLLM has experimental support for macOS with Apple silicon. For now, users shall Currently the CPU implementation for macOS supports FP32 and FP16 datatypes. +:::{attention} +There are no pre-built wheels or images for this device, so you must build vLLM from source. +::: + ## Requirements - OS: `macOS Sonoma` or later diff --git a/docs/source/getting_started/installation/cpu/arm.inc.md b/docs/source/getting_started/installation/cpu/arm.inc.md index 08a764e1a25f4..a661a0ca5adc7 100644 --- a/docs/source/getting_started/installation/cpu/arm.inc.md +++ b/docs/source/getting_started/installation/cpu/arm.inc.md @@ -4,6 +4,10 @@ vLLM has been adapted to work on ARM64 CPUs with NEON support, leveraging the CP ARM CPU backend currently supports Float32, FP16 and BFloat16 datatypes. +:::{attention} +There are no pre-built wheels or images for this device, so you must build vLLM from source. +::: + ## Requirements - OS: Linux diff --git a/docs/source/getting_started/installation/cpu/index.md b/docs/source/getting_started/installation/cpu/index.md index 2f549ede0cf48..d53430403583c 100644 --- a/docs/source/getting_started/installation/cpu/index.md +++ b/docs/source/getting_started/installation/cpu/index.md @@ -5,7 +5,8 @@ vLLM is a Python library that supports the following CPU variants. Select your C :::::{tab-set} :sync-group: device -::::{tab-item} x86 +::::{tab-item} Intel/AMD x86 +:selected: :sync: x86 :::{include} x86.inc.md @@ -15,7 +16,7 @@ vLLM is a Python library that supports the following CPU variants. Select your C :::: -::::{tab-item} ARM +::::{tab-item} ARM AArch64 :sync: arm :::{include} arm.inc.md @@ -44,7 +45,7 @@ vLLM is a Python library that supports the following CPU variants. Select your C :::::{tab-set} :sync-group: device -::::{tab-item} x86 +::::{tab-item} Intel/AMD x86 :sync: x86 :::{include} x86.inc.md @@ -54,7 +55,7 @@ vLLM is a Python library that supports the following CPU variants. Select your C :::: -::::{tab-item} ARM +::::{tab-item} ARM AArch64 :sync: arm :::{include} arm.inc.md @@ -92,7 +93,7 @@ Currently, there are no pre-built CPU wheels. :::::{tab-set} :sync-group: device -::::{tab-item} x86 +::::{tab-item} Intel/AMD x86 :sync: x86 :::{include} x86.inc.md @@ -102,7 +103,7 @@ Currently, there are no pre-built CPU wheels. :::: -::::{tab-item} ARM +::::{tab-item} ARM AArch64 :sync: arm :::{include} arm.inc.md diff --git a/docs/source/getting_started/installation/cpu/x86.inc.md b/docs/source/getting_started/installation/cpu/x86.inc.md index f146ae0918b44..1dafc3660060e 100644 --- a/docs/source/getting_started/installation/cpu/x86.inc.md +++ b/docs/source/getting_started/installation/cpu/x86.inc.md @@ -2,12 +2,20 @@ vLLM initially supports basic model inferencing and serving on x86 CPU platform, with data types FP32, FP16 and BF16. +:::{attention} +There are no pre-built wheels or images for this device, so you must build vLLM from source. +::: + ## Requirements - OS: Linux - Compiler: `gcc/g++ >= 12.3.0` (optional, recommended) - Instruction Set Architecture (ISA): AVX512 (optional, recommended) +:::{tip} +[Intel Extension for PyTorch (IPEX)](https://github.com/intel/intel-extension-for-pytorch) extends PyTorch with up-to-date features optimizations for an extra performance boost on Intel hardware. +::: + ## Set up using Python ### Pre-built wheels @@ -29,7 +37,3 @@ vLLM initially supports basic model inferencing and serving on x86 CPU platform, ### Build image from source ## Extra information - -## Intel Extension for PyTorch - -- [Intel Extension for PyTorch (IPEX)](https://github.com/intel/intel-extension-for-pytorch) extends PyTorch with up-to-date features optimizations for an extra performance boost on Intel hardware. diff --git a/docs/source/getting_started/installation/gpu/index.md b/docs/source/getting_started/installation/gpu/index.md index 0a61f889753a3..f82c4bda28620 100644 --- a/docs/source/getting_started/installation/gpu/index.md +++ b/docs/source/getting_started/installation/gpu/index.md @@ -5,7 +5,8 @@ vLLM is a Python library that supports the following GPU variants. Select your G :::::{tab-set} :sync-group: device -::::{tab-item} CUDA +::::{tab-item} NVIDIA CUDA +:selected: :sync: cuda :::{include} cuda.inc.md @@ -15,7 +16,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G :::: -::::{tab-item} ROCm +::::{tab-item} AMD ROCm :sync: rocm :::{include} rocm.inc.md @@ -25,7 +26,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G :::: -::::{tab-item} XPU +::::{tab-item} Intel XPU :sync: xpu :::{include} xpu.inc.md @@ -45,7 +46,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G :::::{tab-set} :sync-group: device -::::{tab-item} CUDA +::::{tab-item} NVIDIA CUDA :sync: cuda :::{include} cuda.inc.md @@ -55,7 +56,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G :::: -::::{tab-item} ROCm +::::{tab-item} AMD ROCm :sync: rocm :::{include} rocm.inc.md @@ -65,7 +66,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G :::: -::::{tab-item} XPU +::::{tab-item} Intel XPU :sync: xpu :::{include} xpu.inc.md @@ -87,7 +88,7 @@ vLLM is a Python library that supports the following GPU variants. Select your G :::::{tab-set} :sync-group: device -::::{tab-item} CUDA +::::{tab-item} NVIDIA CUDA :sync: cuda :::{include} cuda.inc.md @@ -97,14 +98,14 @@ vLLM is a Python library that supports the following GPU variants. Select your G :::: -::::{tab-item} ROCm +::::{tab-item} AMD ROCm :sync: rocm There is no extra information on creating a new Python environment for this device. :::: -::::{tab-item} XPU +::::{tab-item} Intel XPU :sync: xpu There is no extra information on creating a new Python environment for this device. @@ -118,7 +119,7 @@ There is no extra information on creating a new Python environment for this devi :::::{tab-set} :sync-group: device -::::{tab-item} CUDA +::::{tab-item} NVIDIA CUDA :sync: cuda :::{include} cuda.inc.md @@ -128,7 +129,7 @@ There is no extra information on creating a new Python environment for this devi :::: -::::{tab-item} ROCm +::::{tab-item} AMD ROCm :sync: rocm :::{include} rocm.inc.md @@ -138,7 +139,7 @@ There is no extra information on creating a new Python environment for this devi :::: -::::{tab-item} XPU +::::{tab-item} Intel XPU :sync: xpu :::{include} xpu.inc.md @@ -157,7 +158,7 @@ There is no extra information on creating a new Python environment for this devi :::::{tab-set} :sync-group: device -::::{tab-item} CUDA +::::{tab-item} NVIDIA CUDA :sync: cuda :::{include} cuda.inc.md @@ -167,7 +168,7 @@ There is no extra information on creating a new Python environment for this devi :::: -::::{tab-item} ROCm +::::{tab-item} AMD ROCm :sync: rocm :::{include} rocm.inc.md @@ -177,7 +178,7 @@ There is no extra information on creating a new Python environment for this devi :::: -::::{tab-item} XPU +::::{tab-item} Intel XPU :sync: xpu :::{include} xpu.inc.md @@ -196,7 +197,7 @@ There is no extra information on creating a new Python environment for this devi :::::{tab-set} :sync-group: device -::::{tab-item} CUDA +::::{tab-item} NVIDIA CUDA :sync: cuda :::{include} cuda.inc.md @@ -206,7 +207,7 @@ There is no extra information on creating a new Python environment for this devi :::: -::::{tab-item} ROCm +::::{tab-item} AMD ROCm :sync: rocm :::{include} rocm.inc.md @@ -216,7 +217,7 @@ There is no extra information on creating a new Python environment for this devi :::: -::::{tab-item} XPU +::::{tab-item} Intel XPU :sync: xpu :::{include} xpu.inc.md @@ -233,7 +234,7 @@ There is no extra information on creating a new Python environment for this devi :::::{tab-set} :sync-group: device -::::{tab-item} CUDA +::::{tab-item} NVIDIA CUDA :sync: cuda :::{include} cuda.inc.md @@ -243,7 +244,7 @@ There is no extra information on creating a new Python environment for this devi :::: -::::{tab-item} ROCm +::::{tab-item} AMD ROCm :sync: rocm :::{include} rocm.inc.md @@ -253,7 +254,7 @@ There is no extra information on creating a new Python environment for this devi :::: -::::{tab-item} XPU +::::{tab-item} Intel XPU :sync: xpu :::{include} xpu.inc.md @@ -270,7 +271,7 @@ There is no extra information on creating a new Python environment for this devi :::::{tab-set} :sync-group: device -::::{tab-item} CUDA +::::{tab-item} NVIDIA CUDA :sync: cuda :::{include} cuda.inc.md @@ -279,7 +280,7 @@ There is no extra information on creating a new Python environment for this devi :::: -::::{tab-item} ROCm +::::{tab-item} AMD ROCm :sync: rocm :::{include} rocm.inc.md @@ -288,7 +289,7 @@ There is no extra information on creating a new Python environment for this devi :::: -::::{tab-item} XPU +::::{tab-item} Intel XPU :sync: xpu :::{include} xpu.inc.md diff --git a/docs/source/getting_started/installation/gpu/rocm.inc.md b/docs/source/getting_started/installation/gpu/rocm.inc.md index 131ad1704ea11..c8fd11415cfda 100644 --- a/docs/source/getting_started/installation/gpu/rocm.inc.md +++ b/docs/source/getting_started/installation/gpu/rocm.inc.md @@ -2,6 +2,10 @@ vLLM supports AMD GPUs with ROCm 6.2. +:::{attention} +There are no pre-built wheels for this device, so you must either use the pre-built Docker image or build vLLM from source. +::: + ## Requirements - GPU: MI200s (gfx90a), MI300 (gfx942), Radeon RX 7900 series (gfx1100) @@ -13,14 +17,6 @@ vLLM supports AMD GPUs with ROCm 6.2. Currently, there are no pre-built ROCm wheels. -However, the [AMD Infinity hub for vLLM](https://hub.docker.com/r/rocm/vllm/tags) offers a prebuilt, optimized -docker image designed for validating inference performance on the AMD Instinct™ MI300X accelerator. - -:::{tip} -Please check [LLM inference performance validation on AMD Instinct MI300X](https://rocm.docs.amd.com/en/latest/how-to/performance-validation/mi300x/vllm-benchmark.html) -for instructions on how to use this prebuilt docker image. -::: - ### Build wheel from source 0. Install prerequisites (skip if you are already in an environment/docker with the following installed): @@ -112,7 +108,13 @@ for instructions on how to use this prebuilt docker image. ### Pre-built images -Currently, there are no pre-built ROCm images. +The [AMD Infinity hub for vLLM](https://hub.docker.com/r/rocm/vllm/tags) offers a prebuilt, optimized +docker image designed for validating inference performance on the AMD Instinct™ MI300X accelerator. + +:::{tip} +Please check [LLM inference performance validation on AMD Instinct MI300X](https://rocm.docs.amd.com/en/latest/how-to/performance-validation/mi300x/vllm-benchmark.html) +for instructions on how to use this prebuilt docker image. +::: ### Build image from source diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/source/getting_started/installation/gpu/xpu.inc.md index bc01c6000bc07..4116826789e5c 100644 --- a/docs/source/getting_started/installation/gpu/xpu.inc.md +++ b/docs/source/getting_started/installation/gpu/xpu.inc.md @@ -2,6 +2,10 @@ vLLM initially supports basic model inferencing and serving on Intel GPU platform. +:::{attention} +There are no pre-built wheels or images for this device, so you must build vLLM from source. +::: + ## Requirements - Supported Hardware: Intel Data Center GPU, Intel ARC GPU diff --git a/docs/source/getting_started/installation/index.md b/docs/source/getting_started/installation/index.md index 0f5e013ce071a..c64c3a7208eeb 100644 --- a/docs/source/getting_started/installation/index.md +++ b/docs/source/getting_started/installation/index.md @@ -6,8 +6,23 @@ vLLM supports the following hardware platforms: :::{toctree} :maxdepth: 1 +:hidden: gpu/index cpu/index ai_accelerator/index ::: + +- + - NVIDIA CUDA + - AMD ROCm + - Intel XPU +- + - Intel/AMD x86 + - ARM AArch64 + - Apple silicon +- + - Google TPU + - Intel Gaudi + - AWS Neuron + - OpenVINO