diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py index e7a41ffd0d7dc..252440c7b7e08 100644 --- a/vllm/worker/cache_engine.py +++ b/vllm/worker/cache_engine.py @@ -64,7 +64,7 @@ def __init__( # Initialize the cache. self.gpu_cache = self._allocate_kv_cache( - self.num_gpu_blocks, self.device_config.device) + self.num_gpu_blocks, self.device_config.device_type) self.cpu_cache = self._allocate_kv_cache(self.num_cpu_blocks, "cpu") def _allocate_kv_cache(