Merge pull request vllm-project#1 from bigPYJ1151/fix_ans

Fix key cache block shape.
bigPYJ1151 · Oct 27, 2023 · b5e7066 · b5e7066
2 parents 3a4c79a + eff780b
commit b5e7066
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py
@@ -58,7 +58,7 @@ def get_key_block_shape(self) -> Tuple[int, int, int, int]:
         element_size = torch.tensor([], dtype=self.dtype).element_size()
         x = (
             16 // element_size
-        ) if not self.cache_config.cpu_only else 1  # TODO: whether need to follow the tiling method on GPU
+        )
         return (
             self.num_heads,
             self.head_size // x,