From 923386630c0a3af8dae4f54c825838fe597997e0 Mon Sep 17 00:00:00 2001 From: sasha0552 Date: Wed, 30 Oct 2024 21:45:08 +0000 Subject: [PATCH] minor fix Signed-off-by: sasha0552 --- vllm/attention/backends/xformers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/attention/backends/xformers.py b/vllm/attention/backends/xformers.py index fb8a382434241..6c472bbccfa76 100644 --- a/vllm/attention/backends/xformers.py +++ b/vllm/attention/backends/xformers.py @@ -536,7 +536,7 @@ def build(self, seq_lens: List[int], query_lens: List[int], dtype=query_start_loc.dtype, out=query_start_loc[1:]) - return self._metadata_cls( # type: ignore + self._cached_prefill_metadata = XFormersMetadata( num_prefills=self.num_prefills, slot_mapping=slot_mapping_tensor, num_prefill_tokens=self.num_prefill_tokens, @@ -552,6 +552,7 @@ def build(self, seq_lens: List[int], query_lens: List[int], block_tables=block_tables, use_cuda_graph=use_captured_graph, ) + return self._cached_prefill_metadata class XFormersImpl(AttentionImpl[XFormersMetadata]):