Skip to content

Commit

Permalink
flatten qkv
Browse files Browse the repository at this point in the history
Signed-off-by: Isotr0py <2037008807@qq.com>
  • Loading branch information
Isotr0py committed Jan 26, 2025
1 parent 0d5228d commit 159f0f2
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions vllm/attention/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,16 @@ def forward(
device=key.device)

out = flash_attn_varlen_func(
query,
key,
value,
query.flatten(0, 1),
key.flatten(0, 1),
value.flatten(0, 1),
cu_seqlens_q=cu_seqlens_q,
cu_seqlens_k=cu_seqlens_k,
max_seqlen_q=q_len,
max_seqlen_k=kv_len,
softmax_scale=self.scale,
)
out = out.reshape(bsz, q_len, -1)
elif self.attn_backend == _Backend.XFORMERS:
from xformers import ops as xops

Expand Down

0 comments on commit 159f0f2

Please sign in to comment.