From 949a71bf27ed4f18591727923cb241c2c93e39db Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Fri, 24 Jan 2025 15:20:59 -0500 Subject: [PATCH] [Bugfix][Kernel] FA3 Fix - RuntimeError: This flash attention build only supports pack_gqa (for build size reasons). (#12405) Signed-off-by: Lucas Wilkinson --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2f9da6fa3e1d3..c954731bf94ef 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -576,7 +576,7 @@ else() FetchContent_Declare( vllm-flash-attn GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git - GIT_TAG 0aff05f577e8a10086066a00618609199b25231d + GIT_TAG 9732b0ce005d1e6216864788502d5570004678f5 GIT_PROGRESS TRUE # Don't share the vllm-flash-attn build between build types BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn