remove dependency on flash_attn (#410)

xdit-project · Dec 26, 2024 · 92187b8 · 92187b8
1 parent 167351a
commit 92187b8
Show file tree

Hide file tree

Showing 12 changed files with 66 additions and 213 deletions.
diff --git a/setup.py b/setup.py
@@ -32,19 +32,19 @@ def get_cuda_version():
             "sentencepiece>=0.1.99",
             "beautifulsoup4>=4.12.3",
             "distvae",
-            "yunchang>=0.3.0",
+            "yunchang>=0.6.0",
             "pytest",
             "flask",
             "opencv-python",
             "imageio",
             "imageio-ffmpeg",
             "optimum-quanto",
-            "flash_attn>=2.6.3",
             "ray"
         ],
         extras_require={
             "diffusers": [
-                "diffusers>=0.31.0",  # NOTE: diffusers>=0.32.0.dev is necessary for CogVideoX and Flux
+                "diffusers>=0.32.0",  # NOTE: diffusers>=0.32.0.dev is necessary for CogVideoX and Flux
+                "flash_attn>=2.6.3",
             ]
         },
         url="https://github.com/xdit-project/xDiT.",

diff --git a/xfuser/config/config.py b/xfuser/config/config.py
@@ -130,10 +130,6 @@ def __post_init__(self):
                 f"sp_degree is {self.sp_degree}, please set it "
                 f"to 1 or install 'yunchang' to use it"
             )
-        if not HAS_FLASH_ATTN and self.ring_degree > 1:
-            raise ValueError(
-                f"Flash attention not found. Ring attention not available. Please set ring_degree to 1"
-            )
 
 
 @dataclass

diff --git a/xfuser/core/fast_attention/attn_layer.py b/xfuser/core/fast_attention/attn_layer.py
@@ -7,7 +7,12 @@
 from diffusers.models.attention_processor import Attention
 from typing import Optional
 import torch.nn.functional as F
-import flash_attn
+
+try:
+    import flash_attn
+except ImportError:
+    flash_attn = None
+
 from enum import Flag, auto
 from .fast_attn_state import get_fast_attn_window_size
 
@@ -165,6 +170,7 @@ def __call__(
                     is_causal=False,
                 ).transpose(1, 2)
             elif method.has(FastAttnMethod.FULL_ATTN):
+                assert flash_attn is not None, f"FlashAttention is not available, please install flash_attn"
                 all_hidden_states = flash_attn.flash_attn_func(query, key, value)
                 if need_compute_residual:
                     # Compute the full-window attention residual

diff --git a/xfuser/core/long_ctx_attention/__init__.py b/xfuser/core/long_ctx_attention/__init__.py
@@ -1,7 +1,5 @@
 from .hybrid import xFuserLongContextAttention
-from .ulysses import xFuserUlyssesAttention
 
 __all__ = [
     "xFuserLongContextAttention",
-    "xFuserUlyssesAttention",
 ]
diff --git a/xfuser/core/long_ctx_attention/hybrid/attn_layer.py b/xfuser/core/long_ctx_attention/hybrid/attn_layer.py
@@ -3,6 +3,11 @@
 
 import torch.distributed
 from yunchang import LongContextAttention
+try:
+    from yunchang.kernels import AttnType
+except ImportError:
+    raise ImportError("Please install yunchang 0.6.0 or later")
+
 from yunchang.comm.all_to_all import SeqAllToAll4D
 
 from xfuser.logger import init_logger
@@ -21,6 +26,7 @@ def __init__(
         ring_impl_type: str = "basic",
         use_pack_qkv: bool = False,
         use_kv_cache: bool = False,
+        attn_type: AttnType = AttnType.FA,
     ) -> None:
         """
         Arguments:
@@ -35,6 +41,7 @@ def __init__(
             gather_idx=gather_idx,
             ring_impl_type=ring_impl_type,
             use_pack_qkv=use_pack_qkv,
+            attn_type = attn_type,
         )
         self.use_kv_cache = use_kv_cache
         if (

diff --git a/xfuser/core/long_ctx_attention/ring/ring_flash_attn.py b/xfuser/core/long_ctx_attention/ring/ring_flash_attn.py
@@ -1,11 +1,16 @@
 import torch
-import flash_attn
-from flash_attn.flash_attn_interface import _flash_attn_forward
+
 from xfuser.core.long_ctx_attention import xFuserLongContextAttention
 from xfuser.core.cache_manager.cache_manager import get_cache_manager
 from yunchang.ring.utils import RingComm, update_out_and_lse
 from yunchang.ring.ring_flash_attn import RingFlashAttnFunc
 
+try:
+    import flash_attn
+    from flash_attn.flash_attn_interface import _flash_attn_forward
+except ImportError:
+    flash_attn = None
+    _flash_attn_forward = None
 
 def xdit_ring_flash_attn_forward(
     process_group,
@@ -80,6 +85,7 @@ def xdit_ring_flash_attn_forward(
             key, value = k, v
 
         if not causal or step <= comm.rank:
+            assert flash_attn is not None, f"FlashAttention is not available, please install flash_attn"
             if flash_attn.__version__ <= "2.6.3":
                 block_out, _, _, _, _, block_lse, _, _ = _flash_attn_forward(
                     q,

diff --git a/xfuser/core/long_ctx_attention/ulysses/__init__.py b/xfuser/core/long_ctx_attention/ulysses/__init__.py
diff --git a/xfuser/core/long_ctx_attention/ulysses/attn_layer.py b/xfuser/core/long_ctx_attention/ulysses/attn_layer.py