xdit-project · feifeibear · Sep 14, 2024 · Sep 14, 2024 · Sep 14, 2024
diff --git a/examples/cogvideox_example.py b/examples/cogvideox_example.py
@@ -18,6 +18,14 @@ def main():
     parser = FlexibleArgumentParser(description="xFuser Arguments")
     args = xFuserArgs.add_cli_args(parser).parse_args()
     engine_args = xFuserArgs.from_cli_args(args)
+
+    # Check if ulysses_degree is valid
+    num_heads = 30
+    if engine_args.ulysses_degree > 0 and num_heads % engine_args.ulysses_degree != 0:
+        raise ValueError(
+            f"ulysses_degree ({engine_args.ulysses_degree}) must be a divisor of the number of heads ({num_heads})"
+        )
+
     engine_config, input_config = engine_args.create_config()
     local_rank = get_world_group().local_rank
 
@@ -30,7 +38,8 @@ def main():
         pipe.enable_model_cpu_offload(gpu_id=local_rank)
         pipe.vae.enable_tiling()
     else:
-        pipe = pipe.to(f"cuda:{local_rank}")
+        device = torch.device(f"cuda:{local_rank}")
+        pipe = pipe.to(device)
 
     torch.cuda.reset_peak_memory_stats()
     start_time = time.time()
@@ -49,8 +58,18 @@ def main():
     elapsed_time = end_time - start_time
     peak_memory = torch.cuda.max_memory_allocated(device=f"cuda:{local_rank}")
 
+    parallel_info = (
+        f"dp{engine_args.data_parallel_degree}_cfg{engine_config.parallel_config.cfg_degree}_"
+        f"ulysses{engine_args.ulysses_degree}_ring{engine_args.ring_degree}_"
+        f"tp{engine_args.tensor_parallel_degree}_"
+        f"pp{engine_args.pipefusion_parallel_degree}_patch{engine_args.num_pipeline_patch}"
+    )
     if is_dp_last_group():
-        export_to_video(output, "results/output.mp4", fps=8)
+        world_size = get_data_parallel_world_size()
+        resolution = f"{input_config.width}x{input_config.height}"
+        output_filename = f"results/cogvideox_{parallel_info}_{resolution}.mp4"
+        export_to_video(output, output_filename, fps=8)
+        print(f"output saved to {output_filename}")
 
     if get_world_group().rank == get_world_group().world_size - 1:
         print(f"epoch time: {elapsed_time:.2f} sec, memory: {peak_memory/1e9} GB")

diff --git a/setup.py b/setup.py
@@ -1,14 +1,18 @@
 from setuptools import find_packages, setup
 import subprocess
 
+
 def get_cuda_version():
     try:
         nvcc_version = subprocess.check_output(["nvcc", "--version"]).decode("utf-8")
-        version_line = [line for line in nvcc_version.split('\n') if "release" in line][0]
-        cuda_version = version_line.split(' ')[-2].replace(',', '')
-        return 'cu' + cuda_version.replace('.', '')
+        version_line = [line for line in nvcc_version.split("\n") if "release" in line][
+            0
+        ]
+        cuda_version = version_line.split(" ")[-2].replace(",", "")
+        return "cu" + cuda_version.replace(".", "")
     except Exception as e:
-        return 'no_cuda'
+        return "no_cuda"
+
 
 if __name__ == "__main__":
     with open("README.md", "r") as f:
@@ -29,9 +33,10 @@ def get_cuda_version():
             "sentencepiece>=0.1.99",
             "beautifulsoup4>=4.12.3",
             "distvae",
-            "yunchang==0.3",
+            "yunchang>=0.3.0",
             "pytest",
             "flask",
+            "opencv-python",
         ],
         extras_require={
             "[flash_attn]": [

diff --git a/tests/layers/attention_processor_test.py b/tests/layers/attention_processor_test.py
@@ -33,6 +33,7 @@ def init_process(rank, world_size, fn, run_attn_test):
 
     os.environ["MASTER_ADDR"] = "localhost"
     os.environ["MASTER_PORT"] = "12355"
+    os.environ["LOCAL_RANK"] = str(rank)
 
     init_distributed_environment(rank=rank, world_size=world_size)
     initialize_model_parallel(

diff --git a/xfuser/core/long_ctx_attention/ring/ring_flash_attn.py b/xfuser/core/long_ctx_attention/ring/ring_flash_attn.py
@@ -26,7 +26,7 @@ def ring_flash_attn_forward(
         raise ValueError(
             f"joint_strategy: {joint_strategy} not supprted. supported joint strategy: {supported_joint_strategy}"
         )
-    elif joint_strategy is not "none" and (
+    elif joint_strategy != "none" and (
         joint_tensor_key is None or joint_tensor_value is None
     ):
         raise ValueError(