From 4a0b5afb0e7d755e709ec3a6b108d2766eb73477 Mon Sep 17 00:00:00 2001
From: xwhzz <wh.xie@outlook.com>
Date: Tue, 24 Sep 2024 21:26:57 +0800
Subject: [PATCH] Fix Latte Pipeline support

---
 examples/latte_example.py                         | 2 +-
 xfuser/model_executor/pipelines/pipeline_latte.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/latte_example.py b/examples/latte_example.py
index 212879e9..06a87877 100644
--- a/examples/latte_example.py
+++ b/examples/latte_example.py
@@ -39,7 +39,7 @@ def main():
     output = pipe(
         height=input_config.height,
         width=input_config.width,
-        num_frames=16,
+        video_length=16,
         prompt=input_config.prompt,
         num_inference_steps=input_config.num_inference_steps,
         output_type="pt",
diff --git a/xfuser/model_executor/pipelines/pipeline_latte.py b/xfuser/model_executor/pipelines/pipeline_latte.py
index c773f870..311f0431 100644
--- a/xfuser/model_executor/pipelines/pipeline_latte.py
+++ b/xfuser/model_executor/pipelines/pipeline_latte.py
@@ -68,7 +68,7 @@ def __call__(
         timesteps: Optional[List[int]] = None,
         guidance_scale: float = 7.5,
         num_images_per_prompt: int = 1,
-        num_frames: int = 16,
+        video_length: int = 16,
         height: int = 512,
         width: int = 512,
         eta: float = 0.0,
@@ -116,7 +116,7 @@ def __call__(
                 Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
                 1`. Higher guidance scale encourages to generate videos that are closely linked to the text `prompt`,
                 usually at the expense of lower video quality.
-            num_frames (`int`, *optional*, defaults to 16):
+            video_length (`int`, *optional*, defaults to 16):
                 The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds
             num_images_per_prompt (`int`, *optional*, defaults to 1):
                 The number of videos to generate per prompt.
@@ -172,6 +172,7 @@ def __call__(
             callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
 
         # 0. Default
+        num_frames = video_length
         decode_chunk_size = (
             decode_chunk_size if decode_chunk_size is not None else num_frames
         )