add turbo

ali-vilab · Dec 21, 2024 · 94b6b59 · 94b6b59
1 parent 5f40233
commit 94b6b59
Show file tree

Hide file tree

Showing 11 changed files with 1,659 additions and 94 deletions.
diff --git a/README.md b/README.md
@@ -42,8 +42,8 @@ pip install -r requirements.txt
 ## 💫 Inference with Command
 ### 1. Higher-Resolution Text-to-Image
 
-1) Download the pre-trained SDXL checkpoints from [Hugging Face](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0).
-2) Modify the `run_freescale.py` and input the following commands in the terminal.
+1) Modify the `run_freescale.py` and input the following commands in the terminal.
+2) Input the following commands in terminal:
 ```bash
   python run_freescale.py
 
@@ -54,14 +54,15 @@ pip install -r requirements.txt
 
 ### 2. Flexible Control for Detail Level
 
-1) Download the pre-trained SDXL checkpoints.
-2) Modify the `run_sdxl.py` and generate the base image with the original resolutions.
+1) Modify the `run_sdxl.py` and generate the base image with the original resolutions.
+2) Input the following commands in terminal:
 ```bash
   python run_sdxl.py
 ```
 3) Put the generated image into folder `imgen_intermediates`.
 4) (Optional) Generate the mask using other segmentation models (e.g., [Segment Anything](https://huggingface.co/spaces/Xenova/segment-anything-web)) and put the mask into folder `imgen_intermediates`.
 5) Modify the `run_freescale_imgen.py` and generate the final image with the higher resolutions.
+6) Input the following commands in terminal:
 ```bash
   python run_freescale_imgen.py
 
@@ -71,6 +72,14 @@ pip install -r requirements.txt
 ```
 <img src="assets/fig_mask.png">
 
+### 3. Faster Generation with SDXL-Turbo
+
+1) Modify the `run_freescale_turbo.py` and input the following commands in the terminal.
+2) Input the following commands in terminal:
+```bash
+  python run_freescale_turbo.py
+```
+
 
 ## 🧲 Tips
 1. Generating 8k (8192 x 8192) images will cost around 55 GB and 1 hour on NVIDIA A800. 
@@ -81,6 +90,12 @@ pip install -r requirements.txt
 If your have any questions about FreeScale, feel free to contact [Haonan Qiu](http://haonanqiu.com/).
 
 
+## 📝 Changelog
+- __[2024.12.22]__: 🔥🔥 Release FreeScale for SDXL-Turbo, trading slight quality loss for a significant speedup.
+- __[2024.12.13]__: 🔥🔥 Release FreeScale (based on SDXL), higher-resolution image generation!
+<br>
+
+
 ## 😉 Citation
 ```bib
 @article{qiu2024freescale,

diff --git a/gradio_app.py b/gradio_app.py
@@ -1,8 +1,6 @@
 import gradio as gr
 
-import os
 import torch
-from PIL import Image
 
 from pipeline_freescale import StableDiffusionXLPipeline
 from free_lunch_utils import register_free_upblock2d, register_free_crossattn_upblock2d
@@ -68,84 +66,6 @@ def infer(prompt, output_size, ddim_steps, guidance_scale, cosine_scale, seed, o
 
 css = """
 #col-container {max-width: 768px; margin-left: auto; margin-right: auto;}
-a {text-decoration-line: underline; font-weight: 600;}
-.animate-spin {
-  animation: spin 1s linear infinite;
-}
-@keyframes spin {
-  from {
-      transform: rotate(0deg);
-  }
-  to {
-      transform: rotate(360deg);
-  }
-}
-#share-btn-container {
-  display: flex; 
-  padding-left: 0.5rem !important; 
-  padding-right: 0.5rem !important; 
-  background-color: #000000; 
-  justify-content: center; 
-  align-items: center; 
-  border-radius: 9999px !important; 
-  max-width: 15rem;
-  height: 36px;
-}
-div#share-btn-container > div {
-    flex-direction: row;
-    background: black;
-    align-items: center;
-}
-#share-btn-container:hover {
-  background-color: #060606;
-}
-#share-btn {
-  all: initial; 
-  color: #ffffff;
-  font-weight: 600; 
-  cursor:pointer; 
-  font-family: 'IBM Plex Sans', sans-serif; 
-  margin-left: 0.5rem !important; 
-  padding-top: 0.5rem !important; 
-  padding-bottom: 0.5rem !important;
-  right:0;
-}
-#share-btn * {
-  all: unset;
-}
-#share-btn-container div:nth-child(-n+2){
-  width: auto !important;
-  min-height: 0px !important;
-}
-#share-btn-container .wrap {
-  display: none !important;
-}
-#share-btn-container.hidden {
-  display: none!important;
-}
-img[src*='#center'] { 
-    display: inline-block;
-    margin: unset;
-}
-.footer {
-        margin-bottom: 45px;
-        margin-top: 10px;
-        text-align: center;
-        border-bottom: 1px solid #e5e5e5;
-    }
-    .footer>p {
-        font-size: .8rem;
-        display: inline-block;
-        padding: 0 10px;
-        transform: translateY(10px);
-        background: white;
-    }
-    .dark .footer {
-        border-color: #303030;
-    }
-    .dark .footer>p {
-        background: #0b0f19;
-    }
 """
 
 with gr.Blocks(css=css) as demo:

diff --git a/pipeline_freescale.py b/pipeline_freescale.py
@@ -33,6 +33,7 @@
 from functools import partial
 import numpy as np
 
+import torch.nn.functional as F
 from diffusers.models.attention import BasicTransformerBlock
 from scale_attention import ori_forward, scale_forward
 
@@ -815,7 +816,7 @@ def __call__(
             height, width = resolutions_list[0]
             target_sizes = resolutions_list[1:]
             if not restart_steps:
-                restart_steps = [15] * len(target_sizes)
+                restart_steps = [int(num_inference_steps*0.3)] * len(target_sizes)
         else:
             height = height or self.default_sample_size * self.vae_scale_factor
             width = width or self.default_sample_size * self.vae_scale_factor

diff --git a/pipeline_freescale_imgen.py b/pipeline_freescale_imgen.py
@@ -33,6 +33,7 @@
 from functools import partial
 import numpy as np
 
+import torch.nn.functional as F
 from diffusers.models.attention import BasicTransformerBlock
 from scale_attention import ori_forward, scale_forward
 from PIL import Image
@@ -839,7 +840,7 @@ def __call__(
             height, width = resolutions_list[0]
             target_sizes = resolutions_list[1:]
             if not restart_steps:
-                restart_steps = [15] * len(target_sizes)
+                restart_steps = [int(num_inference_steps*0.3)] * len(target_sizes)
         else:
             height = height or self.default_sample_size * self.vae_scale_factor
             width = width or self.default_sample_size * self.vae_scale_factor