From 102ee0558ad6a4732d8a8fd9810b24eb7e5ac4e2 Mon Sep 17 00:00:00 2001
From: Wang Yi <53533850+marigoold@users.noreply.github.com>
Date: Sat, 22 Jun 2024 09:19:21 +0800
Subject: [PATCH] webui support controlnet extension (#948)

only supported UNet inference with control, but controlnet model not
supported now

---------

Co-authored-by: Li Junliang <117806079+lijunliangTG@users.noreply.github.com>
---
 .../compile/compile_utils.py                  |    8 +-
 .../compile/sd_webui_onediff_utils.py         |   27 +-
 .../onediff_controlnet.py                     | 1079 +++++++++++++++++
 onediff_sd_webui_extensions/onediff_shared.py |    7 +-
 onediff_sd_webui_extensions/onediff_utils.py  |  148 +++
 .../scripts/onediff.py                        |   43 +-
 onediff_sd_webui_extensions/ui_utils.py       |   14 -
 tests/sd-webui/test_api.py                    |    1 -
 8 files changed, 1283 insertions(+), 44 deletions(-)
 create mode 100644 onediff_sd_webui_extensions/onediff_controlnet.py
 create mode 100644 onediff_sd_webui_extensions/onediff_utils.py

diff --git a/onediff_sd_webui_extensions/compile/compile_utils.py b/onediff_sd_webui_extensions/compile/compile_utils.py
index 9d39fbc96..d79278be2 100644
--- a/onediff_sd_webui_extensions/compile/compile_utils.py
+++ b/onediff_sd_webui_extensions/compile/compile_utils.py
@@ -61,7 +61,9 @@ def get_calibrate_info(filename: str) -> Union[None, Dict]:
 
 
 def get_compiled_graph(sd_model, quantization) -> OneDiffCompiledGraph:
-    compiled_unet = compile_unet(
-        sd_model.model.diffusion_model, quantization=quantization
-    )
+    diffusion_model = sd_model.model.diffusion_model
+    # for controlnet
+    if "forward" in diffusion_model.__dict__:
+        diffusion_model.__dict__.pop("forward")
+    compiled_unet = compile_unet(diffusion_model, quantization=quantization)
     return OneDiffCompiledGraph(sd_model, compiled_unet, quantization)
diff --git a/onediff_sd_webui_extensions/compile/sd_webui_onediff_utils.py b/onediff_sd_webui_extensions/compile/sd_webui_onediff_utils.py
index db338fbf1..c77f5c3d1 100644
--- a/onediff_sd_webui_extensions/compile/sd_webui_onediff_utils.py
+++ b/onediff_sd_webui_extensions/compile/sd_webui_onediff_utils.py
@@ -13,17 +13,22 @@ def forward(self, x):
 
 
 # https://github.com/Stability-AI/generative-models/blob/059d8e9cd9c55aea1ef2ece39abf605efb8b7cc9/sgm/modules/diffusionmodules/util.py#L207
-def timestep_embedding(timesteps, dim, max_period=10000):
-    half = dim // 2
-    freqs = flow.exp(
-        -math.log(max_period)
-        * flow.arange(start=0, end=half, dtype=flow.float32)
-        / half
-    ).to(device=timesteps.device)
-    args = timesteps[:, None].float() * freqs[None]
-    embedding = flow.cat([flow.cos(args), flow.sin(args)], dim=-1)
-    if dim % 2:
-        embedding = flow.cat([embedding, flow.zeros_like(embedding[:, :1])], dim=-1)
+def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=False):
+    if not repeat_only:
+        half = dim // 2
+        freqs = flow.exp(
+            -math.log(max_period)
+            * flow.arange(start=0, end=half, dtype=flow.float32)
+            / half
+        ).to(device=timesteps.device)
+        args = timesteps[:, None].float() * freqs[None]
+        embedding = flow.cat([flow.cos(args), flow.sin(args)], dim=-1)
+        if dim % 2:
+            embedding = flow.cat([embedding, flow.zeros_like(embedding[:, :1])], dim=-1)
+    else:
+        raise NotImplementedError(
+            "repeat_only=True is not implemented in timestep_embedding"
+        )
     return embedding
 
 
diff --git a/onediff_sd_webui_extensions/onediff_controlnet.py b/onediff_sd_webui_extensions/onediff_controlnet.py
new file mode 100644
index 000000000..c5da15651
--- /dev/null
+++ b/onediff_sd_webui_extensions/onediff_controlnet.py
@@ -0,0 +1,1079 @@
+from functools import wraps
+
+import onediff_shared
+from onediff_utils import check_structure_change
+import oneflow as flow
+import torch
+import torch as th
+from compile import OneDiffCompiledGraph
+from compile.sd_webui_onediff_utils import (
+    CrossAttentionOflow,
+    GroupNorm32Oflow,
+    timestep_embedding,
+)
+from ldm.modules.attention import BasicTransformerBlock, CrossAttention
+from ldm.modules.diffusionmodules.openaimodel import ResBlock, UNetModel
+from ldm.modules.diffusionmodules.util import GroupNorm32
+from modules import devices
+from modules.sd_hijack_utils import CondFunc
+from onediff_utils import singleton_decorator
+
+from onediff.infer_compiler import oneflow_compile
+from onediff.infer_compiler.backends.oneflow.transform import proxy_class, register
+
+
+# https://github.com/Mikubill/sd-webui-controlnet/blob/8bbbd0e55ef6e5d71b09c2de2727b36e7bc825b0/scripts/hook.py#L238
+def torch_aligned_adding(base, x, require_channel_alignment):
+    if isinstance(x, float):
+        if x == 0.0:
+            return base
+        return base + x
+
+    if require_channel_alignment:
+        zeros = torch.zeros_like(base)
+        zeros[:, : x.shape[1], ...] = x
+        x = zeros
+
+    # resize to sample resolution
+    base_h, base_w = base.shape[-2:]
+    xh, xw = x.shape[-2:]
+
+    if xh > 1 or xw > 1:
+        if base_h != xh or base_w != xw:
+            # logger.info('[Warning] ControlNet finds unexpected mis-alignment in tensor shape.')
+            x = th.nn.functional.interpolate(x, size=(base_h, base_w), mode="nearest")
+
+    return base + x
+
+
+# Due to the tracing mechanism in OneFlow, it's crucial to ensure that
+# the same conditional branches are taken during the first run as in subsequent runs.
+# Therefore, certain "optimizations" have been modified.
+def oneflow_aligned_adding(base, x, require_channel_alignment):
+    if isinstance(x, float):
+        # remove `if x == 0.0: return base` here
+        return base + x
+
+    if require_channel_alignment:
+        zeros = flow.zeros_like(base)
+        zeros[:, : x.shape[1], ...] = x
+        x = zeros
+
+    # resize to sample resolution
+    base_h, base_w = base.shape[-2:]
+    xh, xw = x.shape[-2:]
+
+    if xh > 1 or xw > 1 and (base_h != xh or base_w != xw):
+        # logger.info('[Warning] ControlNet finds unexpected mis-alignment in tensor shape.')
+        x = flow.nn.functional.interpolate(x, size=(base_h, base_w), mode="nearest")
+    return base + x
+
+
+cond_cast_unet = getattr(devices, "cond_cast_unet", lambda x: x)
+
+
+class TorchOnediffControlNetModel(torch.nn.Module):
+    def __init__(self, unet):
+        super().__init__()
+        self.time_embed = unet.time_embed
+        self.input_blocks = unet.input_blocks
+        self.label_emb = getattr(unet, "label_emb", None)
+        self.middle_block = unet.middle_block
+        self.output_blocks = unet.output_blocks
+        self.out = unet.out
+        self.model_channels = unet.model_channels
+
+    def forward(
+        self,
+        x,
+        timesteps,
+        context,
+        y,
+        total_t2i_adapter_embedding,
+        total_controlnet_embedding,
+        is_sdxl,
+        require_inpaint_hijack,
+    ):
+        from ldm.modules.diffusionmodules.util import timestep_embedding
+
+        hs = []
+        with th.no_grad():
+            t_emb = cond_cast_unet(
+                timestep_embedding(timesteps, self.model_channels, repeat_only=False)
+            )
+            emb = self.time_embed(t_emb)
+
+            if is_sdxl:
+                assert y.shape[0] == x.shape[0]
+                emb = emb + self.label_emb(y)
+
+            h = x
+            for i, module in enumerate(self.input_blocks):
+                self.current_h_shape = (h.shape[0], h.shape[1], h.shape[2], h.shape[3])
+                h = module(h, emb, context)
+
+                t2i_injection = [3, 5, 8] if is_sdxl else [2, 5, 8, 11]
+
+                if i in t2i_injection:
+                    h = torch_aligned_adding(
+                        h, total_t2i_adapter_embedding.pop(0), require_inpaint_hijack
+                    )
+
+                hs.append(h)
+
+            self.current_h_shape = (h.shape[0], h.shape[1], h.shape[2], h.shape[3])
+            h = self.middle_block(h, emb, context)
+
+        # U-Net Middle Block
+        h = torch_aligned_adding(
+            h, total_controlnet_embedding.pop(), require_inpaint_hijack
+        )
+
+        if len(total_t2i_adapter_embedding) > 0 and is_sdxl:
+            h = torch_aligned_adding(
+                h, total_t2i_adapter_embedding.pop(0), require_inpaint_hijack
+            )
+
+        # U-Net Decoder
+        for i, module in enumerate(self.output_blocks):
+            self.current_h_shape = (h.shape[0], h.shape[1], h.shape[2], h.shape[3])
+            h = th.cat(
+                [
+                    h,
+                    torch_aligned_adding(
+                        hs.pop(),
+                        total_controlnet_embedding.pop(),
+                        require_inpaint_hijack,
+                    ),
+                ],
+                dim=1,
+            )
+            h = module(h, emb, context)
+
+        # U-Net Output
+        h = h.type(x.dtype)
+        h = self.out(h)
+
+        return h
+
+
+class OneFlowOnediffControlNetModel(proxy_class(UNetModel)):
+    def forward(
+        self,
+        x,
+        timesteps,
+        context,
+        y,
+        total_t2i_adapter_embedding,
+        total_controlnet_embedding,
+        is_sdxl,
+        require_inpaint_hijack,
+    ):
+        x = x.half()
+        if y is not None:
+            y = y.half()
+        context = context.half()
+        hs = []
+        with flow.no_grad():
+            t_emb = cond_cast_unet(
+                timestep_embedding(timesteps, self.model_channels, repeat_only=False)
+            )
+            emb = self.time_embed(t_emb.half())
+
+            if is_sdxl:
+                assert y.shape[0] == x.shape[0]
+                emb = emb + self.label_emb(y)
+
+            h = x
+            for i, module in enumerate(self.input_blocks):
+                self.current_h_shape = (h.shape[0], h.shape[1], h.shape[2], h.shape[3])
+                h = module(h, emb, context)
+
+                t2i_injection = [3, 5, 8] if is_sdxl else [2, 5, 8, 11]
+
+                if i in t2i_injection:
+                    h = oneflow_aligned_adding(
+                        h, total_t2i_adapter_embedding.pop(0), require_inpaint_hijack
+                    )
+
+                hs.append(h)
+
+            self.current_h_shape = (h.shape[0], h.shape[1], h.shape[2], h.shape[3])
+            h = self.middle_block(h, emb, context)
+
+        # U-Net Middle Block
+        h = oneflow_aligned_adding(
+            h, total_controlnet_embedding.pop(), require_inpaint_hijack
+        )
+
+        if len(total_t2i_adapter_embedding) > 0 and is_sdxl:
+            h = oneflow_aligned_adding(
+                h, total_t2i_adapter_embedding.pop(0), require_inpaint_hijack
+            )
+
+        # U-Net Decoder
+        for i, module in enumerate(self.output_blocks):
+            self.current_h_shape = (h.shape[0], h.shape[1], h.shape[2], h.shape[3])
+            h = flow.cat(
+                [
+                    h,
+                    oneflow_aligned_adding(
+                        hs.pop(),
+                        total_controlnet_embedding.pop(),
+                        require_inpaint_hijack,
+                    ),
+                ],
+                dim=1,
+            )
+            h = h.half()
+            h = module(h, emb, context)
+
+        # U-Net Output
+        h = h.type(x.dtype)
+        h = self.out(h)
+
+        return h
+
+
+def onediff_controlnet_decorator(func):
+    @wraps(func)
+    def wrapper(self, p, *arg, **kwargs):
+        try:
+            onediff_shared.controlnet_enabled = check_if_controlnet_enabled(p)
+            if onediff_shared.controlnet_enabled:
+                hijack_controlnet_extension(p)
+            return func(self, p, *arg, **kwargs)
+        finally:
+            if onediff_shared.controlnet_enabled:
+                onediff_shared.previous_is_controlnet = True
+            else:
+                onediff_shared.controlnet_compiled = False
+                onediff_shared.previous_is_controlnet = False
+
+    return wrapper
+
+
+def compile_controlnet_ldm_unet(sd_model, unet_model, *, options=None):
+    from sgm.modules.attention import BasicTransformerBlock as BasicTransformerBlockSGM
+    from ldm.modules.attention import BasicTransformerBlock as BasicTransformerBlockLDM
+    from sgm.modules.diffusionmodules.openaimodel import ResBlock as ResBlockSGM
+    from ldm.modules.diffusionmodules.openaimodel import ResBlock as ResBlockLDM
+    for module in unet_model.modules():
+        if isinstance(module, (BasicTransformerBlockLDM, BasicTransformerBlockSGM)):
+            module.checkpoint = False
+        if isinstance(module, (ResBlockLDM, ResBlockSGM)):
+            module.use_checkpoint = False
+    # TODO: refine here
+    compiled_model = oneflow_compile(unet_model, options=options)
+    compiled_graph = OneDiffCompiledGraph(sd_model, compiled_model)
+    compiled_graph.eager_module = unet_model
+    compiled_graph.name += "_controlnet"
+    return compiled_graph
+
+
+torch2oflow_class_map = {
+    CrossAttention: CrossAttentionOflow,
+    GroupNorm32: GroupNorm32Oflow,
+    TorchOnediffControlNetModel: OneFlowOnediffControlNetModel,
+}
+register(package_names=["scripts.hook"], torch2oflow_class_map=torch2oflow_class_map)
+
+
+def check_if_controlnet_ext_loaded() -> bool:
+    from modules import extensions
+
+    return "sd-webui-controlnet" in extensions.loaded_extensions
+
+
+def hijacked_main_entry(self, p):
+    self._original_controlnet_main_entry(p)
+    sd_ldm = p.sd_model
+    unet = sd_ldm.model.diffusion_model
+
+    structure_changed = check_structure_change(
+        onediff_shared.previous_unet_type, sd_ldm
+    )
+    if onediff_shared.controlnet_compiled is False or structure_changed:
+        onediff_model = TorchOnediffControlNetModel(unet)
+        onediff_shared.current_unet_graph = compile_controlnet_ldm_unet(
+            sd_ldm, onediff_model
+        )
+        onediff_shared.controlnet_compiled = True
+    else:
+        pass
+
+
+def get_controlnet_script(p):
+    for script in p.scripts.scripts:
+        if script.__module__ == "controlnet.py":
+            return script
+    return None
+
+
+def check_if_controlnet_enabled(p):
+    controlnet_script_class = get_controlnet_script(p)
+    return (
+        controlnet_script_class is not None
+        and len(controlnet_script_class.get_enabled_units(p)) != 0
+    )
+
+
+# When OneDiff is initializing, the controlnet extension has not yet been loaded.
+# Therefore, this function should be called during image generation
+# rather than during the initialization of the OneDiff.
+@singleton_decorator
+def hijack_controlnet_extension(p):
+    CondFunc(
+        "scripts.hook.UnetHook.hook",
+        hijacked_controlnet_hook,
+        lambda _, *arg, **kwargs: onediff_shared.onediff_enabled,
+    )
+    # get controlnet script
+    controlnet_script = get_controlnet_script(p)
+    if controlnet_script is None:
+        return
+
+    controlnet_script._original_controlnet_main_entry = (
+        controlnet_script.controlnet_main_entry
+    )
+    controlnet_script.controlnet_main_entry = hijacked_main_entry.__get__(
+        controlnet_script
+    )
+
+
+# We were intended to only hack the closure function `forward`
+# in the member function `hook` of the UnetHook class in the ControlNet extension.
+# But due to certain limitations, we were unable to directly only hack
+# the closure function `forward` within the `hook` method.
+# So we have to hack the entire member function `hook` in the `UnetHook` class.
+
+# The function largely retains its original content,
+# with modifications specifically made within the `forward` function.
+# To identify the altered parts, you can search for the tag "modified by OneDiff"
+
+# https://github.com/Mikubill/sd-webui-controlnet/blob/8bbbd0e55ef6e5d71b09c2de2727b36e7bc825b0/scripts/hook.py#L442
+def hijacked_controlnet_hook(
+    orig_func,
+    self,
+    model,
+    sd_ldm,
+    control_params,
+    process,
+    batch_option_uint_separate=False,
+    batch_option_style_align=False,
+):
+    from modules import devices, lowvram, scripts, shared
+    from scripts.controlnet_sparsectrl import SparseCtrl
+    from scripts.enums import AutoMachine, ControlModelType, HiResFixOption
+    from scripts.hook import (
+        AbstractLowScaleModel,
+        blur,
+        mark_prompt_context,
+        predict_noise_from_start,
+        predict_q_sample,
+        predict_start_from_noise,
+        register_schedule,
+        torch_dfs,
+        unmark_prompt_context,
+    )
+    from scripts.ipadapter.ipadapter_model import ImageEmbed
+    from scripts.logging import logger
+
+    self.model = model
+    self.sd_ldm = sd_ldm
+    self.control_params = control_params
+
+    model_is_sdxl = getattr(self.sd_ldm, "is_sdxl", False)
+
+    outer = self
+
+    def process_sample(*args, **kwargs):
+        # ControlNet must know whether a prompt is conditional prompt (positive prompt) or unconditional conditioning prompt (negative prompt).
+        # You can use the hook.py's `mark_prompt_context` to mark the prompts that will be seen by ControlNet.
+        # Let us say XXX is a MulticondLearnedConditioning or a ComposableScheduledPromptConditioning or a ScheduledPromptConditioning or a list of these components,
+        # if XXX is a positive prompt, you should call mark_prompt_context(XXX, positive=True)
+        # if XXX is a negative prompt, you should call mark_prompt_context(XXX, positive=False)
+        # After you mark the prompts, the ControlNet will know which prompt is cond/uncond and works as expected.
+        # After you mark the prompts, the mismatch errors will disappear.
+        mark_prompt_context(kwargs.get("conditioning", []), positive=True)
+        mark_prompt_context(
+            kwargs.get("unconditional_conditioning", []), positive=False
+        )
+        mark_prompt_context(getattr(process, "hr_c", []), positive=True)
+        mark_prompt_context(getattr(process, "hr_uc", []), positive=False)
+        return process.sample_before_CN_hack(*args, **kwargs)
+
+    def forward(self, x, timesteps=None, context=None, y=None, **kwargs):
+        is_sdxl = y is not None and model_is_sdxl
+        total_t2i_adapter_embedding = [0.0] * 4
+        if is_sdxl:
+            total_controlnet_embedding = [0.0] * 10
+        else:
+            total_controlnet_embedding = [0.0] * 13
+        require_inpaint_hijack = False
+        is_in_high_res_fix = False
+        batch_size = int(x.shape[0])
+
+        # Handle cond-uncond marker
+        (
+            cond_mark,
+            outer.current_uc_indices,
+            outer.current_c_indices,
+            context,
+        ) = unmark_prompt_context(context)
+        outer.model.cond_mark = cond_mark
+        # logger.info(str(cond_mark[:, 0, 0, 0].detach().cpu().numpy().tolist()) + ' - ' + str(outer.current_uc_indices))
+
+        # Revision
+        if is_sdxl:
+            revision_y1280 = 0
+
+            for param in outer.control_params:
+                if param.guidance_stopped:
+                    continue
+                if param.control_model_type == ControlModelType.ReVision:
+                    if param.vision_hint_count is None:
+                        k = (
+                            torch.Tensor(
+                                [int(param.preprocessor["threshold_a"] * 1000)]
+                            )
+                            .to(param.hint_cond)
+                            .long()
+                            .clip(0, 999)
+                        )
+                        param.vision_hint_count = outer.revision_q_sampler.q_sample(
+                            param.hint_cond, k
+                        )
+                    revision_emb = param.vision_hint_count
+                    if isinstance(revision_emb, torch.Tensor):
+                        revision_y1280 += revision_emb * param.weight
+
+            if isinstance(revision_y1280, torch.Tensor):
+                y[:, :1280] = revision_y1280 * cond_mark[:, :, 0, 0]
+                if any(
+                    "ignore_prompt" in param.preprocessor["name"]
+                    for param in outer.control_params
+                ) or (
+                    getattr(process, "prompt", "") == ""
+                    and getattr(process, "negative_prompt", "") == ""
+                ):
+                    context = torch.zeros_like(context)
+
+        # High-res fix
+        for param in outer.control_params:
+            # select which hint_cond to use
+            if param.used_hint_cond is None:
+                param.used_hint_cond = param.hint_cond
+                param.used_hint_cond_latent = None
+                param.used_hint_inpaint_hijack = None
+
+            # has high-res fix
+            if (
+                isinstance(param.hr_hint_cond, torch.Tensor)
+                and x.ndim == 4
+                and param.hint_cond.ndim == 4
+                and param.hr_hint_cond.ndim == 4
+            ):
+                _, _, h_lr, w_lr = param.hint_cond.shape
+                _, _, h_hr, w_hr = param.hr_hint_cond.shape
+                _, _, h, w = x.shape
+                h, w = h * 8, w * 8
+                if abs(h - h_lr) < abs(h - h_hr):
+                    is_in_high_res_fix = False
+                    if param.used_hint_cond is not param.hint_cond:
+                        param.used_hint_cond = param.hint_cond
+                        param.used_hint_cond_latent = None
+                        param.used_hint_inpaint_hijack = None
+                else:
+                    is_in_high_res_fix = True
+                    if param.used_hint_cond is not param.hr_hint_cond:
+                        param.used_hint_cond = param.hr_hint_cond
+                        param.used_hint_cond_latent = None
+                        param.used_hint_inpaint_hijack = None
+
+        self.is_in_high_res_fix = is_in_high_res_fix
+        outer.is_in_high_res_fix = is_in_high_res_fix
+
+        # Convert control image to latent
+        for param in outer.control_params:
+            if param.used_hint_cond_latent is not None:
+                continue
+            if (
+                param.control_model_type not in [ControlModelType.AttentionInjection]
+                and "colorfix" not in param.preprocessor["name"]
+                and "inpaint_only" not in param.preprocessor["name"]
+            ):
+                continue
+            param.used_hint_cond_latent = outer.call_vae_using_process(
+                process, param.used_hint_cond, batch_size=batch_size
+            )
+
+        # vram
+        for param in outer.control_params:
+            if getattr(param.control_model, "disable_memory_management", False):
+                continue
+
+            if param.control_model is not None:
+                if (
+                    outer.lowvram
+                    and is_sdxl
+                    and hasattr(param.control_model, "aggressive_lowvram")
+                ):
+                    param.control_model.aggressive_lowvram()
+                elif hasattr(param.control_model, "fullvram"):
+                    param.control_model.fullvram()
+                elif hasattr(param.control_model, "to"):
+                    param.control_model.to(devices.get_device_for("controlnet"))
+
+        # handle prompt token control
+        for param in outer.control_params:
+            if param.guidance_stopped or param.disabled_by_hr_option(
+                self.is_in_high_res_fix
+            ):
+                continue
+
+            if param.control_model_type not in [ControlModelType.T2I_StyleAdapter]:
+                continue
+
+            control = param.control_model(
+                x=x, hint=param.used_hint_cond, timesteps=timesteps, context=context
+            )
+            control = torch.cat([control.clone() for _ in range(batch_size)], dim=0)
+            control *= param.weight
+            control *= cond_mark[:, :, :, 0]
+            context = torch.cat([context, control.clone()], dim=1)
+
+        # handle ControlNet / T2I_Adapter
+        for param_index, param in enumerate(outer.control_params):
+            if param.guidance_stopped or param.disabled_by_hr_option(
+                self.is_in_high_res_fix
+            ):
+                continue
+
+            if not (
+                param.control_model_type.is_controlnet
+                or param.control_model_type == ControlModelType.T2I_Adapter
+            ):
+                continue
+
+            # inpaint model workaround
+            x_in = x
+            control_model = param.control_model.control_model
+
+            if param.control_model_type.is_controlnet:
+                if (
+                    x.shape[1] != control_model.input_blocks[0][0].in_channels
+                    and x.shape[1] == 9
+                ):
+                    # inpaint_model: 4 data + 4 downscaled image + 1 mask
+                    x_in = x[:, :4, ...]
+                    require_inpaint_hijack = True
+
+            assert (
+                param.used_hint_cond is not None
+            ), "Controlnet is enabled but no input image is given"
+
+            hint = param.used_hint_cond
+            if param.control_model_type == ControlModelType.InstantID:
+                assert isinstance(param.control_context_override, ImageEmbed)
+                controlnet_context = param.control_context_override.eval(cond_mark).to(
+                    x.device, dtype=x.dtype
+                )
+            else:
+                controlnet_context = context
+
+            # ControlNet inpaint protocol
+            if hint.shape[1] == 4 and not isinstance(control_model, SparseCtrl):
+                c = hint[:, 0:3, :, :]
+                m = hint[:, 3:4, :, :]
+                m = (m > 0.5).float()
+                hint = c * (1 - m) - m
+
+            control = param.control_model(
+                x=x_in, hint=hint, timesteps=timesteps, context=controlnet_context, y=y
+            )
+
+            if is_sdxl:
+                control_scales = [param.weight] * 10
+            else:
+                control_scales = [param.weight] * 13
+
+            if param.cfg_injection or param.global_average_pooling:
+                if param.control_model_type == ControlModelType.T2I_Adapter:
+                    control = [
+                        torch.cat([c.clone() for _ in range(batch_size)], dim=0)
+                        for c in control
+                    ]
+                control = [c * cond_mark for c in control]
+
+            high_res_fix_forced_soft_injection = False
+
+            if is_in_high_res_fix:
+                if "canny" in param.preprocessor["name"]:
+                    high_res_fix_forced_soft_injection = True
+                if "mlsd" in param.preprocessor["name"]:
+                    high_res_fix_forced_soft_injection = True
+
+            if param.soft_injection or high_res_fix_forced_soft_injection:
+                # important! use the soft weights with high-res fix can significantly reduce artifacts.
+                if param.control_model_type == ControlModelType.T2I_Adapter:
+                    control_scales = [
+                        param.weight * x for x in (0.25, 0.62, 0.825, 1.0)
+                    ]
+                elif param.control_model_type.is_controlnet:
+                    control_scales = [
+                        param.weight * (0.825 ** float(12 - i)) for i in range(13)
+                    ]
+
+            if is_sdxl and param.control_model_type.is_controlnet:
+                control_scales = control_scales[:10]
+
+            if param.advanced_weighting is not None:
+                logger.info(f"Advanced weighting enabled. {param.advanced_weighting}")
+                if param.soft_injection or high_res_fix_forced_soft_injection:
+                    logger.warn("Advanced weighting overwrites soft_injection effect.")
+                control_scales = param.advanced_weighting
+
+            control = [
+                param.apply_effective_region_mask(c * scale)
+                for c, scale in zip(control, control_scales)
+            ]
+            if param.global_average_pooling:
+                control = [torch.mean(c, dim=(2, 3), keepdim=True) for c in control]
+
+            for idx, item in enumerate(control):
+                target = None
+                if param.control_model_type.is_controlnet:
+                    target = total_controlnet_embedding
+                if param.control_model_type == ControlModelType.T2I_Adapter:
+                    target = total_t2i_adapter_embedding
+                if target is not None:
+                    if batch_option_uint_separate:
+                        for pi, ci in enumerate(outer.current_c_indices):
+                            if pi % len(outer.control_params) != param_index:
+                                item[ci] = 0
+                        for pi, ci in enumerate(outer.current_uc_indices):
+                            if pi % len(outer.control_params) != param_index:
+                                item[ci] = 0
+                        target[idx] = item + target[idx]
+                    else:
+                        target[idx] = item + target[idx]
+
+        # Replace x_t to support inpaint models
+        for param in outer.control_params:
+            if not isinstance(param.used_hint_cond, torch.Tensor):
+                continue
+            if param.used_hint_cond.ndim < 2 or param.used_hint_cond.shape[1] != 4:
+                continue
+            if x.shape[1] != 9:
+                continue
+            if param.used_hint_inpaint_hijack is None:
+                mask_pixel = param.used_hint_cond[:, 3:4, :, :]
+                image_pixel = param.used_hint_cond[:, 0:3, :, :]
+                mask_pixel = (mask_pixel > 0.5).to(mask_pixel.dtype)
+                masked_latent = outer.call_vae_using_process(
+                    process, image_pixel, batch_size, mask=mask_pixel
+                )
+                mask_latent = torch.nn.functional.max_pool2d(mask_pixel, (8, 8))
+                if mask_latent.shape[0] != batch_size:
+                    mask_latent = torch.cat(
+                        [mask_latent.clone() for _ in range(batch_size)], dim=0
+                    )
+                param.used_hint_inpaint_hijack = torch.cat(
+                    [mask_latent, masked_latent], dim=1
+                )
+                param.used_hint_inpaint_hijack.to(x.dtype).to(x.device)
+            x = torch.cat([x[:, :4, :, :], param.used_hint_inpaint_hijack], dim=1)
+
+        # vram
+        for param in outer.control_params:
+            if param.control_model is not None:
+                if outer.lowvram:
+                    param.control_model.to("cpu")
+
+        # A1111 fix for medvram.
+        if shared.cmd_opts.medvram or (
+            getattr(shared.cmd_opts, "medvram_sdxl", False) and is_sdxl
+        ):
+            try:
+                # Trigger the register_forward_pre_hook
+                outer.sd_ldm.model()
+            except Exception as e:
+                logger.debug("register_forward_pre_hook")
+                logger.debug(e)
+
+        # Clear attention and AdaIn cache
+        for module in outer.attn_module_list:
+            module.bank = []
+            module.style_cfgs = []
+        for module in outer.gn_module_list:
+            module.mean_bank = []
+            module.var_bank = []
+            module.style_cfgs = []
+
+        # Handle attention and AdaIn control
+        for param in outer.control_params:
+            if param.guidance_stopped or param.disabled_by_hr_option(
+                self.is_in_high_res_fix
+            ):
+                continue
+
+            if param.used_hint_cond_latent is None:
+                continue
+
+            if param.control_model_type not in [ControlModelType.AttentionInjection]:
+                continue
+
+            ref_xt = predict_q_sample(
+                outer.sd_ldm,
+                param.used_hint_cond_latent,
+                torch.round(timesteps.float()).long(),
+            )
+
+            # Inpaint Hijack
+            if x.shape[1] == 9:
+                ref_xt = torch.cat(
+                    [
+                        ref_xt,
+                        torch.zeros_like(ref_xt)[:, 0:1, :, :],
+                        param.used_hint_cond_latent,
+                    ],
+                    dim=1,
+                )
+
+            outer.current_style_fidelity = float(param.preprocessor["threshold_a"])
+            outer.current_style_fidelity = max(
+                0.0, min(1.0, outer.current_style_fidelity)
+            )
+
+            if is_sdxl:
+                # sdxl's attention hacking is highly unstable.
+                # We have no other methods but to reduce the style_fidelity a bit.
+                # By default, 0.5 ** 3.0 = 0.125
+                outer.current_style_fidelity = outer.current_style_fidelity ** 3.0
+
+            if param.cfg_injection:
+                outer.current_style_fidelity = 1.0
+            elif param.soft_injection or is_in_high_res_fix:
+                outer.current_style_fidelity = 0.0
+
+            control_name = param.preprocessor["name"]
+
+            if control_name in ["reference_only", "reference_adain+attn"]:
+                outer.attention_auto_machine = AutoMachine.Write
+                outer.attention_auto_machine_weight = param.weight
+
+            if control_name in ["reference_adain", "reference_adain+attn"]:
+                outer.gn_auto_machine = AutoMachine.Write
+                outer.gn_auto_machine_weight = param.weight
+
+            if is_sdxl:
+                outer.original_forward(
+                    x=ref_xt.to(devices.dtype_unet),
+                    timesteps=timesteps.to(devices.dtype_unet),
+                    context=context.to(devices.dtype_unet),
+                    y=y,
+                )
+            else:
+                outer.original_forward(
+                    x=ref_xt.to(devices.dtype_unet),
+                    timesteps=timesteps.to(devices.dtype_unet),
+                    context=context.to(devices.dtype_unet),
+                )
+
+            outer.attention_auto_machine = AutoMachine.Read
+            outer.gn_auto_machine = AutoMachine.Read
+
+        # modified by OneDiff
+        h = onediff_shared.current_unet_graph.graph_module(
+            x,
+            timesteps,
+            context,
+            y,
+            total_t2i_adapter_embedding,
+            total_controlnet_embedding,
+            is_sdxl,
+            require_inpaint_hijack,
+        )
+
+        # Post-processing for color fix
+        for param in outer.control_params:
+            if param.used_hint_cond_latent is None:
+                continue
+            if "colorfix" not in param.preprocessor["name"]:
+                continue
+
+            k = int(param.preprocessor["threshold_a"])
+            if is_in_high_res_fix and not param.disabled_by_hr_option(
+                self.is_in_high_res_fix
+            ):
+                k *= 2
+
+            # Inpaint hijack
+            xt = x[:, :4, :, :]
+
+            x0_origin = param.used_hint_cond_latent
+            t = torch.round(timesteps.float()).long()
+            x0_prd = predict_start_from_noise(outer.sd_ldm, xt, t, h)
+            x0 = x0_prd - blur(x0_prd, k) + blur(x0_origin, k)
+
+            if "+sharp" in param.preprocessor["name"]:
+                detail_weight = float(param.preprocessor["threshold_b"]) * 0.01
+                neg = detail_weight * blur(x0, k) + (1 - detail_weight) * x0
+                x0 = cond_mark * x0 + (1 - cond_mark) * neg
+
+            eps_prd = predict_noise_from_start(outer.sd_ldm, xt, t, x0)
+
+            w = max(0.0, min(1.0, float(param.weight)))
+            h = eps_prd * w + h * (1 - w)
+
+        # Post-processing for restore
+        for param in outer.control_params:
+            if param.used_hint_cond_latent is None:
+                continue
+            if "inpaint_only" not in param.preprocessor["name"]:
+                continue
+            if param.used_hint_cond.shape[1] != 4:
+                continue
+
+            # Inpaint hijack
+            xt = x[:, :4, :, :]
+
+            mask = param.used_hint_cond[:, 3:4, :, :]
+            mask = torch.nn.functional.max_pool2d(
+                mask, (10, 10), stride=(8, 8), padding=1
+            )
+
+            x0_origin = param.used_hint_cond_latent
+            t = torch.round(timesteps.float()).long()
+            x0_prd = predict_start_from_noise(outer.sd_ldm, xt, t, h)
+            x0 = x0_prd * mask + x0_origin * (1 - mask)
+            eps_prd = predict_noise_from_start(outer.sd_ldm, xt, t, x0)
+
+            w = max(0.0, min(1.0, float(param.weight)))
+            h = eps_prd * w + h * (1 - w)
+
+        return h
+
+    def move_all_control_model_to_cpu():
+        for param in getattr(outer, "control_params", []) or []:
+            if isinstance(param.control_model, torch.nn.Module):
+                param.control_model.to("cpu")
+
+    def forward_webui(*args, **kwargs):
+        # ------ modified by OneDiff below ------
+        forward_func = None
+        if (
+            "forward"
+            in onediff_shared.current_unet_graph.graph_module._torch_module.__dict__
+        ):
+            forward_func = onediff_shared.current_unet_graph.graph_module._torch_module.__dict__.pop(
+                "forward"
+            )
+            _original_forward_func = onediff_shared.current_unet_graph.graph_module._torch_module.__dict__.pop(
+                "_original_forward"
+            )
+        # ------ modified by OneDiff above ------
+
+        # webui will handle other compoments
+        try:
+            if shared.cmd_opts.lowvram:
+                lowvram.send_everything_to_cpu()
+            return forward(*args, **kwargs)
+        except Exception as e:
+            move_all_control_model_to_cpu()
+            raise e
+        finally:
+            if outer.lowvram:
+                move_all_control_model_to_cpu()
+
+            # ------ modified by OneDiff below ------
+            if forward_func is not None:
+                onediff_shared.current_unet_graph.graph_module._torch_module.forward = (
+                    forward_func
+                )
+                onediff_shared.current_unet_graph.graph_module._torch_module._original_forward = (
+                    _original_forward_func
+                )
+            # ------ modified by OneDiff above ------
+
+    def hacked_basic_transformer_inner_forward(self, x, context=None):
+        x_norm1 = self.norm1(x)
+        self_attn1 = None
+        if self.disable_self_attn:
+            # Do not use self-attention
+            self_attn1 = self.attn1(x_norm1, context=context)
+        else:
+            # Use self-attention
+            self_attention_context = x_norm1
+            if outer.attention_auto_machine == AutoMachine.Write:
+                if outer.attention_auto_machine_weight > self.attn_weight:
+                    self.bank.append(self_attention_context.detach().clone())
+                    self.style_cfgs.append(outer.current_style_fidelity)
+            if outer.attention_auto_machine == AutoMachine.Read:
+                if len(self.bank) > 0:
+                    style_cfg = sum(self.style_cfgs) / float(len(self.style_cfgs))
+                    self_attn1_uc = self.attn1(
+                        x_norm1,
+                        context=torch.cat([self_attention_context] + self.bank, dim=1),
+                    )
+                    self_attn1_c = self_attn1_uc.clone()
+                    if len(outer.current_uc_indices) > 0 and style_cfg > 1e-5:
+                        self_attn1_c[outer.current_uc_indices] = self.attn1(
+                            x_norm1[outer.current_uc_indices],
+                            context=self_attention_context[outer.current_uc_indices],
+                        )
+                    self_attn1 = (
+                        style_cfg * self_attn1_c + (1.0 - style_cfg) * self_attn1_uc
+                    )
+                self.bank = []
+                self.style_cfgs = []
+            if (
+                outer.attention_auto_machine == AutoMachine.StyleAlign
+                and not outer.is_in_high_res_fix
+            ):
+                # very VRAM hungry - disable at high_res_fix
+
+                def shared_attn1(inner_x):
+                    BB, FF, CC = inner_x.shape
+                    return self.attn1(inner_x.reshape(1, BB * FF, CC)).reshape(
+                        BB, FF, CC
+                    )
+
+                uc_layer = shared_attn1(x_norm1[outer.current_uc_indices])
+                c_layer = shared_attn1(x_norm1[outer.current_c_indices])
+                self_attn1 = torch.zeros_like(x_norm1).to(uc_layer)
+                self_attn1[outer.current_uc_indices] = uc_layer
+                self_attn1[outer.current_c_indices] = c_layer
+                del uc_layer, c_layer
+            if self_attn1 is None:
+                self_attn1 = self.attn1(x_norm1, context=self_attention_context)
+
+        x = self_attn1.to(x.dtype) + x
+        x = self.attn2(self.norm2(x), context=context) + x
+        x = self.ff(self.norm3(x)) + x
+        return x
+
+    def hacked_group_norm_forward(self, *args, **kwargs):
+        eps = 1e-6
+        x = self.original_forward_cn_hijack(*args, **kwargs)
+        y = None
+        if outer.gn_auto_machine == AutoMachine.Write:
+            if outer.gn_auto_machine_weight > self.gn_weight:
+                var, mean = torch.var_mean(x, dim=(2, 3), keepdim=True, correction=0)
+                self.mean_bank.append(mean)
+                self.var_bank.append(var)
+                self.style_cfgs.append(outer.current_style_fidelity)
+        if outer.gn_auto_machine == AutoMachine.Read:
+            if len(self.mean_bank) > 0 and len(self.var_bank) > 0:
+                style_cfg = sum(self.style_cfgs) / float(len(self.style_cfgs))
+                var, mean = torch.var_mean(x, dim=(2, 3), keepdim=True, correction=0)
+                std = torch.maximum(var, torch.zeros_like(var) + eps) ** 0.5
+                mean_acc = sum(self.mean_bank) / float(len(self.mean_bank))
+                var_acc = sum(self.var_bank) / float(len(self.var_bank))
+                std_acc = torch.maximum(var_acc, torch.zeros_like(var_acc) + eps) ** 0.5
+                y_uc = (((x - mean) / std) * std_acc) + mean_acc
+                y_c = y_uc.clone()
+                if len(outer.current_uc_indices) > 0 and style_cfg > 1e-5:
+                    y_c[outer.current_uc_indices] = x.to(y_c.dtype)[
+                        outer.current_uc_indices
+                    ]
+                y = style_cfg * y_c + (1.0 - style_cfg) * y_uc
+            self.mean_bank = []
+            self.var_bank = []
+            self.style_cfgs = []
+        if y is None:
+            y = x
+        return y.to(x.dtype)
+
+    if getattr(process, "sample_before_CN_hack", None) is None:
+        process.sample_before_CN_hack = process.sample
+    process.sample = process_sample
+
+    model._original_forward = model.forward
+    outer.original_forward = model.forward
+    model.forward = forward_webui.__get__(model, UNetModel)
+
+    if model_is_sdxl:
+        register_schedule(sd_ldm)
+        outer.revision_q_sampler = AbstractLowScaleModel()
+
+    need_attention_hijack = False
+
+    for param in outer.control_params:
+        if param.control_model_type in [ControlModelType.AttentionInjection]:
+            need_attention_hijack = True
+
+    if batch_option_style_align:
+        need_attention_hijack = True
+        outer.attention_auto_machine = AutoMachine.StyleAlign
+        outer.gn_auto_machine = AutoMachine.StyleAlign
+
+    all_modules = torch_dfs(model)
+
+    if need_attention_hijack:
+        attn_modules = [
+            module
+            for module in all_modules
+            if isinstance(module, BasicTransformerBlock)
+            or isinstance(module, BasicTransformerBlockSGM)
+        ]
+        attn_modules = sorted(attn_modules, key=lambda x: -x.norm1.normalized_shape[0])
+
+        for i, module in enumerate(attn_modules):
+            if getattr(module, "_original_inner_forward_cn_hijack", None) is None:
+                module._original_inner_forward_cn_hijack = module._forward
+            module._forward = hacked_basic_transformer_inner_forward.__get__(
+                module, BasicTransformerBlock
+            )
+            module.bank = []
+            module.style_cfgs = []
+            module.attn_weight = float(i) / float(len(attn_modules))
+
+        gn_modules = [model.middle_block]
+        model.middle_block.gn_weight = 0
+
+        if model_is_sdxl:
+            input_block_indices = [4, 5, 7, 8]
+            output_block_indices = [0, 1, 2, 3, 4, 5]
+        else:
+            input_block_indices = [4, 5, 7, 8, 10, 11]
+            output_block_indices = [0, 1, 2, 3, 4, 5, 6, 7]
+
+        for w, i in enumerate(input_block_indices):
+            module = model.input_blocks[i]
+            module.gn_weight = 1.0 - float(w) / float(len(input_block_indices))
+            gn_modules.append(module)
+
+        for w, i in enumerate(output_block_indices):
+            module = model.output_blocks[i]
+            module.gn_weight = float(w) / float(len(output_block_indices))
+            gn_modules.append(module)
+
+        for i, module in enumerate(gn_modules):
+            if getattr(module, "original_forward_cn_hijack", None) is None:
+                module.original_forward_cn_hijack = module.forward
+            module.forward = hacked_group_norm_forward.__get__(module, torch.nn.Module)
+            module.mean_bank = []
+            module.var_bank = []
+            module.style_cfgs = []
+            module.gn_weight *= 2
+
+        outer.attn_module_list = attn_modules
+        outer.gn_module_list = gn_modules
+    else:
+        for module in all_modules:
+            _original_inner_forward_cn_hijack = getattr(
+                module, "_original_inner_forward_cn_hijack", None
+            )
+            original_forward_cn_hijack = getattr(
+                module, "original_forward_cn_hijack", None
+            )
+            if _original_inner_forward_cn_hijack is not None:
+                module._forward = _original_inner_forward_cn_hijack
+            if original_forward_cn_hijack is not None:
+                module.forward = original_forward_cn_hijack
+        outer.attn_module_list = []
+        outer.gn_module_list = []
+
+    scripts.script_callbacks.on_cfg_denoiser(self.guidance_schedule_handler)
diff --git a/onediff_sd_webui_extensions/onediff_shared.py b/onediff_sd_webui_extensions/onediff_shared.py
index 8d9e4cf15..38f6ccbbf 100644
--- a/onediff_sd_webui_extensions/onediff_shared.py
+++ b/onediff_sd_webui_extensions/onediff_shared.py
@@ -2,10 +2,15 @@
 
 current_unet_graph = OneDiffCompiledGraph()
 current_quantization = False
-current_unet_type = {
+previous_unet_type = {
     "is_sdxl": False,
     "is_sd2": False,
     "is_sd1": False,
     "is_ssd": False,
 }
 onediff_enabled = False
+
+# controlnet
+controlnet_enabled = False
+controlnet_compiled = False
+previous_is_controlnet = False
diff --git a/onediff_sd_webui_extensions/onediff_utils.py b/onediff_sd_webui_extensions/onediff_utils.py
new file mode 100644
index 000000000..dc3a42927
--- /dev/null
+++ b/onediff_sd_webui_extensions/onediff_utils.py
@@ -0,0 +1,148 @@
+import os
+from contextlib import contextmanager
+from functools import wraps
+from pathlib import Path
+from textwrap import dedent
+from zipfile import BadZipFile
+
+import onediff_shared
+import oneflow as flow
+from modules.devices import torch_gc
+from modules import shared
+
+from onediff.infer_compiler import DeployableModule
+
+hints_message = dedent(
+    """\
+<div id="hintMessage" style="position: relative; padding: 20px; border: 1px solid #e0e0e0; border-radius: 5px; background-color: #f9f9f9;">
+    <button onclick="document.getElementById('hintMessage').style.display = 'none'" style="position: absolute; top: 10px; right: 10px; background: none; border: none; font-size: 18px; cursor: pointer;">&times;</button>
+    <div style="font-size: 18px; font-weight: bold; margin-bottom: 15px; color: #31708f;">
+        Hints Message
+    </div>
+    <div style="padding: 10px; border: 1px solid #31708f; border-radius: 5px; background-color: #f9f9f9;">
+        Hints: Enterprise function is not supported on your system.
+    </div>
+    <p style="margin-top: 15px;">
+        If you need Enterprise Level Support for your system or business, please send an email to
+        <a href="mailto:business@siliconflow.com" style="color: #31708f; text-decoration: none;">business@siliconflow.com</a>.
+        <br>
+        Tell us about your use case, deployment scale, and requirements.
+    </p>
+    <p>
+        <strong>GitHub Issue:</strong>
+        <a href="https://github.com/siliconflow/onediff/issues" style="color: #31708f; text-decoration: none;">https://github.com/siliconflow/onediff/issues</a>
+    </p>
+</div>
+"""
+)
+
+all_compiler_caches = []
+
+
+def all_compiler_caches_path():
+    import modules.shared as shared
+
+    caches_path = Path(shared.opts.onediff_compiler_caches_path)
+    if not caches_path.exists():
+        caches_path.mkdir(parents=True)
+    return shared.opts.onediff_compiler_caches_path
+
+
+def get_all_compiler_caches():
+    global all_compiler_caches
+    if len(all_compiler_caches) == 0:
+        refresh_all_compiler_caches()
+    return all_compiler_caches
+
+
+def refresh_all_compiler_caches(path: Path = None):
+    global all_compiler_caches
+    path = path or all_compiler_caches_path()
+    all_compiler_caches = [f.stem for f in Path(path).iterdir() if f.is_file()]
+
+
+def check_structure_change(current_type: dict[str, bool], model):
+    return current_type != get_model_type(model)
+
+
+def load_graph(compiled_unet: DeployableModule, compiler_cache: str):
+    from compile import OneDiffCompiledGraph
+
+    if isinstance(compiled_unet, OneDiffCompiledGraph):
+        compiled_unet = compiled_unet.graph_module
+
+    if compiler_cache in [None, "None"]:
+        return
+
+    compiler_cache_path = all_compiler_caches_path() + f"/{compiler_cache}"
+    if not Path(compiler_cache_path).exists():
+        raise FileNotFoundError(
+            f"Cannot find cache {compiler_cache_path}, please make sure it exists"
+        )
+    try:
+        compiled_unet.load_graph(compiler_cache_path, run_warmup=True)
+    except BadZipFile:
+        raise RuntimeError(
+            "Load cache failed. Please make sure that the --disable-safe-unpickle parameter is added when starting the webui"
+        )
+    except Exception as e:
+        raise RuntimeError(
+            f"Load cache failed ({e}). Please make sure cache has the same sd version (or unet architure) with current checkpoint"
+        )
+    return compiled_unet
+
+
+def save_graph(compiled_unet: DeployableModule, saved_cache_name: str = ""):
+    from compile import OneDiffCompiledGraph
+
+    if isinstance(compiled_unet, OneDiffCompiledGraph):
+        compiled_unet = compiled_unet.graph_module
+
+    if saved_cache_name in ["", None]:
+        return
+
+    if not os.access(str(all_compiler_caches_path()), os.W_OK):
+        raise PermissionError(
+            f"The directory {all_compiler_caches_path()} does not have write permissions, and compiler cache cannot be written to this directory. \
+                                Please change it in the settings to a directory with write permissions"
+        )
+    if not Path(all_compiler_caches_path()).exists():
+        Path(all_compiler_caches_path()).mkdir()
+    saved_cache_name = all_compiler_caches_path() + f"/{saved_cache_name}"
+    if not Path(saved_cache_name).exists():
+        compiled_unet.save_graph(saved_cache_name)
+
+
+def onediff_enabled_decorator(func):
+    @wraps(func)
+    def wrapper(self, p, *arg, **kwargs):
+        onediff_shared.onediff_enabled = True
+        try:
+            return func(self, p, *arg, **kwargs)
+        finally:
+            onediff_shared.onediff_enabled = False
+            onediff_shared.previous_unet_type.update(**get_model_type(shared.sd_model))
+            torch_gc()
+            flow.cuda.empty_cache()
+
+    return wrapper
+
+
+def singleton_decorator(func):
+    has_been_called = False
+
+    def wrapper(*args, **kwargs):
+        nonlocal has_been_called
+        if not has_been_called:
+            has_been_called = True
+            return func(*args, **kwargs)
+
+    return wrapper
+
+def get_model_type(model):
+    return {
+        "is_sdxl": model.is_sdxl,
+        "is_sd2": model.is_sd2,
+        "is_sd1": model.is_sd1,
+        "is_ssd": model.is_ssd,
+    }
diff --git a/onediff_sd_webui_extensions/scripts/onediff.py b/onediff_sd_webui_extensions/scripts/onediff.py
index 0561469d8..bd4049ed9 100644
--- a/onediff_sd_webui_extensions/scripts/onediff.py
+++ b/onediff_sd_webui_extensions/scripts/onediff.py
@@ -4,6 +4,7 @@
 import modules.scripts as scripts
 import modules.sd_models as sd_models
 import modules.shared as shared
+import onediff_controlnet
 import onediff_shared
 import oneflow as flow
 from compile import SD21CompileCtx, VaeCompileCtx, get_compiled_graph
@@ -13,12 +14,12 @@
 from modules.ui_common import create_refresh_button
 from onediff_hijack import do_hijack as onediff_do_hijack
 from onediff_lora import HijackLoraActivate
-from ui_utils import (
-    check_structure_change_and_update,
+from onediff_utils import (
+    check_structure_change,
     get_all_compiler_caches,
     hints_message,
     load_graph,
-    onediff_enabled,
+    onediff_enabled_decorator,
     refresh_all_compiler_caches,
     save_graph,
 )
@@ -35,15 +36,21 @@ class UnetCompileCtx(object):
     and then the original model restored so that subsequent reasoning with onediff disabled meets expectations.
     """
 
+    def __init__(self, enabled):
+        self.enabled = enabled
+
     def __enter__(self):
+        if not self.enabled:
+            return
         self._original_model = shared.sd_model.model.diffusion_model
         shared.sd_model.model.diffusion_model = (
             onediff_shared.current_unet_graph.graph_module
         )
 
     def __exit__(self, exc_type, exc_val, exc_tb):
+        if not self.enabled:
+            return
         shared.sd_model.model.diffusion_model = self._original_model
-        return False
 
 
 class Script(scripts.Script):
@@ -85,6 +92,8 @@ def ui(self, is_img2img):
     def show(self, is_img2img):
         return True
 
+    @onediff_enabled_decorator
+    @onediff_controlnet.onediff_controlnet_decorator
     def run(
         self,
         p,
@@ -110,35 +119,41 @@ def run(
             shared.sd_model.sd_checkpoint_info.name
             != onediff_shared.current_unet_graph.name
         )
-        structure_changed = check_structure_change_and_update(
-            onediff_shared.current_unet_type, shared.sd_model
+        structure_changed = check_structure_change(
+            onediff_shared.previous_unet_type, shared.sd_model
         )
         quantization_changed = (
             quantization != onediff_shared.current_unet_graph.quantized
         )
+        controlnet_enabled_status_changed = (
+            onediff_shared.controlnet_enabled != onediff_shared.previous_is_controlnet
+        )
         need_recompile = (
             (
                 quantization and ckpt_changed
             )  # always recompile when switching ckpt with 'int8 speed model' enabled
             or structure_changed  # always recompile when switching model to another structure
             or quantization_changed  # always recompile when switching model from non-quantized to quantized (and vice versa)
+            or controlnet_enabled_status_changed
             or always_recompile
         )
         if need_recompile:
-            onediff_shared.current_unet_graph = get_compiled_graph(
-                shared.sd_model, quantization
-            )
-            load_graph(onediff_shared.current_unet_graph, compiler_cache)
+            if not onediff_shared.controlnet_enabled:
+                onediff_shared.current_unet_graph = get_compiled_graph(
+                    shared.sd_model, quantization
+                )
+                load_graph(onediff_shared.current_unet_graph, compiler_cache)
         else:
             logger.info(
-                f"Model {current_checkpoint_name} has same sd type of graph type {onediff_shared.current_unet_type}, skip compile"
+                f"Model {current_checkpoint_name} has same sd type of graph type {onediff_shared.previous_unet_type}, skip compile"
             )
 
-        with UnetCompileCtx(), VaeCompileCtx(), SD21CompileCtx(), HijackLoraActivate(), onediff_enabled():
+        with UnetCompileCtx(
+            not onediff_shared.controlnet_enabled
+        ), VaeCompileCtx(), SD21CompileCtx(), HijackLoraActivate():
             proc = process_images(p)
         save_graph(onediff_shared.current_unet_graph, saved_cache_name)
-        torch_gc()
-        flow.cuda.empty_cache()
+
         return proc
 
 
diff --git a/onediff_sd_webui_extensions/ui_utils.py b/onediff_sd_webui_extensions/ui_utils.py
index bdb875a38..097f51853 100644
--- a/onediff_sd_webui_extensions/ui_utils.py
+++ b/onediff_sd_webui_extensions/ui_utils.py
@@ -57,20 +57,6 @@ def refresh_all_compiler_caches(path: Path = None):
     all_compiler_caches = [f.stem for f in Path(path).iterdir() if f.is_file()]
 
 
-def check_structure_change_and_update(current_type: dict[str, bool], model):
-    def get_model_type(model):
-        return {
-            "is_sdxl": model.is_sdxl,
-            "is_sd2": model.is_sd2,
-            "is_sd1": model.is_sd1,
-            "is_ssd": model.is_ssd,
-        }
-
-    changed = current_type != get_model_type(model)
-    current_type.update(**get_model_type(model))
-    return changed
-
-
 def load_graph(compiled_unet: DeployableModule, compiler_cache: str):
     from compile import OneDiffCompiledGraph
 
diff --git a/tests/sd-webui/test_api.py b/tests/sd-webui/test_api.py
index 247925bd1..00e74f9c2 100644
--- a/tests/sd-webui/test_api.py
+++ b/tests/sd-webui/test_api.py
@@ -22,7 +22,6 @@
     get_threshold,
 )
 
-THRESHOLD = 0.97
 
 @pytest.fixture(scope="session", autouse=True)
 def change_model():