From e87f8239550cf5eee1a942f63e8b36c63f996ad0 Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Thu, 12 Dec 2024 07:37:43 +0000
Subject: [PATCH 01/32] add cogvideox support for gaudi.

---
 examples/text-to-video/cogvideox_generate.py  |  75 +++
 .../pipelines/cogvideox/cogvideoX_gaudi.py    | 273 +++++++++
 .../cogvideox/pipeline_cogvideox_gaudi.py     | 536 ++++++++++++++++++
 3 files changed, 884 insertions(+)
 create mode 100644 examples/text-to-video/cogvideox_generate.py
 create mode 100644 optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
 create mode 100644 optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py

diff --git a/examples/text-to-video/cogvideox_generate.py b/examples/text-to-video/cogvideox_generate.py
new file mode 100644
index 0000000000..8848f92654
--- /dev/null
+++ b/examples/text-to-video/cogvideox_generate.py
@@ -0,0 +1,75 @@
+import argparse
+import logging
+import sys
+from pathlib import Path
+
+import torch
+from pipeline_cogvideox_gaudi import GaudiCogVideoXPipeline
+#from diffusers import CogVideoXPipeline
+from diffusers.utils import export_to_video
+
+from optimum.habana.transformers.gaudi_configuration import GaudiConfig
+from optimum.habana.utils import set_seed
+logger = logging.getLogger(__name__)
+
+prompt = "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance."
+
+#prompt = "A 360-degree panoramic view of a lush mountain valley with a flowing river, birds flying across the sky, and a soft orange-pink sunrise."
+#prompt = "Spiderman is surfing, Darth Vader is also surfing and following Spiderman"
+#prompt = "An astronaut riding a horse"
+#prompt = "A drone shot flying above vibrant red and orange foliage with occasional sunlight beams piercing through the canopy."
+#prompt = "Skyscrapers with glowing neon signs, flying cars zipping between buildings, and a massive digital billboard displaying a news broadcast."
+#prompt = "Bright, surreal waves of color blending and transforming into abstract shapes in rhythm with gentle ambient music."
+#prompt = "A first-person view of a runner jumping between rooftops, flipping over obstacles, and climbing walls."
+
+gaudi_config_kwargs = {"use_fused_adam": True, "use_fused_clip_norm": True}
+gaudi_config_kwargs["use_torch_autocast"] = True
+
+gaudi_config = GaudiConfig(**gaudi_config_kwargs)
+logger.info(f"Gaudi Config: {gaudi_config}")
+
+
+kwargs = {
+    "use_habana": True,
+    "use_hpu_graphs": True,
+    "gaudi_config": gaudi_config,
+}
+kwargs["torch_dtype"] = torch.bfloat16
+
+
+print('now to load model.....')
+model_path = "/mnt/disk2/libo/hf_models/CogVideoX-2b/"
+#model_path = "/mnt/disk2/libo/hf_models/CogVideoX-5b/"
+pipe = GaudiCogVideoXPipeline.from_pretrained(
+    model_path,
+    **kwargs
+)
+print('pipe line load done!')
+
+pipe.vae.enable_tiling()
+pipe.vae.enable_slicing()
+
+print('now to generate video.')
+video = pipe(
+    prompt=prompt,
+    num_videos_per_prompt=1,
+    num_inference_steps=50,
+    num_frames=49,
+    guidance_scale=6,
+    generator=torch.Generator(device="cpu").manual_seed(42),
+).frames[0]
+
+print('generate video done!')
+
+export_to_video(video, "panda_gaudi.mp4", fps=8)
+#export_to_video(video, "output_gaudi.mp4", fps=8)
+#export_to_video(video, "Spiderman_gaudi.mp4", fps=8)
+#export_to_video(video, "astronaut_gaudi.mp4", fps=8)
+#export_to_video(video, "drone_gaudi.mp4", fps=8)
+#export_to_video(video, "Skyscrapers_gaudi.mp4", fps=8)
+#export_to_video(video, "waves_gaudi.mp4", fps=8)
+
+
+
+
+ 
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
new file mode 100644
index 0000000000..89e10248e1
--- /dev/null
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -0,0 +1,273 @@
+from typing import Any, Callable, Dict, List, Optional, Union, Tuple
+import torch
+import torch.nn as nn
+
+try:
+    from habana_frameworks.torch.hpex.kernels import FusedSDPA
+except ImportError:
+    print("Not using HPU fused scaled dot-product attention kernel.")
+    FusedSDPA = None
+
+#  FusedScaledDotProductAttention
+class ModuleFusedSDPA(torch.nn.Module):
+    def __init__(self, fusedSDPA):
+        super().__init__()
+        self._hpu_kernel_fsdpa = fusedSDPA
+
+    def forward(self, query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode):
+        return self._hpu_kernel_fsdpa.apply(query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode)
+
+
+from diffusers.models.attention import Attention
+class CogVideoXAttnProcessorGaudi:
+    r"""
+    Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
+    query and key vectors, but does not include spatial normalization.
+    """
+
+    def __init__(self):
+        self.fused_scaled_dot_product_attention = ModuleFusedSDPA(FusedSDPA) if FusedSDPA else None
+
+    def __call__(
+        self,
+        attn: Attention,
+        hidden_states: torch.Tensor,
+        encoder_hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        image_rotary_emb: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        text_seq_length = encoder_hidden_states.size(1)
+
+        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
+
+        batch_size, sequence_length, _ = (
+            hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
+        )
+
+        if attention_mask is not None:
+            attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
+            attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
+
+        query = attn.to_q(hidden_states)
+        key = attn.to_k(hidden_states)
+        value = attn.to_v(hidden_states)
+
+        inner_dim = key.shape[-1]
+        head_dim = inner_dim // attn.heads
+
+        query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+
+        if attn.norm_q is not None:
+            query = attn.norm_q(query)
+        if attn.norm_k is not None:
+            key = attn.norm_k(key)
+
+        # Apply RoPE if needed
+        if image_rotary_emb is not None:
+            from .embeddings import apply_rotary_emb
+
+            query[:, :, text_seq_length:] = apply_rotary_emb(query[:, :, text_seq_length:], image_rotary_emb)
+            if not attn.is_cross_attention:
+                key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb)
+
+        hidden_states = self.fused_scaled_dot_product_attention(
+            query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_casual=False, scale=None, softmax_mode='fast'
+        )
+
+        hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
+
+        # linear proj
+        hidden_states = attn.to_out[0](hidden_states)
+        # dropout
+        hidden_states = attn.to_out[1](hidden_states)
+
+        encoder_hidden_states, hidden_states = hidden_states.split(
+            [text_seq_length, hidden_states.size(1) - text_seq_length], dim=1
+        )
+        return hidden_states, encoder_hidden_states
+
+import torch.nn.functional as F
+from diffusers.models import attention_processor
+attention_processor.CogVideoXAttnProcessor2_0 = CogVideoXAttnProcessorGaudi
+
+from diffusers.models.autoencoders.autoencoder_kl_cogvideox import CogVideoXSafeConv3d
+from diffusers.models.autoencoders.vae import DecoderOutput
+
+class CogVideoXCausalConv3dGaudi(nn.Module):
+    r"""A 3D causal convolution layer that pads the input tensor to ensure causality in CogVideoX Model.
+
+    Args:
+        in_channels (`int`): Number of channels in the input tensor.
+        out_channels (`int`): Number of output channels produced by the convolution.
+        kernel_size (`int` or `Tuple[int, int, int]`): Kernel size of the convolutional kernel.
+        stride (`int`, defaults to `1`): Stride of the convolution.
+        dilation (`int`, defaults to `1`): Dilation rate of the convolution.
+        pad_mode (`str`, defaults to `"constant"`): Padding mode.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Union[int, Tuple[int, int, int]],
+        stride: int = 1,
+        dilation: int = 1,
+        pad_mode: str = "constant",
+    ):
+        super().__init__()
+
+        if isinstance(kernel_size, int):
+            kernel_size = (kernel_size,) * 3
+
+        time_kernel_size, height_kernel_size, width_kernel_size = kernel_size
+
+        self.pad_mode = pad_mode
+        time_pad = dilation * (time_kernel_size - 1) + (1 - stride)
+        height_pad = height_kernel_size // 2
+        width_pad = width_kernel_size // 2
+
+        self.height_pad = height_pad
+        self.width_pad = width_pad
+        self.time_pad = time_pad
+        self.time_causal_padding = (width_pad, width_pad, height_pad, height_pad, time_pad, 0)
+
+        self.temporal_dim = 2
+        self.time_kernel_size = time_kernel_size
+
+        stride = (stride, 1, 1)
+        dilation = (dilation, 1, 1)
+        self.conv = CogVideoXSafeConv3d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            dilation=dilation,
+        )
+
+        self.conv_cache = None
+
+    def fake_context_parallel_forward(self, inputs: torch.Tensor) -> torch.Tensor:
+        kernel_size = self.time_kernel_size
+        if kernel_size > 1:
+            cached_inputs = (
+                [self.conv_cache] if self.conv_cache is not None else [inputs[:, :, :1]] * (kernel_size - 1)
+            )
+            inputs = torch.cat(cached_inputs + [inputs], dim=2)
+        return inputs
+
+    def _clear_fake_context_parallel_cache(self):
+        del self.conv_cache
+        self.conv_cache = None
+
+    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
+        inputs = self.fake_context_parallel_forward(inputs)
+
+        #self._clear_fake_context_parallel_cache()
+        # Note: we could move these to the cpu for a lower maximum memory usage but its only a few
+        # hundred megabytes and so let's not do it for now
+        #self.conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
+
+        padding_2d = (self.width_pad, self.width_pad, self.height_pad, self.height_pad)
+        inputs_pad = F.pad(inputs, padding_2d, mode="constant", value=0)
+
+        output = self.conv(inputs_pad)
+        if self.time_kernel_size>1:
+            if self.conv_cache is not None and self.conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1:].shape:
+                self.conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
+            else:
+                self.conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
+        return output
+
+from diffusers.models.autoencoders import autoencoder_kl_cogvideox
+autoencoder_kl_cogvideox.CogVideoXCausalConv3d = CogVideoXCausalConv3dGaudi
+
+from diffusers.models.autoencoders.autoencoder_kl_cogvideox import AutoencoderKLCogVideoX
+class AutoencoderKLCogVideoXGaudi(AutoencoderKLCogVideoX):
+    def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[DecoderOutput, torch.Tensor]:
+        r"""
+        Decode a batch of images using a tiled decoder.
+
+        Args:
+            z (`torch.Tensor`): Input batch of latent vectors.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~models.vae.DecoderOutput`] instead of a plain tuple.
+
+        Returns:
+            [`~models.vae.DecoderOutput`] or `tuple`:
+                If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is
+                returned.
+        """
+        # Rough memory assessment:
+        #   - In CogVideoX-2B, there are a total of 24 CausalConv3d layers.
+        #   - The biggest intermediate dimensions are: [1, 128, 9, 480, 720].
+        #   - Assume fp16 (2 bytes per value).
+        # Memory required: 1 * 128 * 9 * 480 * 720 * 24 * 2 / 1024**3 = 17.8 GB
+        #
+        # Memory assessment when using tiling:
+        #   - Assume everything as above but now HxW is 240x360 by tiling in half
+        # Memory required: 1 * 128 * 9 * 240 * 360 * 24 * 2 / 1024**3 = 4.5 GB
+
+        print('run gaudi tiled decode!')
+        batch_size, num_channels, num_frames, height, width = z.shape
+
+        overlap_height = int(self.tile_latent_min_height * (1 - self.tile_overlap_factor_height))
+        overlap_width = int(self.tile_latent_min_width * (1 - self.tile_overlap_factor_width))
+        blend_extent_height = int(self.tile_sample_min_height * self.tile_overlap_factor_height)
+        blend_extent_width = int(self.tile_sample_min_width * self.tile_overlap_factor_width)
+        row_limit_height = self.tile_sample_min_height - blend_extent_height
+        row_limit_width = self.tile_sample_min_width - blend_extent_width
+        frame_batch_size = self.num_latent_frames_batch_size
+
+        # Split z into overlapping tiles and decode them separately.
+        # The tiles have an overlap to avoid seams between tiles.
+        rows = []
+        for i in range(0, height, overlap_height):
+            row = []
+            for j in range(0, width, overlap_width):
+                num_batches = num_frames // frame_batch_size
+                time = []
+                for k in range(num_batches):
+                    remaining_frames = num_frames % frame_batch_size
+                    start_frame = frame_batch_size * k + (0 if k == 0 else remaining_frames)
+                    end_frame = frame_batch_size * (k + 1) + remaining_frames
+                    tile = z[
+                        :,
+                        :,
+                        start_frame:end_frame,
+                        i : i + self.tile_latent_min_height,
+                        j : j + self.tile_latent_min_width,
+                    ].clone()
+                    if self.post_quant_conv is not None:
+                        tile = self.post_quant_conv(tile)
+                    tile = self.decoder(tile)
+                    time.append(tile.clone())
+                self._clear_fake_context_parallel_cache()
+                row.append(torch.cat(time, dim=2))
+            rows.append(row)
+
+        result_rows = []
+        for i, row in enumerate(rows):
+            result_row = []
+            for j, tile in enumerate(row):
+                # blend the above tile and the left tile
+                # to the current tile and add the current tile to the result row
+                if i > 0:
+                    tile = self.blend_v(rows[i - 1][j], tile, blend_extent_height)
+                if j > 0:
+                    tile = self.blend_h(row[j - 1], tile, blend_extent_width)
+                result_row.append(tile[:, :, :, :row_limit_height, :row_limit_width])
+            result_rows.append(torch.cat(result_row, dim=4))
+
+        dec = torch.cat(result_rows, dim=3)
+
+        if not return_dict:
+            return (dec,)
+
+        return DecoderOutput(sample=dec)
+
+from diffusers.models.autoencoders import autoencoder_kl_cogvideox
+autoencoder_kl_cogvideox.AutoencoderKLCogVideoX=AutoencoderKLCogVideoXGaudi
+
+
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
new file mode 100644
index 0000000000..f5b7d7b9ca
--- /dev/null
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -0,0 +1,536 @@
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import inspect
+from dataclasses import dataclass
+from math import ceil
+from typing import Any, Callable, Dict, List, Optional, Union
+
+import cogvideoX_gaudi
+
+import numpy as np
+import PIL.Image
+import torch
+from diffusers import CogVideoXPipeline
+from transformers import T5EncoderModel, T5Tokenizer
+from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
+from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
+from diffusers.utils.torch_utils import randn_tensor
+from diffusers.utils import BaseOutput
+from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
+
+from diffusers.utils import logging
+
+from optimum.habana.transformers.gaudi_configuration import GaudiConfig
+from optimum.habana.diffusers.pipelines.pipeline_utils import GaudiDiffusionPipeline
+import time as tm_perf
+
+
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+
+class time_box_t():
+    def __init__(self):
+        self.t0=None
+
+    def start(self):
+        self.t0 = tm_perf.perf_counter()
+
+    def show_time(self, desc):
+        torch.hpu.synchronize()
+        t1 = tm_perf.perf_counter()
+        duration = t1-self.t0
+        self.t0 = t1
+        print(f'{desc} duration:{duration:.3f}s')
+
+@dataclass
+class GaudiTextToVideoSDPipelineOutput(BaseOutput):
+    r"""
+    Output class for CogVideo pipelines.
+
+    Args:
+        frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
+            List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing
+            denoised PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
+            `(batch_size, num_frames, channels, height, width)`.
+    """
+
+    frames: torch.Tensor
+
+# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
+def retrieve_timesteps(
+    scheduler,
+    num_inference_steps: Optional[int] = None,
+    device: Optional[Union[str, torch.device]] = None,
+    timesteps: Optional[List[int]] = None,
+    sigmas: Optional[List[float]] = None,
+    **kwargs,
+):
+    """
+    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
+    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
+
+    Args:
+        scheduler (`SchedulerMixin`):
+            The scheduler to get timesteps from.
+        num_inference_steps (`int`):
+            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
+            must be `None`.
+        device (`str` or `torch.device`, *optional*):
+            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
+        timesteps (`List[int]`, *optional*):
+            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
+            `num_inference_steps` and `sigmas` must be `None`.
+        sigmas (`List[float]`, *optional*):
+            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
+            `num_inference_steps` and `timesteps` must be `None`.
+
+    Returns:
+        `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
+        second element is the number of inference steps.
+    """
+    if timesteps is not None and sigmas is not None:
+        raise ValueError("Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values")
+    if timesteps is not None:
+        accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
+        if not accepts_timesteps:
+            raise ValueError(
+                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
+                f" timestep schedules. Please check whether you are using the correct scheduler."
+            )
+        scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
+        timesteps = scheduler.timesteps
+        num_inference_steps = len(timesteps)
+    elif sigmas is not None:
+        accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
+        if not accept_sigmas:
+            raise ValueError(
+                f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
+                f" sigmas schedules. Please check whether you are using the correct scheduler."
+            )
+        scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs)
+        timesteps = scheduler.timesteps
+        num_inference_steps = len(timesteps)
+    else:
+        scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
+        timesteps = scheduler.timesteps
+    return timesteps, num_inference_steps
+
+
+class GaudiCogVideoXPipeline(GaudiDiffusionPipeline, CogVideoXPipeline):
+    r"""
+    Adapted from: https://github.com/huggingface/diffusers/blob/v0.26.3/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L84
+    """
+
+    def __init__(
+        self,
+        tokenizer: T5Tokenizer,
+        text_encoder: T5EncoderModel,
+        vae: AutoencoderKLCogVideoX,
+        transformer: CogVideoXTransformer3DModel,
+        scheduler: Union[CogVideoXDDIMScheduler, CogVideoXDPMScheduler],
+        use_habana: bool = False,
+        use_hpu_graphs: bool = False,
+        gaudi_config: Union[str, GaudiConfig] = None,
+        bf16_full_eval: bool = False,
+    ):
+        print(f'GaudiCogVideoXPipeline init use_habana:{use_habana} use_hpu_graphs:{use_hpu_graphs}')
+        GaudiDiffusionPipeline.__init__(
+            self,
+            use_habana,
+            use_hpu_graphs,
+            gaudi_config,
+            bf16_full_eval,
+        )
+        CogVideoXPipeline.__init__(
+            self,
+            tokenizer,
+            text_encoder,
+            vae,
+            transformer,
+            scheduler,
+        )
+        self.to(self._device)
+
+        from habana_frameworks.torch.hpu import wrap_in_hpu_graph
+        self.vae.decoder = wrap_in_hpu_graph(self.vae.decoder)
+
+    @property
+    def guidance_scale(self):
+        return self._guidance_scale
+
+    @property
+    def num_timesteps(self):
+        return self._num_timesteps
+
+    @property
+    def interrupt(self):
+        return self._interrupt
+
+    def enable_model_cpu_offload(self, *args, **kwargs):
+        if self.use_habana:
+            raise NotImplementedError("enable_model_cpu_offload() is not implemented for HPU")
+        else:
+            return super().enable_model_cpu_offload(*args, **kwargs)
+
+    def prepare_latents(
+        self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
+    ):
+        shape = (
+            batch_size,
+            (num_frames - 1) // self.vae_scale_factor_temporal + 1,
+            num_channels_latents,
+            height // self.vae_scale_factor_spatial,
+            width // self.vae_scale_factor_spatial,
+        )
+        if isinstance(generator, list) and len(generator) != batch_size:
+            raise ValueError(
+                f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
+                f" size of {batch_size}. Make sure the batch size matches the length of the generators."
+            )
+
+        if latents is None:
+            # torch.randn is broken on HPU so running it on CPU
+            rand_device = "cpu" if device.type == "hpu" else device
+            rand_device = torch.device(rand_device)
+            latents = randn_tensor(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
+        else:
+            latents = latents.to(device)
+
+        # scale the initial noise by the standard deviation required by the scheduler
+        latents = latents * self.scheduler.init_noise_sigma
+        return latents
+
+
+    @torch.no_grad()
+    def __call__(
+        self,
+        prompt: Optional[Union[str, List[str]]] = None,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        height: int = 480,
+        width: int = 720,
+        num_frames: int = 49,
+        num_inference_steps: int = 50,
+        timesteps: Optional[List[int]] = None,
+        guidance_scale: float = 6,
+        use_dynamic_cfg: bool = False,
+        num_videos_per_prompt: int = 1,
+        eta: float = 0.0,
+        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        output_type: str = "pil",
+        return_dict: bool = True,
+        callback_on_step_end: Optional[
+            Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
+        ] = None,
+        callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+        max_sequence_length: int = 226,
+    ):
+        """
+        Function invoked when calling the pipeline for generation.
+
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
+                instead.
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts not to guide the image generation. If not defined, one has to pass
+                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
+                less than `1`).
+            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The height in pixels of the generated image. This is set to 1024 by default for the best results.
+            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
+                The width in pixels of the generated image. This is set to 1024 by default for the best results.
+            num_frames (`int`, defaults to `48`):
+                Number of frames to generate. Must be divisible by self.vae_scale_factor_temporal. Generated video will
+                contain 1 extra frame because CogVideoX is conditioned with (num_seconds * fps + 1) frames where
+                num_seconds is 6 and fps is 4. However, since videos can be saved at any fps, the only condition that
+                needs to be satisfied is that of divisibility mentioned above.
+            num_inference_steps (`int`, *optional*, defaults to 50):
+                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
+                expense of slower inference.
+            timesteps (`List[int]`, *optional*):
+                Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
+                in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
+                passed will be used. Must be in descending order.
+            guidance_scale (`float`, *optional*, defaults to 7.0):
+                Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
+                `guidance_scale` is defined as `w` of equation 2. of [Imagen
+                Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
+                1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
+                usually at the expense of lower image quality.
+            num_videos_per_prompt (`int`, *optional*, defaults to 1):
+                The number of videos to generate per prompt.
+            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
+                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
+                to make generation deterministic.
+            latents (`torch.FloatTensor`, *optional*):
+                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
+                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+                tensor will ge generated by sampling using the supplied random `generator`.
+            prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
+                provided, text embeddings will be generated from `prompt` input argument.
+            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
+                argument.
+            output_type (`str`, *optional*, defaults to `"pil"`):
+                The output format of the generate image. Choose between
+                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
+                of a plain tuple.
+            callback_on_step_end (`Callable`, *optional*):
+                A function that calls at the end of each denoising steps during the inference. The function is called
+                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
+                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
+                `callback_on_step_end_tensor_inputs`.
+            callback_on_step_end_tensor_inputs (`List`, *optional*):
+                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
+                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
+                `._callback_tensor_inputs` attribute of your pipeline class.
+            max_sequence_length (`int`, defaults to `226`):
+                Maximum sequence length in encoded prompt. Must be consistent with
+                `self.transformer.config.max_text_seq_length` otherwise may lead to poor results.
+
+        Examples:
+
+        Returns:
+            [`~pipelines.cogvideo.pipeline_cogvideox.CogVideoXPipelineOutput`] or `tuple`:
+            [`~pipelines.cogvideo.pipeline_cogvideox.CogVideoXPipelineOutput`] if `return_dict` is True, otherwise a
+            `tuple`. When returning a tuple, the first element is a list with the generated images.
+        """
+        with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=self.gaudi_config.use_torch_autocast):
+            if num_frames > 49:
+                raise ValueError(
+                    "The number of frames must be less than 49 for now due to static positional embeddings. This will be updated in the future to remove this limitation."
+                )
+
+            if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
+                callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
+
+            time_box = time_box_t() 
+            time_box.start()
+            # 0. Default height and width to unet
+            height = height or self.transformer.config.sample_size * self.vae_scale_factor_spatial
+            width = width or self.transformer.config.sample_size * self.vae_scale_factor_spatial
+            num_videos_per_prompt = 1
+
+            # 1. Check inputs. Raise error if not correct
+            self.check_inputs(
+                prompt,
+                height,
+                width,
+                negative_prompt,
+                callback_on_step_end_tensor_inputs,
+                prompt_embeds,
+                negative_prompt_embeds,
+            )
+            self._guidance_scale = guidance_scale
+            self._interrupt = False
+
+            # 2. Define call parameters
+            if prompt is not None and isinstance(prompt, str):
+                batch_size = 1
+            elif prompt is not None and isinstance(prompt, list):
+                batch_size = len(prompt)
+            else:
+                batch_size = prompt_embeds.shape[0]
+
+
+            device = self._execution_device
+            # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+            # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+            # corresponds to doing no classifier free guidance.
+            do_classifier_free_guidance = guidance_scale > 1.0
+
+            # 3. Encode input prompt 
+            prompt_embeds, negative_prompt_embeds = self.encode_prompt(
+                prompt,
+                negative_prompt,
+                do_classifier_free_guidance,
+                num_videos_per_prompt=num_videos_per_prompt,
+                prompt_embeds=prompt_embeds,
+                negative_prompt_embeds=negative_prompt_embeds,
+                max_sequence_length=max_sequence_length,
+                device=device,
+            )
+            if do_classifier_free_guidance:
+                prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+
+            # 4. Prepare timesteps
+            timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
+            self._num_timesteps = len(timesteps)
+
+            # 5. Prepare latent variables
+            latent_channels = self.transformer.config.in_channels
+            latents = self.prepare_latents(
+                batch_size * num_videos_per_prompt,
+                latent_channels,
+                num_frames,
+                height,
+                width,
+                prompt_embeds.dtype,
+                device,
+                generator,
+                latents,
+            )
+
+            # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+            extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+            image_rotary_emb = (
+                self._prepare_rotary_positional_embeddings(height, width, latents.size(1), device)
+                if self.transformer.config.use_rotary_positional_embeddings
+                else None
+            )
+            time_box.show_time('prepare latents')
+
+            # 7. Denoising loop
+            num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+            outputs = []
+            with self.progress_bar(total=num_inference_steps) as progress_bar:
+                # for DPM-solver++
+                old_pred_original_sample = None
+                for i, t in enumerate(timesteps):
+                    if self.interrupt:
+                        continue
+
+                    latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
+                    latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+                    # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
+                    timestep = t.expand(latent_model_input.shape[0])
+
+                    noise_pred = self.transformer_hpu(
+                        latent_model_input=latent_model_input,
+                        prompt_embeds=prompt_embeds,
+                        timestep=timestep,
+                        image_rotary_emb=image_rotary_emb,
+                    )
+
+                    noise_pred = noise_pred.float()
+
+                    # perform guidance
+                    if use_dynamic_cfg:
+                        self._guidance_scale = 1 + guidance_scale * (
+                            (1 - math.cos(math.pi * ((num_inference_steps - t.item()) / num_inference_steps) ** 5.0)) / 2
+                        )
+                    if do_classifier_free_guidance:
+                        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                        noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+
+                    # compute the previous noisy sample x_t -> x_t-1
+                    if not isinstance(self.scheduler, CogVideoXDPMScheduler):
+                        latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+                    else:
+                        latents, old_pred_original_sample = self.scheduler.step(
+                            noise_pred,
+                            old_pred_original_sample,
+                            t,
+                            timesteps[i - 1] if i > 0 else None,
+                            latents,
+                            **extra_step_kwargs,
+                            return_dict=False,
+                        )
+                    latents = latents.to(prompt_embeds.dtype)
+
+                    if not self.use_hpu_graphs:
+                        self.htcore.mark_step()
+
+                    # call the callback, if provided
+                    if callback_on_step_end is not None:
+                        callback_kwargs = {}
+                        for k in callback_on_step_end_tensor_inputs:
+                            callback_kwargs[k] = locals()[k]
+                        callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+
+                        latents = callback_outputs.pop("latents", latents)
+                        prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+                        negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
+
+
+                    if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                        progress_bar.update()
+                if not self.use_hpu_graphs:
+                    self.htcore.mark_step()
+        time_box.show_time('transformer_hpu')
+
+        #HabanaProfile.stop()
+        if not output_type == "latent":
+            #print('baymax now to decode latents')
+            #latents = latents.to('cpu')
+            video = self.decode_latents(latents)
+            time_box.show_time('decode latents')
+            #print('baymax decode latents done!')
+            video = self.video_processor.postprocess_video(video=video, output_type=output_type)
+            time_box.show_time('postprocess_video')
+        else:
+            video = latents
+
+        # Offload all models
+        self.maybe_free_model_hooks()
+
+        if not return_dict:
+            return (video,)
+
+        return GaudiTextToVideoSDPipelineOutput(frames=video)
+
+    @torch.no_grad()
+    def transformer_hpu(self, latent_model_input, prompt_embeds, timestep, image_rotary_emb):
+        if self.use_hpu_graphs:
+            return self.capture_replay(latent_model_input, prompt_embeds, timestep, image_rotary_emb)
+        else:
+            return self.transformer(
+                hidden_states=latent_model_input,
+                encoder_hidden_states=prompt_embeds,
+                timestep=timestep,
+                image_rotary_emb=image_rotary_emb,
+                return_dict=False,
+            )[0]
+
+    @torch.no_grad()
+    def capture_replay(self, latent_model_input, prompt_embeds, timestep, image_rotary_emb):
+        inputs = [latent_model_input.clone(), prompt_embeds.clone(), timestep.clone(), image_rotary_emb, False]
+        h = self.ht.hpu.graphs.input_hash(inputs)
+        cached = self.cache.get(h)
+
+        if cached is None:
+            # Capture the graph and cache it
+            with self.ht.hpu.stream(self.hpu_stream):
+                graph = self.ht.hpu.HPUGraph()
+                graph.capture_begin()
+                outputs = self.transformer(
+                    hidden_states = inputs[0], 
+                    encoder_hidden_states = inputs[1], 
+                    timestep=inputs[2], 
+                    image_rotary_emb=inputs[3], 
+                    return_dict=inputs[4]
+                )[0]
+                graph.capture_end()
+                graph_inputs = inputs
+                graph_outputs = outputs
+                self.cache[h] = self.ht.hpu.graphs.CachedParams(graph_inputs, graph_outputs, graph)
+            return outputs
+
+        # Replay the cached graph with updated inputs
+        self.ht.hpu.graphs.copy_to(cached.graph_inputs, inputs)
+        cached.graph.replay()
+        self.ht.core.hpu.default_stream().synchronize()
+
+        return cached.graph_outputs

From fd556caf8206d01d5333c00cb80d0cf103e6dbdd Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Thu, 12 Dec 2024 09:16:37 +0000
Subject: [PATCH 02/32] update README for cogvideX

---
 examples/text-to-video/README.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/examples/text-to-video/README.md b/examples/text-to-video/README.md
index 1df4e44e59..061c5c7928 100644
--- a/examples/text-to-video/README.md
+++ b/examples/text-to-video/README.md
@@ -39,3 +39,12 @@ python3 text_to_video_generation.py \
 
 Models that have been validated:
   - [ali-vilab/text-to-video-ms-1.7b](https://huggingface.co/ali-vilab/text-to-video-ms-1.7b)
+
+CogvideoX test:
+```bash
+python3 cogvideo_generate.py \
+    --model_name_or_path CogVideoX-2b \
+    --output_name gaudi_output.mp4
+```
+
+

From 6a8e73d71b27f724d4e61bdc95cf5cd2fa1b19f3 Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Thu, 12 Dec 2024 09:26:59 +0000
Subject: [PATCH 03/32] import cogvideo module from optimumu lib

---
 examples/text-to-video/cogvideox_generate.py | 131 ++++++++++---------
 1 file changed, 71 insertions(+), 60 deletions(-)

diff --git a/examples/text-to-video/cogvideox_generate.py b/examples/text-to-video/cogvideox_generate.py
index 8848f92654..39811c2267 100644
--- a/examples/text-to-video/cogvideox_generate.py
+++ b/examples/text-to-video/cogvideox_generate.py
@@ -4,72 +4,83 @@
 from pathlib import Path
 
 import torch
-from pipeline_cogvideox_gaudi import GaudiCogVideoXPipeline
-#from diffusers import CogVideoXPipeline
+from optimum.habana.diffusers.pipelines.cogvideox.pipeline_cogvideox_gaudi import GaudiCogVideoXPipeline
 from diffusers.utils import export_to_video
 
 from optimum.habana.transformers.gaudi_configuration import GaudiConfig
 from optimum.habana.utils import set_seed
 logger = logging.getLogger(__name__)
 
-prompt = "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance."
-
-#prompt = "A 360-degree panoramic view of a lush mountain valley with a flowing river, birds flying across the sky, and a soft orange-pink sunrise."
-#prompt = "Spiderman is surfing, Darth Vader is also surfing and following Spiderman"
-#prompt = "An astronaut riding a horse"
-#prompt = "A drone shot flying above vibrant red and orange foliage with occasional sunlight beams piercing through the canopy."
-#prompt = "Skyscrapers with glowing neon signs, flying cars zipping between buildings, and a massive digital billboard displaying a news broadcast."
-#prompt = "Bright, surreal waves of color blending and transforming into abstract shapes in rhythm with gentle ambient music."
-#prompt = "A first-person view of a runner jumping between rooftops, flipping over obstacles, and climbing walls."
-
-gaudi_config_kwargs = {"use_fused_adam": True, "use_fused_clip_norm": True}
-gaudi_config_kwargs["use_torch_autocast"] = True
-
-gaudi_config = GaudiConfig(**gaudi_config_kwargs)
-logger.info(f"Gaudi Config: {gaudi_config}")
-
-
-kwargs = {
-    "use_habana": True,
-    "use_hpu_graphs": True,
-    "gaudi_config": gaudi_config,
-}
-kwargs["torch_dtype"] = torch.bfloat16
-
-
-print('now to load model.....')
-model_path = "/mnt/disk2/libo/hf_models/CogVideoX-2b/"
-#model_path = "/mnt/disk2/libo/hf_models/CogVideoX-5b/"
-pipe = GaudiCogVideoXPipeline.from_pretrained(
-    model_path,
-    **kwargs
-)
-print('pipe line load done!')
-
-pipe.vae.enable_tiling()
-pipe.vae.enable_slicing()
-
-print('now to generate video.')
-video = pipe(
-    prompt=prompt,
-    num_videos_per_prompt=1,
-    num_inference_steps=50,
-    num_frames=49,
-    guidance_scale=6,
-    generator=torch.Generator(device="cpu").manual_seed(42),
-).frames[0]
-
-print('generate video done!')
-
-export_to_video(video, "panda_gaudi.mp4", fps=8)
-#export_to_video(video, "output_gaudi.mp4", fps=8)
-#export_to_video(video, "Spiderman_gaudi.mp4", fps=8)
-#export_to_video(video, "astronaut_gaudi.mp4", fps=8)
-#export_to_video(video, "drone_gaudi.mp4", fps=8)
-#export_to_video(video, "Skyscrapers_gaudi.mp4", fps=8)
-#export_to_video(video, "waves_gaudi.mp4", fps=8)
-
-
 
+def main():
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    
+    parser.add_argument(
+        "--model_name_or_path",
+        default="/mnt/disk2/libo/hf_models/CogVideoX-2b/",
+        type=str,
+        help="Path to pre-trained model",
+    )
+    # Pipeline arguments
+    parser.add_argument(
+        "--prompts",
+        type=str,
+        nargs="*",
+        default="A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance.",
+        help="The prompt or prompts to guide the video generation.",
+    )
+    parser.add_argument(
+        "--output_name",
+        default="panda_gaudi.mp4",
+        type=str,
+        help="Path to pre-trained model",
+    )
+
+    args = parser.parse_args()
+    
+    gaudi_config_kwargs = {"use_fused_adam": True, "use_fused_clip_norm": True}
+    gaudi_config_kwargs["use_torch_autocast"] = True
+    
+    gaudi_config = GaudiConfig(**gaudi_config_kwargs)
+    logger.info(f"Gaudi Config: {gaudi_config}")
+    
+    
+    kwargs = {
+        "use_habana": True,
+        "use_hpu_graphs": True,
+        "gaudi_config": gaudi_config,
+    }
+    kwargs["torch_dtype"] = torch.bfloat16
+    
+    
+    print('now to load model.....')
+    pipe = GaudiCogVideoXPipeline.from_pretrained(
+        args.model_name_or_path,
+        **kwargs
+    )
+    print('pipe line load done!')
+    
+    pipe.vae.enable_tiling()
+    pipe.vae.enable_slicing()
+    
+    print('now to generate video.')
+    video = pipe(
+        prompt=args.prompts,
+        num_videos_per_prompt=1,
+        num_inference_steps=50,
+        num_frames=49,
+        guidance_scale=6,
+        generator=torch.Generator(device="cpu").manual_seed(42),
+    ).frames[0]
+    
+    print('generate video done!')
+    
+    export_to_video(video, args.output_name, fps=8)
+
+
+
+if __name__ == "__main__":
+    main()
 
  

From 7092a515aa41f642a2621022a5527e4ad97f8c95 Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Thu, 12 Dec 2024 09:54:54 +0000
Subject: [PATCH 04/32] refine test examples

---
 examples/text-to-video/cogvideox_generate.py  | 33 +++++++++----------
 .../pipelines/cogvideox/cogvideoX_gaudi.py    | 15 ++++++++-
 .../cogvideox/pipeline_cogvideox_gaudi.py     | 31 +++++++----------
 3 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/examples/text-to-video/cogvideox_generate.py b/examples/text-to-video/cogvideox_generate.py
index 39811c2267..26e4b74c4f 100644
--- a/examples/text-to-video/cogvideox_generate.py
+++ b/examples/text-to-video/cogvideox_generate.py
@@ -1,24 +1,23 @@
 import argparse
 import logging
-import sys
-from pathlib import Path
 
 import torch
-from optimum.habana.diffusers.pipelines.cogvideox.pipeline_cogvideox_gaudi import GaudiCogVideoXPipeline
 from diffusers.utils import export_to_video
 
+from optimum.habana.diffusers.pipelines.cogvideox.pipeline_cogvideox_gaudi import GaudiCogVideoXPipeline
 from optimum.habana.transformers.gaudi_configuration import GaudiConfig
-from optimum.habana.utils import set_seed
+
+
 logger = logging.getLogger(__name__)
 
 
 def main():
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
 
-    
+
     parser.add_argument(
         "--model_name_or_path",
-        default="/mnt/disk2/libo/hf_models/CogVideoX-2b/",
+        default="CogVideoX-2b",
         type=str,
         help="Path to pre-trained model",
     )
@@ -38,32 +37,32 @@ def main():
     )
 
     args = parser.parse_args()
-    
+
     gaudi_config_kwargs = {"use_fused_adam": True, "use_fused_clip_norm": True}
     gaudi_config_kwargs["use_torch_autocast"] = True
-    
+
     gaudi_config = GaudiConfig(**gaudi_config_kwargs)
     logger.info(f"Gaudi Config: {gaudi_config}")
-    
-    
+
+
     kwargs = {
         "use_habana": True,
         "use_hpu_graphs": True,
         "gaudi_config": gaudi_config,
     }
     kwargs["torch_dtype"] = torch.bfloat16
-    
-    
+
+
     print('now to load model.....')
     pipe = GaudiCogVideoXPipeline.from_pretrained(
         args.model_name_or_path,
         **kwargs
     )
     print('pipe line load done!')
-    
+
     pipe.vae.enable_tiling()
     pipe.vae.enable_slicing()
-    
+
     print('now to generate video.')
     video = pipe(
         prompt=args.prompts,
@@ -73,9 +72,9 @@ def main():
         guidance_scale=6,
         generator=torch.Generator(device="cpu").manual_seed(42),
     ).frames[0]
-    
+
     print('generate video done!')
-    
+
     export_to_video(video, args.output_name, fps=8)
 
 
@@ -83,4 +82,4 @@ def main():
 if __name__ == "__main__":
     main()
 
- 
+
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
index 89e10248e1..e9b598a629 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -1,7 +1,9 @@
-from typing import Any, Callable, Dict, List, Optional, Union, Tuple
+from typing import Optional, Tuple, Union
+
 import torch
 import torch.nn as nn
 
+
 try:
     from habana_frameworks.torch.hpex.kernels import FusedSDPA
 except ImportError:
@@ -19,6 +21,8 @@ def forward(self, query, key, value, attn_mask, dropout_p, is_casual, scale, sof
 
 
 from diffusers.models.attention import Attention
+
+
 class CogVideoXAttnProcessorGaudi:
     r"""
     Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
@@ -90,11 +94,14 @@ def __call__(
 
 import torch.nn.functional as F
 from diffusers.models import attention_processor
+
+
 attention_processor.CogVideoXAttnProcessor2_0 = CogVideoXAttnProcessorGaudi
 
 from diffusers.models.autoencoders.autoencoder_kl_cogvideox import CogVideoXSafeConv3d
 from diffusers.models.autoencoders.vae import DecoderOutput
 
+
 class CogVideoXCausalConv3dGaudi(nn.Module):
     r"""A 3D causal convolution layer that pads the input tensor to ensure causality in CogVideoX Model.
 
@@ -181,9 +188,13 @@ def forward(self, inputs: torch.Tensor) -> torch.Tensor:
         return output
 
 from diffusers.models.autoencoders import autoencoder_kl_cogvideox
+
+
 autoencoder_kl_cogvideox.CogVideoXCausalConv3d = CogVideoXCausalConv3dGaudi
 
 from diffusers.models.autoencoders.autoencoder_kl_cogvideox import AutoencoderKLCogVideoX
+
+
 class AutoencoderKLCogVideoXGaudi(AutoencoderKLCogVideoX):
     def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[DecoderOutput, torch.Tensor]:
         r"""
@@ -268,6 +279,8 @@ def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[Decod
         return DecoderOutput(sample=dec)
 
 from diffusers.models.autoencoders import autoencoder_kl_cogvideox
+
+
 autoencoder_kl_cogvideox.AutoencoderKLCogVideoX=AutoencoderKLCogVideoXGaudi
 
 
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index f5b7d7b9ca..7812543a7c 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -13,28 +13,21 @@
 # limitations under the License.
 
 import inspect
+import time as tm_perf
 from dataclasses import dataclass
-from math import ceil
-from typing import Any, Callable, Dict, List, Optional, Union
-
-import cogvideoX_gaudi
+from typing import Callable, Dict, List, Optional, Union
 
-import numpy as np
-import PIL.Image
 import torch
 from diffusers import CogVideoXPipeline
-from transformers import T5EncoderModel, T5Tokenizer
+from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
 from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
 from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
+from diffusers.utils import BaseOutput, logging
 from diffusers.utils.torch_utils import randn_tensor
-from diffusers.utils import BaseOutput
-from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
-
-from diffusers.utils import logging
+from transformers import T5EncoderModel, T5Tokenizer
 
-from optimum.habana.transformers.gaudi_configuration import GaudiConfig
 from optimum.habana.diffusers.pipelines.pipeline_utils import GaudiDiffusionPipeline
-import time as tm_perf
+from optimum.habana.transformers.gaudi_configuration import GaudiConfig
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -323,7 +316,7 @@ def __call__(
             if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
                 callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
 
-            time_box = time_box_t() 
+            time_box = time_box_t()
             time_box.start()
             # 0. Default height and width to unet
             height = height or self.transformer.config.sample_size * self.vae_scale_factor_spatial
@@ -358,7 +351,7 @@ def __call__(
             # corresponds to doing no classifier free guidance.
             do_classifier_free_guidance = guidance_scale > 1.0
 
-            # 3. Encode input prompt 
+            # 3. Encode input prompt
             prompt_embeds, negative_prompt_embeds = self.encode_prompt(
                 prompt,
                 negative_prompt,
@@ -516,10 +509,10 @@ def capture_replay(self, latent_model_input, prompt_embeds, timestep, image_rota
                 graph = self.ht.hpu.HPUGraph()
                 graph.capture_begin()
                 outputs = self.transformer(
-                    hidden_states = inputs[0], 
-                    encoder_hidden_states = inputs[1], 
-                    timestep=inputs[2], 
-                    image_rotary_emb=inputs[3], 
+                    hidden_states = inputs[0],
+                    encoder_hidden_states = inputs[1],
+                    timestep=inputs[2],
+                    image_rotary_emb=inputs[3],
                     return_dict=inputs[4]
                 )[0]
                 graph.capture_end()

From 13b86c8cf1bae2ea8f08119fc07581ef0d0b4f72 Mon Sep 17 00:00:00 2001
From: Zhiwei35 <zhiwei.huang@intel.com>
Date: Tue, 17 Dec 2024 11:43:51 +0800
Subject: [PATCH 05/32] fix module import defect

---
 examples/text-to-video/cogvideox_generate.py                | 1 +
 .../habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py | 6 ++++++
 2 files changed, 7 insertions(+)

diff --git a/examples/text-to-video/cogvideox_generate.py b/examples/text-to-video/cogvideox_generate.py
index 26e4b74c4f..4d77c01174 100644
--- a/examples/text-to-video/cogvideox_generate.py
+++ b/examples/text-to-video/cogvideox_generate.py
@@ -4,6 +4,7 @@
 import torch
 from diffusers.utils import export_to_video
 
+from optimum.habana.diffusers.pipelines.cogvideox.cogvideoX_gaudi import adapt_cogvideo_to_gaudi
 from optimum.habana.diffusers.pipelines.cogvideox.pipeline_cogvideox_gaudi import GaudiCogVideoXPipeline
 from optimum.habana.transformers.gaudi_configuration import GaudiConfig
 
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
index e9b598a629..37c1df5d44 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -283,4 +283,10 @@ def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[Decod
 
 autoencoder_kl_cogvideox.AutoencoderKLCogVideoX=AutoencoderKLCogVideoXGaudi
 
+import diffusers
+def adapt_cogvideo_to_gaudi():
+    diffusers.models.autoencoders.autoencoder_kl_cogvideox.CogVideoXCausalConv3d  = CogVideoXCausalConv3dGaudi
+    diffusers.models.autoencoders.autoencoder_kl_cogvideox.AutoencoderKLCogVideoX = AutoencoderKLCogVideoXGaudi
+    diffusers.models.attention_processor.CogVideoXAttnProcessor2_0 = CogVideoXAttnProcessorGaudi
+
 

From d125fe699d106d433ebe23dd017943a7c1286fb3 Mon Sep 17 00:00:00 2001
From: Zhiwei35 <zhiwei.huang@intel.com>
Date: Tue, 17 Dec 2024 11:45:15 +0800
Subject: [PATCH 06/32] update module import method

---
 optimum/habana/diffusers/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/optimum/habana/diffusers/__init__.py b/optimum/habana/diffusers/__init__.py
index 86b6477c0b..234233065f 100644
--- a/optimum/habana/diffusers/__init__.py
+++ b/optimum/habana/diffusers/__init__.py
@@ -1,3 +1,4 @@
+from .pipelines.cogvideox.cogvideoX_gaudi import adapt_cogvideo_to_gaudi
 from .pipelines.auto_pipeline import AutoPipelineForInpainting, AutoPipelineForText2Image
 from .pipelines.controlnet.pipeline_controlnet import GaudiStableDiffusionControlNetPipeline
 from .pipelines.controlnet.pipeline_stable_video_diffusion_controlnet import (

From 4698ded7961c08cb5e1fd3e8a4fc1fcd430ba742 Mon Sep 17 00:00:00 2001
From: Zhiwei35 <zhiwei.huang@intel.com>
Date: Tue, 17 Dec 2024 16:33:27 +0800
Subject: [PATCH 07/32] upgrade for diffusers version 0.31.0

---
 .../pipelines/cogvideox/cogvideoX_gaudi.py    | 39 ++++++++-----------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
index 37c1df5d44..8aa487b65b 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -153,39 +153,30 @@ def __init__(
             dilation=dilation,
         )
 
-        self.conv_cache = None
 
-    def fake_context_parallel_forward(self, inputs: torch.Tensor) -> torch.Tensor:
+    def fake_context_parallel_forward(
+        self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
         kernel_size = self.time_kernel_size
         if kernel_size > 1:
-            cached_inputs = (
-                [self.conv_cache] if self.conv_cache is not None else [inputs[:, :, :1]] * (kernel_size - 1)
-            )
+            cached_inputs = [conv_cache] if conv_cache is not None else [inputs[:, :, :1]] * (kernel_size - 1)
             inputs = torch.cat(cached_inputs + [inputs], dim=2)
         return inputs
 
-    def _clear_fake_context_parallel_cache(self):
-        del self.conv_cache
-        self.conv_cache = None
-
-    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
-        inputs = self.fake_context_parallel_forward(inputs)
-
-        #self._clear_fake_context_parallel_cache()
-        # Note: we could move these to the cpu for a lower maximum memory usage but its only a few
-        # hundred megabytes and so let's not do it for now
-        #self.conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
+    def forward(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None) -> torch.Tensor:
+        inputs = self.fake_context_parallel_forward(inputs, conv_cache)
+        #conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
 
         padding_2d = (self.width_pad, self.width_pad, self.height_pad, self.height_pad)
         inputs_pad = F.pad(inputs, padding_2d, mode="constant", value=0)
 
         output = self.conv(inputs_pad)
         if self.time_kernel_size>1:
-            if self.conv_cache is not None and self.conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1:].shape:
-                self.conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
+            if conv_cache is not None and conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1:].shape:
+                conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
             else:
-                self.conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
-        return output
+                conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
+        return output, conv_cache
 
 from diffusers.models.autoencoders import autoencoder_kl_cogvideox
 
@@ -237,8 +228,10 @@ def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[Decod
         for i in range(0, height, overlap_height):
             row = []
             for j in range(0, width, overlap_width):
-                num_batches = num_frames // frame_batch_size
+                num_batches = max(num_frames // frame_batch_size, 1)
+                conv_cache = None
                 time = []
+
                 for k in range(num_batches):
                     remaining_frames = num_frames % frame_batch_size
                     start_frame = frame_batch_size * k + (0 if k == 0 else remaining_frames)
@@ -252,9 +245,9 @@ def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[Decod
                     ].clone()
                     if self.post_quant_conv is not None:
                         tile = self.post_quant_conv(tile)
-                    tile = self.decoder(tile)
+                    tile, conv_cache = self.decoder(tile, conv_cache=conv_cache)
                     time.append(tile.clone())
-                self._clear_fake_context_parallel_cache()
+
                 row.append(torch.cat(time, dim=2))
             rows.append(row)
 

From 21caddcd392a46996809e2ab5bea7ae0735c4940 Mon Sep 17 00:00:00 2001
From: ranzhejiang <zhejiang.ran@intel.com>
Date: Wed, 18 Dec 2024 18:12:36 +0800
Subject: [PATCH 08/32] add cogVideo test case.

---
 .../cogvideox/pipeline_cogvideox_gaudi.py     |   3 -
 tests/test_diffusers.py                       | 136 ++++++++++++++++++
 2 files changed, 136 insertions(+), 3 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index 7812543a7c..afb8165340 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -466,11 +466,8 @@ def __call__(
 
         #HabanaProfile.stop()
         if not output_type == "latent":
-            #print('baymax now to decode latents')
-            #latents = latents.to('cpu')
             video = self.decode_latents(latents)
             time_box.show_time('decode latents')
-            #print('baymax decode latents done!')
             video = self.video_processor.postprocess_video(video=video, output_type=output_type)
             time_box.show_time('postprocess_video')
         else:
diff --git a/tests/test_diffusers.py b/tests/test_diffusers.py
index 97bbb7632d..fd580bbcbe 100755
--- a/tests/test_diffusers.py
+++ b/tests/test_diffusers.py
@@ -42,6 +42,7 @@
     AutoencoderKL,
     AutoencoderKLTemporalDecoder,
     AutoencoderTiny,
+    AutoencoderKLCogVideoX,
     ControlNetModel,
     DiffusionPipeline,
     DPMSolverMultistepScheduler,
@@ -59,6 +60,8 @@
     UNet3DConditionModel,
     UNetSpatioTemporalConditionModel,
     UniPCMultistepScheduler,
+    CogVideoXTransformer3DModel,
+    CogVideoXDDIMScheduler,
 )
 from diffusers.image_processor import VaeImageProcessor
 from diffusers.pipelines.controlnet.pipeline_controlnet import MultiControlNetModel
@@ -89,6 +92,8 @@
     DPTFeatureExtractor,
     DPTForDepthEstimation,
     T5EncoderModel,
+    T5Tokenizer,
+    T5Config,
 )
 from transformers.testing_utils import parse_flag_from_env, slow
 
@@ -117,6 +122,7 @@
     GaudiStableVideoDiffusionControlNetPipeline,
     GaudiStableVideoDiffusionPipeline,
     GaudiTextToVideoSDPipeline,
+    GaudiCogVideoXPipeline,
 )
 from optimum.habana.diffusers.models import (
     ControlNetSDVModel,
@@ -3767,6 +3773,136 @@ def test_deterministic_image_generation_no_throughput_regression_bf16(self):
 
         self.assertGreaterEqual(outputs.throughput, 0.95 * DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT)
 
+class GaudiCogVideoXPipelineTester(TestCase):
+    """
+    Tests the TextToVideoSDPipeline for Gaudi.
+    Adapted from https://github.com/huggingface/diffusers/blob/v0.24.0-release/tests/pipelines/text_to_video_synthesis/test_text_to_video.py
+    """
+
+    def get_dummy_components(self):
+        tokenizer = T5Tokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
+        set_seed(0)
+        text_encoder_cfg = T5Config(vocab_size = 32128,
+                                   d_kv = 64,
+                                   d_ff = 10240,
+                                   num_layers = 8,
+                                   num_decoder_layers=8,
+                                   relative_attention_num_buckets=32,
+                                   relative_attention_max_distance=128,
+                                   initializer_factor=1.0,
+                                   feed_forward_proj='gated-gelu',
+                                   is_encoder_decoder=True,
+                                   pad_token_id=0,
+                                   eos_token_id=1,
+                                   torch_dtype = torch.bfloat16,
+                                   d_model = 4096)
+        text_encoder = T5EncoderModel(text_encoder_cfg).bfloat16()
+
+        set_seed(0)
+        transformer = CogVideoXTransformer3DModel(
+                          num_attention_heads=30,
+                          attention_head_dim=64,
+                          in_channels=16,
+                          out_channels=16,
+                          flip_sin_to_cos=True,
+                          freq_shift=0,
+                          time_embed_dim=512,
+                          text_embed_dim=4096,
+                          num_layers=8,
+                          dropout=0.0,
+                          attention_bias=True,
+                          sample_width=90,
+                          sample_height=60,
+                          sample_frames=49,
+                          patch_size=2,
+                          temporal_compression_ratio=4,
+                          max_text_seq_length=226,
+                          activation_fn="gelu-approximate",
+                          timestep_activation_fn="silu",
+                          norm_elementwise_affine=True,
+                          norm_eps=1e-5,
+                          spatial_interpolation_scale=1.875,
+                          temporal_interpolation_scale=1.0,
+                      ).bfloat16()
+
+        scheduler = CogVideoXDDIMScheduler(
+                        num_train_timesteps=1000,
+                        beta_start = 0.00085,
+                        beta_end = 0.0120,
+                        beta_schedule = "scaled_linear",
+                        clip_sample=False,
+                        set_alpha_to_one = True,
+                        steps_offset=0,
+                        prediction_type = "v_prediction",
+                        clip_sample_range = 1.0,
+                        sample_max_value = 1.0,
+                        timestep_spacing = "trailing",
+                        rescale_betas_zero_snr = True,
+                        snr_shift_scale=1.0,
+                    )
+
+
+        set_seed(0)
+        vae = AutoencoderKLCogVideoX(in_channels=3, 
+                                     out_channels = 3,
+                                     down_block_types = [
+                                         "CogVideoXDownBlock3D",
+                                         "CogVideoXDownBlock3D",
+                                         "CogVideoXDownBlock3D",
+                                         "CogVideoXDownBlock3D"
+                                         ],
+                                     block_out_channels = [128,256,256,512],
+                                     latent_channels=16,
+                                     layers_per_block=1,
+                                     act_fn="silu",
+                                     norm_eps=1e-6,
+                                     norm_num_groups=32,
+                                     temporal_compression_ratio=4,
+                                     sample_height=480,
+                                     sample_width=720,
+                                     scaling_factor=1.15258426,
+                                     ).bfloat16()
+        
+        
+        vae.enable_slicing()
+        vae.enable_tiling()
+
+        components = {
+            "tokenizer": tokenizer,
+            "text_encoder": text_encoder,
+            "transformer": transformer,
+            "scheduler": scheduler,
+            "vae": vae,
+        }
+
+        return components
+
+    def get_dummy_inputs(self, device, seed=0):
+        prompts = "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance."
+        return prompts
+
+    def test_cogvideoX_default_case(self):
+        gaudi_config_kwargs = {"use_fused_adam": True, "use_fused_clip_norm": True}
+        gaudi_config_kwargs["use_torch_autocast"] = True
+        gaudi_config = GaudiConfig(**gaudi_config_kwargs)
+
+        components = self.get_dummy_components()
+        components["use_habana"] = True
+        components["use_hpu_graphs"] = True
+        components["gaudi_config"] = gaudi_config
+
+        cogVideoX_pipe = GaudiCogVideoXPipeline(**components)
+        video = pipe(
+            prompt=prompts,
+            num_videos_per_prompt=1,
+            num_inference_steps=5,
+            num_frames=49,
+            guidance_scale=6,
+            generator=torch.Generator(device="cpu").manual_seed(42),
+        ).frames[0]
+
+        assert video is not None
+        assert 49 == len(video)
 
 class GaudiTextToVideoSDPipelineTester(TestCase):
     """

From feff2a366af064304fa1a2f226957766a1fce1b9 Mon Sep 17 00:00:00 2001
From: libo7x <bo.o.li@intel.com>
Date: Wed, 18 Dec 2024 18:42:29 +0800
Subject: [PATCH 09/32] refine model default path

---
 examples/text-to-video/cogvideox_generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/text-to-video/cogvideox_generate.py b/examples/text-to-video/cogvideox_generate.py
index 4d77c01174..4b95c0a8ee 100644
--- a/examples/text-to-video/cogvideox_generate.py
+++ b/examples/text-to-video/cogvideox_generate.py
@@ -18,7 +18,7 @@ def main():
 
     parser.add_argument(
         "--model_name_or_path",
-        default="CogVideoX-2b",
+        default="THUDM/CogVideoX-2b",
         type=str,
         help="Path to pre-trained model",
     )

From ae05af94907e80f5c83fc769d5d8c6eb23cae146 Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Thu, 19 Dec 2024 08:49:48 +0000
Subject: [PATCH 10/32] add required python lib for cogvideo

---
 examples/text-to-video/requirements.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/examples/text-to-video/requirements.txt b/examples/text-to-video/requirements.txt
index 6ab6d0d570..f3e192bbdc 100644
--- a/examples/text-to-video/requirements.txt
+++ b/examples/text-to-video/requirements.txt
@@ -1 +1,5 @@
 opencv-python-headless
+sentencepiece
+imageio
+imageio-ffmpeg
+

From 12badb89586148947065a214b6cd8860d88392d6 Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Mon, 13 Jan 2025 10:53:22 +0800
Subject: [PATCH 11/32] refine README.MD

---
 examples/text-to-video/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/text-to-video/README.md b/examples/text-to-video/README.md
index 061c5c7928..a7ab947b24 100644
--- a/examples/text-to-video/README.md
+++ b/examples/text-to-video/README.md
@@ -42,8 +42,8 @@ Models that have been validated:
 
 CogvideoX test:
 ```bash
-python3 cogvideo_generate.py \
-    --model_name_or_path CogVideoX-2b \
+python3 cogvideox_generate.py \
+    --model_name_or_path THUDM/CogVideoX-2b \
     --output_name gaudi_output.mp4
 ```
 

From 7df1a6c0c1c31c730cda7115dfecc925b6f4f802 Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Wed, 15 Jan 2025 06:46:04 +0000
Subject: [PATCH 12/32] use gaudi implementation of apply rotary embedding.

---
 .../pipelines/cogvideox/cogvideoX_gaudi.py    | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
index 8aa487b65b..5d73bfbe9b 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -22,6 +22,23 @@ def forward(self, query, key, value, attn_mask, dropout_p, is_casual, scale, sof
 
 from diffusers.models.attention import Attention
 
+def apply_rotary_emb(
+    x: torch.Tensor,
+    freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]],
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Adapted from: https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/models/embeddings.py#L697
+    """
+    cos_, sin_ = freqs_cis  # [S, D]
+
+    cos = cos_[None, None]
+    sin = sin_[None, None]
+    cos, sin = cos.to(x.device), sin.to(x.device)
+
+    x = torch.ops.hpu.rotary_pos_embedding(x, sin, cos, None, 0, 1)
+
+    return x
+
 
 class CogVideoXAttnProcessorGaudi:
     r"""
@@ -70,8 +87,6 @@ def __call__(
 
         # Apply RoPE if needed
         if image_rotary_emb is not None:
-            from .embeddings import apply_rotary_emb
-
             query[:, :, text_seq_length:] = apply_rotary_emb(query[:, :, text_seq_length:], image_rotary_emb)
             if not attn.is_cross_attention:
                 key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb)

From 6919313fa76009d9638c1550aa790a868978980c Mon Sep 17 00:00:00 2001
From: "tony.lin@intel.com" <ganmei.you@intel.com>
Date: Thu, 23 Jan 2025 15:23:58 +0800
Subject: [PATCH 13/32] fix htcore defect

---
 .../pipelines/cogvideox/pipeline_cogvideox_gaudi.py          | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index afb8165340..8e4c3c7e40 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -28,6 +28,7 @@
 
 from optimum.habana.diffusers.pipelines.pipeline_utils import GaudiDiffusionPipeline
 from optimum.habana.transformers.gaudi_configuration import GaudiConfig
+import habana_frameworks.torch.core as htcore
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -444,7 +445,7 @@ def __call__(
                     latents = latents.to(prompt_embeds.dtype)
 
                     if not self.use_hpu_graphs:
-                        self.htcore.mark_step()
+                        htcore.mark_step()
 
                     # call the callback, if provided
                     if callback_on_step_end is not None:
@@ -461,7 +462,7 @@ def __call__(
                     if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
                         progress_bar.update()
                 if not self.use_hpu_graphs:
-                    self.htcore.mark_step()
+                    htcore.mark_step()
         time_box.show_time('transformer_hpu')
 
         #HabanaProfile.stop()

From c15aa511ea32e1758769c5a654c33687dd2ac361 Mon Sep 17 00:00:00 2001
From: "tony.lin@intel.com" <ganmei.you@intel.com>
Date: Thu, 23 Jan 2025 15:25:01 +0800
Subject: [PATCH 14/32] fix can't find htcore defect.

---
 .../diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index 8e4c3c7e40..400a16f66f 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -444,7 +444,7 @@ def __call__(
                         )
                     latents = latents.to(prompt_embeds.dtype)
 
-                    if not self.use_hpu_graphs:
+                    if self.use_hpu_graphs:
                         htcore.mark_step()
 
                     # call the callback, if provided
@@ -461,7 +461,7 @@ def __call__(
 
                     if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
                         progress_bar.update()
-                if not self.use_hpu_graphs:
+                if self.use_hpu_graphs:
                     htcore.mark_step()
         time_box.show_time('transformer_hpu')
 

From 687caf9ff135b6d74a548e7f8014a142e452714e Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Thu, 23 Jan 2025 09:51:07 +0000
Subject: [PATCH 15/32] support for G3 on graph optimization

---
 .../pipelines/cogvideox/cogvideoX_gaudi.py    | 174 +++++++++++++++++-
 1 file changed, 172 insertions(+), 2 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
index 5d73bfbe9b..210ac631b4 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -1,4 +1,4 @@
-from typing import Optional, Tuple, Union
+from typing import Any, Dict, Optional, Tuple, Union
 
 import torch
 import torch.nn as nn
@@ -291,10 +291,180 @@ def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[Decod
 
 autoencoder_kl_cogvideox.AutoencoderKLCogVideoX=AutoencoderKLCogVideoXGaudi
 
-import diffusers
+from diffusers.utils import USE_PEFT_BACKEND
+from diffusers.models.transformers.cogvideox_transformer_3d import CogVideoXTransformer3DModel
+import habana_frameworks.torch.core as htcore
+
+class CogVideoXTransformer3DModelGaudi(CogVideoXTransformer3DModel):
+    def __init__(
+        self,
+        num_attention_heads: int = 30,
+        attention_head_dim: int = 64,
+        in_channels: int = 16,
+        out_channels: Optional[int] = 16,
+        flip_sin_to_cos: bool = True,
+        freq_shift: int = 0,
+        time_embed_dim: int = 512,
+        text_embed_dim: int = 4096,
+        num_layers: int = 30,
+        dropout: float = 0.0,
+        attention_bias: bool = True,
+        sample_width: int = 90,
+        sample_height: int = 60,
+        sample_frames: int = 49,
+        patch_size: int = 2,
+        temporal_compression_ratio: int = 4,
+        max_text_seq_length: int = 226,
+        activation_fn: str = "gelu-approximate",
+        timestep_activation_fn: str = "silu",
+        norm_elementwise_affine: bool = True,
+        norm_eps: float = 1e-5,
+        spatial_interpolation_scale: float = 1.875,
+        temporal_interpolation_scale: float = 1.0,
+        use_rotary_positional_embeddings: bool = False,
+        use_learned_positional_embeddings: bool = False,
+    ):
+        super().__init__(
+            num_attention_heads,
+            attention_head_dim,
+            in_channels,
+            out_channels,
+            flip_sin_to_cos,
+            freq_shift,
+            time_embed_dim,
+            text_embed_dim,
+            num_layers,
+            dropout,
+            attention_bias,
+            sample_width,
+            sample_height,
+            sample_frames,
+            patch_size,
+            temporal_compression_ratio,
+            max_text_seq_length,
+            activation_fn,
+            timestep_activation_fn,
+            norm_elementwise_affine,
+            norm_eps,
+            spatial_interpolation_scale,
+            temporal_interpolation_scale,
+            use_rotary_positional_embeddings,
+            use_learned_positional_embeddings,
+        )
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        encoder_hidden_states: torch.Tensor,
+        timestep: Union[int, float, torch.LongTensor],
+        timestep_cond: Optional[torch.Tensor] = None,
+        image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+        attention_kwargs: Optional[Dict[str, Any]] = None,
+        return_dict: bool = True,
+    ):
+        if attention_kwargs is not None:
+            attention_kwargs = attention_kwargs.copy()
+            lora_scale = attention_kwargs.pop("scale", 1.0)
+        else:
+            lora_scale = 1.0
+
+        if USE_PEFT_BACKEND:
+            # weight the lora layers by setting `lora_scale` for each PEFT layer
+            scale_lora_layers(self, lora_scale)
+        else:
+            if attention_kwargs is not None and attention_kwargs.get("scale", None) is not None:
+                logger.warning(
+                    "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
+                )
+
+        batch_size, num_frames, channels, height, width = hidden_states.shape
+
+        # 1. Time embedding
+        timesteps = timestep
+        t_emb = self.time_proj(timesteps)
+
+        # timesteps does not contain any weights and will always return f32 tensors
+        # but time_embedding might actually be running in fp16. so we need to cast here.
+        # there might be better ways to encapsulate this.
+        t_emb = t_emb.to(dtype=hidden_states.dtype)
+        emb = self.time_embedding(t_emb, timestep_cond)
+
+        # 2. Patch embedding
+        hidden_states = self.patch_embed(encoder_hidden_states, hidden_states)
+        hidden_states = self.embedding_dropout(hidden_states)
+
+        text_seq_length = encoder_hidden_states.shape[1]
+        encoder_hidden_states = hidden_states[:, :text_seq_length]
+        hidden_states = hidden_states[:, text_seq_length:]
+
+        print(f'baymax run gaudi CogVideoXTransformer3DModel forward!')
+
+        # 3. Transformer blocks
+        for i, block in enumerate(self.transformer_blocks):
+            if self.training and self.gradient_checkpointing:
+
+                def create_custom_forward(module):
+                    def custom_forward(*inputs):
+                        return module(*inputs)
+
+                    return custom_forward
+
+                ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+                hidden_states, encoder_hidden_states = torch.utils.checkpoint.checkpoint(
+                    create_custom_forward(block),
+                    hidden_states,
+                    encoder_hidden_states,
+                    emb,
+                    image_rotary_emb,
+                    **ckpt_kwargs,
+                )
+            else:
+                hidden_states, encoder_hidden_states = block(
+                    hidden_states=hidden_states,
+                    encoder_hidden_states=encoder_hidden_states,
+                    temb=emb,
+                    image_rotary_emb=image_rotary_emb,
+                )
+            htcore.mark_step()
+
+        if not self.config.use_rotary_positional_embeddings:
+            # CogVideoX-2B
+            hidden_states = self.norm_final(hidden_states)
+        else:
+            # CogVideoX-5B
+            hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
+            hidden_states = self.norm_final(hidden_states)
+            hidden_states = hidden_states[:, text_seq_length:]
+
+        # 4. Final block
+        hidden_states = self.norm_out(hidden_states, temb=emb)
+        hidden_states = self.proj_out(hidden_states)
+
+        # 5. Unpatchify
+        # Note: we use `-1` instead of `channels`:
+        #   - It is okay to `channels` use for CogVideoX-2b and CogVideoX-5b (number of input channels is equal to output channels)
+        #   - However, for CogVideoX-5b-I2V also takes concatenated input image latents (number of input channels is twice the output channels)
+        p = self.config.patch_size
+        output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
+        output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)
+
+        if USE_PEFT_BACKEND:
+            # remove `lora_scale` from each PEFT layer
+            unscale_lora_layers(self, lora_scale)
+
+        if not return_dict:
+            return (output,)
+        return Transformer2DModelOutput(sample=output)
+
+from diffusers.models.transformers import cogvideox_transformer_3d
+cogvideox_transformer_3d.CogVideoXTransformer3DModel = CogVideoXTransformer3DModelGaudi
+
+
 def adapt_cogvideo_to_gaudi():
+    import diffusers
     diffusers.models.autoencoders.autoencoder_kl_cogvideox.CogVideoXCausalConv3d  = CogVideoXCausalConv3dGaudi
     diffusers.models.autoencoders.autoencoder_kl_cogvideox.AutoencoderKLCogVideoX = AutoencoderKLCogVideoXGaudi
     diffusers.models.attention_processor.CogVideoXAttnProcessor2_0 = CogVideoXAttnProcessorGaudi
+    diffusers.models.transformers.cogvideox_transformer_3d.CogVideoXTransformer3DModel = CogVideoXTransformer3DModelGaudi
 
 

From 339e31f9cf649c302f61db63e21ec69f16b749df Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Thu, 23 Jan 2025 09:52:44 +0000
Subject: [PATCH 16/32] clear debug code,

---
 optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
index 210ac631b4..4c0e64423a 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -397,8 +397,6 @@ def forward(
         encoder_hidden_states = hidden_states[:, :text_seq_length]
         hidden_states = hidden_states[:, text_seq_length:]
 
-        print(f'baymax run gaudi CogVideoXTransformer3DModel forward!')
-
         # 3. Transformer blocks
         for i, block in enumerate(self.transformer_blocks):
             if self.training and self.gradient_checkpointing:

From 4ab7ebeba95bac476da943a28a0a9517225be1d7 Mon Sep 17 00:00:00 2001
From: ranzhejiang <zhejiang.ran@intel.com>
Date: Sun, 26 Jan 2025 14:38:52 +0800
Subject: [PATCH 17/32] set transformer gaudi fowrad in pipelines.

---
 .../pipelines/cogvideox/cogvideoX_gaudi.py    | 171 +-----------------
 .../cogvideox/pipeline_cogvideox_gaudi.py     | 108 ++++++++++-
 2 files changed, 110 insertions(+), 169 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
index 4c0e64423a..64395ea473 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -117,6 +117,7 @@ def __call__(
 from diffusers.models.autoencoders.vae import DecoderOutput
 
 
+import habana_frameworks.torch.core as htcore
 class CogVideoXCausalConv3dGaudi(nn.Module):
     r"""A 3D causal convolution layer that pads the input tensor to ensure causality in CogVideoX Model.
 
@@ -191,6 +192,7 @@ def forward(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = Non
                 conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
             else:
                 conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
+        htcore.mark_step()
         return output, conv_cache
 
 from diffusers.models.autoencoders import autoencoder_kl_cogvideox
@@ -291,178 +293,11 @@ def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[Decod
 
 autoencoder_kl_cogvideox.AutoencoderKLCogVideoX=AutoencoderKLCogVideoXGaudi
 
-from diffusers.utils import USE_PEFT_BACKEND
-from diffusers.models.transformers.cogvideox_transformer_3d import CogVideoXTransformer3DModel
-import habana_frameworks.torch.core as htcore
-
-class CogVideoXTransformer3DModelGaudi(CogVideoXTransformer3DModel):
-    def __init__(
-        self,
-        num_attention_heads: int = 30,
-        attention_head_dim: int = 64,
-        in_channels: int = 16,
-        out_channels: Optional[int] = 16,
-        flip_sin_to_cos: bool = True,
-        freq_shift: int = 0,
-        time_embed_dim: int = 512,
-        text_embed_dim: int = 4096,
-        num_layers: int = 30,
-        dropout: float = 0.0,
-        attention_bias: bool = True,
-        sample_width: int = 90,
-        sample_height: int = 60,
-        sample_frames: int = 49,
-        patch_size: int = 2,
-        temporal_compression_ratio: int = 4,
-        max_text_seq_length: int = 226,
-        activation_fn: str = "gelu-approximate",
-        timestep_activation_fn: str = "silu",
-        norm_elementwise_affine: bool = True,
-        norm_eps: float = 1e-5,
-        spatial_interpolation_scale: float = 1.875,
-        temporal_interpolation_scale: float = 1.0,
-        use_rotary_positional_embeddings: bool = False,
-        use_learned_positional_embeddings: bool = False,
-    ):
-        super().__init__(
-            num_attention_heads,
-            attention_head_dim,
-            in_channels,
-            out_channels,
-            flip_sin_to_cos,
-            freq_shift,
-            time_embed_dim,
-            text_embed_dim,
-            num_layers,
-            dropout,
-            attention_bias,
-            sample_width,
-            sample_height,
-            sample_frames,
-            patch_size,
-            temporal_compression_ratio,
-            max_text_seq_length,
-            activation_fn,
-            timestep_activation_fn,
-            norm_elementwise_affine,
-            norm_eps,
-            spatial_interpolation_scale,
-            temporal_interpolation_scale,
-            use_rotary_positional_embeddings,
-            use_learned_positional_embeddings,
-        )
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        encoder_hidden_states: torch.Tensor,
-        timestep: Union[int, float, torch.LongTensor],
-        timestep_cond: Optional[torch.Tensor] = None,
-        image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
-        attention_kwargs: Optional[Dict[str, Any]] = None,
-        return_dict: bool = True,
-    ):
-        if attention_kwargs is not None:
-            attention_kwargs = attention_kwargs.copy()
-            lora_scale = attention_kwargs.pop("scale", 1.0)
-        else:
-            lora_scale = 1.0
-
-        if USE_PEFT_BACKEND:
-            # weight the lora layers by setting `lora_scale` for each PEFT layer
-            scale_lora_layers(self, lora_scale)
-        else:
-            if attention_kwargs is not None and attention_kwargs.get("scale", None) is not None:
-                logger.warning(
-                    "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
-                )
-
-        batch_size, num_frames, channels, height, width = hidden_states.shape
-
-        # 1. Time embedding
-        timesteps = timestep
-        t_emb = self.time_proj(timesteps)
-
-        # timesteps does not contain any weights and will always return f32 tensors
-        # but time_embedding might actually be running in fp16. so we need to cast here.
-        # there might be better ways to encapsulate this.
-        t_emb = t_emb.to(dtype=hidden_states.dtype)
-        emb = self.time_embedding(t_emb, timestep_cond)
-
-        # 2. Patch embedding
-        hidden_states = self.patch_embed(encoder_hidden_states, hidden_states)
-        hidden_states = self.embedding_dropout(hidden_states)
-
-        text_seq_length = encoder_hidden_states.shape[1]
-        encoder_hidden_states = hidden_states[:, :text_seq_length]
-        hidden_states = hidden_states[:, text_seq_length:]
-
-        # 3. Transformer blocks
-        for i, block in enumerate(self.transformer_blocks):
-            if self.training and self.gradient_checkpointing:
-
-                def create_custom_forward(module):
-                    def custom_forward(*inputs):
-                        return module(*inputs)
-
-                    return custom_forward
-
-                ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
-                hidden_states, encoder_hidden_states = torch.utils.checkpoint.checkpoint(
-                    create_custom_forward(block),
-                    hidden_states,
-                    encoder_hidden_states,
-                    emb,
-                    image_rotary_emb,
-                    **ckpt_kwargs,
-                )
-            else:
-                hidden_states, encoder_hidden_states = block(
-                    hidden_states=hidden_states,
-                    encoder_hidden_states=encoder_hidden_states,
-                    temb=emb,
-                    image_rotary_emb=image_rotary_emb,
-                )
-            htcore.mark_step()
-
-        if not self.config.use_rotary_positional_embeddings:
-            # CogVideoX-2B
-            hidden_states = self.norm_final(hidden_states)
-        else:
-            # CogVideoX-5B
-            hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
-            hidden_states = self.norm_final(hidden_states)
-            hidden_states = hidden_states[:, text_seq_length:]
-
-        # 4. Final block
-        hidden_states = self.norm_out(hidden_states, temb=emb)
-        hidden_states = self.proj_out(hidden_states)
-
-        # 5. Unpatchify
-        # Note: we use `-1` instead of `channels`:
-        #   - It is okay to `channels` use for CogVideoX-2b and CogVideoX-5b (number of input channels is equal to output channels)
-        #   - However, for CogVideoX-5b-I2V also takes concatenated input image latents (number of input channels is twice the output channels)
-        p = self.config.patch_size
-        output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
-        output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)
-
-        if USE_PEFT_BACKEND:
-            # remove `lora_scale` from each PEFT layer
-            unscale_lora_layers(self, lora_scale)
-
-        if not return_dict:
-            return (output,)
-        return Transformer2DModelOutput(sample=output)
-
-from diffusers.models.transformers import cogvideox_transformer_3d
-cogvideox_transformer_3d.CogVideoXTransformer3DModel = CogVideoXTransformer3DModelGaudi
-
-
 def adapt_cogvideo_to_gaudi():
     import diffusers
     diffusers.models.autoencoders.autoencoder_kl_cogvideox.CogVideoXCausalConv3d  = CogVideoXCausalConv3dGaudi
     diffusers.models.autoencoders.autoencoder_kl_cogvideox.AutoencoderKLCogVideoX = AutoencoderKLCogVideoXGaudi
     diffusers.models.attention_processor.CogVideoXAttnProcessor2_0 = CogVideoXAttnProcessorGaudi
-    diffusers.models.transformers.cogvideox_transformer_3d.CogVideoXTransformer3DModel = CogVideoXTransformer3DModelGaudi
+    #diffusers.models.transformers.cogvideox_transformer_3d.CogVideoXTransformer3DModel = CogVideoXTransformer3DModelGaudi
 
 
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index 400a16f66f..cb6fa07a30 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -15,7 +15,7 @@
 import inspect
 import time as tm_perf
 from dataclasses import dataclass
-from typing import Callable, Dict, List, Optional, Union
+from typing import Any, Dict, Optional, Tuple, Union, Callable, List,
 
 import torch
 from diffusers import CogVideoXPipeline
@@ -121,6 +121,109 @@ def retrieve_timesteps(
         timesteps = scheduler.timesteps
     return timesteps, num_inference_steps
 
+from diffusers.utils import USE_PEFT_BACKEND
+def gaudi_forward(
+    self,
+    hidden_states: torch.Tensor,
+    encoder_hidden_states: torch.Tensor,
+    timestep: Union[int, float, torch.LongTensor],
+    timestep_cond: Optional[torch.Tensor] = None,
+    image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+    attention_kwargs: Optional[Dict[str, Any]] = None,
+    return_dict: bool = True,
+):
+    if attention_kwargs is not None:
+        attention_kwargs = attention_kwargs.copy()
+        lora_scale = attention_kwargs.pop("scale", 1.0)
+    else:
+        lora_scale = 1.0
+
+    if USE_PEFT_BACKEND:
+        # weight the lora layers by setting `lora_scale` for each PEFT layer
+        scale_lora_layers(self, lora_scale)
+    else:
+        if attention_kwargs is not None and attention_kwargs.get("scale", None) is not None:
+            logger.warning(
+                "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
+            )
+
+    batch_size, num_frames, channels, height, width = hidden_states.shape
+
+    # 1. Time embedding
+    timesteps = timestep
+    t_emb = self.time_proj(timesteps)
+
+    # timesteps does not contain any weights and will always return f32 tensors
+    # but time_embedding might actually be running in fp16. so we need to cast here.
+    # there might be better ways to encapsulate this.
+    t_emb = t_emb.to(dtype=hidden_states.dtype)
+    emb = self.time_embedding(t_emb, timestep_cond)
+
+    # 2. Patch embedding
+    hidden_states = self.patch_embed(encoder_hidden_states, hidden_states)
+    hidden_states = self.embedding_dropout(hidden_states)
+
+    text_seq_length = encoder_hidden_states.shape[1]
+    encoder_hidden_states = hidden_states[:, :text_seq_length]
+    hidden_states = hidden_states[:, text_seq_length:]
+
+    print(f'baymax debug run gaudi transformer forward!')
+    # 3. Transformer blocks
+    for i, block in enumerate(self.transformer_blocks):
+        if self.training and self.gradient_checkpointing:
+
+            def create_custom_forward(module):
+                def custom_forward(*inputs):
+                    return module(*inputs)
+
+                return custom_forward
+
+            ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+            hidden_states, encoder_hidden_states = torch.utils.checkpoint.checkpoint(
+                create_custom_forward(block),
+                hidden_states,
+                encoder_hidden_states,
+                emb,
+                image_rotary_emb,
+                **ckpt_kwargs,
+            )
+        else:
+            hidden_states, encoder_hidden_states = block(
+                hidden_states=hidden_states,
+                encoder_hidden_states=encoder_hidden_states,
+                temb=emb,
+                image_rotary_emb=image_rotary_emb,
+            )
+        htcore.mark_step()
+
+    if not self.config.use_rotary_positional_embeddings:
+        # CogVideoX-2B
+        hidden_states = self.norm_final(hidden_states)
+    else:
+        # CogVideoX-5B
+        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
+        hidden_states = self.norm_final(hidden_states)
+        hidden_states = hidden_states[:, text_seq_length:]
+
+    # 4. Final block
+    hidden_states = self.norm_out(hidden_states, temb=emb)
+    hidden_states = self.proj_out(hidden_states)
+
+    # 5. Unpatchify
+    # Note: we use `-1` instead of `channels`:
+    #   - It is okay to `channels` use for CogVideoX-2b and CogVideoX-5b (number of input channels is equal to output channels)
+    #   - However, for CogVideoX-5b-I2V also takes concatenated input image latents (number of input channels is twice the output channels)
+    p = self.config.patch_size
+    output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
+    output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)
+
+    if USE_PEFT_BACKEND:
+        # remove `lora_scale` from each PEFT layer
+        unscale_lora_layers(self, lora_scale)
+
+    if not return_dict:
+        return (output,)
+    return Transformer2DModelOutput(sample=output)
 
 class GaudiCogVideoXPipeline(GaudiDiffusionPipeline, CogVideoXPipeline):
     r"""
@@ -156,6 +259,7 @@ def __init__(
             scheduler,
         )
         self.to(self._device)
+        self.transformer.forward = gaudi_forward
 
         from habana_frameworks.torch.hpu import wrap_in_hpu_graph
         self.vae.decoder = wrap_in_hpu_graph(self.vae.decoder)
@@ -488,6 +592,7 @@ def transformer_hpu(self, latent_model_input, prompt_embeds, timestep, image_rot
             return self.capture_replay(latent_model_input, prompt_embeds, timestep, image_rotary_emb)
         else:
             return self.transformer(
+                self.transformer,
                 hidden_states=latent_model_input,
                 encoder_hidden_states=prompt_embeds,
                 timestep=timestep,
@@ -507,6 +612,7 @@ def capture_replay(self, latent_model_input, prompt_embeds, timestep, image_rota
                 graph = self.ht.hpu.HPUGraph()
                 graph.capture_begin()
                 outputs = self.transformer(
+                    self.transformer,
                     hidden_states = inputs[0],
                     encoder_hidden_states = inputs[1],
                     timestep=inputs[2],

From c3e253ee91f3241b646803936f967b8a753793ea Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Sun, 26 Jan 2025 10:16:31 +0000
Subject: [PATCH 18/32] set autoencoder tiled decode gaudi wit setattr.

---
 .../pipelines/cogvideox/cogvideoX_gaudi.py    | 95 +-----------------
 .../cogvideox/pipeline_cogvideox_gaudi.py     | 96 ++++++++++++++++++-
 2 files changed, 95 insertions(+), 96 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
index 64395ea473..faab010bcd 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -117,7 +117,6 @@ def __call__(
 from diffusers.models.autoencoders.vae import DecoderOutput
 
 
-import habana_frameworks.torch.core as htcore
 class CogVideoXCausalConv3dGaudi(nn.Module):
     r"""A 3D causal convolution layer that pads the input tensor to ensure causality in CogVideoX Model.
 
@@ -192,7 +191,6 @@ def forward(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = Non
                 conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
             else:
                 conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
-        htcore.mark_step()
         return output, conv_cache
 
 from diffusers.models.autoencoders import autoencoder_kl_cogvideox
@@ -202,101 +200,10 @@ def forward(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = Non
 
 from diffusers.models.autoencoders.autoencoder_kl_cogvideox import AutoencoderKLCogVideoX
 
-
-class AutoencoderKLCogVideoXGaudi(AutoencoderKLCogVideoX):
-    def tiled_decode(self, z: torch.Tensor, return_dict: bool = True) -> Union[DecoderOutput, torch.Tensor]:
-        r"""
-        Decode a batch of images using a tiled decoder.
-
-        Args:
-            z (`torch.Tensor`): Input batch of latent vectors.
-            return_dict (`bool`, *optional*, defaults to `True`):
-                Whether or not to return a [`~models.vae.DecoderOutput`] instead of a plain tuple.
-
-        Returns:
-            [`~models.vae.DecoderOutput`] or `tuple`:
-                If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is
-                returned.
-        """
-        # Rough memory assessment:
-        #   - In CogVideoX-2B, there are a total of 24 CausalConv3d layers.
-        #   - The biggest intermediate dimensions are: [1, 128, 9, 480, 720].
-        #   - Assume fp16 (2 bytes per value).
-        # Memory required: 1 * 128 * 9 * 480 * 720 * 24 * 2 / 1024**3 = 17.8 GB
-        #
-        # Memory assessment when using tiling:
-        #   - Assume everything as above but now HxW is 240x360 by tiling in half
-        # Memory required: 1 * 128 * 9 * 240 * 360 * 24 * 2 / 1024**3 = 4.5 GB
-
-        print('run gaudi tiled decode!')
-        batch_size, num_channels, num_frames, height, width = z.shape
-
-        overlap_height = int(self.tile_latent_min_height * (1 - self.tile_overlap_factor_height))
-        overlap_width = int(self.tile_latent_min_width * (1 - self.tile_overlap_factor_width))
-        blend_extent_height = int(self.tile_sample_min_height * self.tile_overlap_factor_height)
-        blend_extent_width = int(self.tile_sample_min_width * self.tile_overlap_factor_width)
-        row_limit_height = self.tile_sample_min_height - blend_extent_height
-        row_limit_width = self.tile_sample_min_width - blend_extent_width
-        frame_batch_size = self.num_latent_frames_batch_size
-
-        # Split z into overlapping tiles and decode them separately.
-        # The tiles have an overlap to avoid seams between tiles.
-        rows = []
-        for i in range(0, height, overlap_height):
-            row = []
-            for j in range(0, width, overlap_width):
-                num_batches = max(num_frames // frame_batch_size, 1)
-                conv_cache = None
-                time = []
-
-                for k in range(num_batches):
-                    remaining_frames = num_frames % frame_batch_size
-                    start_frame = frame_batch_size * k + (0 if k == 0 else remaining_frames)
-                    end_frame = frame_batch_size * (k + 1) + remaining_frames
-                    tile = z[
-                        :,
-                        :,
-                        start_frame:end_frame,
-                        i : i + self.tile_latent_min_height,
-                        j : j + self.tile_latent_min_width,
-                    ].clone()
-                    if self.post_quant_conv is not None:
-                        tile = self.post_quant_conv(tile)
-                    tile, conv_cache = self.decoder(tile, conv_cache=conv_cache)
-                    time.append(tile.clone())
-
-                row.append(torch.cat(time, dim=2))
-            rows.append(row)
-
-        result_rows = []
-        for i, row in enumerate(rows):
-            result_row = []
-            for j, tile in enumerate(row):
-                # blend the above tile and the left tile
-                # to the current tile and add the current tile to the result row
-                if i > 0:
-                    tile = self.blend_v(rows[i - 1][j], tile, blend_extent_height)
-                if j > 0:
-                    tile = self.blend_h(row[j - 1], tile, blend_extent_width)
-                result_row.append(tile[:, :, :, :row_limit_height, :row_limit_width])
-            result_rows.append(torch.cat(result_row, dim=4))
-
-        dec = torch.cat(result_rows, dim=3)
-
-        if not return_dict:
-            return (dec,)
-
-        return DecoderOutput(sample=dec)
-
-from diffusers.models.autoencoders import autoencoder_kl_cogvideox
-
-
-autoencoder_kl_cogvideox.AutoencoderKLCogVideoX=AutoencoderKLCogVideoXGaudi
-
 def adapt_cogvideo_to_gaudi():
     import diffusers
     diffusers.models.autoencoders.autoencoder_kl_cogvideox.CogVideoXCausalConv3d  = CogVideoXCausalConv3dGaudi
-    diffusers.models.autoencoders.autoencoder_kl_cogvideox.AutoencoderKLCogVideoX = AutoencoderKLCogVideoXGaudi
+    #diffusers.models.autoencoders.autoencoder_kl_cogvideox.AutoencoderKLCogVideoX = AutoencoderKLCogVideoXGaudi
     diffusers.models.attention_processor.CogVideoXAttnProcessor2_0 = CogVideoXAttnProcessorGaudi
     #diffusers.models.transformers.cogvideox_transformer_3d.CogVideoXTransformer3DModel = CogVideoXTransformer3DModelGaudi
 
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index cb6fa07a30..fc8b897fea 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -15,7 +15,8 @@
 import inspect
 import time as tm_perf
 from dataclasses import dataclass
-from typing import Any, Dict, Optional, Tuple, Union, Callable, List,
+from typing import Callable, Dict, List, Optional, Union
+from typing import Any, Dict, Optional, Tuple, Union
 
 import torch
 from diffusers import CogVideoXPipeline
@@ -24,6 +25,7 @@
 from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
 from diffusers.utils import BaseOutput, logging
 from diffusers.utils.torch_utils import randn_tensor
+from diffusers.models.autoencoders.vae import DecoderOutput
 from transformers import T5EncoderModel, T5Tokenizer
 
 from optimum.habana.diffusers.pipelines.pipeline_utils import GaudiDiffusionPipeline
@@ -225,6 +227,93 @@ def custom_forward(*inputs):
         return (output,)
     return Transformer2DModelOutput(sample=output)
 
+def tiled_decode_gaudi(self, z: torch.Tensor, return_dict: bool = True) -> Union[DecoderOutput, torch.Tensor]:
+    r"""
+    Decode a batch of images using a tiled decoder.
+
+    Args:
+        z (`torch.Tensor`): Input batch of latent vectors.
+        return_dict (`bool`, *optional*, defaults to `True`):
+            Whether or not to return a [`~models.vae.DecoderOutput`] instead of a plain tuple.
+
+    Returns:
+        [`~models.vae.DecoderOutput`] or `tuple`:
+            If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is
+            returned.
+    """
+    # Rough memory assessment:
+    #   - In CogVideoX-2B, there are a total of 24 CausalConv3d layers.
+    #   - The biggest intermediate dimensions are: [1, 128, 9, 480, 720].
+    #   - Assume fp16 (2 bytes per value).
+    # Memory required: 1 * 128 * 9 * 480 * 720 * 24 * 2 / 1024**3 = 17.8 GB
+    #
+    # Memory assessment when using tiling:
+    #   - Assume everything as above but now HxW is 240x360 by tiling in half
+    # Memory required: 1 * 128 * 9 * 240 * 360 * 24 * 2 / 1024**3 = 4.5 GB
+
+    print('run gaudi pipelined tiled decode!')
+    batch_size, num_channels, num_frames, height, width = z.shape
+
+    overlap_height = int(self.tile_latent_min_height * (1 - self.tile_overlap_factor_height))
+    overlap_width = int(self.tile_latent_min_width * (1 - self.tile_overlap_factor_width))
+    blend_extent_height = int(self.tile_sample_min_height * self.tile_overlap_factor_height)
+    blend_extent_width = int(self.tile_sample_min_width * self.tile_overlap_factor_width)
+    row_limit_height = self.tile_sample_min_height - blend_extent_height
+    row_limit_width = self.tile_sample_min_width - blend_extent_width
+    frame_batch_size = self.num_latent_frames_batch_size
+
+    # Split z into overlapping tiles and decode them separately.
+    # The tiles have an overlap to avoid seams between tiles.
+    rows = []
+    for i in range(0, height, overlap_height):
+        row = []
+        for j in range(0, width, overlap_width):
+            num_batches = max(num_frames // frame_batch_size, 1)
+            conv_cache = None
+            time = []
+
+            for k in range(num_batches):
+                remaining_frames = num_frames % frame_batch_size
+                start_frame = frame_batch_size * k + (0 if k == 0 else remaining_frames)
+                end_frame = frame_batch_size * (k + 1) + remaining_frames
+                tile = z[
+                    :,
+                    :,
+                    start_frame:end_frame,
+                    i : i + self.tile_latent_min_height,
+                    j : j + self.tile_latent_min_width,
+                ].clone()
+                if self.post_quant_conv is not None:
+                    tile = self.post_quant_conv(tile)
+                tile, conv_cache = self.decoder(tile, conv_cache=conv_cache)
+                time.append(tile.clone())
+                htcore.mark_step()
+
+            row.append(torch.cat(time, dim=2))
+        rows.append(row)
+
+    result_rows = []
+    for i, row in enumerate(rows):
+        result_row = []
+        for j, tile in enumerate(row):
+            # blend the above tile and the left tile
+            # to the current tile and add the current tile to the result row
+            if i > 0:
+                tile = self.blend_v(rows[i - 1][j], tile, blend_extent_height)
+            if j > 0:
+                tile = self.blend_h(row[j - 1], tile, blend_extent_width)
+            result_row.append(tile[:, :, :, :row_limit_height, :row_limit_width])
+        result_rows.append(torch.cat(result_row, dim=4))
+
+    dec = torch.cat(result_rows, dim=3)
+
+    if not return_dict:
+        return (dec,)
+
+    return DecoderOutput(sample=dec)
+
+setattr(AutoencoderKLCogVideoX, 'tiled_decode', tiled_decode_gaudi)
+
 class GaudiCogVideoXPipeline(GaudiDiffusionPipeline, CogVideoXPipeline):
     r"""
     Adapted from: https://github.com/huggingface/diffusers/blob/v0.26.3/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L84
@@ -260,9 +349,12 @@ def __init__(
         )
         self.to(self._device)
         self.transformer.forward = gaudi_forward
+        print(f'vae decode name:{self.vae.tiled_decode.__name__}')
 
         from habana_frameworks.torch.hpu import wrap_in_hpu_graph
-        self.vae.decoder = wrap_in_hpu_graph(self.vae.decoder)
+        #self.vae.decoder = wrap_in_hpu_graph(self.vae.decoder)
+        self.vae.tiled_decode = tiled_decode_gaudi
+        print(f' vae decode name:{self.vae.tiled_decode.__name__} tiled_decode_gaudi:{tiled_decode_gaudi.__name__}')
 
     @property
     def guidance_scale(self):

From bb036d6299c2f0e8cccde7e9a34ccab33c07ce4b Mon Sep 17 00:00:00 2001
From: root <root@G6.sh.intel.com>
Date: Fri, 7 Feb 2025 16:46:45 +0800
Subject: [PATCH 19/32] move cogvideox  conv3d to gaudi pipeline.

---
 .../pipelines/cogvideox/cogvideoX_gaudi.py    |  92 ------------
 .../cogvideox/pipeline_cogvideox_gaudi.py     | 131 +++++++++++++++++-
 2 files changed, 128 insertions(+), 95 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
index faab010bcd..08c8cdd874 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -107,104 +107,12 @@ def __call__(
         )
         return hidden_states, encoder_hidden_states
 
-import torch.nn.functional as F
 from diffusers.models import attention_processor
 
-
 attention_processor.CogVideoXAttnProcessor2_0 = CogVideoXAttnProcessorGaudi
 
-from diffusers.models.autoencoders.autoencoder_kl_cogvideox import CogVideoXSafeConv3d
-from diffusers.models.autoencoders.vae import DecoderOutput
-
-
-class CogVideoXCausalConv3dGaudi(nn.Module):
-    r"""A 3D causal convolution layer that pads the input tensor to ensure causality in CogVideoX Model.
-
-    Args:
-        in_channels (`int`): Number of channels in the input tensor.
-        out_channels (`int`): Number of output channels produced by the convolution.
-        kernel_size (`int` or `Tuple[int, int, int]`): Kernel size of the convolutional kernel.
-        stride (`int`, defaults to `1`): Stride of the convolution.
-        dilation (`int`, defaults to `1`): Dilation rate of the convolution.
-        pad_mode (`str`, defaults to `"constant"`): Padding mode.
-    """
-
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_size: Union[int, Tuple[int, int, int]],
-        stride: int = 1,
-        dilation: int = 1,
-        pad_mode: str = "constant",
-    ):
-        super().__init__()
-
-        if isinstance(kernel_size, int):
-            kernel_size = (kernel_size,) * 3
-
-        time_kernel_size, height_kernel_size, width_kernel_size = kernel_size
-
-        self.pad_mode = pad_mode
-        time_pad = dilation * (time_kernel_size - 1) + (1 - stride)
-        height_pad = height_kernel_size // 2
-        width_pad = width_kernel_size // 2
-
-        self.height_pad = height_pad
-        self.width_pad = width_pad
-        self.time_pad = time_pad
-        self.time_causal_padding = (width_pad, width_pad, height_pad, height_pad, time_pad, 0)
-
-        self.temporal_dim = 2
-        self.time_kernel_size = time_kernel_size
-
-        stride = (stride, 1, 1)
-        dilation = (dilation, 1, 1)
-        self.conv = CogVideoXSafeConv3d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            dilation=dilation,
-        )
-
-
-    def fake_context_parallel_forward(
-        self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None
-    ) -> torch.Tensor:
-        kernel_size = self.time_kernel_size
-        if kernel_size > 1:
-            cached_inputs = [conv_cache] if conv_cache is not None else [inputs[:, :, :1]] * (kernel_size - 1)
-            inputs = torch.cat(cached_inputs + [inputs], dim=2)
-        return inputs
-
-    def forward(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None) -> torch.Tensor:
-        inputs = self.fake_context_parallel_forward(inputs, conv_cache)
-        #conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
-
-        padding_2d = (self.width_pad, self.width_pad, self.height_pad, self.height_pad)
-        inputs_pad = F.pad(inputs, padding_2d, mode="constant", value=0)
-
-        output = self.conv(inputs_pad)
-        if self.time_kernel_size>1:
-            if conv_cache is not None and conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1:].shape:
-                conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
-            else:
-                conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
-        return output, conv_cache
-
-from diffusers.models.autoencoders import autoencoder_kl_cogvideox
-
-
-autoencoder_kl_cogvideox.CogVideoXCausalConv3d = CogVideoXCausalConv3dGaudi
-
-from diffusers.models.autoencoders.autoencoder_kl_cogvideox import AutoencoderKLCogVideoX
-
 def adapt_cogvideo_to_gaudi():
     import diffusers
-    diffusers.models.autoencoders.autoencoder_kl_cogvideox.CogVideoXCausalConv3d  = CogVideoXCausalConv3dGaudi
-    #diffusers.models.autoencoders.autoencoder_kl_cogvideox.AutoencoderKLCogVideoX = AutoencoderKLCogVideoXGaudi
     diffusers.models.attention_processor.CogVideoXAttnProcessor2_0 = CogVideoXAttnProcessorGaudi
-    #diffusers.models.transformers.cogvideox_transformer_3d.CogVideoXTransformer3DModel = CogVideoXTransformer3DModelGaudi
 
 
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index fc8b897fea..ca679aa146 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -50,6 +50,112 @@ def show_time(self, desc):
         self.t0 = t1
         print(f'{desc} duration:{duration:.3f}s')
 
+#try:
+#    from habana_frameworks.torch.hpex.kernels import FusedSDPA
+#except ImportError:
+#    print("Not using HPU fused scaled dot-product attention kernel.")
+#    FusedSDPA = None
+#
+##  FusedScaledDotProductAttention
+#class ModuleFusedSDPA(torch.nn.Module):
+#    def __init__(self, fusedSDPA):
+#        super().__init__()
+#        self._hpu_kernel_fsdpa = fusedSDPA
+#
+#    def forward(self, query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode):
+#        return self._hpu_kernel_fsdpa.apply(query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode)
+#
+#
+#from diffusers.models.attention import Attention
+#
+#def apply_rotary_emb(
+#    x: torch.Tensor,
+#    freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]],
+#) -> Tuple[torch.Tensor, torch.Tensor]:
+#    """
+#    Adapted from: https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/models/embeddings.py#L697
+#    """
+#    cos_, sin_ = freqs_cis  # [S, D]
+#
+#    cos = cos_[None, None]
+#    sin = sin_[None, None]
+#    cos, sin = cos.to(x.device), sin.to(x.device)
+#
+#    x = torch.ops.hpu.rotary_pos_embedding(x, sin, cos, None, 0, 1)
+#
+#    return x
+#
+#class CogVideoXAttnProcessorGaudi:
+#    r"""
+#    Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
+#    query and key vectors, but does not include spatial normalization.
+#    """
+#
+#    def __init__(self):
+#        self.fused_scaled_dot_product_attention = ModuleFusedSDPA(FusedSDPA) if FusedSDPA else None
+#
+#    def __call__(
+#        self,
+#        attn: Attention,
+#        hidden_states: torch.Tensor,
+#        encoder_hidden_states: torch.Tensor,
+#        attention_mask: Optional[torch.Tensor] = None,
+#        image_rotary_emb: Optional[torch.Tensor] = None,
+#    ) -> torch.Tensor:
+#        print(f'run gaudi transformer attention_processor with fused SDPA!')
+#        text_seq_length = encoder_hidden_states.size(1)
+#
+#        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
+#
+#        batch_size, sequence_length, _ = (
+#            hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
+#        )
+#
+#        if attention_mask is not None:
+#            attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
+#            attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
+#
+#        query = attn.to_q(hidden_states)
+#        key = attn.to_k(hidden_states)
+#        value = attn.to_v(hidden_states)
+#
+#        inner_dim = key.shape[-1]
+#        head_dim = inner_dim // attn.heads
+#
+#        query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+#        key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+#        value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+#
+#        if attn.norm_q is not None:
+#            query = attn.norm_q(query)
+#        if attn.norm_k is not None:
+#            key = attn.norm_k(key)
+#
+#        # Apply RoPE if needed
+#        if image_rotary_emb is not None:
+#            query[:, :, text_seq_length:] = apply_rotary_emb(query[:, :, text_seq_length:], image_rotary_emb)
+#            if not attn.is_cross_attention:
+#                key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb)
+#
+#        hidden_states = self.fused_scaled_dot_product_attention(
+#            query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_casual=False, scale=None, softmax_mode='fast'
+#        )
+#
+#        hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
+#
+#        # linear proj
+#        hidden_states = attn.to_out[0](hidden_states)
+#        # dropout
+#        hidden_states = attn.to_out[1](hidden_states)
+#
+#        encoder_hidden_states, hidden_states = hidden_states.split(
+#            [text_seq_length, hidden_states.size(1) - text_seq_length], dim=1
+#        )
+#        return hidden_states, encoder_hidden_states
+#
+#from diffusers.models import attention_processor
+#setattr(attention_processor, 'CogVideoXAttnProcessor2_0', CogVideoXAttnProcessorGaudi)
+
 @dataclass
 class GaudiTextToVideoSDPipelineOutput(BaseOutput):
     r"""
@@ -314,6 +420,26 @@ def tiled_decode_gaudi(self, z: torch.Tensor, return_dict: bool = True) -> Union
 
 setattr(AutoencoderKLCogVideoX, 'tiled_decode', tiled_decode_gaudi)
 
+import torch.nn.functional as F
+def CogVideoXCausalConv3dforwardGaudi(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None) -> torch.Tensor:
+    #print('run gaudi CogVideoXCausalConv3d forward!')
+    inputs = self.fake_context_parallel_forward(inputs, conv_cache)
+    #conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
+
+    padding_2d = (self.width_pad, self.width_pad, self.height_pad, self.height_pad)
+    inputs_pad = F.pad(inputs, padding_2d, mode="constant", value=0)
+
+    output = self.conv(inputs_pad)
+    if self.time_kernel_size>1:
+        if conv_cache is not None and conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1:].shape:
+            conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
+        else:
+            conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
+    return output, conv_cache
+
+from diffusers.models.autoencoders.autoencoder_kl_cogvideox import CogVideoXCausalConv3d
+setattr(CogVideoXCausalConv3d, 'forward', CogVideoXCausalConv3dforwardGaudi)
+
 class GaudiCogVideoXPipeline(GaudiDiffusionPipeline, CogVideoXPipeline):
     r"""
     Adapted from: https://github.com/huggingface/diffusers/blob/v0.26.3/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L84
@@ -352,9 +478,8 @@ def __init__(
         print(f'vae decode name:{self.vae.tiled_decode.__name__}')
 
         from habana_frameworks.torch.hpu import wrap_in_hpu_graph
-        #self.vae.decoder = wrap_in_hpu_graph(self.vae.decoder)
-        self.vae.tiled_decode = tiled_decode_gaudi
-        print(f' vae decode name:{self.vae.tiled_decode.__name__} tiled_decode_gaudi:{tiled_decode_gaudi.__name__}')
+        self.vae.decoder = wrap_in_hpu_graph(self.vae.decoder)
+        #print(f' vae decode name:{self.vae.tiled_decode.__name__} tiled_decode_gaudi:{tiled_decode_gaudi.__name__}')
 
     @property
     def guidance_scale(self):

From 65fb0ed60b89f1211c6a4e3e9923f3ad81827a43 Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Sat, 8 Feb 2025 10:14:15 +0000
Subject: [PATCH 20/32] remove import gaudi function in __init__.py

---
 optimum/habana/diffusers/__init__.py          |   1 -
 .../pipelines/cogvideox/cogvideoX_gaudi.py    | 216 +++++++++++-
 .../cogvideox/pipeline_cogvideox_gaudi.py     | 326 +-----------------
 3 files changed, 217 insertions(+), 326 deletions(-)

diff --git a/optimum/habana/diffusers/__init__.py b/optimum/habana/diffusers/__init__.py
index 234233065f..86b6477c0b 100644
--- a/optimum/habana/diffusers/__init__.py
+++ b/optimum/habana/diffusers/__init__.py
@@ -1,4 +1,3 @@
-from .pipelines.cogvideox.cogvideoX_gaudi import adapt_cogvideo_to_gaudi
 from .pipelines.auto_pipeline import AutoPipelineForInpainting, AutoPipelineForText2Image
 from .pipelines.controlnet.pipeline_controlnet import GaudiStableDiffusionControlNetPipeline
 from .pipelines.controlnet.pipeline_stable_video_diffusion_controlnet import (
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
index 08c8cdd874..5cbfc6427c 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
@@ -2,6 +2,10 @@
 
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
+from diffusers.models.attention import Attention
+from diffusers.models.autoencoders.vae import DecoderOutput
+from diffusers.utils import USE_PEFT_BACKEND
 
 
 try:
@@ -20,7 +24,6 @@ def forward(self, query, key, value, attn_mask, dropout_p, is_casual, scale, sof
         return self._hpu_kernel_fsdpa.apply(query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode)
 
 
-from diffusers.models.attention import Attention
 
 def apply_rotary_emb(
     x: torch.Tensor,
@@ -39,7 +42,6 @@ def apply_rotary_emb(
 
     return x
 
-
 class CogVideoXAttnProcessorGaudi:
     r"""
     Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
@@ -57,6 +59,7 @@ def __call__(
         attention_mask: Optional[torch.Tensor] = None,
         image_rotary_emb: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
+        print(f'run gaudi transformer attention_processor with fused SDPA!')
         text_seq_length = encoder_hidden_states.size(1)
 
         hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
@@ -107,12 +110,209 @@ def __call__(
         )
         return hidden_states, encoder_hidden_states
 
-from diffusers.models import attention_processor
-
-attention_processor.CogVideoXAttnProcessor2_0 = CogVideoXAttnProcessorGaudi
+def cogvideoXTransformerForwardGaudi(
+    self,
+    hidden_states: torch.Tensor,
+    encoder_hidden_states: torch.Tensor,
+    timestep: Union[int, float, torch.LongTensor],
+    timestep_cond: Optional[torch.Tensor] = None,
+    image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+    attention_kwargs: Optional[Dict[str, Any]] = None,
+    return_dict: bool = True,
+):
+    if attention_kwargs is not None:
+        attention_kwargs = attention_kwargs.copy()
+        lora_scale = attention_kwargs.pop("scale", 1.0)
+    else:
+        lora_scale = 1.0
+
+    if USE_PEFT_BACKEND:
+        # weight the lora layers by setting `lora_scale` for each PEFT layer
+        scale_lora_layers(self, lora_scale)
+    else:
+        if attention_kwargs is not None and attention_kwargs.get("scale", None) is not None:
+            logger.warning(
+                "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
+            )
+
+    batch_size, num_frames, channels, height, width = hidden_states.shape
+
+    # 1. Time embedding
+    timesteps = timestep
+    t_emb = self.time_proj(timesteps)
+
+    # timesteps does not contain any weights and will always return f32 tensors
+    # but time_embedding might actually be running in fp16. so we need to cast here.
+    # there might be better ways to encapsulate this.
+    t_emb = t_emb.to(dtype=hidden_states.dtype)
+    emb = self.time_embedding(t_emb, timestep_cond)
+
+    # 2. Patch embedding
+    hidden_states = self.patch_embed(encoder_hidden_states, hidden_states)
+    hidden_states = self.embedding_dropout(hidden_states)
+
+    text_seq_length = encoder_hidden_states.shape[1]
+    encoder_hidden_states = hidden_states[:, :text_seq_length]
+    hidden_states = hidden_states[:, text_seq_length:]
+
+    print(f'baymax debug run gaudi transformer forward!')
+    # 3. Transformer blocks
+    for i, block in enumerate(self.transformer_blocks):
+        if self.training and self.gradient_checkpointing:
+
+            def create_custom_forward(module):
+                def custom_forward(*inputs):
+                    return module(*inputs)
+
+                return custom_forward
+
+            ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+            hidden_states, encoder_hidden_states = torch.utils.checkpoint.checkpoint(
+                create_custom_forward(block),
+                hidden_states,
+                encoder_hidden_states,
+                emb,
+                image_rotary_emb,
+                **ckpt_kwargs,
+            )
+        else:
+            hidden_states, encoder_hidden_states = block(
+                hidden_states=hidden_states,
+                encoder_hidden_states=encoder_hidden_states,
+                temb=emb,
+                image_rotary_emb=image_rotary_emb,
+            )
+        htcore.mark_step()
+
+    if not self.config.use_rotary_positional_embeddings:
+        # CogVideoX-2B
+        hidden_states = self.norm_final(hidden_states)
+    else:
+        # CogVideoX-5B
+        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
+        hidden_states = self.norm_final(hidden_states)
+        hidden_states = hidden_states[:, text_seq_length:]
+
+    # 4. Final block
+    hidden_states = self.norm_out(hidden_states, temb=emb)
+    hidden_states = self.proj_out(hidden_states)
+
+    # 5. Unpatchify
+    # Note: we use `-1` instead of `channels`:
+    #   - It is okay to `channels` use for CogVideoX-2b and CogVideoX-5b (number of input channels is equal to output channels)
+    #   - However, for CogVideoX-5b-I2V also takes concatenated input image latents (number of input channels is twice the output channels)
+    p = self.config.patch_size
+    output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
+    output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)
+
+    if USE_PEFT_BACKEND:
+        # remove `lora_scale` from each PEFT layer
+        unscale_lora_layers(self, lora_scale)
+
+    if not return_dict:
+        return (output,)
+    return Transformer2DModelOutput(sample=output)
+
+import habana_frameworks.torch.core as htcore
+def tiled_decode_gaudi(self, z: torch.Tensor, return_dict: bool = True) -> Union[DecoderOutput, torch.Tensor]:
+    r"""
+    Decode a batch of images using a tiled decoder.
 
-def adapt_cogvideo_to_gaudi():
-    import diffusers
-    diffusers.models.attention_processor.CogVideoXAttnProcessor2_0 = CogVideoXAttnProcessorGaudi
+    Args:
+        z (`torch.Tensor`): Input batch of latent vectors.
+        return_dict (`bool`, *optional*, defaults to `True`):
+            Whether or not to return a [`~models.vae.DecoderOutput`] instead of a plain tuple.
 
+    Returns:
+        [`~models.vae.DecoderOutput`] or `tuple`:
+            If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is
+            returned.
+    """
+    # Rough memory assessment:
+    #   - In CogVideoX-2B, there are a total of 24 CausalConv3d layers.
+    #   - The biggest intermediate dimensions are: [1, 128, 9, 480, 720].
+    #   - Assume fp16 (2 bytes per value).
+    # Memory required: 1 * 128 * 9 * 480 * 720 * 24 * 2 / 1024**3 = 17.8 GB
+    #
+    # Memory assessment when using tiling:
+    #   - Assume everything as above but now HxW is 240x360 by tiling in half
+    # Memory required: 1 * 128 * 9 * 240 * 360 * 24 * 2 / 1024**3 = 4.5 GB
+
+    print('run gaudi pipelined tiled decode!')
+    batch_size, num_channels, num_frames, height, width = z.shape
+
+    overlap_height = int(self.tile_latent_min_height * (1 - self.tile_overlap_factor_height))
+    overlap_width = int(self.tile_latent_min_width * (1 - self.tile_overlap_factor_width))
+    blend_extent_height = int(self.tile_sample_min_height * self.tile_overlap_factor_height)
+    blend_extent_width = int(self.tile_sample_min_width * self.tile_overlap_factor_width)
+    row_limit_height = self.tile_sample_min_height - blend_extent_height
+    row_limit_width = self.tile_sample_min_width - blend_extent_width
+    frame_batch_size = self.num_latent_frames_batch_size
+
+    # Split z into overlapping tiles and decode them separately.
+    # The tiles have an overlap to avoid seams between tiles.
+    rows = []
+    for i in range(0, height, overlap_height):
+        row = []
+        for j in range(0, width, overlap_width):
+            num_batches = max(num_frames // frame_batch_size, 1)
+            conv_cache = None
+            time = []
+
+            for k in range(num_batches):
+                remaining_frames = num_frames % frame_batch_size
+                start_frame = frame_batch_size * k + (0 if k == 0 else remaining_frames)
+                end_frame = frame_batch_size * (k + 1) + remaining_frames
+                tile = z[
+                    :,
+                    :,
+                    start_frame:end_frame,
+                    i : i + self.tile_latent_min_height,
+                    j : j + self.tile_latent_min_width,
+                ].clone()
+                if self.post_quant_conv is not None:
+                    tile = self.post_quant_conv(tile)
+                tile, conv_cache = self.decoder(tile, conv_cache=conv_cache)
+                time.append(tile.clone())
+                htcore.mark_step()
+
+            row.append(torch.cat(time, dim=2))
+        rows.append(row)
+
+    result_rows = []
+    for i, row in enumerate(rows):
+        result_row = []
+        for j, tile in enumerate(row):
+            # blend the above tile and the left tile
+            # to the current tile and add the current tile to the result row
+            if i > 0:
+                tile = self.blend_v(rows[i - 1][j], tile, blend_extent_height)
+            if j > 0:
+                tile = self.blend_h(row[j - 1], tile, blend_extent_width)
+            result_row.append(tile[:, :, :, :row_limit_height, :row_limit_width])
+        result_rows.append(torch.cat(result_row, dim=4))
+
+    dec = torch.cat(result_rows, dim=3)
+
+    if not return_dict:
+        return (dec,)
+
+    return DecoderOutput(sample=dec)
+
+
+def CogVideoXCausalConv3dforwardGaudi(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None) -> torch.Tensor:
+    #print('run gaudi CogVideoXCausalConv3d forward!')
+    inputs = self.fake_context_parallel_forward(inputs, conv_cache)
+    #conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
+
+    padding_2d = (self.width_pad, self.width_pad, self.height_pad, self.height_pad)
+    inputs_pad = F.pad(inputs, padding_2d, mode="constant", value=0)
+
+    output = self.conv(inputs_pad)
+    if self.time_kernel_size>1:
+        if conv_cache is not None and conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1:].shape:
+            conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
+        else:
+            conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
+    return output, conv_cache
 
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index ca679aa146..1cc13382a4 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -25,9 +25,10 @@
 from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
 from diffusers.utils import BaseOutput, logging
 from diffusers.utils.torch_utils import randn_tensor
-from diffusers.models.autoencoders.vae import DecoderOutput
 from transformers import T5EncoderModel, T5Tokenizer
 
+from optimum.habana.diffusers.pipelines.cogvideox.cogvideoX_gaudi import CogVideoXAttnProcessorGaudi, cogvideoXTransformerForwardGaudi
+from optimum.habana.diffusers.pipelines.cogvideox.cogvideoX_gaudi import tiled_decode_gaudi, CogVideoXCausalConv3dforwardGaudi
 from optimum.habana.diffusers.pipelines.pipeline_utils import GaudiDiffusionPipeline
 from optimum.habana.transformers.gaudi_configuration import GaudiConfig
 import habana_frameworks.torch.core as htcore
@@ -35,6 +36,9 @@
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
+from diffusers.models.autoencoders.autoencoder_kl_cogvideox import CogVideoXCausalConv3d
+setattr(CogVideoXCausalConv3d, 'forward', CogVideoXCausalConv3dforwardGaudi)
+setattr(AutoencoderKLCogVideoX, 'tiled_decode', tiled_decode_gaudi)
 
 class time_box_t():
     def __init__(self):
@@ -50,112 +54,6 @@ def show_time(self, desc):
         self.t0 = t1
         print(f'{desc} duration:{duration:.3f}s')
 
-#try:
-#    from habana_frameworks.torch.hpex.kernels import FusedSDPA
-#except ImportError:
-#    print("Not using HPU fused scaled dot-product attention kernel.")
-#    FusedSDPA = None
-#
-##  FusedScaledDotProductAttention
-#class ModuleFusedSDPA(torch.nn.Module):
-#    def __init__(self, fusedSDPA):
-#        super().__init__()
-#        self._hpu_kernel_fsdpa = fusedSDPA
-#
-#    def forward(self, query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode):
-#        return self._hpu_kernel_fsdpa.apply(query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode)
-#
-#
-#from diffusers.models.attention import Attention
-#
-#def apply_rotary_emb(
-#    x: torch.Tensor,
-#    freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]],
-#) -> Tuple[torch.Tensor, torch.Tensor]:
-#    """
-#    Adapted from: https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/models/embeddings.py#L697
-#    """
-#    cos_, sin_ = freqs_cis  # [S, D]
-#
-#    cos = cos_[None, None]
-#    sin = sin_[None, None]
-#    cos, sin = cos.to(x.device), sin.to(x.device)
-#
-#    x = torch.ops.hpu.rotary_pos_embedding(x, sin, cos, None, 0, 1)
-#
-#    return x
-#
-#class CogVideoXAttnProcessorGaudi:
-#    r"""
-#    Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
-#    query and key vectors, but does not include spatial normalization.
-#    """
-#
-#    def __init__(self):
-#        self.fused_scaled_dot_product_attention = ModuleFusedSDPA(FusedSDPA) if FusedSDPA else None
-#
-#    def __call__(
-#        self,
-#        attn: Attention,
-#        hidden_states: torch.Tensor,
-#        encoder_hidden_states: torch.Tensor,
-#        attention_mask: Optional[torch.Tensor] = None,
-#        image_rotary_emb: Optional[torch.Tensor] = None,
-#    ) -> torch.Tensor:
-#        print(f'run gaudi transformer attention_processor with fused SDPA!')
-#        text_seq_length = encoder_hidden_states.size(1)
-#
-#        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
-#
-#        batch_size, sequence_length, _ = (
-#            hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
-#        )
-#
-#        if attention_mask is not None:
-#            attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
-#            attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
-#
-#        query = attn.to_q(hidden_states)
-#        key = attn.to_k(hidden_states)
-#        value = attn.to_v(hidden_states)
-#
-#        inner_dim = key.shape[-1]
-#        head_dim = inner_dim // attn.heads
-#
-#        query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
-#        key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
-#        value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
-#
-#        if attn.norm_q is not None:
-#            query = attn.norm_q(query)
-#        if attn.norm_k is not None:
-#            key = attn.norm_k(key)
-#
-#        # Apply RoPE if needed
-#        if image_rotary_emb is not None:
-#            query[:, :, text_seq_length:] = apply_rotary_emb(query[:, :, text_seq_length:], image_rotary_emb)
-#            if not attn.is_cross_attention:
-#                key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb)
-#
-#        hidden_states = self.fused_scaled_dot_product_attention(
-#            query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_casual=False, scale=None, softmax_mode='fast'
-#        )
-#
-#        hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
-#
-#        # linear proj
-#        hidden_states = attn.to_out[0](hidden_states)
-#        # dropout
-#        hidden_states = attn.to_out[1](hidden_states)
-#
-#        encoder_hidden_states, hidden_states = hidden_states.split(
-#            [text_seq_length, hidden_states.size(1) - text_seq_length], dim=1
-#        )
-#        return hidden_states, encoder_hidden_states
-#
-#from diffusers.models import attention_processor
-#setattr(attention_processor, 'CogVideoXAttnProcessor2_0', CogVideoXAttnProcessorGaudi)
-
 @dataclass
 class GaudiTextToVideoSDPipelineOutput(BaseOutput):
     r"""
@@ -229,216 +127,7 @@ def retrieve_timesteps(
         timesteps = scheduler.timesteps
     return timesteps, num_inference_steps
 
-from diffusers.utils import USE_PEFT_BACKEND
-def gaudi_forward(
-    self,
-    hidden_states: torch.Tensor,
-    encoder_hidden_states: torch.Tensor,
-    timestep: Union[int, float, torch.LongTensor],
-    timestep_cond: Optional[torch.Tensor] = None,
-    image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
-    attention_kwargs: Optional[Dict[str, Any]] = None,
-    return_dict: bool = True,
-):
-    if attention_kwargs is not None:
-        attention_kwargs = attention_kwargs.copy()
-        lora_scale = attention_kwargs.pop("scale", 1.0)
-    else:
-        lora_scale = 1.0
-
-    if USE_PEFT_BACKEND:
-        # weight the lora layers by setting `lora_scale` for each PEFT layer
-        scale_lora_layers(self, lora_scale)
-    else:
-        if attention_kwargs is not None and attention_kwargs.get("scale", None) is not None:
-            logger.warning(
-                "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
-            )
-
-    batch_size, num_frames, channels, height, width = hidden_states.shape
-
-    # 1. Time embedding
-    timesteps = timestep
-    t_emb = self.time_proj(timesteps)
-
-    # timesteps does not contain any weights and will always return f32 tensors
-    # but time_embedding might actually be running in fp16. so we need to cast here.
-    # there might be better ways to encapsulate this.
-    t_emb = t_emb.to(dtype=hidden_states.dtype)
-    emb = self.time_embedding(t_emb, timestep_cond)
-
-    # 2. Patch embedding
-    hidden_states = self.patch_embed(encoder_hidden_states, hidden_states)
-    hidden_states = self.embedding_dropout(hidden_states)
-
-    text_seq_length = encoder_hidden_states.shape[1]
-    encoder_hidden_states = hidden_states[:, :text_seq_length]
-    hidden_states = hidden_states[:, text_seq_length:]
-
-    print(f'baymax debug run gaudi transformer forward!')
-    # 3. Transformer blocks
-    for i, block in enumerate(self.transformer_blocks):
-        if self.training and self.gradient_checkpointing:
-
-            def create_custom_forward(module):
-                def custom_forward(*inputs):
-                    return module(*inputs)
-
-                return custom_forward
-
-            ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
-            hidden_states, encoder_hidden_states = torch.utils.checkpoint.checkpoint(
-                create_custom_forward(block),
-                hidden_states,
-                encoder_hidden_states,
-                emb,
-                image_rotary_emb,
-                **ckpt_kwargs,
-            )
-        else:
-            hidden_states, encoder_hidden_states = block(
-                hidden_states=hidden_states,
-                encoder_hidden_states=encoder_hidden_states,
-                temb=emb,
-                image_rotary_emb=image_rotary_emb,
-            )
-        htcore.mark_step()
-
-    if not self.config.use_rotary_positional_embeddings:
-        # CogVideoX-2B
-        hidden_states = self.norm_final(hidden_states)
-    else:
-        # CogVideoX-5B
-        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
-        hidden_states = self.norm_final(hidden_states)
-        hidden_states = hidden_states[:, text_seq_length:]
-
-    # 4. Final block
-    hidden_states = self.norm_out(hidden_states, temb=emb)
-    hidden_states = self.proj_out(hidden_states)
-
-    # 5. Unpatchify
-    # Note: we use `-1` instead of `channels`:
-    #   - It is okay to `channels` use for CogVideoX-2b and CogVideoX-5b (number of input channels is equal to output channels)
-    #   - However, for CogVideoX-5b-I2V also takes concatenated input image latents (number of input channels is twice the output channels)
-    p = self.config.patch_size
-    output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
-    output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)
-
-    if USE_PEFT_BACKEND:
-        # remove `lora_scale` from each PEFT layer
-        unscale_lora_layers(self, lora_scale)
-
-    if not return_dict:
-        return (output,)
-    return Transformer2DModelOutput(sample=output)
-
-def tiled_decode_gaudi(self, z: torch.Tensor, return_dict: bool = True) -> Union[DecoderOutput, torch.Tensor]:
-    r"""
-    Decode a batch of images using a tiled decoder.
 
-    Args:
-        z (`torch.Tensor`): Input batch of latent vectors.
-        return_dict (`bool`, *optional*, defaults to `True`):
-            Whether or not to return a [`~models.vae.DecoderOutput`] instead of a plain tuple.
-
-    Returns:
-        [`~models.vae.DecoderOutput`] or `tuple`:
-            If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is
-            returned.
-    """
-    # Rough memory assessment:
-    #   - In CogVideoX-2B, there are a total of 24 CausalConv3d layers.
-    #   - The biggest intermediate dimensions are: [1, 128, 9, 480, 720].
-    #   - Assume fp16 (2 bytes per value).
-    # Memory required: 1 * 128 * 9 * 480 * 720 * 24 * 2 / 1024**3 = 17.8 GB
-    #
-    # Memory assessment when using tiling:
-    #   - Assume everything as above but now HxW is 240x360 by tiling in half
-    # Memory required: 1 * 128 * 9 * 240 * 360 * 24 * 2 / 1024**3 = 4.5 GB
-
-    print('run gaudi pipelined tiled decode!')
-    batch_size, num_channels, num_frames, height, width = z.shape
-
-    overlap_height = int(self.tile_latent_min_height * (1 - self.tile_overlap_factor_height))
-    overlap_width = int(self.tile_latent_min_width * (1 - self.tile_overlap_factor_width))
-    blend_extent_height = int(self.tile_sample_min_height * self.tile_overlap_factor_height)
-    blend_extent_width = int(self.tile_sample_min_width * self.tile_overlap_factor_width)
-    row_limit_height = self.tile_sample_min_height - blend_extent_height
-    row_limit_width = self.tile_sample_min_width - blend_extent_width
-    frame_batch_size = self.num_latent_frames_batch_size
-
-    # Split z into overlapping tiles and decode them separately.
-    # The tiles have an overlap to avoid seams between tiles.
-    rows = []
-    for i in range(0, height, overlap_height):
-        row = []
-        for j in range(0, width, overlap_width):
-            num_batches = max(num_frames // frame_batch_size, 1)
-            conv_cache = None
-            time = []
-
-            for k in range(num_batches):
-                remaining_frames = num_frames % frame_batch_size
-                start_frame = frame_batch_size * k + (0 if k == 0 else remaining_frames)
-                end_frame = frame_batch_size * (k + 1) + remaining_frames
-                tile = z[
-                    :,
-                    :,
-                    start_frame:end_frame,
-                    i : i + self.tile_latent_min_height,
-                    j : j + self.tile_latent_min_width,
-                ].clone()
-                if self.post_quant_conv is not None:
-                    tile = self.post_quant_conv(tile)
-                tile, conv_cache = self.decoder(tile, conv_cache=conv_cache)
-                time.append(tile.clone())
-                htcore.mark_step()
-
-            row.append(torch.cat(time, dim=2))
-        rows.append(row)
-
-    result_rows = []
-    for i, row in enumerate(rows):
-        result_row = []
-        for j, tile in enumerate(row):
-            # blend the above tile and the left tile
-            # to the current tile and add the current tile to the result row
-            if i > 0:
-                tile = self.blend_v(rows[i - 1][j], tile, blend_extent_height)
-            if j > 0:
-                tile = self.blend_h(row[j - 1], tile, blend_extent_width)
-            result_row.append(tile[:, :, :, :row_limit_height, :row_limit_width])
-        result_rows.append(torch.cat(result_row, dim=4))
-
-    dec = torch.cat(result_rows, dim=3)
-
-    if not return_dict:
-        return (dec,)
-
-    return DecoderOutput(sample=dec)
-
-setattr(AutoencoderKLCogVideoX, 'tiled_decode', tiled_decode_gaudi)
-
-import torch.nn.functional as F
-def CogVideoXCausalConv3dforwardGaudi(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None) -> torch.Tensor:
-    #print('run gaudi CogVideoXCausalConv3d forward!')
-    inputs = self.fake_context_parallel_forward(inputs, conv_cache)
-    #conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
-
-    padding_2d = (self.width_pad, self.width_pad, self.height_pad, self.height_pad)
-    inputs_pad = F.pad(inputs, padding_2d, mode="constant", value=0)
-
-    output = self.conv(inputs_pad)
-    if self.time_kernel_size>1:
-        if conv_cache is not None and conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1:].shape:
-            conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
-        else:
-            conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
-    return output, conv_cache
-
-from diffusers.models.autoencoders.autoencoder_kl_cogvideox import CogVideoXCausalConv3d
-setattr(CogVideoXCausalConv3d, 'forward', CogVideoXCausalConv3dforwardGaudi)
 
 class GaudiCogVideoXPipeline(GaudiDiffusionPipeline, CogVideoXPipeline):
     r"""
@@ -474,8 +163,11 @@ def __init__(
             scheduler,
         )
         self.to(self._device)
-        self.transformer.forward = gaudi_forward
+        self.transformer.forward = cogvideoXTransformerForwardGaudi
         print(f'vae decode name:{self.vae.tiled_decode.__name__}')
+        for block in self.transformer.transformer_blocks:
+            block.attn1.set_processor(CogVideoXAttnProcessorGaudi())
+        print(f'set gaudi attention Processor done!')
 
         from habana_frameworks.torch.hpu import wrap_in_hpu_graph
         self.vae.decoder = wrap_in_hpu_graph(self.vae.decoder)

From cbf7ee1ef2227b41354e481f5745525a31d3be7f Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Tue, 11 Feb 2025 09:30:35 +0000
Subject: [PATCH 21/32] mv gaudi func to cogvideo pipelines.

---
 .../pipelines/cogvideox/cogvideoX_gaudi.py    | 318 -----------------
 .../cogvideox/pipeline_cogvideox_gaudi.py     | 323 +++++++++++++++++-
 2 files changed, 313 insertions(+), 328 deletions(-)
 delete mode 100644 optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
deleted file mode 100644
index 5cbfc6427c..0000000000
--- a/optimum/habana/diffusers/pipelines/cogvideox/cogvideoX_gaudi.py
+++ /dev/null
@@ -1,318 +0,0 @@
-from typing import Any, Dict, Optional, Tuple, Union
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from diffusers.models.attention import Attention
-from diffusers.models.autoencoders.vae import DecoderOutput
-from diffusers.utils import USE_PEFT_BACKEND
-
-
-try:
-    from habana_frameworks.torch.hpex.kernels import FusedSDPA
-except ImportError:
-    print("Not using HPU fused scaled dot-product attention kernel.")
-    FusedSDPA = None
-
-#  FusedScaledDotProductAttention
-class ModuleFusedSDPA(torch.nn.Module):
-    def __init__(self, fusedSDPA):
-        super().__init__()
-        self._hpu_kernel_fsdpa = fusedSDPA
-
-    def forward(self, query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode):
-        return self._hpu_kernel_fsdpa.apply(query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode)
-
-
-
-def apply_rotary_emb(
-    x: torch.Tensor,
-    freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]],
-) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Adapted from: https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/models/embeddings.py#L697
-    """
-    cos_, sin_ = freqs_cis  # [S, D]
-
-    cos = cos_[None, None]
-    sin = sin_[None, None]
-    cos, sin = cos.to(x.device), sin.to(x.device)
-
-    x = torch.ops.hpu.rotary_pos_embedding(x, sin, cos, None, 0, 1)
-
-    return x
-
-class CogVideoXAttnProcessorGaudi:
-    r"""
-    Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
-    query and key vectors, but does not include spatial normalization.
-    """
-
-    def __init__(self):
-        self.fused_scaled_dot_product_attention = ModuleFusedSDPA(FusedSDPA) if FusedSDPA else None
-
-    def __call__(
-        self,
-        attn: Attention,
-        hidden_states: torch.Tensor,
-        encoder_hidden_states: torch.Tensor,
-        attention_mask: Optional[torch.Tensor] = None,
-        image_rotary_emb: Optional[torch.Tensor] = None,
-    ) -> torch.Tensor:
-        print(f'run gaudi transformer attention_processor with fused SDPA!')
-        text_seq_length = encoder_hidden_states.size(1)
-
-        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
-
-        batch_size, sequence_length, _ = (
-            hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
-        )
-
-        if attention_mask is not None:
-            attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
-            attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
-
-        query = attn.to_q(hidden_states)
-        key = attn.to_k(hidden_states)
-        value = attn.to_v(hidden_states)
-
-        inner_dim = key.shape[-1]
-        head_dim = inner_dim // attn.heads
-
-        query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
-        key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
-        value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
-
-        if attn.norm_q is not None:
-            query = attn.norm_q(query)
-        if attn.norm_k is not None:
-            key = attn.norm_k(key)
-
-        # Apply RoPE if needed
-        if image_rotary_emb is not None:
-            query[:, :, text_seq_length:] = apply_rotary_emb(query[:, :, text_seq_length:], image_rotary_emb)
-            if not attn.is_cross_attention:
-                key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb)
-
-        hidden_states = self.fused_scaled_dot_product_attention(
-            query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_casual=False, scale=None, softmax_mode='fast'
-        )
-
-        hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
-
-        # linear proj
-        hidden_states = attn.to_out[0](hidden_states)
-        # dropout
-        hidden_states = attn.to_out[1](hidden_states)
-
-        encoder_hidden_states, hidden_states = hidden_states.split(
-            [text_seq_length, hidden_states.size(1) - text_seq_length], dim=1
-        )
-        return hidden_states, encoder_hidden_states
-
-def cogvideoXTransformerForwardGaudi(
-    self,
-    hidden_states: torch.Tensor,
-    encoder_hidden_states: torch.Tensor,
-    timestep: Union[int, float, torch.LongTensor],
-    timestep_cond: Optional[torch.Tensor] = None,
-    image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
-    attention_kwargs: Optional[Dict[str, Any]] = None,
-    return_dict: bool = True,
-):
-    if attention_kwargs is not None:
-        attention_kwargs = attention_kwargs.copy()
-        lora_scale = attention_kwargs.pop("scale", 1.0)
-    else:
-        lora_scale = 1.0
-
-    if USE_PEFT_BACKEND:
-        # weight the lora layers by setting `lora_scale` for each PEFT layer
-        scale_lora_layers(self, lora_scale)
-    else:
-        if attention_kwargs is not None and attention_kwargs.get("scale", None) is not None:
-            logger.warning(
-                "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
-            )
-
-    batch_size, num_frames, channels, height, width = hidden_states.shape
-
-    # 1. Time embedding
-    timesteps = timestep
-    t_emb = self.time_proj(timesteps)
-
-    # timesteps does not contain any weights and will always return f32 tensors
-    # but time_embedding might actually be running in fp16. so we need to cast here.
-    # there might be better ways to encapsulate this.
-    t_emb = t_emb.to(dtype=hidden_states.dtype)
-    emb = self.time_embedding(t_emb, timestep_cond)
-
-    # 2. Patch embedding
-    hidden_states = self.patch_embed(encoder_hidden_states, hidden_states)
-    hidden_states = self.embedding_dropout(hidden_states)
-
-    text_seq_length = encoder_hidden_states.shape[1]
-    encoder_hidden_states = hidden_states[:, :text_seq_length]
-    hidden_states = hidden_states[:, text_seq_length:]
-
-    print(f'baymax debug run gaudi transformer forward!')
-    # 3. Transformer blocks
-    for i, block in enumerate(self.transformer_blocks):
-        if self.training and self.gradient_checkpointing:
-
-            def create_custom_forward(module):
-                def custom_forward(*inputs):
-                    return module(*inputs)
-
-                return custom_forward
-
-            ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
-            hidden_states, encoder_hidden_states = torch.utils.checkpoint.checkpoint(
-                create_custom_forward(block),
-                hidden_states,
-                encoder_hidden_states,
-                emb,
-                image_rotary_emb,
-                **ckpt_kwargs,
-            )
-        else:
-            hidden_states, encoder_hidden_states = block(
-                hidden_states=hidden_states,
-                encoder_hidden_states=encoder_hidden_states,
-                temb=emb,
-                image_rotary_emb=image_rotary_emb,
-            )
-        htcore.mark_step()
-
-    if not self.config.use_rotary_positional_embeddings:
-        # CogVideoX-2B
-        hidden_states = self.norm_final(hidden_states)
-    else:
-        # CogVideoX-5B
-        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
-        hidden_states = self.norm_final(hidden_states)
-        hidden_states = hidden_states[:, text_seq_length:]
-
-    # 4. Final block
-    hidden_states = self.norm_out(hidden_states, temb=emb)
-    hidden_states = self.proj_out(hidden_states)
-
-    # 5. Unpatchify
-    # Note: we use `-1` instead of `channels`:
-    #   - It is okay to `channels` use for CogVideoX-2b and CogVideoX-5b (number of input channels is equal to output channels)
-    #   - However, for CogVideoX-5b-I2V also takes concatenated input image latents (number of input channels is twice the output channels)
-    p = self.config.patch_size
-    output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
-    output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)
-
-    if USE_PEFT_BACKEND:
-        # remove `lora_scale` from each PEFT layer
-        unscale_lora_layers(self, lora_scale)
-
-    if not return_dict:
-        return (output,)
-    return Transformer2DModelOutput(sample=output)
-
-import habana_frameworks.torch.core as htcore
-def tiled_decode_gaudi(self, z: torch.Tensor, return_dict: bool = True) -> Union[DecoderOutput, torch.Tensor]:
-    r"""
-    Decode a batch of images using a tiled decoder.
-
-    Args:
-        z (`torch.Tensor`): Input batch of latent vectors.
-        return_dict (`bool`, *optional*, defaults to `True`):
-            Whether or not to return a [`~models.vae.DecoderOutput`] instead of a plain tuple.
-
-    Returns:
-        [`~models.vae.DecoderOutput`] or `tuple`:
-            If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is
-            returned.
-    """
-    # Rough memory assessment:
-    #   - In CogVideoX-2B, there are a total of 24 CausalConv3d layers.
-    #   - The biggest intermediate dimensions are: [1, 128, 9, 480, 720].
-    #   - Assume fp16 (2 bytes per value).
-    # Memory required: 1 * 128 * 9 * 480 * 720 * 24 * 2 / 1024**3 = 17.8 GB
-    #
-    # Memory assessment when using tiling:
-    #   - Assume everything as above but now HxW is 240x360 by tiling in half
-    # Memory required: 1 * 128 * 9 * 240 * 360 * 24 * 2 / 1024**3 = 4.5 GB
-
-    print('run gaudi pipelined tiled decode!')
-    batch_size, num_channels, num_frames, height, width = z.shape
-
-    overlap_height = int(self.tile_latent_min_height * (1 - self.tile_overlap_factor_height))
-    overlap_width = int(self.tile_latent_min_width * (1 - self.tile_overlap_factor_width))
-    blend_extent_height = int(self.tile_sample_min_height * self.tile_overlap_factor_height)
-    blend_extent_width = int(self.tile_sample_min_width * self.tile_overlap_factor_width)
-    row_limit_height = self.tile_sample_min_height - blend_extent_height
-    row_limit_width = self.tile_sample_min_width - blend_extent_width
-    frame_batch_size = self.num_latent_frames_batch_size
-
-    # Split z into overlapping tiles and decode them separately.
-    # The tiles have an overlap to avoid seams between tiles.
-    rows = []
-    for i in range(0, height, overlap_height):
-        row = []
-        for j in range(0, width, overlap_width):
-            num_batches = max(num_frames // frame_batch_size, 1)
-            conv_cache = None
-            time = []
-
-            for k in range(num_batches):
-                remaining_frames = num_frames % frame_batch_size
-                start_frame = frame_batch_size * k + (0 if k == 0 else remaining_frames)
-                end_frame = frame_batch_size * (k + 1) + remaining_frames
-                tile = z[
-                    :,
-                    :,
-                    start_frame:end_frame,
-                    i : i + self.tile_latent_min_height,
-                    j : j + self.tile_latent_min_width,
-                ].clone()
-                if self.post_quant_conv is not None:
-                    tile = self.post_quant_conv(tile)
-                tile, conv_cache = self.decoder(tile, conv_cache=conv_cache)
-                time.append(tile.clone())
-                htcore.mark_step()
-
-            row.append(torch.cat(time, dim=2))
-        rows.append(row)
-
-    result_rows = []
-    for i, row in enumerate(rows):
-        result_row = []
-        for j, tile in enumerate(row):
-            # blend the above tile and the left tile
-            # to the current tile and add the current tile to the result row
-            if i > 0:
-                tile = self.blend_v(rows[i - 1][j], tile, blend_extent_height)
-            if j > 0:
-                tile = self.blend_h(row[j - 1], tile, blend_extent_width)
-            result_row.append(tile[:, :, :, :row_limit_height, :row_limit_width])
-        result_rows.append(torch.cat(result_row, dim=4))
-
-    dec = torch.cat(result_rows, dim=3)
-
-    if not return_dict:
-        return (dec,)
-
-    return DecoderOutput(sample=dec)
-
-
-def CogVideoXCausalConv3dforwardGaudi(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None) -> torch.Tensor:
-    #print('run gaudi CogVideoXCausalConv3d forward!')
-    inputs = self.fake_context_parallel_forward(inputs, conv_cache)
-    #conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
-
-    padding_2d = (self.width_pad, self.width_pad, self.height_pad, self.height_pad)
-    inputs_pad = F.pad(inputs, padding_2d, mode="constant", value=0)
-
-    output = self.conv(inputs_pad)
-    if self.time_kernel_size>1:
-        if conv_cache is not None and conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1:].shape:
-            conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
-        else:
-            conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
-    return output, conv_cache
-
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index 1cc13382a4..64258149e5 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -19,23 +19,330 @@
 from typing import Any, Dict, Optional, Tuple, Union
 
 import torch
+import torch.nn as nn
+import torch.nn.functional as F
 from diffusers import CogVideoXPipeline
 from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
 from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
 from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
 from diffusers.utils import BaseOutput, logging
 from diffusers.utils.torch_utils import randn_tensor
+from diffusers.models.attention import Attention
+from diffusers.models.autoencoders.vae import DecoderOutput
+from diffusers.utils import USE_PEFT_BACKEND
 from transformers import T5EncoderModel, T5Tokenizer
 
-from optimum.habana.diffusers.pipelines.cogvideox.cogvideoX_gaudi import CogVideoXAttnProcessorGaudi, cogvideoXTransformerForwardGaudi
-from optimum.habana.diffusers.pipelines.cogvideox.cogvideoX_gaudi import tiled_decode_gaudi, CogVideoXCausalConv3dforwardGaudi
 from optimum.habana.diffusers.pipelines.pipeline_utils import GaudiDiffusionPipeline
 from optimum.habana.transformers.gaudi_configuration import GaudiConfig
-import habana_frameworks.torch.core as htcore
 
 
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
+try:
+    from habana_frameworks.torch.hpex.kernels import FusedSDPA
+except ImportError:
+    print("Not using HPU fused scaled dot-product attention kernel.")
+    FusedSDPA = None
+
+#  FusedScaledDotProductAttention
+class ModuleFusedSDPA(torch.nn.Module):
+    def __init__(self, fusedSDPA):
+        super().__init__()
+        self._hpu_kernel_fsdpa = fusedSDPA
+
+    def forward(self, query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode):
+        return self._hpu_kernel_fsdpa.apply(query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode)
+
+def apply_rotary_emb(
+    x: torch.Tensor,
+    freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]],
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Adapted from: https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/models/embeddings.py#L697
+    """
+    cos_, sin_ = freqs_cis  # [S, D]
+
+    cos = cos_[None, None]
+    sin = sin_[None, None]
+    cos, sin = cos.to(x.device), sin.to(x.device)
+
+    x = torch.ops.hpu.rotary_pos_embedding(x, sin, cos, None, 0, 1)
+
+    return x
+
+class CogVideoXAttnProcessorGaudi:
+    r"""
+    Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
+    query and key vectors, but does not include spatial normalization.
+    """
+
+    def __init__(self):
+        self.fused_scaled_dot_product_attention = ModuleFusedSDPA(FusedSDPA) if FusedSDPA else None
+
+    def __call__(
+        self,
+        attn: Attention,
+        hidden_states: torch.Tensor,
+        encoder_hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        image_rotary_emb: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        text_seq_length = encoder_hidden_states.size(1)
+
+        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
+
+        batch_size, sequence_length, _ = (
+            hidden_states.shape if encoder_hidden_states is None else encoder_hidden_states.shape
+        )
+
+        if attention_mask is not None:
+            attention_mask = attn.prepare_attention_mask(attention_mask, sequence_length, batch_size)
+            attention_mask = attention_mask.view(batch_size, attn.heads, -1, attention_mask.shape[-1])
+
+        query = attn.to_q(hidden_states)
+        key = attn.to_k(hidden_states)
+        value = attn.to_v(hidden_states)
+
+        inner_dim = key.shape[-1]
+        head_dim = inner_dim // attn.heads
+
+        query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+
+        if attn.norm_q is not None:
+            query = attn.norm_q(query)
+        if attn.norm_k is not None:
+            key = attn.norm_k(key)
+
+        # Apply RoPE if needed
+        if image_rotary_emb is not None:
+            query[:, :, text_seq_length:] = apply_rotary_emb(query[:, :, text_seq_length:], image_rotary_emb)
+            if not attn.is_cross_attention:
+                key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb)
+
+        hidden_states = self.fused_scaled_dot_product_attention(
+            query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_casual=False, scale=None, softmax_mode='fast'
+        )
+
+        hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
+
+        # linear proj
+        hidden_states = attn.to_out[0](hidden_states)
+        # dropout
+        hidden_states = attn.to_out[1](hidden_states)
+
+        encoder_hidden_states, hidden_states = hidden_states.split(
+            [text_seq_length, hidden_states.size(1) - text_seq_length], dim=1
+        )
+        return hidden_states, encoder_hidden_states
+
+def cogvideoXTransformerForwardGaudi(
+    self,
+    hidden_states: torch.Tensor,
+    encoder_hidden_states: torch.Tensor,
+    timestep: Union[int, float, torch.LongTensor],
+    timestep_cond: Optional[torch.Tensor] = None,
+    image_rotary_emb: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+    attention_kwargs: Optional[Dict[str, Any]] = None,
+    return_dict: bool = True,
+):
+    if attention_kwargs is not None:
+        attention_kwargs = attention_kwargs.copy()
+        lora_scale = attention_kwargs.pop("scale", 1.0)
+    else:
+        lora_scale = 1.0
+
+    if USE_PEFT_BACKEND:
+        # weight the lora layers by setting `lora_scale` for each PEFT layer
+        scale_lora_layers(self, lora_scale)
+    else:
+        if attention_kwargs is not None and attention_kwargs.get("scale", None) is not None:
+            logger.warning(
+                "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
+            )
+
+    batch_size, num_frames, channels, height, width = hidden_states.shape
+
+    # 1. Time embedding
+    timesteps = timestep
+    t_emb = self.time_proj(timesteps)
+
+    # timesteps does not contain any weights and will always return f32 tensors
+    # but time_embedding might actually be running in fp16. so we need to cast here.
+    # there might be better ways to encapsulate this.
+    t_emb = t_emb.to(dtype=hidden_states.dtype)
+    emb = self.time_embedding(t_emb, timestep_cond)
+
+    # 2. Patch embedding
+    hidden_states = self.patch_embed(encoder_hidden_states, hidden_states)
+    hidden_states = self.embedding_dropout(hidden_states)
+
+    text_seq_length = encoder_hidden_states.shape[1]
+    encoder_hidden_states = hidden_states[:, :text_seq_length]
+    hidden_states = hidden_states[:, text_seq_length:]
+
+    import habana_frameworks.torch.core as htcore
+    # 3. Transformer blocks
+    for i, block in enumerate(self.transformer_blocks):
+        if self.training and self.gradient_checkpointing:
+
+            def create_custom_forward(module):
+                def custom_forward(*inputs):
+                    return module(*inputs)
+
+                return custom_forward
+
+            ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+            hidden_states, encoder_hidden_states = torch.utils.checkpoint.checkpoint(
+                create_custom_forward(block),
+                hidden_states,
+                encoder_hidden_states,
+                emb,
+                image_rotary_emb,
+                **ckpt_kwargs,
+            )
+        else:
+            hidden_states, encoder_hidden_states = block(
+                hidden_states=hidden_states,
+                encoder_hidden_states=encoder_hidden_states,
+                temb=emb,
+                image_rotary_emb=image_rotary_emb,
+            )
+        htcore.mark_step()
+
+    if not self.config.use_rotary_positional_embeddings:
+        # CogVideoX-2B
+        hidden_states = self.norm_final(hidden_states)
+    else:
+        # CogVideoX-5B
+        hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
+        hidden_states = self.norm_final(hidden_states)
+        hidden_states = hidden_states[:, text_seq_length:]
+
+    # 4. Final block
+    hidden_states = self.norm_out(hidden_states, temb=emb)
+    hidden_states = self.proj_out(hidden_states)
+
+    # 5. Unpatchify
+    # Note: we use `-1` instead of `channels`:
+    #   - It is okay to `channels` use for CogVideoX-2b and CogVideoX-5b (number of input channels is equal to output channels)
+    #   - However, for CogVideoX-5b-I2V also takes concatenated input image latents (number of input channels is twice the output channels)
+    p = self.config.patch_size
+    output = hidden_states.reshape(batch_size, num_frames, height // p, width // p, -1, p, p)
+    output = output.permute(0, 1, 4, 2, 5, 3, 6).flatten(5, 6).flatten(3, 4)
+
+    if USE_PEFT_BACKEND:
+        # remove `lora_scale` from each PEFT layer
+        unscale_lora_layers(self, lora_scale)
+
+    if not return_dict:
+        return (output,)
+    return Transformer2DModelOutput(sample=output)
+
+def tiled_decode_gaudi(self, z: torch.Tensor, return_dict: bool = True) -> Union[DecoderOutput, torch.Tensor]:
+    r"""
+    Decode a batch of images using a tiled decoder.
+
+    Args:
+        z (`torch.Tensor`): Input batch of latent vectors.
+        return_dict (`bool`, *optional*, defaults to `True`):
+            Whether or not to return a [`~models.vae.DecoderOutput`] instead of a plain tuple.
+
+    Returns:
+        [`~models.vae.DecoderOutput`] or `tuple`:
+            If return_dict is True, a [`~models.vae.DecoderOutput`] is returned, otherwise a plain `tuple` is
+            returned.
+    """
+    # Rough memory assessment:
+    #   - In CogVideoX-2B, there are a total of 24 CausalConv3d layers.
+    #   - The biggest intermediate dimensions are: [1, 128, 9, 480, 720].
+    #   - Assume fp16 (2 bytes per value).
+    # Memory required: 1 * 128 * 9 * 480 * 720 * 24 * 2 / 1024**3 = 17.8 GB
+    #
+    # Memory assessment when using tiling:
+    #   - Assume everything as above but now HxW is 240x360 by tiling in half
+    # Memory required: 1 * 128 * 9 * 240 * 360 * 24 * 2 / 1024**3 = 4.5 GB
+
+    print('run gaudi pipelined tiled decode!')
+    batch_size, num_channels, num_frames, height, width = z.shape
+
+    overlap_height = int(self.tile_latent_min_height * (1 - self.tile_overlap_factor_height))
+    overlap_width = int(self.tile_latent_min_width * (1 - self.tile_overlap_factor_width))
+    blend_extent_height = int(self.tile_sample_min_height * self.tile_overlap_factor_height)
+    blend_extent_width = int(self.tile_sample_min_width * self.tile_overlap_factor_width)
+    row_limit_height = self.tile_sample_min_height - blend_extent_height
+    row_limit_width = self.tile_sample_min_width - blend_extent_width
+    frame_batch_size = self.num_latent_frames_batch_size
+
+    import habana_frameworks.torch.core as htcore
+    # Split z into overlapping tiles and decode them separately.
+    # The tiles have an overlap to avoid seams between tiles.
+    rows = []
+    for i in range(0, height, overlap_height):
+        row = []
+        for j in range(0, width, overlap_width):
+            num_batches = max(num_frames // frame_batch_size, 1)
+            conv_cache = None
+            time = []
+
+            for k in range(num_batches):
+                remaining_frames = num_frames % frame_batch_size
+                start_frame = frame_batch_size * k + (0 if k == 0 else remaining_frames)
+                end_frame = frame_batch_size * (k + 1) + remaining_frames
+                tile = z[
+                    :,
+                    :,
+                    start_frame:end_frame,
+                    i : i + self.tile_latent_min_height,
+                    j : j + self.tile_latent_min_width,
+                ].clone()
+                if self.post_quant_conv is not None:
+                    tile = self.post_quant_conv(tile)
+                tile, conv_cache = self.decoder(tile, conv_cache=conv_cache)
+                time.append(tile.clone())
+                htcore.mark_step()
+
+            row.append(torch.cat(time, dim=2))
+        rows.append(row)
+
+    result_rows = []
+    for i, row in enumerate(rows):
+        result_row = []
+        for j, tile in enumerate(row):
+            # blend the above tile and the left tile
+            # to the current tile and add the current tile to the result row
+            if i > 0:
+                tile = self.blend_v(rows[i - 1][j], tile, blend_extent_height)
+            if j > 0:
+                tile = self.blend_h(row[j - 1], tile, blend_extent_width)
+            result_row.append(tile[:, :, :, :row_limit_height, :row_limit_width])
+        result_rows.append(torch.cat(result_row, dim=4))
+
+    dec = torch.cat(result_rows, dim=3)
+
+    if not return_dict:
+        return (dec,)
+
+    return DecoderOutput(sample=dec)
+
+
+def CogVideoXCausalConv3dforwardGaudi(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None) -> torch.Tensor:
+    #print('run gaudi CogVideoXCausalConv3d forward!')
+    inputs = self.fake_context_parallel_forward(inputs, conv_cache)
+    #conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
+
+    padding_2d = (self.width_pad, self.width_pad, self.height_pad, self.height_pad)
+    inputs_pad = F.pad(inputs, padding_2d, mode="constant", value=0)
+
+    output = self.conv(inputs_pad)
+    if self.time_kernel_size>1:
+        if conv_cache is not None and conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1:].shape:
+            conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
+        else:
+            conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
+    return output, conv_cache
+
 from diffusers.models.autoencoders.autoencoder_kl_cogvideox import CogVideoXCausalConv3d
 setattr(CogVideoXCausalConv3d, 'forward', CogVideoXCausalConv3dforwardGaudi)
 setattr(AutoencoderKLCogVideoX, 'tiled_decode', tiled_decode_gaudi)
@@ -68,7 +375,6 @@ class GaudiTextToVideoSDPipelineOutput(BaseOutput):
 
     frames: torch.Tensor
 
-# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
 def retrieve_timesteps(
     scheduler,
     num_inference_steps: Optional[int] = None,
@@ -146,7 +452,6 @@ def __init__(
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
     ):
-        print(f'GaudiCogVideoXPipeline init use_habana:{use_habana} use_hpu_graphs:{use_hpu_graphs}')
         GaudiDiffusionPipeline.__init__(
             self,
             use_habana,
@@ -164,14 +469,11 @@ def __init__(
         )
         self.to(self._device)
         self.transformer.forward = cogvideoXTransformerForwardGaudi
-        print(f'vae decode name:{self.vae.tiled_decode.__name__}')
         for block in self.transformer.transformer_blocks:
             block.attn1.set_processor(CogVideoXAttnProcessorGaudi())
-        print(f'set gaudi attention Processor done!')
 
         from habana_frameworks.torch.hpu import wrap_in_hpu_graph
         self.vae.decoder = wrap_in_hpu_graph(self.vae.decoder)
-        #print(f' vae decode name:{self.vae.tiled_decode.__name__} tiled_decode_gaudi:{tiled_decode_gaudi.__name__}')
 
     @property
     def guidance_scale(self):
@@ -410,6 +712,7 @@ def __call__(
             # 7. Denoising loop
             num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
             outputs = []
+            import habana_frameworks.torch.core as htcore
             with self.progress_bar(total=num_inference_steps) as progress_bar:
                 # for DPM-solver++
                 old_pred_original_sample = None
@@ -457,7 +760,7 @@ def __call__(
                         )
                     latents = latents.to(prompt_embeds.dtype)
 
-                    if self.use_hpu_graphs:
+                    if not self.use_hpu_graphs:
                         htcore.mark_step()
 
                     # call the callback, if provided
@@ -474,7 +777,7 @@ def __call__(
 
                     if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
                         progress_bar.update()
-                if self.use_hpu_graphs:
+                if not self.use_hpu_graphs:
                     htcore.mark_step()
         time_box.show_time('transformer_hpu')
 

From 7148011f1d59292ce37b2c2b1db6c384225ec5a5 Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Tue, 11 Feb 2025 10:31:47 +0000
Subject: [PATCH 22/32] refine cogvideox examples.

---
 examples/text-to-video/README.md              |  5 +-
 examples/text-to-video/cogvideox_generate.py  | 86 -----------------
 .../text-to-video/text_to_video_generation.py | 95 ++++++++++++-------
 3 files changed, 66 insertions(+), 120 deletions(-)
 delete mode 100644 examples/text-to-video/cogvideox_generate.py

diff --git a/examples/text-to-video/README.md b/examples/text-to-video/README.md
index a7ab947b24..49905cb5b8 100644
--- a/examples/text-to-video/README.md
+++ b/examples/text-to-video/README.md
@@ -42,9 +42,10 @@ Models that have been validated:
 
 CogvideoX test:
 ```bash
-python3 cogvideox_generate.py \
+python3 text_to_video_generation.py \
     --model_name_or_path THUDM/CogVideoX-2b \
-    --output_name gaudi_output.mp4
+    --pipeline_type 'cogvideox' \
+    --video_save_dir 'cogvideo_out' \
 ```
 
 
diff --git a/examples/text-to-video/cogvideox_generate.py b/examples/text-to-video/cogvideox_generate.py
deleted file mode 100644
index 4b95c0a8ee..0000000000
--- a/examples/text-to-video/cogvideox_generate.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import argparse
-import logging
-
-import torch
-from diffusers.utils import export_to_video
-
-from optimum.habana.diffusers.pipelines.cogvideox.cogvideoX_gaudi import adapt_cogvideo_to_gaudi
-from optimum.habana.diffusers.pipelines.cogvideox.pipeline_cogvideox_gaudi import GaudiCogVideoXPipeline
-from optimum.habana.transformers.gaudi_configuration import GaudiConfig
-
-
-logger = logging.getLogger(__name__)
-
-
-def main():
-    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-
-    parser.add_argument(
-        "--model_name_or_path",
-        default="THUDM/CogVideoX-2b",
-        type=str,
-        help="Path to pre-trained model",
-    )
-    # Pipeline arguments
-    parser.add_argument(
-        "--prompts",
-        type=str,
-        nargs="*",
-        default="A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance.",
-        help="The prompt or prompts to guide the video generation.",
-    )
-    parser.add_argument(
-        "--output_name",
-        default="panda_gaudi.mp4",
-        type=str,
-        help="Path to pre-trained model",
-    )
-
-    args = parser.parse_args()
-
-    gaudi_config_kwargs = {"use_fused_adam": True, "use_fused_clip_norm": True}
-    gaudi_config_kwargs["use_torch_autocast"] = True
-
-    gaudi_config = GaudiConfig(**gaudi_config_kwargs)
-    logger.info(f"Gaudi Config: {gaudi_config}")
-
-
-    kwargs = {
-        "use_habana": True,
-        "use_hpu_graphs": True,
-        "gaudi_config": gaudi_config,
-    }
-    kwargs["torch_dtype"] = torch.bfloat16
-
-
-    print('now to load model.....')
-    pipe = GaudiCogVideoXPipeline.from_pretrained(
-        args.model_name_or_path,
-        **kwargs
-    )
-    print('pipe line load done!')
-
-    pipe.vae.enable_tiling()
-    pipe.vae.enable_slicing()
-
-    print('now to generate video.')
-    video = pipe(
-        prompt=args.prompts,
-        num_videos_per_prompt=1,
-        num_inference_steps=50,
-        num_frames=49,
-        guidance_scale=6,
-        generator=torch.Generator(device="cpu").manual_seed(42),
-    ).frames[0]
-
-    print('generate video done!')
-
-    export_to_video(video, args.output_name, fps=8)
-
-
-
-if __name__ == "__main__":
-    main()
-
-
diff --git a/examples/text-to-video/text_to_video_generation.py b/examples/text-to-video/text_to_video_generation.py
index 8813e321cf..220722224b 100755
--- a/examples/text-to-video/text_to_video_generation.py
+++ b/examples/text-to-video/text_to_video_generation.py
@@ -23,7 +23,9 @@
 import torch
 from diffusers.utils.export_utils import export_to_video
 
+from optimum.habana.diffusers.pipelines.cogvideox.pipeline_cogvideox_gaudi import GaudiCogVideoXPipeline
 from optimum.habana.diffusers import GaudiTextToVideoSDPipeline
+#from optimum.habana.diffusers import GaudiCogVideoXPipeline
 from optimum.habana.transformers.gaudi_configuration import GaudiConfig
 from optimum.habana.utils import set_seed
 
@@ -60,6 +62,13 @@ def main():
         default="Spiderman is surfing",
         help="The prompt or prompts to guide the video generation.",
     )
+    parser.add_argument(
+        "--pipeline_type",
+        type=str,
+        nargs="*",
+        default="sdp",
+        help="pipeline type:sdp or cogvideoX",
+    )
     parser.add_argument(
         "--num_videos_per_prompt", type=int, default=1, help="The number of videos to generate per prompt."
     )
@@ -178,38 +187,60 @@ def main():
         kwargs["torch_dtype"] = torch.float32
 
     # Generate images
-    pipeline: GaudiTextToVideoSDPipeline = GaudiTextToVideoSDPipeline.from_pretrained(
-        args.model_name_or_path, **kwargs
-    )
-    set_seed(args.seed)
-    outputs = pipeline(
-        prompt=args.prompts,
-        num_videos_per_prompt=args.num_videos_per_prompt,
-        batch_size=args.batch_size,
-        num_inference_steps=args.num_inference_steps,
-        guidance_scale=args.guidance_scale,
-        negative_prompt=args.negative_prompts,
-        eta=args.eta,
-        output_type="pil" if args.output_type == "mp4" else args.output_type,  # Naming inconsistency in base class
-        **kwargs_call,
-    )
-
-    # Save the pipeline in the specified directory if not None
-    if args.pipeline_save_dir is not None:
-        pipeline.save_pretrained(args.pipeline_save_dir)
-
-    # Save images in the specified directory if not None and if they are in PIL format
-    if args.video_save_dir is not None:
-        if args.output_type == "mp4":
-            video_save_dir = Path(args.video_save_dir)
-            video_save_dir.mkdir(parents=True, exist_ok=True)
-            logger.info(f"Saving images in {video_save_dir.resolve()}...")
-
-            for i, video in enumerate(outputs.videos):
-                filename = video_save_dir / f"video_{i + 1}.mp4"
-                export_to_video(video, str(filename.resolve()))
-        else:
-            logger.warning("--output_type should be equal to 'mp4' to save images in --video_save_dir.")
+    if args.pipeline_type[0] == 'sdp':
+        pipeline: GaudiTextToVideoSDPipeline = GaudiTextToVideoSDPipeline.from_pretrained(
+            args.model_name_or_path, **kwargs
+        )
+        set_seed(args.seed)
+        outputs = pipeline(
+            prompt=args.prompts,
+            num_videos_per_prompt=args.num_videos_per_prompt,
+            batch_size=args.batch_size,
+            num_inference_steps=args.num_inference_steps,
+            guidance_scale=args.guidance_scale,
+            negative_prompt=args.negative_prompts,
+            eta=args.eta,
+            output_type="pil" if args.output_type == "mp4" else args.output_type,  # Naming inconsistency in base class
+            **kwargs_call,
+        )
+        # Save the pipeline in the specified directory if not None
+        if args.pipeline_save_dir is not None:
+            pipeline.save_pretrained(args.pipeline_save_dir)
+
+        # Save images in the specified directory if not None and if they are in PIL format
+        if args.video_save_dir is not None:
+            if args.output_type == "mp4":
+                video_save_dir = Path(args.video_save_dir)
+                video_save_dir.mkdir(parents=True, exist_ok=True)
+                logger.info(f"Saving images in {video_save_dir.resolve()}...")
+
+                for i, video in enumerate(outputs.videos):
+                    filename = video_save_dir / f"video_{i + 1}.mp4"
+                    export_to_video(video, str(filename.resolve()))
+            else:
+                logger.warning("--output_type should be equal to 'mp4' to save images in --video_save_dir.")
+
+    elif args.pipeline_type[0] == 'cogvideox':
+        pipeline: GaudiCogVideoXPipeline= GaudiCogVideoXPipeline.from_pretrained(
+            args.model_name_or_path, **kwargs
+        )
+        pipeline.vae.enable_tiling()
+        pipeline.vae.enable_slicing()
+        video = pipeline(
+            prompt=args.prompts,
+            num_videos_per_prompt=1,
+            num_inference_steps=50,
+            num_frames=49,
+            guidance_scale=6,
+            generator=torch.Generator(device="cpu").manual_seed(42),
+        ).frames[0]
+        video_save_dir = Path(args.video_save_dir)
+        video_save_dir.mkdir(parents=True, exist_ok=True)
+        filename = video_save_dir / f"cogvideoX_out.mp4"
+        export_to_video(video, str(filename.resolve()), fps=8)
+    else:
+        logger.error(f"unsupported pipe line:{args.pipeline_type}")
+
 
 
 if __name__ == "__main__":

From 1316cf2f29755fccf47bd51232bf0e432ef8610e Mon Sep 17 00:00:00 2001
From: nc-BobLee <bo.o.li@intel.com>
Date: Tue, 11 Feb 2025 10:40:10 +0000
Subject: [PATCH 23/32] import cogvideo pipeline from OH diffusers.

---
 examples/text-to-video/text_to_video_generation.py | 3 +--
 optimum/habana/diffusers/__init__.py               | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/text-to-video/text_to_video_generation.py b/examples/text-to-video/text_to_video_generation.py
index 220722224b..26c83bdf98 100755
--- a/examples/text-to-video/text_to_video_generation.py
+++ b/examples/text-to-video/text_to_video_generation.py
@@ -23,9 +23,8 @@
 import torch
 from diffusers.utils.export_utils import export_to_video
 
-from optimum.habana.diffusers.pipelines.cogvideox.pipeline_cogvideox_gaudi import GaudiCogVideoXPipeline
 from optimum.habana.diffusers import GaudiTextToVideoSDPipeline
-#from optimum.habana.diffusers import GaudiCogVideoXPipeline
+from optimum.habana.diffusers import GaudiCogVideoXPipeline
 from optimum.habana.transformers.gaudi_configuration import GaudiConfig
 from optimum.habana.utils import set_seed
 
diff --git a/optimum/habana/diffusers/__init__.py b/optimum/habana/diffusers/__init__.py
index 86b6477c0b..086257a8f8 100644
--- a/optimum/habana/diffusers/__init__.py
+++ b/optimum/habana/diffusers/__init__.py
@@ -1,4 +1,5 @@
 from .pipelines.auto_pipeline import AutoPipelineForInpainting, AutoPipelineForText2Image
+from .pipelines.cogvideox.pipeline_cogvideox_gaudi import GaudiCogVideoXPipeline
 from .pipelines.controlnet.pipeline_controlnet import GaudiStableDiffusionControlNetPipeline
 from .pipelines.controlnet.pipeline_stable_video_diffusion_controlnet import (
     GaudiStableVideoDiffusionControlNetPipeline,

From b49115cfa6a25a74d363d26850e834b3b08be258 Mon Sep 17 00:00:00 2001
From: root <root@G6.sh.intel.com>
Date: Fri, 21 Feb 2025 14:43:53 +0800
Subject: [PATCH 24/32] refine some code style.

---
 .../cogvideox/pipeline_cogvideox_gaudi.py     | 21 -------------------
 tests/test_diffusers.py                       |  4 ++--
 2 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index 64258149e5..309960ce73 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -347,20 +347,6 @@ def CogVideoXCausalConv3dforwardGaudi(self, inputs: torch.Tensor, conv_cache: Op
 setattr(CogVideoXCausalConv3d, 'forward', CogVideoXCausalConv3dforwardGaudi)
 setattr(AutoencoderKLCogVideoX, 'tiled_decode', tiled_decode_gaudi)
 
-class time_box_t():
-    def __init__(self):
-        self.t0=None
-
-    def start(self):
-        self.t0 = tm_perf.perf_counter()
-
-    def show_time(self, desc):
-        torch.hpu.synchronize()
-        t1 = tm_perf.perf_counter()
-        duration = t1-self.t0
-        self.t0 = t1
-        print(f'{desc} duration:{duration:.3f}s')
-
 @dataclass
 class GaudiTextToVideoSDPipelineOutput(BaseOutput):
     r"""
@@ -632,8 +618,6 @@ def __call__(
             if isinstance(callback_on_step_end, (PipelineCallback, MultiPipelineCallbacks)):
                 callback_on_step_end_tensor_inputs = callback_on_step_end.tensor_inputs
 
-            time_box = time_box_t()
-            time_box.start()
             # 0. Default height and width to unet
             height = height or self.transformer.config.sample_size * self.vae_scale_factor_spatial
             width = width or self.transformer.config.sample_size * self.vae_scale_factor_spatial
@@ -707,7 +691,6 @@ def __call__(
                 if self.transformer.config.use_rotary_positional_embeddings
                 else None
             )
-            time_box.show_time('prepare latents')
 
             # 7. Denoising loop
             num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
@@ -779,14 +762,10 @@ def __call__(
                         progress_bar.update()
                 if not self.use_hpu_graphs:
                     htcore.mark_step()
-        time_box.show_time('transformer_hpu')
 
-        #HabanaProfile.stop()
         if not output_type == "latent":
             video = self.decode_latents(latents)
-            time_box.show_time('decode latents')
             video = self.video_processor.postprocess_video(video=video, output_type=output_type)
-            time_box.show_time('postprocess_video')
         else:
             video = latents
 
diff --git a/tests/test_diffusers.py b/tests/test_diffusers.py
index 97f0a60a07..a184e1512b 100755
--- a/tests/test_diffusers.py
+++ b/tests/test_diffusers.py
@@ -3906,8 +3906,8 @@ def test_cogvideoX_default_case(self):
             generator=torch.Generator(device="cpu").manual_seed(42),
         ).frames[0]
 
-        assert video is not None
-        assert 49 == len(video)
+        self.assertIsNotNone(video)
+        self.assertEqual(49 == len(video))
 
 class GaudiTextToVideoSDPipelineTester(TestCase):
     """

From 8fb20410ccc73d1fb304149c88291211b725cdab Mon Sep 17 00:00:00 2001
From: root <root@G6.sh.intel.com>
Date: Mon, 24 Feb 2025 11:04:58 +0800
Subject: [PATCH 25/32] fix code formmattings.

---
 .../text-to-video/text_to_video_generation.py |  14 +-
 .../cogvideox/pipeline_cogvideox_gaudi.py     |  88 ++++++----
 tests/test_diffusers.py                       | 166 +++++++++---------
 3 files changed, 145 insertions(+), 123 deletions(-)

diff --git a/examples/text-to-video/text_to_video_generation.py b/examples/text-to-video/text_to_video_generation.py
index 26c83bdf98..014fe5d6dc 100755
--- a/examples/text-to-video/text_to_video_generation.py
+++ b/examples/text-to-video/text_to_video_generation.py
@@ -23,8 +23,7 @@
 import torch
 from diffusers.utils.export_utils import export_to_video
 
-from optimum.habana.diffusers import GaudiTextToVideoSDPipeline
-from optimum.habana.diffusers import GaudiCogVideoXPipeline
+from optimum.habana.diffusers import GaudiCogVideoXPipeline, GaudiTextToVideoSDPipeline
 from optimum.habana.transformers.gaudi_configuration import GaudiConfig
 from optimum.habana.utils import set_seed
 
@@ -186,7 +185,7 @@ def main():
         kwargs["torch_dtype"] = torch.float32
 
     # Generate images
-    if args.pipeline_type[0] == 'sdp':
+    if args.pipeline_type[0] == "sdp":
         pipeline: GaudiTextToVideoSDPipeline = GaudiTextToVideoSDPipeline.from_pretrained(
             args.model_name_or_path, **kwargs
         )
@@ -219,10 +218,8 @@ def main():
             else:
                 logger.warning("--output_type should be equal to 'mp4' to save images in --video_save_dir.")
 
-    elif args.pipeline_type[0] == 'cogvideox':
-        pipeline: GaudiCogVideoXPipeline= GaudiCogVideoXPipeline.from_pretrained(
-            args.model_name_or_path, **kwargs
-        )
+    elif args.pipeline_type[0] == "cogvideox":
+        pipeline: GaudiCogVideoXPipeline = GaudiCogVideoXPipeline.from_pretrained(args.model_name_or_path, **kwargs)
         pipeline.vae.enable_tiling()
         pipeline.vae.enable_slicing()
         video = pipeline(
@@ -235,12 +232,11 @@ def main():
         ).frames[0]
         video_save_dir = Path(args.video_save_dir)
         video_save_dir.mkdir(parents=True, exist_ok=True)
-        filename = video_save_dir / f"cogvideoX_out.mp4"
+        filename = video_save_dir / "cogvideoX_out.mp4"
         export_to_video(video, str(filename.resolve()), fps=8)
     else:
         logger.error(f"unsupported pipe line:{args.pipeline_type}")
 
 
-
 if __name__ == "__main__":
     main()
diff --git a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
index 309960ce73..e77adc1c9a 100644
--- a/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
+++ b/optimum/habana/diffusers/pipelines/cogvideox/pipeline_cogvideox_gaudi.py
@@ -13,23 +13,29 @@
 # limitations under the License.
 
 import inspect
-import time as tm_perf
+import math
 from dataclasses import dataclass
-from typing import Callable, Dict, List, Optional, Union
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
-import torch.nn as nn
 import torch.nn.functional as F
 from diffusers import CogVideoXPipeline
 from diffusers.callbacks import MultiPipelineCallbacks, PipelineCallback
 from diffusers.models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
-from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
-from diffusers.utils import BaseOutput, logging
-from diffusers.utils.torch_utils import randn_tensor
 from diffusers.models.attention import Attention
+from diffusers.models.autoencoders.autoencoder_kl_cogvideox import CogVideoXCausalConv3d
 from diffusers.models.autoencoders.vae import DecoderOutput
-from diffusers.utils import USE_PEFT_BACKEND
+from diffusers.models.modeling_outputs import Transformer2DModelOutput
+from diffusers.schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
+from diffusers.utils import (
+    USE_PEFT_BACKEND,
+    BaseOutput,
+    is_torch_version,
+    logging,
+    scale_lora_layers,
+    unscale_lora_layers,
+)
+from diffusers.utils.torch_utils import randn_tensor
 from transformers import T5EncoderModel, T5Tokenizer
 
 from optimum.habana.diffusers.pipelines.pipeline_utils import GaudiDiffusionPipeline
@@ -44,6 +50,7 @@
     print("Not using HPU fused scaled dot-product attention kernel.")
     FusedSDPA = None
 
+
 #  FusedScaledDotProductAttention
 class ModuleFusedSDPA(torch.nn.Module):
     def __init__(self, fusedSDPA):
@@ -53,6 +60,7 @@ def __init__(self, fusedSDPA):
     def forward(self, query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode):
         return self._hpu_kernel_fsdpa.apply(query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode)
 
+
 def apply_rotary_emb(
     x: torch.Tensor,
     freqs_cis: Union[torch.Tensor, Tuple[torch.Tensor]],
@@ -70,6 +78,7 @@ def apply_rotary_emb(
 
     return x
 
+
 class CogVideoXAttnProcessorGaudi:
     r"""
     Processor for implementing scaled dot-product attention for the CogVideoX model. It applies a rotary embedding on
@@ -122,7 +131,14 @@ def __call__(
                 key[:, :, text_seq_length:] = apply_rotary_emb(key[:, :, text_seq_length:], image_rotary_emb)
 
         hidden_states = self.fused_scaled_dot_product_attention(
-            query, key, value, attn_mask=attention_mask, dropout_p=0.0, is_casual=False, scale=None, softmax_mode='fast'
+            query,
+            key,
+            value,
+            attn_mask=attention_mask,
+            dropout_p=0.0,
+            is_casual=False,
+            scale=None,
+            softmax_mode="fast",
         )
 
         hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
@@ -137,6 +153,7 @@ def __call__(
         )
         return hidden_states, encoder_hidden_states
 
+
 def cogvideoXTransformerForwardGaudi(
     self,
     hidden_states: torch.Tensor,
@@ -158,9 +175,7 @@ def cogvideoXTransformerForwardGaudi(
         scale_lora_layers(self, lora_scale)
     else:
         if attention_kwargs is not None and attention_kwargs.get("scale", None) is not None:
-            logger.warning(
-                "Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective."
-            )
+            logger.warning("Passing `scale` via `attention_kwargs` when not using the PEFT backend is ineffective.")
 
     batch_size, num_frames, channels, height, width = hidden_states.shape
 
@@ -183,6 +198,7 @@ def cogvideoXTransformerForwardGaudi(
     hidden_states = hidden_states[:, text_seq_length:]
 
     import habana_frameworks.torch.core as htcore
+
     # 3. Transformer blocks
     for i, block in enumerate(self.transformer_blocks):
         if self.training and self.gradient_checkpointing:
@@ -240,6 +256,7 @@ def custom_forward(*inputs):
         return (output,)
     return Transformer2DModelOutput(sample=output)
 
+
 def tiled_decode_gaudi(self, z: torch.Tensor, return_dict: bool = True) -> Union[DecoderOutput, torch.Tensor]:
     r"""
     Decode a batch of images using a tiled decoder.
@@ -264,7 +281,7 @@ def tiled_decode_gaudi(self, z: torch.Tensor, return_dict: bool = True) -> Union
     #   - Assume everything as above but now HxW is 240x360 by tiling in half
     # Memory required: 1 * 128 * 9 * 240 * 360 * 24 * 2 / 1024**3 = 4.5 GB
 
-    print('run gaudi pipelined tiled decode!')
+    print("run gaudi pipelined tiled decode!")
     batch_size, num_channels, num_frames, height, width = z.shape
 
     overlap_height = int(self.tile_latent_min_height * (1 - self.tile_overlap_factor_height))
@@ -276,6 +293,7 @@ def tiled_decode_gaudi(self, z: torch.Tensor, return_dict: bool = True) -> Union
     frame_batch_size = self.num_latent_frames_batch_size
 
     import habana_frameworks.torch.core as htcore
+
     # Split z into overlapping tiles and decode them separately.
     # The tiles have an overlap to avoid seams between tiles.
     rows = []
@@ -327,25 +345,28 @@ def tiled_decode_gaudi(self, z: torch.Tensor, return_dict: bool = True) -> Union
     return DecoderOutput(sample=dec)
 
 
-def CogVideoXCausalConv3dforwardGaudi(self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None) -> torch.Tensor:
-    #print('run gaudi CogVideoXCausalConv3d forward!')
+def CogVideoXCausalConv3dforwardGaudi(
+    self, inputs: torch.Tensor, conv_cache: Optional[torch.Tensor] = None
+) -> torch.Tensor:
+    # print('run gaudi CogVideoXCausalConv3d forward!')
     inputs = self.fake_context_parallel_forward(inputs, conv_cache)
-    #conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
+    # conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
 
     padding_2d = (self.width_pad, self.width_pad, self.height_pad, self.height_pad)
     inputs_pad = F.pad(inputs, padding_2d, mode="constant", value=0)
 
     output = self.conv(inputs_pad)
-    if self.time_kernel_size>1:
-        if conv_cache is not None and conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1:].shape:
-            conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1:])
+    if self.time_kernel_size > 1:
+        if conv_cache is not None and conv_cache.shape == inputs[:, :, -self.time_kernel_size + 1 :].shape:
+            conv_cache.copy_(inputs[:, :, -self.time_kernel_size + 1 :])
         else:
-            conv_cache = inputs[:, :, -self.time_kernel_size + 1:].clone()
+            conv_cache = inputs[:, :, -self.time_kernel_size + 1 :].clone()
     return output, conv_cache
 
-from diffusers.models.autoencoders.autoencoder_kl_cogvideox import CogVideoXCausalConv3d
-setattr(CogVideoXCausalConv3d, 'forward', CogVideoXCausalConv3dforwardGaudi)
-setattr(AutoencoderKLCogVideoX, 'tiled_decode', tiled_decode_gaudi)
+
+setattr(CogVideoXCausalConv3d, "forward", CogVideoXCausalConv3dforwardGaudi)
+setattr(AutoencoderKLCogVideoX, "tiled_decode", tiled_decode_gaudi)
+
 
 @dataclass
 class GaudiTextToVideoSDPipelineOutput(BaseOutput):
@@ -361,6 +382,7 @@ class GaudiTextToVideoSDPipelineOutput(BaseOutput):
 
     frames: torch.Tensor
 
+
 def retrieve_timesteps(
     scheduler,
     num_inference_steps: Optional[int] = None,
@@ -420,7 +442,6 @@ def retrieve_timesteps(
     return timesteps, num_inference_steps
 
 
-
 class GaudiCogVideoXPipeline(GaudiDiffusionPipeline, CogVideoXPipeline):
     r"""
     Adapted from: https://github.com/huggingface/diffusers/blob/v0.26.3/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L84
@@ -459,6 +480,7 @@ def __init__(
             block.attn1.set_processor(CogVideoXAttnProcessorGaudi())
 
         from habana_frameworks.torch.hpu import wrap_in_hpu_graph
+
         self.vae.decoder = wrap_in_hpu_graph(self.vae.decoder)
 
     @property
@@ -507,7 +529,6 @@ def prepare_latents(
         latents = latents * self.scheduler.init_noise_sigma
         return latents
 
-
     @torch.no_grad()
     def __call__(
         self,
@@ -644,7 +665,6 @@ def __call__(
             else:
                 batch_size = prompt_embeds.shape[0]
 
-
             device = self._execution_device
             # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
             # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
@@ -696,6 +716,7 @@ def __call__(
             num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
             outputs = []
             import habana_frameworks.torch.core as htcore
+
             with self.progress_bar(total=num_inference_steps) as progress_bar:
                 # for DPM-solver++
                 old_pred_original_sample = None
@@ -721,16 +742,18 @@ def __call__(
                     # perform guidance
                     if use_dynamic_cfg:
                         self._guidance_scale = 1 + guidance_scale * (
-                            (1 - math.cos(math.pi * ((num_inference_steps - t.item()) / num_inference_steps) ** 5.0)) / 2
+                            (1 - math.cos(math.pi * ((num_inference_steps - t.item()) / num_inference_steps) ** 5.0))
+                            / 2
                         )
                     if do_classifier_free_guidance:
                         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
                         noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
 
-
                     # compute the previous noisy sample x_t -> x_t-1
                     if not isinstance(self.scheduler, CogVideoXDPMScheduler):
-                        latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+                        latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[
+                            0
+                        ]
                     else:
                         latents, old_pred_original_sample = self.scheduler.step(
                             noise_pred,
@@ -757,7 +780,6 @@ def __call__(
                         prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
                         negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
 
-
                     if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
                         progress_bar.update()
                 if not self.use_hpu_graphs:
@@ -804,11 +826,11 @@ def capture_replay(self, latent_model_input, prompt_embeds, timestep, image_rota
                 graph.capture_begin()
                 outputs = self.transformer(
                     self.transformer,
-                    hidden_states = inputs[0],
-                    encoder_hidden_states = inputs[1],
+                    hidden_states=inputs[0],
+                    encoder_hidden_states=inputs[1],
                     timestep=inputs[2],
                     image_rotary_emb=inputs[3],
-                    return_dict=inputs[4]
+                    return_dict=inputs[4],
                 )[0]
                 graph.capture_end()
                 graph_inputs = inputs
diff --git a/tests/test_diffusers.py b/tests/test_diffusers.py
index a184e1512b..267c8e12ec 100755
--- a/tests/test_diffusers.py
+++ b/tests/test_diffusers.py
@@ -40,9 +40,11 @@
 import torch
 from diffusers import (
     AutoencoderKL,
+    AutoencoderKLCogVideoX,
     AutoencoderKLTemporalDecoder,
     AutoencoderTiny,
-    AutoencoderKLCogVideoX,
+    CogVideoXDDIMScheduler,
+    CogVideoXTransformer3DModel,
     ControlNetModel,
     DiffusionPipeline,
     DPMSolverMultistepScheduler,
@@ -60,8 +62,6 @@
     UNet3DConditionModel,
     UNetSpatioTemporalConditionModel,
     UniPCMultistepScheduler,
-    CogVideoXTransformer3DModel,
-    CogVideoXDDIMScheduler,
 )
 from diffusers.image_processor import VaeImageProcessor
 from diffusers.pipelines.controlnet.pipeline_controlnet import MultiControlNetModel
@@ -91,14 +91,15 @@
     DPTConfig,
     DPTFeatureExtractor,
     DPTForDepthEstimation,
+    T5Config,
     T5EncoderModel,
     T5Tokenizer,
-    T5Config,
 )
 from transformers.testing_utils import parse_flag_from_env, slow
 
 from optimum.habana import GaudiConfig
 from optimum.habana.diffusers import (
+    GaudiCogVideoXPipeline,
     GaudiDDIMScheduler,
     GaudiDDPMPipeline,
     GaudiDiffusionPipeline,
@@ -122,7 +123,6 @@
     GaudiStableVideoDiffusionControlNetPipeline,
     GaudiStableVideoDiffusionPipeline,
     GaudiTextToVideoSDPipeline,
-    GaudiCogVideoXPipeline,
 )
 from optimum.habana.diffusers.models import (
     ControlNetSDVModel,
@@ -3778,6 +3778,7 @@ def test_deterministic_image_generation_no_throughput_regression_bf16(self):
 
         self.assertGreaterEqual(outputs.throughput, 0.95 * DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT)
 
+
 class GaudiCogVideoXPipelineTester(TestCase):
     """
     Tests the TextToVideoSDPipeline for Gaudi.
@@ -3787,88 +3788,89 @@ class GaudiCogVideoXPipelineTester(TestCase):
     def get_dummy_components(self):
         tokenizer = T5Tokenizer.from_pretrained("hf-internal-testing/tiny-random-t5")
         set_seed(0)
-        text_encoder_cfg = T5Config(vocab_size = 32128,
-                                   d_kv = 64,
-                                   d_ff = 10240,
-                                   num_layers = 8,
-                                   num_decoder_layers=8,
-                                   relative_attention_num_buckets=32,
-                                   relative_attention_max_distance=128,
-                                   initializer_factor=1.0,
-                                   feed_forward_proj='gated-gelu',
-                                   is_encoder_decoder=True,
-                                   pad_token_id=0,
-                                   eos_token_id=1,
-                                   torch_dtype = torch.bfloat16,
-                                   d_model = 4096)
+        text_encoder_cfg = T5Config(
+            vocab_size=32128,
+            d_kv=64,
+            d_ff=10240,
+            num_layers=8,
+            num_decoder_layers=8,
+            relative_attention_num_buckets=32,
+            relative_attention_max_distance=128,
+            initializer_factor=1.0,
+            feed_forward_proj="gated-gelu",
+            is_encoder_decoder=True,
+            pad_token_id=0,
+            eos_token_id=1,
+            torch_dtype=torch.bfloat16,
+            d_model=4096,
+        )
         text_encoder = T5EncoderModel(text_encoder_cfg).bfloat16()
 
         set_seed(0)
         transformer = CogVideoXTransformer3DModel(
-                          num_attention_heads=30,
-                          attention_head_dim=64,
-                          in_channels=16,
-                          out_channels=16,
-                          flip_sin_to_cos=True,
-                          freq_shift=0,
-                          time_embed_dim=512,
-                          text_embed_dim=4096,
-                          num_layers=8,
-                          dropout=0.0,
-                          attention_bias=True,
-                          sample_width=90,
-                          sample_height=60,
-                          sample_frames=49,
-                          patch_size=2,
-                          temporal_compression_ratio=4,
-                          max_text_seq_length=226,
-                          activation_fn="gelu-approximate",
-                          timestep_activation_fn="silu",
-                          norm_elementwise_affine=True,
-                          norm_eps=1e-5,
-                          spatial_interpolation_scale=1.875,
-                          temporal_interpolation_scale=1.0,
-                      ).bfloat16()
+            num_attention_heads=30,
+            attention_head_dim=64,
+            in_channels=16,
+            out_channels=16,
+            flip_sin_to_cos=True,
+            freq_shift=0,
+            time_embed_dim=512,
+            text_embed_dim=4096,
+            num_layers=8,
+            dropout=0.0,
+            attention_bias=True,
+            sample_width=90,
+            sample_height=60,
+            sample_frames=49,
+            patch_size=2,
+            temporal_compression_ratio=4,
+            max_text_seq_length=226,
+            activation_fn="gelu-approximate",
+            timestep_activation_fn="silu",
+            norm_elementwise_affine=True,
+            norm_eps=1e-5,
+            spatial_interpolation_scale=1.875,
+            temporal_interpolation_scale=1.0,
+        ).bfloat16()
 
         scheduler = CogVideoXDDIMScheduler(
-                        num_train_timesteps=1000,
-                        beta_start = 0.00085,
-                        beta_end = 0.0120,
-                        beta_schedule = "scaled_linear",
-                        clip_sample=False,
-                        set_alpha_to_one = True,
-                        steps_offset=0,
-                        prediction_type = "v_prediction",
-                        clip_sample_range = 1.0,
-                        sample_max_value = 1.0,
-                        timestep_spacing = "trailing",
-                        rescale_betas_zero_snr = True,
-                        snr_shift_scale=1.0,
-                    )
-
+            num_train_timesteps=1000,
+            beta_start=0.00085,
+            beta_end=0.0120,
+            beta_schedule="scaled_linear",
+            clip_sample=False,
+            set_alpha_to_one=True,
+            steps_offset=0,
+            prediction_type="v_prediction",
+            clip_sample_range=1.0,
+            sample_max_value=1.0,
+            timestep_spacing="trailing",
+            rescale_betas_zero_snr=True,
+            snr_shift_scale=1.0,
+        )
 
         set_seed(0)
-        vae = AutoencoderKLCogVideoX(in_channels=3, 
-                                     out_channels = 3,
-                                     down_block_types = [
-                                         "CogVideoXDownBlock3D",
-                                         "CogVideoXDownBlock3D",
-                                         "CogVideoXDownBlock3D",
-                                         "CogVideoXDownBlock3D"
-                                         ],
-                                     block_out_channels = [128,256,256,512],
-                                     latent_channels=16,
-                                     layers_per_block=1,
-                                     act_fn="silu",
-                                     norm_eps=1e-6,
-                                     norm_num_groups=32,
-                                     temporal_compression_ratio=4,
-                                     sample_height=480,
-                                     sample_width=720,
-                                     scaling_factor=1.15258426,
-                                     ).bfloat16()
-        
-        
+        vae = AutoencoderKLCogVideoX(
+            in_channels=3,
+            out_channels=3,
+            down_block_types=[
+                "CogVideoXDownBlock3D",
+                "CogVideoXDownBlock3D",
+                "CogVideoXDownBlock3D",
+                "CogVideoXDownBlock3D",
+            ],
+            block_out_channels=[128, 256, 256, 512],
+            latent_channels=16,
+            layers_per_block=1,
+            act_fn="silu",
+            norm_eps=1e-6,
+            norm_num_groups=32,
+            temporal_compression_ratio=4,
+            sample_height=480,
+            sample_width=720,
+            scaling_factor=1.15258426,
+        ).bfloat16()
+
         vae.enable_slicing()
         vae.enable_tiling()
 
@@ -3882,7 +3884,7 @@ def get_dummy_components(self):
 
         return components
 
-    def get_dummy_inputs(self, device, seed=0):
+    def get_dummy_inputs(self):
         prompts = "A panda, dressed in a small, red jacket and a tiny hat, sits on a wooden stool in a serene bamboo forest. The panda's fluffy paws strum a miniature acoustic guitar, producing soft, melodic tunes. Nearby, a few other pandas gather, watching curiously and some clapping in rhythm. Sunlight filters through the tall bamboo, casting a gentle glow on the scene. The panda's face is expressive, showing concentration and joy as it plays. The background includes a small, flowing stream and vibrant green foliage, enhancing the peaceful and magical atmosphere of this unique musical performance."
         return prompts
 
@@ -3896,8 +3898,9 @@ def test_cogvideoX_default_case(self):
         components["use_hpu_graphs"] = True
         components["gaudi_config"] = gaudi_config
 
+        prompts = self.get_dummy_inputs()
         cogVideoX_pipe = GaudiCogVideoXPipeline(**components)
-        video = pipe(
+        video = cogVideoX_pipe(
             prompt=prompts,
             num_videos_per_prompt=1,
             num_inference_steps=5,
@@ -3909,6 +3912,7 @@ def test_cogvideoX_default_case(self):
         self.assertIsNotNone(video)
         self.assertEqual(49 == len(video))
 
+
 class GaudiTextToVideoSDPipelineTester(TestCase):
     """
     Tests the TextToVideoSDPipeline for Gaudi.

From 46cf46a9e3554e852ab1ecb5d8227ff372ece3bb Mon Sep 17 00:00:00 2001
From: regisss <15324346+regisss@users.noreply.github.com>
Date: Fri, 24 Jan 2025 11:27:07 +0100
Subject: [PATCH 26/32] Fix PR doc upload workflow

---
 .github/workflows/upload_pr_documentation.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/upload_pr_documentation.yml b/.github/workflows/upload_pr_documentation.yml
index 3c27ba66ea..764e03bfe8 100644
--- a/.github/workflows/upload_pr_documentation.yml
+++ b/.github/workflows/upload_pr_documentation.yml
@@ -8,7 +8,7 @@ on:
 
 jobs:
   build:
-    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
+    uses: regisss/doc-builder/.github/workflows/upload_pr_documentation.yml@fix_doc_download
     with:
       package_name: optimum-habana
     secrets:

From c8acce7027ee4664759ffdb9e67420c105b370c5 Mon Sep 17 00:00:00 2001
From: Spurthi Lokeshappa <spurthi.lokeshappa@intel.com>
Date: Fri, 24 Jan 2025 04:13:13 -0800
Subject: [PATCH 27/32] Added Unit Test for Gemma-2-27b model (#1616)

Co-authored-by: regisss <15324346+regisss@users.noreply.github.com>
---
 tests/test_text_generation_example.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py
index 060e952e29..f782bd84d1 100644
--- a/tests/test_text_generation_example.py
+++ b/tests/test_text_generation_example.py
@@ -43,6 +43,7 @@
             # ("Qwen/Qwen1.5-7B", 4, False, 490.8621617893209, False),
             ("google/gemma-7b", 1, False, 109.70751574382221, True),
             ("google/gemma-2-9b", 1, False, 92.302359446567, True),
+            ("google/gemma-2-27b", 1, False, 36.578709544111, True),
             ("state-spaces/mamba-130m-hf", 1536, False, 5385.511100161605, False),
             # ("Deci/DeciLM-7B", 1, False, 115, False),
             ("Qwen/Qwen2-7B", 256, False, 8870.945160540245, True),
@@ -87,6 +88,7 @@
             ("meta-llama/Meta-Llama-3-70B-Instruct", 8, 1, 64),
             ("facebook/opt-66b", 2, 1, 28.48069266504111),
             ("google/gemma-2-9b", 8, 1, 110.12610917383735),
+            ("google/gemma-2-27b", 8, 1, 87.578709544111),
         ],
         "torch_compile": [
             ("meta-llama/Llama-2-7b-hf", 102.27823420713148),
@@ -109,6 +111,7 @@
         "bigcode/starcoder2-3b": 'def print_hello_world():\n    print("Hello World")\n\ndef print_hello_world_with_name(name):\n    print("Hello World, " + name)\n\ndef print_hello_world_with_name_and_age(name, age):\n    print("Hello World, " + name + ", " + str(age))\n\ndef print_hello_world_with_name_and_age_and_gender(name, age, gender):\n    print("Hello',
         "google/gemma-7b": "DeepSpeed is a machine learning framework that enables training of large-scale models on commodity hardware. It is designed to be a drop-in replacement for PyTorch, and it is compatible with the existing PyTorch ecosystem. DeepSpeed is designed to be easy to use, and it provides a number of features that make it easy to train large-scale models. DeepSpeed is designed to be scalable, and it can be used to train models on a single machine or on a cluster of machines. DeepSpeed is designed to be efficient,",
         "google/gemma-2-9b": "DeepSpeed is a machine learning framework that enables training of large-scale deep learning models on a single GPU or across multiple GPUs. It is designed to be easy to use and highly scalable, making it a powerful tool for researchers and practitioners working with large-scale deep learning models.\n\nDeepSpeed is built on top of PyTorch, a popular deep learning framework, and provides a set of tools and libraries that make it easy to train large-scale models. It includes features such as zero-shot inference, which allows models to be",
+        "google/gemma-2-27b": "DeepSpeed is a machine learning framework that enables you to train models with trillions of parameters and beyond, using model parallelism to partition large models over multiple GPUs.\n\nThe following is a brief introduction to the DeepSpeed model parallel training.\n\n<h2>1. Introduction</h2>\n\nThe DeepSpeed model parallel training is a simple and effective way to train large models. It is a framework that enables you to train models with trillions of parameters and beyond.\n\nDeepSpeed is a distributed deep learning optimization toolkit that makes it easy and efficient",
         "meta-llama/Llama-2-7b-hf": "DeepSpeed is a machine learning framework for deep learning. It is designed to be fast and efficient, while also being easy to use. DeepSpeed is based on the TensorFlow framework, and it uses the TensorFlow library to perform computations.\nDeepSpeed is a deep learning framework that is designed to be fast and efficient. It is based on the TensorFlow library and uses the TensorFlow library to perform computations. DeepSpeed is designed to be easy to use and to provide a high level of flex",
         "mistralai/Mistral-7B-v0.1": "DeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system.\n\nDeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system",
         "mistralai/Mixtral-8x7B-v0.1": "DeepSpeed is a machine learning framework that enables training of large models on a single machine with a single GPU. It is designed to be easy to use and efficient, and it can be used to train models on a variety of tasks.\n\n## Introduction\n\nDeepSpeed is a machine learning framework that enables training of large models on a single machine with a single GPU. It is designed to be easy to use and efficient, and it can be used to train models on a variety of tasks.\n\n## What is DeepSpeed",

From 73b3bcf6f2a8d2c2540e62ce7d2d665c93558a2d Mon Sep 17 00:00:00 2001
From: regisss <15324346+regisss@users.noreply.github.com>
Date: Fri, 24 Jan 2025 15:09:19 +0100
Subject: [PATCH 28/32] Remove debug for PR doc upload workflow

---
 .github/workflows/upload_pr_documentation.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/upload_pr_documentation.yml b/.github/workflows/upload_pr_documentation.yml
index 764e03bfe8..3c27ba66ea 100644
--- a/.github/workflows/upload_pr_documentation.yml
+++ b/.github/workflows/upload_pr_documentation.yml
@@ -8,7 +8,7 @@ on:
 
 jobs:
   build:
-    uses: regisss/doc-builder/.github/workflows/upload_pr_documentation.yml@fix_doc_download
+    uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
     with:
       package_name: optimum-habana
     secrets:

From 94503b19d695a8a4dfb420cb3ac7ff70ab5569e1 Mon Sep 17 00:00:00 2001
From: "Seunghyuk Park (shepark)" <separk@habana.ai>
Date: Fri, 24 Jan 2025 07:59:51 -0800
Subject: [PATCH 29/32] Update TRL README.md to clean up models (#1706)

---
 examples/trl/README.md | 97 +-----------------------------------------
 1 file changed, 2 insertions(+), 95 deletions(-)

diff --git a/examples/trl/README.md b/examples/trl/README.md
index a40a8254e5..5e488e7072 100644
--- a/examples/trl/README.md
+++ b/examples/trl/README.md
@@ -79,103 +79,10 @@ $ pip install -U -r requirements.txt
 
 ### Training
 
-#### For meta-llama/Llama-2-7b-hf
-
-The following example is for the creation of StackLlaMa 2: a Stack exchange llama-v2-7b model.
-There are two main steps to the DPO training process:
-1. Supervised fine-tuning of the base llama-v2-7b model to create llama-v2-7b-se:
-
-    ```
-    python ../gaudi_spawn.py --world_size 8 --use_mpi sft.py \
-        --model_name_or_path meta-llama/Llama-2-7b-hf \
-        --dataset_name "lvwerra/stack-exchange-paired" \
-        --output_dir="./sft" \
-        --max_steps=500 \
-        --logging_steps=10 \
-        --save_steps=100 \
-        --do_train \
-        --per_device_train_batch_size=4 \
-        --per_device_eval_batch_size=1 \
-        --gradient_accumulation_steps=2 \
-        --learning_rate=1e-4 \
-        --lr_scheduler_type="cosine" \
-        --warmup_steps=100 \
-        --weight_decay=0.05 \
-        --optim="paged_adamw_32bit" \
-        --lora_target_modules "q_proj" "v_proj" \
-        --bf16 \
-        --remove_unused_columns=False \
-        --run_name="sft_llama2" \
-        --report_to=none \
-        --use_habana \
-        --use_lazy_mode
-    ```
-    To merge the adaptors to get the final sft merged checkpoint, we can use the `merge_peft_adapter.py` helper script that comes with TRL:
-    ```
-    python merge_peft_adapter.py --base_model_name="meta-llama/Llama-2-7b-hf" --adapter_model_name="sft" --output_name="sft/final_merged_checkpoint"
-    ```
-
-2. Run the DPO trainer using the model saved by the previous step:
-    ```
-    python ../gaudi_spawn.py --world_size 8 --use_mpi dpo.py \
-        --model_name_or_path="sft/final_merged_checkpoint" \
-        --tokenizer_name_or_path=meta-llama/Llama-2-7b-hf \
-        --lora_target_modules "q_proj" "v_proj" "k_proj" "out_proj" "fc_in" "fc_out" "wte" \
-        --output_dir="dpo" \
-        --report_to=none
-    ```
-
-#### mistralai/Mistral-7B-v0.1
-
-1. Supervised fine-tuning of the base Mistral-7B-v0.1 model:
-
-    ```
-    DEEPSPEED_HPU_ZERO3_SYNC_MARK_STEP_REQUIRED=1 python ../gaudi_spawn.py --world_size 8 --use_deepspeed sft.py \
-        --model_name_or_path mistralai/Mistral-7B-v0.1 \
-        --dataset_name "lvwerra/stack-exchange-paired" \
-        --deepspeed ../language-modeling/llama2_ds_zero3_config.json \
-        --output_dir="./sft" \
-        --do_train \
-        --max_steps=500 \
-        --logging_steps=10 \
-        --save_steps=100 \
-        --per_device_train_batch_size=1 \
-        --per_device_eval_batch_size=1 \
-        --gradient_accumulation_steps=2 \
-        --learning_rate=1e-4 \
-        --lr_scheduler_type="cosine" \
-        --warmup_steps=100 \
-        --weight_decay=0.05 \
-        --optim="paged_adamw_32bit" \
-        --lora_target_modules "q_proj" "v_proj" \
-        --bf16 \
-        --remove_unused_columns=False \
-        --run_name="sft_mistral" \
-        --report_to=none \
-        --use_habana \
-        --use_lazy_mode
-    ```
-    To merge the adaptors to get the final sft merged checkpoint, we can use the `merge_peft_adapter.py` helper script that comes with TRL:
-
-    ```
-    python merge_peft_adapter.py --base_model_name="mistralai/Mistral-7B-v0.1" --adapter_model_name="sft" --output_name="sft/final_merged_checkpoint"
-    ```
-
-2. Run the DPO trainer using the model saved by the previous step:
-    ```
-    DEEPSPEED_HPU_ZERO3_SYNC_MARK_STEP_REQUIRED=1 python ../gaudi_spawn.py --world_size 8 --use_deepspeed dpo.py \
-        --model_name_or_path="sft/final_merged_checkpoint" \
-        --tokenizer_name_or_path=mistralai/Mistral-7B-v0.1 \
-        --deepspeed ../language-modeling/llama2_ds_zero3_config.json \
-        --lora_target_modules "q_proj" "v_proj" "k_proj" "out_proj" "fc_in" "fc_out" "wte" \
-        --output_dir="dpo" \
-        --max_prompt_length=256 \
-        --max_length=512 \
-        --report_to=none
-    ```
-
 #### For meta-llama/Llama-2-70b-hf
 
+The following example is for the creation of StackLlaMa 2: a Stack exchange llama-v2-70b model. There are two main steps to the DPO training process.
+
 For large model like Llama2-70B, we could use DeepSpeed Zero-3 to enable DPO training in multi-card.
 steps like:
 1. Supervised fine-tuning of the base llama-v2-70b model to create llama-v2-70b-se:

From ae003465000e2383bad673bdfa0962dce48fbe68 Mon Sep 17 00:00:00 2001
From: root <root@G6.sh.intel.com>
Date: Mon, 24 Feb 2025 14:33:31 +0800
Subject: [PATCH 30/32] add resolved cogvideox feature.

---
 examples/audio-classification/README.md       |     6 +-
 .../audio-classification/requirements.txt     |     1 +
 .../run_audio_classification.py               |     4 +-
 examples/contrastive-image-text/README.md     |    58 +-
 examples/image-to-text/README.md              |   322 +-
 .../run_image2text_lora_finetune.py           |   174 +-
 examples/image-to-text/run_pipeline.py        |    23 +-
 examples/kubernetes/Chart.yaml                |     2 +-
 examples/language-modeling/README.md          |   366 +-
 examples/language-modeling/run_clm.py         |    34 +
 examples/multi-node-training/README.md        |     4 +
 examples/question-answering/README.md         |   233 +-
 .../nli/README.md                             |     7 +
 .../paraphrases/README.md                     |     6 +
 .../sts/README.md                             |     7 +
 examples/speech-recognition/README.md         |    10 +-
 examples/speech-recognition/requirements.txt  |     1 +
 .../run_speech_recognition_ctc.py             |    28 +
 examples/stable-diffusion/README.md           |   510 +-
 .../image_to_image_generation.py              |    18 +-
 .../image_to_video_generation.py              |    79 +-
 .../quantization/flux/measure_config.json     |     2 +-
 .../quantization/flux/quantize_config.json    |     2 +-
 .../measure/fp8_hooks_maxabs.json             | 18871 ----------------
 .../quantization/measure/fp8_hooks_maxabs.npz |   Bin 263025 -> 0 bytes
 .../quantization/measure_config.json          |     6 -
 .../quantization/quant_config.json            |     7 -
 .../text_to_image_generation.py               |    21 +-
 examples/stable-diffusion/training/README.md  |   772 +-
 .../training/requirements.txt                 |     2 +
 .../training/textual_inversion.py             |     4 +
 .../training/textual_inversion_sdxl.py        |     4 +
 examples/summarization/README.md              |    81 +-
 examples/text-feature-extraction/README.md    |     7 -
 examples/text-generation/README.md            |    98 +-
 .../maxabs_quant_mixtral.json                 |     7 +-
 .../quantization_config/unit_scale_quant.json |     7 +-
 examples/text-generation/run_generation.py    |    35 +-
 examples/text-generation/run_lm_eval.py       |     4 +-
 examples/text-generation/utils.py             |    81 +-
 examples/text-to-speech/requirements.txt      |     1 +
 examples/text-to-video/README.md              |    51 -
 examples/text-to-video/requirements.txt       |     5 -
 .../text-to-video/text_to_video_generation.py |   242 -
 optimum/habana/accelerate/accelerator.py      |    24 +-
 optimum/habana/accelerate/state.py            |     1 +
 .../accelerate/utils/transformer_engine.py    |    48 +-
 optimum/habana/diffusers/__init__.py          |     1 +
 .../controlnet/pipeline_controlnet.py         |     4 +-
 ...eline_stable_video_diffusion_controlnet.py |    14 +
 .../diffusers/pipelines/ddpm/pipeline_ddpm.py |    26 +-
 .../diffusers/pipelines/flux/pipeline_flux.py |    14 +
 .../pipelines/flux/pipeline_flux_img2img.py   |     4 +
 .../diffusers/pipelines/pipeline_utils.py     |     3 +-
 .../pipeline_stable_diffusion.py              |    13 +-
 .../pipeline_stable_diffusion_depth2img.py    |    30 +
 ...peline_stable_diffusion_image_variation.py |     4 +
 .../pipeline_stable_diffusion_img2img.py      |    37 +-
 .../pipeline_stable_diffusion_inpaint.py      |     4 +
 ...eline_stable_diffusion_instruct_pix2pix.py |     4 +
 .../pipeline_stable_diffusion_ldm3d.py        |     4 +
 .../pipeline_stable_diffusion_upscale.py      |    12 +-
 .../pipeline_stable_diffusion_3.py            |   517 +-
 .../pipeline_stable_diffusion_xl.py           |     8 +-
 .../pipeline_stable_diffusion_xl_img2img.py   |     6 +-
 .../pipeline_stable_diffusion_xl_inpaint.py   |     6 +-
 .../pipeline_stable_diffusion_xl_mlperf.py    |    28 +-
 .../pipeline_stable_video_diffusion.py        |    41 +-
 .../pipeline_text_to_video_synth.py           |    27 +
 optimum/habana/distributed/strategy.py        |     4 +-
 .../sentence_transformers/modeling_utils.py   |    11 +-
 .../st_gaudi_data_collator.py                 |    42 +-
 .../sentence_transformers/st_gaudi_encoder.py |    23 +-
 .../sentence_transformers/st_gaudi_trainer.py |   742 +-
 .../st_gaudi_training_args.py                 |    33 +-
 .../st_gaudi_transformer.py                   |     2 +-
 .../generation/configuration_utils.py         |     6 +
 .../habana/transformers/generation/utils.py   |    30 +-
 optimum/habana/transformers/modeling_utils.py |    87 +-
 .../habana/transformers/models/__init__.py    |    19 +-
 .../models/baichuan/modeling_baichuan.py      |     3 +-
 .../deepseek_v2/modeling_deepseek_v2.py       |  1321 +-
 .../models/gemma2/modeling_gemma2.py          |     5 -
 .../models/llama/modeling_llama.py            |   204 +-
 .../models/llava/modeling_llava.py            |   135 +-
 .../models/llava_next/modeling_llava_next.py  |    81 +-
 .../transformers/models/mixtral/__init__.py   |     4 +-
 .../models/mixtral/modeling_mixtral.py        |   164 +-
 .../models/mllama/modeling_mllama.py          |     4 +
 .../transformers/models/t5/modeling_t5.py     |     2 +-
 .../models/wav2vec2/modeling_wav2vec2.py      |     3 +-
 optimum/habana/transformers/trainer.py        |     3 +-
 optimum/habana/transformers/training_args.py  |    42 +
 optimum/habana/trl/models/modeling_base.py    |     2 +-
 optimum/habana/trl/models/modeling_sd_base.py |     4 +-
 optimum/habana/trl/trainer/ddpo_trainer.py    |     6 +-
 optimum/habana/trl/trainer/ppo_config.py      |     2 +-
 optimum/habana/trl/trainer/ppo_trainer.py     |     5 +-
 optimum/habana/trl/trainer/reward_trainer.py  |     2 +-
 optimum/habana/utils.py                       |    12 +
 .../bridgetower_large_itm_mlm_itc.json        |     5 +-
 tests/baselines/clip_roberta.json             |    20 +-
 tests/baselines/falcon_40b.json               |    12 +-
 tests/baselines/llama_7b.json                 |     3 -
 tests/ci/slow_tests_diffusers.sh              |     1 +
 tests/test_diffusers.py                       |  1710 +-
 tests/test_encoder_decoder.py                 |   100 +-
 tests/test_examples.py                        |    17 +-
 tests/test_fp8_examples.py                    |    30 +-
 tests/test_fsdp_examples.py                   |    38 +-
 tests/test_image_to_text_example.py           |    57 +-
 tests/test_openclip_vqa.py                    |    36 +-
 tests/test_pipeline.py                        |    26 +-
 tests/test_sentence_transformers.py           |    64 +-
 tests/test_text_generation_example.py         |   286 +-
 tests/utils.py                                |     3 +-
 116 files changed, 5465 insertions(+), 23029 deletions(-)
 delete mode 100644 examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.json
 delete mode 100644 examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.npz
 delete mode 100755 examples/stable-diffusion/quantization/measure_config.json
 delete mode 100755 examples/stable-diffusion/quantization/quant_config.json
 mode change 100644 => 100755 examples/stable-diffusion/training/textual_inversion_sdxl.py
 delete mode 100644 examples/text-to-video/README.md
 delete mode 100644 examples/text-to-video/requirements.txt
 delete mode 100755 examples/text-to-video/text_to_video_generation.py
 mode change 100644 => 100755 optimum/habana/transformers/generation/utils.py
 mode change 100755 => 100644 tests/test_diffusers.py

diff --git a/examples/audio-classification/README.md b/examples/audio-classification/README.md
index 64f5e0daba..e9f94b3526 100644
--- a/examples/audio-classification/README.md
+++ b/examples/audio-classification/README.md
@@ -58,7 +58,8 @@ python run_audio_classification.py \
     --throughput_warmup_steps 3 \
     --sdp_on_bf16 \
     --bf16 \
-    --trust_remote_code True
+    --trust_remote_code True \
+    --attn_implementation sdpa
 ```
 
 On a single HPU, this script should run in ~13 minutes and yield an accuracy of **97.96%**.
@@ -98,7 +99,8 @@ PT_HPU_LAZY_MODE=0 python ../gaudi_spawn.py \
     --bf16 \
     --trust_remote_code True \
     --torch_compile \
-    --torch_compile_backend hpu_backend
+    --torch_compile_backend hpu_backend \
+    --attn_implementation sdpa
 ```
 
 On 8 HPUs, this script should run in ~12 minutes and yield an accuracy of **80.49%**.
diff --git a/examples/audio-classification/requirements.txt b/examples/audio-classification/requirements.txt
index 720a5a4abc..bae36f7451 100644
--- a/examples/audio-classification/requirements.txt
+++ b/examples/audio-classification/requirements.txt
@@ -1,3 +1,4 @@
 datasets>=1.14.0
 evaluate
+numba==0.60.0
 librosa
diff --git a/examples/audio-classification/run_audio_classification.py b/examples/audio-classification/run_audio_classification.py
index c93e88def0..b83597f84e 100644
--- a/examples/audio-classification/run_audio_classification.py
+++ b/examples/audio-classification/run_audio_classification.py
@@ -196,6 +196,8 @@ class ModelArguments:
     )
 
     def __post_init__(self):
+        if self.use_flash_attention:
+            os.environ["USE_FLASH_ATTENTION"] = "1"
         if self.flash_attention_recompute:
             assert self.use_flash_attention, "flash_attention_recompute is set, but use_flash_attention is not"
             os.environ["FLASH_ATTENTION_RECOMPUTE"] = "1"
@@ -389,7 +391,7 @@ def compute_metrics(eval_pred):
         revision=model_args.model_revision,
         token=model_args.token,
         trust_remote_code=model_args.trust_remote_code,
-        attn_implementation="sdpa" if model_args.use_flash_attention else "eager",
+        attn_implementation=training_args.attn_implementation,
     )
     model = AutoModelForAudioClassification.from_pretrained(
         model_args.model_name_or_path,
diff --git a/examples/contrastive-image-text/README.md b/examples/contrastive-image-text/README.md
index c0aa57ac41..def6d74ec0 100644
--- a/examples/contrastive-image-text/README.md
+++ b/examples/contrastive-image-text/README.md
@@ -163,61 +163,8 @@ python3 ../gaudi_spawn.py --world_size 8 --use_mpi run_clip.py \
 
 ### DeepSpeed
 
-Run the following command for training with DeepSpeed:
-
-```bash
-PT_HPU_LAZY_MODE=0 PT_ENABLE_INT64_SUPPORT=1 \
-python3 ../gaudi_spawn.py --world_size 8 --use_deepspeed run_clip.py \
-    --output_dir=/tmp/clip_roberta \
-    --model_name_or_path=./clip-roberta \
-    --data_dir $PWD/data \
-    --dataset_name ydshieh/coco_dataset_script \
-    --dataset_config_name 2017 \
-    --image_column image_path \
-    --caption_column caption \
-    --remove_unused_columns=False \
-    --do_train --do_eval \
-    --mediapipe_dataloader \
-    --per_device_train_batch_size="64" \
-    --per_device_eval_batch_size="64" \
-    --learning_rate="5e-5" --warmup_steps="0" --weight_decay 0.1 \
-    --overwrite_output_dir \
-    --use_habana \
-    --use_lazy_mode=False \
-    --gaudi_config_name="Habana/clip" \
-    --throughput_warmup_steps=30 \
-    --save_strategy="no" \
-    --dataloader_num_workers=2 \
-    --use_hpu_graphs \
-    --max_steps=100 \
-    --torch_compile_backend=hpu_backend \
-    --torch_compile \
-    --logging_nan_inf_filter \
-    --trust_remote_code \
-    --deepspeed <path_to_my_deepspeed_config>
-
-```
-
-You can look at the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/deepspeed) for more information about how to use DeepSpeed in Optimum Habana.
-Here is a DeepSpeed configuration you can use to train your models on Gaudi:
-```json
-{
-    "steps_per_print": 64,
-    "train_batch_size": "auto",
-    "train_micro_batch_size_per_gpu": "auto",
-    "gradient_accumulation_steps": "auto",
-    "bf16": {
-        "enabled": true
-    },
-    "gradient_clipping": 1.0,
-    "zero_optimization": {
-        "stage": 2,
-        "overlap_comm": false,
-        "reduce_scatter": false,
-        "contiguous_gradients": false
-    }
-}
-```
+You can check the [DeepSpeed](https://github.com/huggingface/optimum-habana/tree/main/examples#deepspeed) section in Optimum Habana examples for how to run DeepSpeed.
+You can also look at the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/deepspeed) for more information about how to use DeepSpeed in Optimum Habana.
 
 
 ## BridgeTower
@@ -244,7 +191,6 @@ python ../gaudi_spawn.py --use_mpi --world_size 8 run_bridgetower.py \
   --logging_steps 10 \
   --dataloader_num_workers 1 \
   --mediapipe_dataloader \
-  --distribution_strategy fast_ddp \
   --trust_remote_code \
   --sdp_on_bf16
 ```
diff --git a/examples/image-to-text/README.md b/examples/image-to-text/README.md
index e4dbb05472..e41f1a6617 100644
--- a/examples/image-to-text/README.md
+++ b/examples/image-to-text/README.md
@@ -17,111 +17,12 @@ limitations under the License.
 # Image to Text Examples
 This directory contains a script that showcases how to perform image to text generation on Intel® Gaudi® AI Accelerators.
 
-## Single-HPU inference
+Habana FusedSDPA is a fused and optimized implementation of torch.nn.functional.scaled_dot_product_attention() for Gaudi. For more details, refer to [Gaudi online documentation](https://docs.habana.ai/en/latest/PyTorch/Model_Optimization_PyTorch/Optimization_in_PyTorch_Models.html?highlight=fusedsdpa#using-fused-scaled-dot-product-attention-fusedsdpa). We optimized many models with FusedSDPA implementation as in [optimum/habana/transformers/models](https://github.com/huggingface/optimum-habana/tree/main/optimum/habana/transformers/models). If a model is not optimized with FusedSDPA, it uses [SDPA implementation](https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html).
 
-Models that have been validated:
-  - [nlpconnect/vit-gpt2-image-captioning](https://huggingface.co/nlpconnect/vit-gpt2-image-captioning)
-  - [Salesforce/blip-image-captioning-large](https://huggingface.co/Salesforce/blip-image-captioning-large)
-  - [Salesforce/blip-image-captioning-base](https://huggingface.co/Salesforce/blip-image-captioning-base)
-  - [llava-hf/llava-1.5-7b-hf](https://huggingface.co/llava-hf/llava-1.5-7b-hf)
-  - [llava-hf/llava-1.5-13b-hf](https://huggingface.co/llava-hf/llava-1.5-13b-hf)
-  - [llava-hf/llava-v1.6-mistral-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf)
-  - [llava-hf/llava-v1.6-vicuna-7b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-7b-hf)
-  - [llava-hf/llava-v1.6-vicuna-13b-hf](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf)
-  - [llava-hf/llava-v1.6-34b-hf](https://huggingface.co/llava-hf/llava-v1.6-34b-hf)
-  - [llava-hf/llama3-llava-next-8b-hf](https://huggingface.co/llava-hf/llama3-llava-next-8b-hf)
-  - [HuggingFaceM4/idefics2-8b](https://huggingface.co/HuggingFaceM4/idefics2-8b)
-  - [meta-llama/Llama-3.2-11B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-11B-Vision-Instruct)
-  - [meta-llama/Llama-3.2-90B-Vision-Instruct](https://huggingface.co/meta-llama/Llama-3.2-90B-Vision-Instruct)
-  - [tiiuae/falcon-11B-vlm](https://huggingface.co/tiiuae/falcon-11B-vlm)
-  - [google/paligemma-3b-mix-224](https://huggingface.co/google/paligemma-3b-mix-224)
+## Inference with mixed-precision (BF16)
 
-### Inference with BF16
-
-To run Salesforce/blip-image-captioning-large inference, use the following command:
-```bash
-python3 run_pipeline.py \
-    --model_name_or_path Salesforce/blip-image-captioning-large \
-    --image_path "https://ankur3107.github.io/assets/images/image-captioning-example.png" \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-To run Llava-1.5-7b inference, use the following command:
-```bash
-python3 run_pipeline.py \
-    --model_name_or_path llava-hf/llava-1.5-7b-hf \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-To run Llava-1.5-13b inference, use the following command:
-```bash
-python3 run_pipeline.py \
-    --model_name_or_path llava-hf/llava-1.5-13b-hf \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-To run Llava-v1.6-mistral-7b inference, use the following command:
-```bash
-python3 run_pipeline.py \
-    --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-To run Llava-v1.6-vicuna-13b inference, use the following command:
-```bash
-python3 run_pipeline.py \
-    --model_name_or_path llava-hf/llava-v1.6-vicuna-13b-hf \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-To run Llava-hf/llava-v1.6-34b-hf inference, use the following command:
-```bash
-python3 run_pipeline.py \
-    --model_name_or_path llava-hf/llava-v1.6-34b-hf \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-To run google/paligemma-3b-mix-224 inference, use the following command:
-```bash
-python3 run_pipeline.py \
-    --model_name_or_path google/paligemma-3b-mix-224 \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-To run Llava-hf/llama3-llava-next-8b-hf inference, use the following command:
-```bash
-python3 run_pipeline.py \
-    --model_name_or_path llava-hf/llama3-llava-next-8b-hf \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-To run idefics2 inference, use the following command:
-
-```bash
-python3 run_pipeline.py \
-    --model_name_or_path HuggingFaceM4/idefics2-8b \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-To run mllama inference using reduced precision in the SDPA, use the following command:
+### Single card inference with BF16
+To run Llama inference with SDPA, use the following command:
 
 ```bash
 python3 run_pipeline.py \
@@ -130,55 +31,30 @@ python3 run_pipeline.py \
     --bf16 \
     --sdp_on_bf16
 ```
+> SDPA may introduce [reduced precison](https://pytorch.org/docs/stable/notes/numerical_accuracy.html#reduced-precision-reduction-for-fp16-and-bf16-in-scaled-dot-product-attention-sdpa)
 
-### Inference with FP8
-Inference for Llava-1.5-7b, Llava-1.5-13b, Llava-v1.6-mistral-7b and Llava-v1.6-vicuna-13b in FP8 precision are enabled using  [Intel Neural Compressor (INC)](https://docs.habana.ai/en/latest/PyTorch/Inference_on_PyTorch/Inference_Using_FP8.html), which provides model measurement and quantization capabilities in PyTorch.
 
-More information on enabling FP8 in SynapseAI is available here:
-https://docs.habana.ai/en/latest/PyTorch/Inference_on_PyTorch/Inference_Using_FP8.html
+### Multi-cards inference with BF16
 
-Here is an example to measure the tensor quantization statistics on Llava-1.5-7b:
-```bash
-QUANT_CONFIG=./quantization_config/maxabs_measure.json python run_pipeline.py \
-    --model_name_or_path llava-hf/llava-1.5-7b-hf \
-    --image_path "https://llava-vl.github.io/static/images/view.jpg" \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-Here is an example to quantize the model based on previous measurements for Llava-1.5-7b:
+Use the following commands to run Llama-3.2-90B-Vision-Instruct BF16 inference with FusedSDPA on 8 HPUs:
 ```bash
-QUANT_CONFIG=./quantization_config/maxabs_quant_scale_format_const.json python run_pipeline.py \
-    --model_name_or_path llava-hf/llava-1.5-7b-hf \
+PT_HPU_ENABLE_LAZY_COLLECTIVES=true python ../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \
+    --model_name_or_path meta-llama/Llama-3.2-90B-Vision-Instruct \
     --image_path "https://llava-vl.github.io/static/images/view.jpg" \
     --use_hpu_graphs \
     --bf16 \
-    --sdp_on_bf16
+    --use_flash_attention \
+    --flash_attention_recompute
 ```
 
+## Inference with FP8
 
-Here is an example to measure the tensor quantization statistics on Llava-v1.6-mistral-7b:
-```bash
-QUANT_CONFIG=./quantization_config/maxabs_measure.json python run_pipeline.py \
-    --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf \
-    --image_path "https://llava-vl.github.io/static/images/view.jpg" \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-Here is an example to quantize the model based on previous measurements for Llava-v1.6-mistral-7b:
-```bash
-QUANT_CONFIG=./quantization_config/maxabs_quant_scale_format_const.json python run_pipeline.py \
-    --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf \
-    --image_path "https://llava-vl.github.io/static/images/view.jpg" \
-    --use_hpu_graphs \
-    --bf16 \
-    --sdp_on_bf16
-```
+Inference with FP8 precision is enabled using [Intel Neural Compressor (INC)](https://docs.habana.ai/en/latest/PyTorch/Inference_on_PyTorch/Quantization/index.html?highlight=inc), which provides model measurement and quantization capabilities in PyTorch.
+More information on enabling FP8 in SynapseAI is available here:
+[Run Inference Using FP8](https://docs.habana.ai/en/latest/PyTorch/Inference_on_PyTorch/Quantization/Inference_Using_FP8.html?highlight=fp8)
 
-Here is an example to measure the tensor quantization statistics on Llava-v1.6-vicuna-13b:
+### Single card inference with FP8
+Here is an example to measure the tensor quantization statistics on Llava-v1.6-vicuna-13b with SDPA:
 ```bash
 QUANT_CONFIG=./quantization_config/maxabs_measure.json python run_pipeline.py \
     --model_name_or_path llava-hf/llava-v1.6-vicuna-13b-hf \
@@ -188,7 +64,7 @@ QUANT_CONFIG=./quantization_config/maxabs_measure.json python run_pipeline.py \
     --sdp_on_bf16
 ```
 
-Here is an example to quantize the model based on previous measurements for Llava-v1.6-vicuna-13b:
+Here is an example to quantize the model based on previous measurements for Llava-v1.6-vicuna-13b with SDPA:
 ```bash
 QUANT_CONFIG=./quantization_config/maxabs_quant_scale_format_const.json python run_pipeline.py \
     --model_name_or_path llava-hf/llava-v1.6-vicuna-13b-hf \
@@ -198,25 +74,10 @@ QUANT_CONFIG=./quantization_config/maxabs_quant_scale_format_const.json python r
     --sdp_on_bf16
 ```
 
-### Inference with FusedSDPA
-
-Habana FusedSDPA is a fused and optimized implementation of torch.nn.functional.scaled_dot_product_attention() for Gaudi. For more details, refer to [Gaudi online documentation](https://docs.habana.ai/en/latest/PyTorch/Model_Optimization_PyTorch/Optimization_in_PyTorch_Models.html?highlight=fusedsdpa#using-fused-scaled-dot-product-attention-fusedsdpa).
-
-Use the following command to run Llava-1.5-7b BF16 inference with FusedSDPA
-```bash
-python3 run_pipeline.py \
-    --model_name_or_path llava-hf/llava-1.5-7b-hf \
-    --image_path "https://llava-vl.github.io/static/images/view.jpg" \
-    --use_hpu_graphs \
-    --bf16 \
-    --use_flash_attention \
-    --flash_attention_recompute
-```
-
-
-Use the following command to run Llava-v1.6-mistral-7b BF16 inference with FusedSDPA
+### Multi-cards inference with FP8
+Here is an example of measuring the tensor quantization statistics on Llava-v1.6-mistral-7b with FusedSDPA on 8 HPUs:
 ```bash
-python3 run_pipeline.py \
+QUANT_CONFIG=./quantization_config/maxabs_measure.json python ../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \
     --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf \
     --image_path "https://llava-vl.github.io/static/images/view.jpg" \
     --use_hpu_graphs \
@@ -225,12 +86,9 @@ python3 run_pipeline.py \
     --flash_attention_recompute
 ```
 
-
-Use the following commands to run Llava-v1.6-mistral-7b FP8 inference with FusedSDPA
-
-Here is an example of measuring the tensor quantization statistics on Llava-v1.6-mistral-7b:
+Here is an example of quantizing the model based on previous measurements for Llava-v1.6-mistral-7b with FusedSDPA on 8 HPUs:
 ```bash
-QUANT_CONFIG=./quantization_config/maxabs_measure.json python run_pipeline.py \
+QUANT_CONFIG=./quantization_config/maxabs_quant_scale_format_const.json python ../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \
     --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf \
     --image_path "https://llava-vl.github.io/static/images/view.jpg" \
     --use_hpu_graphs \
@@ -239,88 +97,8 @@ QUANT_CONFIG=./quantization_config/maxabs_measure.json python run_pipeline.py \
     --flash_attention_recompute
 ```
 
-Here is an example of quantizing the model based on previous measurements for Llava-v1.6-mistral-7b:
-```bash
-QUANT_CONFIG=./quantization_config/maxabs_quant_scale_format_const.json python run_pipeline.py \
-    --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf \
-    --image_path "https://llava-vl.github.io/static/images/view.jpg" \
-    --use_hpu_graphs \
-    --bf16 \
-    --use_flash_attention \
-    --flash_attention_recompute
-```
 ## LORA Finetune
 
-To run LoRA finetuning, you can use `run_image2text_lora_finetune.py`.
-Here are single-/multi-device command examples for HuggingFaceM4/idefics2-8b.
-
-```bash
-python3 run_image2text_lora_finetune.py \
-    --model_name_or_path HuggingFaceM4/idefics2-8b \
-    --dataset_name nielsr/docvqa_1200_examples \
-    --bf16 True \
-    --output_dir ./model_lora_llama \
-    --num_train_epochs 1 \
-    --per_device_train_batch_size 2 \
-    --per_device_eval_batch_size 2 \
-    --gradient_accumulation_steps 8 \
-    --weight_decay 0.01 \
-    --logging_steps 25 \
-    --eval_strategy "no" \
-    --save_strategy "no" \
-    --learning_rate 5e-5 \
-    --warmup_steps  50 \
-    --lr_scheduler_type "constant" \
-    --input_column_names 'image' 'query' \
-    --output_column_names 'answers' \
-    --remove_unused_columns False \
-    --do_train \
-    --do_eval \
-    --use_habana \
-    --use_lazy_mode \
-    --lora_rank=8 \
-    --lora_alpha=8 \
-    --lora_dropout=0.1 \
-    --max_seq_length=512 \
-    --use_hpu_graphs_for_inference \
-    --low_cpu_mem_usage True \
-    --lora_target_modules '.*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$'
-```
-
-```bash
-python3 ../gaudi_spawn.py \
-    --world_size 8 --use_mpi run_image2text_lora_finetune.py \
-    --model_name_or_path HuggingFaceM4/idefics2-8b \
-    --dataset_name nielsr/docvqa_1200_examples \
-    --bf16 True \
-    --output_dir ./model_lora_llama \
-    --num_train_epochs 1 \
-    --per_device_train_batch_size 2 \
-    --per_device_eval_batch_size 2 \
-    --gradient_accumulation_steps 8 \
-    --weight_decay 0.01 \
-    --logging_steps 25 \
-    --eval_strategy "no" \
-    --save_strategy "no" \
-    --learning_rate 5e-5 \
-    --warmup_steps  50 \
-    --lr_scheduler_type "constant" \
-    --input_column_names 'image' 'query' \
-    --output_column_names 'answers' \
-    --remove_unused_columns False \
-    --do_train \
-    --do_eval \
-    --use_habana \
-    --use_lazy_mode \
-    --lora_rank=8 \
-    --lora_alpha=8 \
-    --lora_dropout=0.1 \
-    --max_seq_length=512 \
-    --use_hpu_graphs_for_inference \
-    --low_cpu_mem_usage True \
-    --lora_target_modules '".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$"'
-```
-
 Here are single-/multi-device command examples for meta-llama/Llama-3.2-11B-Vision-Instruct.
 
 ```bash
@@ -390,54 +168,8 @@ python3 ../gaudi_spawn.py \
     --lora_target_modules '".*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$"'
 ```
 
-## Multi-HPU inference
+The single card training command for llava-hf/llava-1.5-7b-hf is similar.
 
-### BF16 Inference with FusedSDPA on 8 HPUs
-
-Use the following commands to run Llava-v1.6-mistral-7b BF16 inference with FusedSDPA on 8 HPUs:
-```bash
-python ../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \
-    --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf \
-    --image_path "https://llava-vl.github.io/static/images/view.jpg" \
-    --use_hpu_graphs \
-    --bf16 \
-    --use_flash_attention \
-    --flash_attention_recompute
-```
-
-Use the following commands to run Llama-3.2-90B-Vision-Instruct BF16 inference with FusedSDPA on 8 HPUs:
-```bash
-PT_HPU_ENABLE_LAZY_COLLECTIVES=true python ../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \
-    --model_name_or_path meta-llama/Llama-3.2-90B-Vision-Instruct \
-    --image_path "https://llava-vl.github.io/static/images/view.jpg" \
-    --use_hpu_graphs \
-    --bf16 \
-    --use_flash_attention \
-    --flash_attention_recompute
-```
-
-
-### FP8 Inference with FusedSDPA on 8 HPUs
-
-Use the following commands to run Llava-v1.6-mistral-7b FP8 inference with FusedSDPA on 8 HPUs.
-Here is an example of measuring the tensor quantization statistics on Llava-v1.6-mistral-7b on 8 HPUs:
-```bash
-QUANT_CONFIG=./quantization_config/maxabs_measure.json python ../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \
-    --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf \
-    --image_path "https://llava-vl.github.io/static/images/view.jpg" \
-    --use_hpu_graphs \
-    --bf16 \
-    --use_flash_attention \
-    --flash_attention_recompute
-```
-
-Here is an example of quantizing the model based on previous measurements for Llava-v1.6-mistral-7b on 8 HPUs:
-```bash
-QUANT_CONFIG=./quantization_config/maxabs_quant_scale_format_const.json python ../gaudi_spawn.py --use_deepspeed --world_size 8 run_pipeline.py \
-    --model_name_or_path llava-hf/llava-v1.6-mistral-7b-hf \
-    --image_path "https://llava-vl.github.io/static/images/view.jpg" \
-    --use_hpu_graphs \
-    --bf16 \
-    --use_flash_attention \
-    --flash_attention_recompute
-```
+>  For different models, please adjust training parameters and `lora_target_modules`. Such as replace `lora_target_modules`
+>  with below for HuggingFaceM4/idefics2-8b.
+>  '".*(text_model|modality_projection|perceiver_resampler).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$"'
diff --git a/examples/image-to-text/run_image2text_lora_finetune.py b/examples/image-to-text/run_image2text_lora_finetune.py
index ded60e6d52..74020cd67d 100644
--- a/examples/image-to-text/run_image2text_lora_finetune.py
+++ b/examples/image-to-text/run_image2text_lora_finetune.py
@@ -298,7 +298,58 @@ def __call__(self, examples):
         return batch
 
 
-def eval(processor, model, dataset, batch_size, use_lazy_mode, use_hpu_graphs, max_seq_length):
+class LLavaDataCollator:
+    def __init__(self, processor, max_seq_length):
+        self.processor = processor
+
+        num_image_tokens = (self.processor.image_processor.crop_size["height"] // self.processor.patch_size) * (
+            self.processor.image_processor.crop_size["width"] // self.processor.patch_size
+        ) + 1
+        if self.processor.vision_feature_select_strategy == "default":
+            num_image_tokens -= 1
+
+        # text length + image length
+        self.max_seq_length = max_seq_length + num_image_tokens
+
+    def __call__(self, examples):
+        texts = []
+        images = []
+
+        keys = list(examples[0].keys())
+        if not all(key in ["image", "query", "answers"] for key in keys):
+            raise ValueError("Unsupported dataset format")
+        for example in examples:
+            image = example["image"]
+            question = example["query"]["en"]
+            answer = random.choice(example["answers"])
+            messages = [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Answer briefly."},
+                        {"type": "image"},
+                        {"type": "text", "text": question},
+                    ],
+                },
+                {"role": "assistant", "content": [{"type": "text", "text": answer}]},
+            ]
+            text = self.processor.apply_chat_template(messages, add_generation_prompt=False)
+            texts.append(text.strip())
+            images.append(image)
+
+        batch = self.processor(
+            images, texts, return_tensors="pt", padding="max_length", truncation=True, max_length=self.max_seq_length
+        )
+
+        labels = batch["input_ids"].clone()
+        if self.processor.tokenizer.pad_token_id is not None:
+            labels[labels == self.processor.tokenizer.pad_token_id] = -100
+        batch["labels"] = labels
+
+        return batch
+
+
+def eval(processor, model, dataset, batch_size, use_lazy_mode, use_hpu_graphs, max_seq_length, model_type):
     from tqdm import tqdm
 
     answers_unique = []
@@ -307,7 +358,6 @@ def eval(processor, model, dataset, batch_size, use_lazy_mode, use_hpu_graphs, m
     for i in tqdm(range(0, len(dataset), batch_size)):
         examples = dataset[i : i + batch_size]
         answers_unique.extend(examples["answers"])
-        images = [[im] for im in examples["image"]]
         texts = []
         for q in examples["query"]:
             messages = [
@@ -322,14 +372,31 @@ def eval(processor, model, dataset, batch_size, use_lazy_mode, use_hpu_graphs, m
             ]
             text = processor.apply_chat_template(messages, add_generation_prompt=True)
             texts.append(text.strip())
-        inputs = processor(
-            text=texts,
-            images=images,
-            return_tensors="pt",
-            padding="max_length",
-            truncation=True,
-            max_length=max_seq_length,
-        )
+
+        if model_type is not None and model_type == "llava":
+            images = []
+            for im in examples["image"]:
+                images.append(im)
+
+            inputs = processor(
+                images,
+                texts,
+                return_tensors="pt",
+                padding="max_length",
+                truncation=True,
+                max_length=max_seq_length,
+                padding_side="left",
+            )
+        else:
+            images = [[im] for im in examples["image"]]
+            inputs = processor(
+                text=texts,
+                images=images,
+                return_tensors="pt",
+                padding="max_length",
+                truncation=True,
+                max_length=max_seq_length,
+            )
         inputs = {k: v.to("hpu") for k, v in inputs.items()}
         generated_ids = model.generate(
             **inputs, max_new_tokens=64, ignore_eos=False, lazy_mode=use_lazy_mode, hpu_graphs=use_hpu_graphs
@@ -346,6 +413,22 @@ def eval(processor, model, dataset, batch_size, use_lazy_mode, use_hpu_graphs, m
     return anls
 
 
+def find_all_linear_names(model):
+    cls = torch.nn.Linear
+    lora_module_names = set()
+    multimodal_keywords = ["mm_projector", "vision_tower", "vision_resampler"]
+    for name, module in model.named_modules():
+        if any(mm_keyword in name for mm_keyword in multimodal_keywords):
+            continue
+        if isinstance(module, cls):
+            names = name.split(".")
+            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
+
+    if "lm_head" in lora_module_names:  # needed for 16-bit
+        lora_module_names.remove("lm_head")
+    return list(lora_module_names)
+
+
 def main():
     parser = HfArgumentParser((ModelArguments, DataArguments, GaudiTrainingArguments, FinetuneArguments))
     if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
@@ -380,7 +463,7 @@ def main():
         do_image_splitting=model_args.do_image_splitting,
         padding_side="right",
     )
-    setattr(processor.image_processor, "pad_to_longest_edge", True)
+
     config_kwargs = {
         "cache_dir": model_args.cache_dir,
         "revision": model_args.model_revision,
@@ -395,7 +478,13 @@ def main():
     else:
         raise ValueError("Please provide value for model_name_or_path or config_name.")
 
-        # Load model
+    if config.model_type == "llava":
+        setattr(processor, "patch_size", config.vision_config.patch_size)
+        setattr(processor, "vision_feature_select_strategy", config.vision_feature_select_strategy)
+    else:
+        setattr(processor.image_processor, "pad_to_longest_edge", True)
+
+    # Load model
     if model_args.model_name_or_path:
         model_dtype = torch.bfloat16 if training_args.bf16 else None
         model = AutoModelForVision2Seq.from_pretrained(
@@ -413,11 +502,16 @@ def main():
     else:
         raise ValueError("Must provide model_name_or_path to load a pretrained CausalLM model.")
 
+    if finetune_args.lora_target_modules is None:
+        target_modules = find_all_linear_names(model)
+    else:
+        target_modules = finetune_args.lora_target_modules
+
     lora_config = LoraConfig(
         r=finetune_args.lora_rank,
         lora_alpha=finetune_args.lora_alpha,
         lora_dropout=finetune_args.lora_dropout,
-        target_modules=finetune_args.lora_target_modules,
+        target_modules=target_modules,
         init_lora_weights="gaussian",
     )
     model = get_peft_model(model, lora_config)
@@ -456,15 +550,21 @@ def main():
             if col not in (data_args.input_column_names + data_args.output_column_names)
         ]
     )
-    if hasattr(config, "image_token_id"):
-        # idefics
-        image_token_id = config.image_token_id
-    elif hasattr(config, "image_token_index"):
-        # mllama
-        image_token_id = config.image_token_index
+    if config.model_type == "llava":
+        data_collator = LLavaDataCollator(processor, max_seq_length=data_args.max_seq_length)
     else:
-        raise ValueError("Please provide value for image_token_id")
-    data_collator = MyDataCollator(processor, max_seq_length=data_args.max_seq_length, image_token_id=image_token_id)
+        if hasattr(config, "image_token_id"):
+            # idefics
+            image_token_id = config.image_token_id
+        elif hasattr(config, "image_token_index"):
+            # mllama
+            image_token_id = config.image_token_index
+        else:
+            raise ValueError("Please provide value for image_token_id")
+
+        data_collator = MyDataCollator(
+            processor, max_seq_length=data_args.max_seq_length, image_token_id=image_token_id
+        )
 
     gaudi_config = GaudiConfig()
     gaudi_config.use_fused_adam = True
@@ -509,14 +609,29 @@ def main():
             }
         ]
         text = processor.apply_chat_template(messages, add_generation_prompt=True)
-        inputs = processor(
-            text=[text.strip()],
-            images=[image],
-            return_tensors="pt",
-            padding="max_length",
-            truncation=True,
-            max_length=data_args.max_seq_length,
-        )
+
+        if config.model_type == "llava":
+            # don't expand image_token_id
+            setattr(processor, "patch_size", None)
+            setattr(processor, "vision_feature_select_strategy", None)
+            inputs = processor(
+                [image],
+                [text.strip()],
+                return_tensors="pt",
+                padding="max_length",
+                truncation=True,
+                max_length=data_args.max_seq_length,
+                padding_side="left",
+            )
+        else:
+            inputs = processor(
+                text=[text.strip()],
+                images=[image],
+                return_tensors="pt",
+                padding="max_length",
+                truncation=True,
+                max_length=data_args.max_seq_length,
+            )
         inputs = {k: v.to("hpu") for k, v in inputs.items()}
         generated_ids = model.generate(
             **inputs,
@@ -543,6 +658,7 @@ def main():
                 use_lazy_mode=training_args.use_lazy_mode,
                 use_hpu_graphs=training_args.use_hpu_graphs_for_inference,
                 max_seq_length=data_args.max_seq_length,
+                model_type=config.model_type,
             )
             eval_metrics = {"eval_accuracy": anls}
             trainer.log_metrics("eval", eval_metrics)
diff --git a/examples/image-to-text/run_pipeline.py b/examples/image-to-text/run_pipeline.py
index cc19de3b83..f75bde19c2 100644
--- a/examples/image-to-text/run_pipeline.py
+++ b/examples/image-to-text/run_pipeline.py
@@ -213,6 +213,7 @@ def main():
     os.environ.setdefault("EXPERIMENTAL_WEIGHT_SHARING", "FALSE")
     if args.world_size > 0:
         os.environ.setdefault("PT_HPU_ENABLE_LAZY_COLLECTIVES", "true")
+        os.environ.setdefault("DEEPSPEED_USE_HABANA_FRAMEWORKS_DETERMINISTIC_API", "1")
 
     from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
 
@@ -222,7 +223,8 @@ def main():
 
     config = AutoConfig.from_pretrained(args.model_name_or_path)
     model_type = config.model_type
-    if args.image_path is None and model_type in ["llava", "idefics2", "mllama"]:
+
+    if args.image_path is None and model_type in ["llava", "idefics2", "mllama", "qwen2_vl"]:
         args.image_path = ["https://llava-vl.github.io/static/images/view.jpg"]
     elif args.image_path is None and model_type == "paligemma":
         args.image_path = [
@@ -233,7 +235,7 @@ def main():
             "https://github.com/haotian-liu/LLaVA/blob/1a91fc274d7c35a9b50b3cb29c4247ae5837ce39/images/llava_v1_5_radar.jpg?raw=true"
         ]
 
-    if model_type in ["llava", "idefics2", "llava_next", "mllama", "paligemma"]:
+    if model_type in ["llava", "idefics2", "llava_next", "mllama", "paligemma", "qwen2_vl"]:
         processor = AutoProcessor.from_pretrained(args.model_name_or_path, padding_side="left")
         if args.prompt is None:
             if processor.chat_template is not None:
@@ -312,6 +314,9 @@ def main():
         generator = pipeline(
             "image-to-text",
             model=args.model_name_or_path,
+            config=args.model_name_or_path,
+            tokenizer=args.model_name_or_path,
+            image_processor=args.model_name_or_path,
             torch_dtype=model_dtype,
             device="hpu",
         )
@@ -340,13 +345,18 @@ def main():
     if args.use_kv_cache:
         generate_kwargs["use_cache"] = args.use_kv_cache
 
+    if model_type == "qwen2_vl":
+        generate_kwargs["use_cache"] = True
+        generate_kwargs["cache_implementation"] = "static"
+
     if args.quant_config:
         generator.model = setup_quantization(generator.model, args)
         htcore.hpu_initialize(generator.model)
 
     # delete once pipeline integrate AutoProcessor as preprocess engine
     # could use "image-text-to-text" pipeline in transformers 4.47
-    if model_type in ["idefics2", "mllama", "paligemma"]:
+
+    if model_type in ["idefics2", "mllama", "paligemma", "qwen2_vl", "llava", "llava_next"]:
         from transformers.image_utils import load_image
 
         def preprocess(self, image, prompt=None, timeout=None):
@@ -378,7 +388,12 @@ def preprocess(self, image, prompt=None, timeout=None):
     n_output_tokens = 0
     for sequence in result:
         # We have to subtract the number of input tokens as they are part of the returned sequence
-        n_output_tokens += len(generator.tokenizer(sequence[0]["generated_text"]).input_ids) - n_input_tokens
+        # TODO this is not accurate, args.prompt contains flag like <|im_start|>, <|im_end|>, while generated_text does not contain it
+        # if it's text+image prompt, should use "image-text-to-text" pipeline after transformers 4.47
+        if not args.ignore_eos:
+            n_output_tokens += len(generator.tokenizer(sequence[0]["generated_text"]).input_ids) - n_input_tokens
+        else:
+            n_output_tokens += args.max_new_tokens
 
     total_new_tokens_generated = args.n_iterations * n_output_tokens
     throughput = total_new_tokens_generated / duration
diff --git a/examples/kubernetes/Chart.yaml b/examples/kubernetes/Chart.yaml
index dc0400ccb0..d1c1778076 100644
--- a/examples/kubernetes/Chart.yaml
+++ b/examples/kubernetes/Chart.yaml
@@ -3,7 +3,7 @@ name: optimum-habana-example-chart
 description: This Helm chart deploys example jobs using Optimum for Intel® Gaudi® Accelerators to a Kubernetes cluster.
 
 # Compatible Kubernetes versions
-kubeVersion: 1.27-1.29
+kubeVersion: 1.27 - 1.29
 
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md
index 9ef27f9e73..5cce1528dc 100644
--- a/examples/language-modeling/README.md
+++ b/examples/language-modeling/README.md
@@ -131,60 +131,6 @@ python ../gaudi_spawn.py \
 This example has been validated with the following DeepSpeed ZeRO-2 config: https://github.com/huggingface/optimum-habana/blob/main/tests/configs/deepspeed_zero_2.json
 
 
-### Multi-card Training with Deepspeed (chatglm3-6b)
-```bash
-python ../gaudi_spawn.py \
-    --world_size 8 --use_deepspeed run_clm.py \
-    --config_name THUDM/chatglm3-6b \
-    --tokenizer_name THUDM/chatglm3-6b \
-    --dataset_name wikitext \
-    --dataset_config_name wikitext-2-raw-v1 \
-    --per_device_train_batch_size 6 \
-    --per_device_eval_batch_size 4 \
-    --do_train \
-    --do_eval \
-    --deepspeed llama2_ds_zero3_config.json \
-    --output_dir /tmp/test-clm \
-    --gaudi_config_name Habana/gpt2 \
-    --use_habana \
-    --use_lazy_mode \
-    --throughput_warmup_steps 3 \
-    --bf16 \
-    --block_size 1024 \
-    --use_cache False \
-    --overwrite_output_dir \
-    --logging_first_step True \
-    --logging_steps 20
-```
-
-### Multi-card Training with Deepspeed (Baichuan2-13B-Chat)
-```bash
-python ../gaudi_spawn.py \
-    --world_size 8 --use_deepspeed run_clm.py \
-    --config_name baichuan-inc/Baichuan2-13B-Chat \
-    --tokenizer_name baichuan-inc/Baichuan2-13B-Chat \
-    --dataset_name wikitext \
-    --num_train_epochs 30 \
-    --dataset_config_name wikitext-2-raw-v1 \
-    --per_device_train_batch_size 2 \
-    --per_device_eval_batch_size 2 \
-    --do_train \
-    --do_eval \
-    --deepspeed llama2_ds_zero3_config.json \
-    --output_dir /tmp/test-clm \
-    --gaudi_config_name Habana/gpt2 \
-    --use_habana \
-    --use_lazy_mode \
-    --throughput_warmup_steps 3 \
-    --bf16 \
-    --block_size 1024 \
-    --use_cache False \
-    --overwrite_output_dir \
-    --logging_first_step True \
-    --logging_steps 20
-```
-
-
 ## Multi-Node Training with Deepspeed (GPT-NeoX)
 
 The following command triggers the fine-tuning of [GPT-NeoX-20B](https://huggingface.co/EleutherAI/gpt-neox-20b) on WikiText-2 with Deepspeed ZeRO-2.
@@ -226,10 +172,11 @@ Following the RoBERTa paper, we use dynamic masking rather than static masking.
 converge slightly slower (over-fitting takes more epochs).
 
 
-### Single-card Training
+### Multi-card Training
 
 ```bash
-python run_mlm.py \
+python ../gaudi_spawn.py \
+    --world_size 8 --use_mpi run_mlm.py \
     --model_name_or_path roberta-base \
     --dataset_name wikitext \
     --dataset_config_name wikitext-2-raw-v1 \
@@ -246,54 +193,12 @@ python run_mlm.py \
     --bf16
 ```
 
-To run on your own training and validation files, use the following command:
-
-```bash
-python run_mlm.py \
-    --model_name_or_path roberta-base \
-    --train_file path_to_train_file \
-    --validation_file path_to_validation_file \
-    --per_device_train_batch_size 8 \
-    --per_device_eval_batch_size 8 \
-    --do_train \
-    --do_eval \
-    --output_dir /tmp/test-mlm \
-    --use_habana \
-    --use_lazy_mode \
-    --use_hpu_graphs_for_inference \
-    --gaudi_config_name Habana/roberta-base \
-    --throughput_warmup_steps 3 \
-    --bf16
-```
-
 If your dataset is organized with one sample per line, you can use the `--line_by_line` flag (otherwise the script
 concatenates all texts and then splits them into blocks of the same length).
 
 **Note:** On HPU, you should use the flag `--pad_to_max_length` in conjunction with the `--line_by_line` flag to make sure all your batches have the same length.
 
 
-### Multi-card Training
-
-```bash
-python ../gaudi_spawn.py \
-    --world_size 8 --use_mpi run_mlm.py \
-    --model_name_or_path roberta-base \
-    --dataset_name wikitext \
-    --dataset_config_name wikitext-2-raw-v1 \
-    --per_device_train_batch_size 8 \
-    --per_device_eval_batch_size 8 \
-    --do_train \
-    --do_eval \
-    --output_dir /tmp/test-mlm \
-    --use_habana \
-    --use_lazy_mode \
-    --use_hpu_graphs_for_inference \
-    --gaudi_config_name Habana/roberta-base \
-    --throughput_warmup_steps 3 \
-    --bf16
-```
-
-
 ### Training in torch.compile mode
 RoBERTa-Large model training in [torch.compile](pytorch.org/tutorials/intermediate/torch_compile_tutorial.html) mode is enabled by applying the following changes to your command,
 a) Set the following environment variables `PT_HPU_LAZY_MODE=0` and `PT_ENABLE_INT64_SUPPORT=1`.
@@ -324,78 +229,6 @@ python run_clm.py \
     --bf16
 ```
 
-
-## Using DeepSpeed
-
-Multi-card examples can be simply adapted to be run with DeepSpeed. Here is the CLM example with GPT2-XL:
-
-```bash
-python ../gaudi_spawn.py \
-    --world_size 8 --use_deepspeed run_clm.py \
-    --model_name_or_path gpt2-xl \
-    --dataset_name wikitext \
-    --dataset_config_name wikitext-2-raw-v1 \
-    --per_device_train_batch_size 16 \
-    --per_device_eval_batch_size 4 \
-    --do_train \
-    --do_eval \
-    --learning_rate 4e-4 \
-    --output_dir /tmp/test-clm \
-    --gaudi_config_name Habana/gpt2 \
-    --use_habana \
-    --use_lazy_mode \
-    --use_hpu_graphs_for_inference \
-    --gradient_checkpointing \
-    --use_cache False \
-    --throughput_warmup_steps 3 \
-    --deepspeed path_to_my_deepspeed_config
-```
-
-You can look at the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/deepspeed) for more information about how to use DeepSpeed in Optimum Habana.
-Here is a DeepSpeed configuration you can use to train your models on Gaudi:
-```json
-{
-    "steps_per_print": 64,
-    "train_batch_size": "auto",
-    "train_micro_batch_size_per_gpu": "auto",
-    "gradient_accumulation_steps": "auto",
-    "bf16": {
-        "enabled": true
-    },
-    "gradient_clipping": 1.0,
-    "zero_optimization": {
-        "stage": 2,
-        "overlap_comm": false,
-        "reduce_scatter": false,
-        "contiguous_gradients": false
-    }
-}
-```
-
-Here is another example with Bloom-7B1:
-
-```bash
-DEEPSPEED_HPU_ZERO3_SYNC_MARK_STEP_REQUIRED=1 PT_HPU_MAX_COMPOUND_OP_SYNC=1 PT_HPU_MAX_COMPOUND_OP_SIZE=1 python ../gaudi_spawn.py \
-    --world_size 8 --use_deepspeed run_clm.py \
-    --model_name_or_path bigscience/bloom-7b1 \
-    --dataset_name wikitext \
-    --dataset_config_name wikitext-2-raw-v1 \
-    --per_device_train_batch_size 8 \
-    --do_train \
-    --output_dir /tmp/test-clm \
-    --gaudi_config_name Habana/roberta-base \
-    --use_habana \
-    --use_lazy_mode \
-    --gradient_checkpointing \
-    --use_cache False \
-    --throughput_warmup_steps 3 \
-    --save_strategy "no" \
-    --learning_rate 1e-04 \
-    --deepspeed path_to_my_deepspeed_config
-```
-[This](https://github.com/huggingface/optimum-habana/blob/main/tests/configs/deepspeed_zero_3_gaudi1.json) is a DeepSpeed configuration you can use to train this model on Gaudi1.
-
-
 ## Inference
 
 To run only inference, you can start from the commands above and you just have to remove the training-only arguments such as `--do_train`, `--per_device_train_batch_size`, `--num_train_epochs`, etc...
@@ -456,141 +289,6 @@ python3 run_lora_clm.py \
     --validation_split_percentage 4 \
     --adam_epsilon 1e-08
 ```
-- Single-card finetuning of Falcon-40B:
-```bash
-PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST=ops_bf16.txt python3 run_lora_clm.py \
-    --model_name_or_path tiiuae/falcon-40b \
-    --dataset_name timdettmers/openassistant-guanaco \
-    --bf16 True \
-    --output_dir ./model_lora_falcon \
-    --num_train_epochs 3 \
-    --per_device_train_batch_size 1 \
-    --per_device_eval_batch_size 1 \
-    --gradient_accumulation_steps 16 \
-    --eval_strategy "no" \
-    --save_strategy "no" \
-    --learning_rate 3e-4 \
-    --max_grad_norm  0.3 \
-    --warmup_ratio  0.03 \
-    --lr_scheduler_type "constant" \
-    --logging_steps 1 \
-    --do_train \
-    --use_habana \
-    --use_lazy_mode \
-    --pipelining_fwd_bwd \
-    --throughput_warmup_steps 3 \
-    --lora_rank=64 \
-    --lora_alpha=16 \
-    --lora_dropout=0.1 \
-    --lora_target_modules "query_key_value" "dense" "dense_h_to_4h" "dense_4h_to_h" \
-    --dataset_concatenation \
-    --max_seq_length 256 \
-    --low_cpu_mem_usage True \
-    --adam_epsilon 1e-08 \
-    --do_eval \
-    --validation_split_percentage 5
-```
-
-- Multi-card finetuning of Llama1-7B:
-```bash
-python ../gaudi_spawn.py \
-    --world_size 8 --use_mpi run_lora_clm.py \
-    --model_name_or_path huggyllama/llama-7b \
-    --dataset_name tatsu-lab/alpaca \
-    --bf16 True \
-    --output_dir ./model_lora_llama_ddp \
-    --num_train_epochs 3 \
-    --per_device_train_batch_size 8 \
-    --gradient_accumulation_steps 2 \
-    --eval_strategy "no" \
-    --save_strategy "no" \
-    --learning_rate 3e-4 \
-    --warmup_ratio  0.03 \
-    --lr_scheduler_type "constant" \
-    --max_grad_norm  0.3 \
-    --logging_steps 1 \
-    --do_train \
-    --do_eval \
-    --use_habana \
-    --use_lazy_mode \
-    --throughput_warmup_steps 3 \
-    --lora_rank=8 \
-    --lora_alpha=16 \
-    --lora_dropout=0.05 \
-    --lora_target_modules "q_proj" "v_proj" \
-    --dataset_concatenation \
-    --max_seq_length 512 \
-    --ddp_bucket_cap_mb 50 \
-    --adam_epsilon 1e-08 \
-    --validation_split_percentage 4 \
-    --low_cpu_mem_usage True
-```
-
-- Multi-card finetuning of Llama2-7B with FP8:
-```bash
-PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST=ops_bf16.txt python ../gaudi_spawn.py \
-    --world_size 8 --use_mpi run_lora_clm.py \
-    --model_name_or_path meta-llama/Llama-2-7b-hf \
-    --dataset_name tatsu-lab/alpaca \
-    --bf16 True \
-    --output_dir ./model_lora_llama \
-    --num_train_epochs 3 \
-    --per_device_train_batch_size 16 \
-    --gradient_accumulation_steps 1 \
-    --eval_strategy "no" \
-    --save_strategy "no" \
-    --learning_rate 3e-4 \
-    --warmup_ratio 0.03 \
-    --lr_scheduler_type "constant" \
-    --max_grad_norm 0.3 \
-    --logging_steps 20 \
-    --do_train \
-    --do_eval \
-    --use_habana \
-    --use_lazy_mode \
-    --throughput_warmup_steps 18 \
-    --lora_rank=8 \
-    --lora_alpha=16 \
-    --lora_dropout=0.05 \
-    --lora_target_modules "q_proj" "v_proj" \
-    --dataset_concatenation \
-    --max_seq_length 512 \
-    --ddp_bucket_cap_mb 50 \
-    --adam_epsilon 1e-08 \
-    --validation_split_percentage 10 \
-    --low_cpu_mem_usage True \
-    --pipelining_fwd_bwd \
-    --fp8 True
-```
-
-- Multi-card finetuning of codegen-16B-mono:
-```bash
-python ../gaudi_spawn.py \
-    --world_size 8 --use_mpi run_lora_clm.py \
-    --model_name_or_path Salesforce/codegen-16B-mono \
-    --dataset_name b-mc2/sql-create-context \
-    --sql_prompt \
-    --bf16 True \
-    --output_dir ./finetuned-models/codegen-finetune-on-sql-create-context-hpu8-lora8-bs4 \
-    --num_train_epochs 5 \
-    --per_device_train_batch_size 4 \
-    --per_device_eval_batch_size 4 \
-    --eval_strategy "no" \
-    --save_strategy "no" \
-    --learning_rate 1e-4 \
-    --logging_steps 1 \
-    --dataset_concatenation \
-    --do_train \
-    --use_habana \
-    --use_lazy_mode \
-    --throughput_warmup_steps 3 \
-    --use_hpu_graphs_for_inference \
-    --lora_target_modules "qkv_proj" \
-    --lora_rank 8 \
-    --do_eval \
-    --validation_split_percentage 10 \
-    --use_cache False
-```
 
 - Multi-card finetuning of gemma2 using chat template:
 ```bash
@@ -740,43 +438,6 @@ python3 ../gaudi_spawn.py --world_size 8 --use_mpi run_lora_clm.py \
   --flash_attention_causal_mask True
 ```
 
-- Multi-card finetuning of Falcon-180B:
-  - Falcon-180B example command saves only the LoRA parameters at end
-  - For inference we need to merge the pretrained model and LoRA weights
-```bash
-PT_HPU_AUTOCAST_LOWER_PRECISION_OPS_LIST=ops_bf16.txt python3 ../gaudi_spawn.py \
-    --world_size 8 --use_deepspeed run_lora_clm.py \
-    --model_name_or_path tiiuae/falcon-180B \
-    --dataset_name timdettmers/openassistant-guanaco \
-    --bf16 True \
-    --output_dir ./model_lora_falcon_ddp \
-    --num_train_epochs 3 \
-    --per_device_train_batch_size 1 \
-    --per_device_eval_batch_size 1 \
-    --gradient_accumulation_steps 16 \
-    --eval_strategy "no" \
-    --save_strategy "no" \
-    --learning_rate 4e-4 \
-    --max_grad_norm  0.3 \
-    --warmup_ratio  0.03 \
-    --lr_scheduler_type "constant" \
-    --logging_steps 1 \
-    --do_train \
-    --use_habana \
-    --use_lazy_mode \
-    --pipelining_fwd_bwd \
-    --throughput_warmup_steps 3 \
-    --lora_rank=64 \
-    --lora_alpha=16 \
-    --lora_dropout=0.1 \
-    --lora_target_modules "query_key_value" "dense" "dense_h_to_4h" "dense_4h_to_h" \
-    --dataset_concatenation \
-    --max_seq_length 256 \
-    --adam_epsilon 1e-08 \
-    --do_eval \
-    --validation_split_percentage 5 \
-    --deepspeed ds_falcon_180b_z3.json
-```
 Default `peft_type` is `lora`, you could enable adalora or ia3 using `--peft_type adalora` or `--peft_type ia3`, or enable llama-adapter for llama model using `--peft_type llama-adapter`, or enable ln-tuning using `--peft_type ln_tuning`, or enable vera using `--peft_type vera`.
 
 #### Custom Files
@@ -824,7 +485,7 @@ The format of the text files (with extensions .text or .txt) is expected to be
 ### Prompt/Prefix/P-tuning
 
 To run prompt tuning finetuning, you can use `run_prompt_tuning_clm.py`.
-Here are single-/multi-device command examples for Llama2-7B:
+Here are single-card command examples for Llama2-7B:
 - single-card finetuning of meta-llama/Llama-2-7b-hf with dataset "ought/raft" and config "twitter_complaints":
 ```bash
 python3 run_prompt_tuning_clm.py \
@@ -844,25 +505,6 @@ python3 run_prompt_tuning_clm.py \
     --use_lazy_mode
 ```
 
-- multi-card finetuning of meta-llama/Llama-2-7b-hf with dataset "ought/raft" and config "twitter_complaints":
-```bash
-python3 ../gaudi_spawn.py \
-    --world_size 8 --use_mpi run_prompt_tuning_clm.py \
-    --model_name_or_path meta-llama/Llama-2-7b-hf \
-    --output_dir prompt_tuning_out \
-    --bf16 True \
-    --report_to=none \
-    --per_device_train_batch_size 1 \
-    --per_device_eval_batch_size 1 \
-    --gradient_accumulation_steps 1 \
-    --low_cpu_mem_usage True \
-    --logging_steps 1 \
-    --do_train \
-    --num_train_epochs 50 \
-    --do_eval  \
-    --use_habana  \
-    --use_lazy_mode
-```
 Default `peft_type` is `prompt_tuning`, you could enable prefix-tuning or p-tuning using `--peft_type prefix_tuning` or `--peft_type p_tuning`.
 
 Use the prompt finetuned model for text-generation:
diff --git a/examples/language-modeling/run_clm.py b/examples/language-modeling/run_clm.py
index 3ee73bc612..93d85ba54b 100644
--- a/examples/language-modeling/run_clm.py
+++ b/examples/language-modeling/run_clm.py
@@ -156,6 +156,32 @@ class ModelArguments:
             )
         },
     )
+    attn_softmax_bf16: bool = field(
+        default=False,
+        metadata={"help": ("Whether to run attention softmax layer in bf16 precision for fine-tuning.")},
+    )
+    use_flash_attention: bool = field(
+        default=False,
+        metadata={"help": ("Whether to use Habana flash attention for fine-tuning.")},
+    )
+    flash_attention_recompute: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to enable recompute in Habana flash attention for fine-tuning."
+                " It is applicable only when use_flash_attention is True."
+            )
+        },
+    )
+    flash_attention_causal_mask: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "Whether to enable causal mask in Habana flash attention for fine-tuning."
+                " It is applicable only when use_flash_attention is True."
+            )
+        },
+    )
     low_cpu_mem_usage: bool = field(
         default=False,
         metadata={
@@ -482,6 +508,14 @@ def main():
         if len(tokenizer) > embedding_size:
             model.resize_token_embeddings(len(tokenizer))
 
+    # We need to add these fused kernels config
+    if model_args.attn_softmax_bf16:
+        model.generation_config.attn_softmax_bf16 = True
+    if model_args.use_flash_attention:
+        model.generation_config.use_flash_attention = True
+        model.generation_config.flash_attention_recompute = model_args.flash_attention_recompute
+        model.generation_config.flash_attention_causal_mask = model_args.flash_attention_causal_mask
+
     # Preprocessing the datasets.
     # First we tokenize all the texts.
     if training_args.do_train:
diff --git a/examples/multi-node-training/README.md b/examples/multi-node-training/README.md
index 0e40e616f8..bc0ba8fda0 100644
--- a/examples/multi-node-training/README.md
+++ b/examples/multi-node-training/README.md
@@ -111,6 +111,10 @@ env_variable_2_name=value
 ...
 ```
 
+You can find an example for GaudiNIC instances [here](https://github.com/huggingface/optimum-habana/tree/main/examples/multi-node-training/GaudiNIC/.deepspeed_env).
+
+> Note above environment variables refers to /etc/profile.d/habanalabs.sh inside docker, and should set only on GaudiNIC master node.
+
 You can find an example for AWS instances [here](https://github.com/huggingface/optimum-habana/tree/main/examples/multi-node-training/EFA/.deepspeed_env).
 
 > Note that one should set `HCCL_OVER_OFI=1` and `LD_LIBRARY_PATH=/root/hccl_ofi_wrapper:/opt/amazon/openmpi/lib:/opt/amazon/efa/lib` only on AWS DL1 instances. *These should not be used otherwise*.
diff --git a/examples/question-answering/README.md b/examples/question-answering/README.md
index c7414c777d..d7a83ea5c8 100755
--- a/examples/question-answering/README.md
+++ b/examples/question-answering/README.md
@@ -33,163 +33,6 @@ First, you should install the requirements:
 pip install -r requirements.txt
 ```
 
-## Fine-tuning BERT on SQuAD1.1
-
-For the following cases, an example of a Gaudi configuration file is given
-[here](https://github.com/huggingface/optimum-habana#how-to-use-it).
-
-
-### Single-card Training
-
-This example code fine-tunes BERT on the SQuAD1.1 dataset.
-
-```bash
-python run_qa.py \
-  --model_name_or_path bert-large-uncased-whole-word-masking \
-  --gaudi_config_name Habana/bert-large-uncased-whole-word-masking \
-  --dataset_name squad \
-  --do_train \
-  --do_eval \
-  --per_device_train_batch_size 32 \
-  --per_device_eval_batch_size 8 \
-  --learning_rate 3e-5 \
-  --num_train_epochs 2 \
-  --max_seq_length 384 \
-  --doc_stride 128 \
-  --output_dir /tmp/squad/ \
-  --use_habana \
-  --use_lazy_mode \
-  --use_hpu_graphs_for_inference \
-  --throughput_warmup_steps 3 \
-  --bf16 \
-  --sdp_on_bf16
-```
-
-For torch.compile mode,
-```bash
-PT_HPU_LAZY_MODE=0 PT_ENABLE_INT64_SUPPORT=1 python run_qa.py \
-  --model_name_or_path bert-large-uncased-whole-word-masking \
-  --gaudi_config_name Habana/bert-large-uncased-whole-word-masking \
-  --dataset_name squad \
-  --do_train \
-  --do_eval \
-  --per_device_train_batch_size 32 \
-  --per_device_eval_batch_size 8 \
-  --learning_rate 3e-5 \
-  --num_train_epochs 2 \
-  --max_seq_length 384 \
-  --doc_stride 128 \
-  --output_dir /tmp/squad/ \
-  --use_habana \
-  --torch_compile_backend hpu_backend \
-  --torch_compile \
-  --use_lazy_mode false \
-  --throughput_warmup_steps 3 \
-  --bf16 \
-  --sdp_on_bf16
-```
-
-### Multi-card Training
-
-Here is how you would fine-tune the BERT large model (with whole word masking) on the SQuAD dataset using the `run_qa` script, with 8 HPUs:
-
-```bash
-python ../gaudi_spawn.py \
-    --world_size 8 --use_mpi run_qa.py \
-    --model_name_or_path bert-large-uncased-whole-word-masking \
-    --gaudi_config_name Habana/bert-large-uncased-whole-word-masking \
-    --dataset_name squad \
-    --do_train \
-    --do_eval \
-    --per_device_train_batch_size 32 \
-    --per_device_eval_batch_size 8 \
-    --learning_rate 3e-5 \
-    --num_train_epochs 2 \
-    --max_seq_length 384 \
-    --doc_stride 128 \
-    --output_dir /tmp/squad_output/ \
-    --use_habana \
-    --use_lazy_mode \
-    --use_hpu_graphs_for_inference \
-    --throughput_warmup_steps 3 \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-For torch.compile mode,
-```bash
-PT_HPU_LAZY_MODE=0 PT_ENABLE_INT64_SUPPORT=1 python ../gaudi_spawn.py \
-    --world_size 8 --use_mpi run_qa.py \
-    --model_name_or_path bert-large-uncased-whole-word-masking \
-    --gaudi_config_name Habana/bert-large-uncased-whole-word-masking \
-    --dataset_name squad \
-    --do_train \
-    --do_eval \
-    --per_device_train_batch_size 32 \
-    --per_device_eval_batch_size 8 \
-    --learning_rate 3e-5 \
-    --num_train_epochs 2 \
-    --max_seq_length 384 \
-    --doc_stride 128 \
-    --output_dir /tmp/squad_output/ \
-    --use_habana \
-    --torch_compile_backend hpu_backend \
-    --torch_compile \
-    --use_lazy_mode false \
-    --throughput_warmup_steps 3 \
-    --bf16 \
-    --sdp_on_bf16
-```
-
-
-### Using DeepSpeed
-
-Similarly to multi-card training, here is how you would fine-tune the BERT large model (with whole word masking) on the SQuAD dataset using DeepSpeed with 8 HPUs:
-
-```bash
-python ../gaudi_spawn.py \
-    --world_size 8 --use_deepspeed run_qa.py \
-    --model_name_or_path bert-large-uncased-whole-word-masking \
-    --gaudi_config_name Habana/bert-large-uncased-whole-word-masking \
-    --dataset_name squad \
-    --do_train \
-    --do_eval \
-    --per_device_train_batch_size 32 \
-    --per_device_eval_batch_size 8 \
-    --learning_rate 3e-5 \
-    --num_train_epochs 2 \
-    --max_seq_length 384 \
-    --doc_stride 128 \
-    --output_dir /tmp/squad_output/ \
-    --use_habana \
-    --use_lazy_mode \
-    --use_hpu_graphs_for_inference \
-    --throughput_warmup_steps 3 \
-    --deepspeed path_to_my_deepspeed_config \
-    --sdp_on_bf16
-```
-
-You can look at the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/deepspeed) for more information about how to use DeepSpeed in Optimum Habana.
-Here is a DeepSpeed configuration you can use to train your models on Gaudi:
-```json
-{
-    "steps_per_print": 64,
-    "train_batch_size": "auto",
-    "train_micro_batch_size_per_gpu": "auto",
-    "gradient_accumulation_steps": "auto",
-    "bf16": {
-        "enabled": true
-    },
-    "gradient_clipping": 1.0,
-    "zero_optimization": {
-        "stage": 2,
-        "overlap_comm": false,
-        "reduce_scatter": false,
-        "contiguous_gradients": false
-    }
-}
-```
-
 ## Fine-tuning Llama on SQuAD1.1
 
 > [!NOTE]
@@ -199,7 +42,7 @@ Here is a command you can run to train a Llama model for question answering:
 ```bash
 python ../gaudi_spawn.py \
   --world_size 8 --use_deepspeed run_qa.py \
-  --model_name_or_path FlagAlpha/Llama2-Chinese-13b-Chat \
+  --model_name_or_path meta-llama/Llama-2-7b-chat-hf \
   --gaudi_config_name Habana/bert-large-uncased-whole-word-masking \
   --dataset_name squad \
   --do_train \
@@ -224,77 +67,3 @@ python ../gaudi_spawn.py \
 ## Inference
 
 To run only inference, you can start from the commands above and you just have to remove the training-only arguments such as `--do_train`, `--per_device_train_batch_size`, `--num_train_epochs`, etc...
-
-For instance, you can run inference with BERT on SQuAD on 1 Gaudi card with the following command:
-```bash
-python run_qa.py \
-  --model_name_or_path bert-large-uncased-whole-word-masking \
-  --gaudi_config_name Habana/bert-large-uncased-whole-word-masking \
-  --dataset_name squad \
-  --do_eval \
-  --per_device_eval_batch_size 8 \
-  --max_seq_length 384 \
-  --doc_stride 128 \
-  --output_dir /tmp/squad/ \
-  --use_habana \
-  --use_lazy_mode \
-  --use_hpu_graphs_for_inference \
-  --bf16 \
-  --sdp_on_bf16
-```
-
-
-## Recommended Hyperparameters for Mixed Precision
-
-| | learning_rate | num_train_epochs | per_device_train_batch_size | per_device_eval_batch_size |
-|----------------------------|:----:|:--:|:-:|:-:|
-| BERT base                  | 3e-5 | 2 | 24 | 8 |
-| BERT large                 | 3e-5 | 2 | 24 | 8 |
-| RoBERTa base               | 3e-5 | 2 | 12 | 8 |
-| RoBERTa large              | 3e-5 | 2 | 12 | 8 |
-| ALBERT large (single-card) | 5e-5 | 2 | 32 | 4 |
-| ALBERT large (multi-card)  | 6e-5 | 2 | 32 | 4 |
-| ALBERT XXL (single-card)   | 5e-6 | 2 | 16 | 2 |
-| ALBERT XXL (multi-card)    | 5e-5 | 2 | 16 | 2 |
-| DistilBERT                 | 5e-5 | 3 | 8  | 8 |
-| meta-llama/Llama-2-13b-chat-hf (multi-card) | 3e-5 | 2 | 8 | 8 |
-| FlagAlpha/Llama2-Chinese-13b-Chat (multi-card) | 3e-5 | 2 | 8 | 8 |
-
-
-## Fine-tuning T5 on SQuAD2.0
-
-The [`run_seq2seq_qa.py`](https://github.com/huggingface/optimum-habana/blob/main/examples/question-answering/run_seq2seq_qa.py) script is meant for encoder-decoder (also called seq2seq) Transformer models, such as T5 or BART. These models are generative, rather than discriminative. This means that they learn to generate the correct answer, rather than predicting the start and end position of the tokens of the answer.
-
-The following command fine-tunes T5 on the SQuAD2.0 dataset:
-
-```bash
-python run_seq2seq_qa.py \
-  --model_name_or_path t5-small \
-  --gaudi_config_name Habana/t5 \
-  --dataset_name squad_v2 \
-  --version_2_with_negative \
-  --context_column context \
-  --question_column question \
-  --answer_column answers \
-  --do_train \
-  --do_eval \
-  --per_device_train_batch_size 16 \
-  --per_device_eval_batch_size 33 \
-  --learning_rate 3e-5 \
-  --num_train_epochs 2 \
-  --max_seq_length 384 \
-  --doc_stride 128 \
-  --output_dir /tmp/seq2seq_squad/ \
-  --predict_with_generate \
-  --use_habana \
-  --use_lazy_mode \
-  --use_hpu_graphs_for_inference \
-  --ignore_pad_token_for_loss False \
-  --pad_to_max_length \
-  --save_strategy epoch \
-  --throughput_warmup_steps 3 \
-  --sdp_on_bf16 \
-  --bf16
-```
-
-For multi-card and DeepSpeed runs, you can use `python ../gaudi_spawn.py --world_size 8 --use_mpi` and `python ../gaudi_spawn.py --world_size 8 --use_deepspeed` as shown in the previous sections.
diff --git a/examples/sentence-transformers-training/nli/README.md b/examples/sentence-transformers-training/nli/README.md
index 7a1b0079a9..2a549ef104 100644
--- a/examples/sentence-transformers-training/nli/README.md
+++ b/examples/sentence-transformers-training/nli/README.md
@@ -4,6 +4,13 @@ Given two sentences (premise and hypothesis), the task of Natural Language Infer
 
 The paper in [Conneau et al.](https://arxiv.org/abs/1705.02364) shows that NLI data can be quite useful when training Sentence Embedding methods. In [Sentence-BERT-Paper](https://arxiv.org/abs/1908.10084) NLI as a first fine-tuning step for sentence embedding methods has been used.
 
+## Requirements
+
+First, you should install the requirements:
+```bash
+pip install -r requirements.txt
+```
+
 # General Models
 
 ## Single-card Training
diff --git a/examples/sentence-transformers-training/paraphrases/README.md b/examples/sentence-transformers-training/paraphrases/README.md
index 8961172025..1e95a425d1 100644
--- a/examples/sentence-transformers-training/paraphrases/README.md
+++ b/examples/sentence-transformers-training/paraphrases/README.md
@@ -4,6 +4,12 @@
 
 To fine-tune on the paraphrase task:
 
+0. Install required packages
+
+    ```sh
+    pip install -r requirements.txt
+    ```
+
 1. Choose a pre-trained model `<model_name>` (For example: `bert-base-uncased`).
 
 2. Choose the training, evaluation, and test dataset(s). Here, we use a dataset dictionary to include multiple datasets.
diff --git a/examples/sentence-transformers-training/sts/README.md b/examples/sentence-transformers-training/sts/README.md
index 3ca2602012..6474c2e07d 100644
--- a/examples/sentence-transformers-training/sts/README.md
+++ b/examples/sentence-transformers-training/sts/README.md
@@ -5,6 +5,13 @@ Semantic Textual Similarity (STS) assigns a score on the similarity of two texts
 - **[training_stsbenchmark.py](training_stsbenchmark.py)** - This example shows how to create a SentenceTransformer model from scratch by using a pre-trained transformer model (e.g. [`distilbert-base-uncased`](https://huggingface.co/distilbert/distilbert-base-uncased)) together with a pooling layer.
 - **[training_stsbenchmark_continue_training.py](training_stsbenchmark_continue_training.py)** - This example shows how to continue training on STS data for a previously created & trained SentenceTransformer model (e.g. [`all-mpnet-base-v2`](https://huggingface.co/sentence-transformers/all-mpnet-base-v2)).
 
+## Requirements
+
+First, you should install the requirements:
+```bash
+pip install -r requirements.txt
+```
+
 # General Models
 
 ## Single-card Training
diff --git a/examples/speech-recognition/README.md b/examples/speech-recognition/README.md
index fe80cf775f..1f0f8fbe38 100644
--- a/examples/speech-recognition/README.md
+++ b/examples/speech-recognition/README.md
@@ -89,7 +89,7 @@ python run_speech_recognition_ctc.py \
     --bf16 \
     --use_hpu_graphs_for_training \
     --use_hpu_graphs_for_inference \
-    --sdp_on_bf16
+    --attn_implementation sdpa
 ```
 
 On a single HPU, this script should run in *ca.* 6 hours and yield a CTC loss of **0.059** and a word error rate of **0.0423**.
@@ -132,7 +132,7 @@ python ../gaudi_spawn.py \
     --sdp_on_bf16 \
     --use_hpu_graphs_for_training \
     --use_hpu_graphs_for_inference \
-    --sdp_on_bf16
+    --attn_implementation sdpa
 ```
 
 On 8 HPUs, this script should run in *ca.* 49 minutes and yield a CTC loss of **0.0613** and a word error rate of **0.0458**.
@@ -181,7 +181,8 @@ python ../gaudi_spawn.py \
     --gaudi_config_name Habana/wav2vec2 \
     --throughput_warmup_steps 3 \
     --deepspeed ../../tests/configs/deepspeed_zero_2.json \
-    --sdp_on_bf16
+    --sdp_on_bf16 \
+    --attn_implementation sdpa
 ```
 
 [The documentation](https://huggingface.co/docs/optimum/habana/usage_guides/deepspeed) provides more information about how to use DeepSpeed within Optimum Habana.
@@ -214,8 +215,7 @@ python run_speech_recognition_ctc.py \
     --gaudi_config_name="Habana/wav2vec2" \
     --sdp_on_bf16 \
     --bf16 \
-    --use_hpu_graphs_for_inference \
-    --sdp_on_bf16
+    --use_hpu_graphs_for_inference
 ```
 ## Sequence to Sequence
 
diff --git a/examples/speech-recognition/requirements.txt b/examples/speech-recognition/requirements.txt
index 3319dee2c7..b7c33c8ba1 100644
--- a/examples/speech-recognition/requirements.txt
+++ b/examples/speech-recognition/requirements.txt
@@ -1,4 +1,5 @@
 datasets >= 1.18.0, <= 2.19.2
+numba==0.60.0
 librosa
 jiwer
 evaluate
diff --git a/examples/speech-recognition/run_speech_recognition_ctc.py b/examples/speech-recognition/run_speech_recognition_ctc.py
index f5da991dbf..81185be677 100644
--- a/examples/speech-recognition/run_speech_recognition_ctc.py
+++ b/examples/speech-recognition/run_speech_recognition_ctc.py
@@ -152,6 +152,33 @@ class ModelArguments:
             "useful to downsample the output length."
         },
     )
+    use_flash_attention: bool = field(
+        default=False, metadata={"help": "Whether to use Habana flash attention for fine-tuning"}
+    )
+    flash_attention_recompute: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to enable recompute in Habana flash attention for fine-tuning."
+            " It is applicable only when use_flash_attention is True."
+        },
+    )
+    flash_attention_fast_softmax: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to use fast softmax for Habana flash attention."
+            " It is applicable only when use_flash_attention is True."
+        },
+    )
+
+    def __post_init__(self):
+        if self.use_flash_attention:
+            os.environ["USE_FLASH_ATTENTION"] = "1"
+        if self.flash_attention_recompute:
+            assert self.use_flash_attention, "flash_attention_recompute is set, but use_flash_attention is not"
+            os.environ["FLASH_ATTENTION_RECOMPUTE"] = "1"
+        if self.flash_attention_fast_softmax:
+            assert self.use_flash_attention, "flash_attention_fast_softmax is set, but use_flash_attention is not"
+            os.environ["FLASH_ATTENTION_FAST_SOFTMAX"] = "1"
 
 
 @dataclass
@@ -535,6 +562,7 @@ def remove_special_characters(batch):
         cache_dir=model_args.cache_dir,
         token=data_args.token,
         trust_remote_code=data_args.trust_remote_code,
+        attn_implementation=training_args.attn_implementation,
     )
 
     # 4. Next, if no tokenizer file is defined,
diff --git a/examples/stable-diffusion/README.md b/examples/stable-diffusion/README.md
index 6f3f61a4e6..9919780543 100644
--- a/examples/stable-diffusion/README.md
+++ b/examples/stable-diffusion/README.md
@@ -16,10 +16,10 @@ limitations under the License.
 
 # Stable Diffusion Examples
 
-This directory contains a script that showcases how to perform text-to-image generation using Stable Diffusion on Intel® Gaudi® AI Accelerators.
-
-Stable Diffusion was proposed in [Stable Diffusion Announcement](https://stability.ai/blog/stable-diffusion-announcement) by Patrick Esser and Robin Rombach and the Stability AI team.
+This directory contains sample scripts demonstrating how to perform diffusion-based generative tasks on Intel® Gaudi® AI Accelerators.
 
+Stable Diffusion was introduced in [Stable Diffusion Announcement](https://stability.ai/blog/stable-diffusion-announcement) by Patrick Esser,
+Robin Rombach and the Stability AI team.
 
 ## Requirements
 
@@ -28,11 +28,11 @@ First, you should install the requirements:
 pip install -r requirements.txt
 ```
 
-## Text-to-image Generation
+## Text-to-Image Generation
 
-### Single Prompt
+### Stable Diffusion
 
-Here is how to generate images with one prompt:
+Here's how to generate images using the Stable Diffusion 1.4 model with a single prompt:
 
 ```bash
 python text_to_image_generation.py \
@@ -48,13 +48,12 @@ python text_to_image_generation.py \
     --bf16
 ```
 
+> [!NOTE]
 > HPU graphs are recommended when generating images by batches to get the fastest possible generations.
 > The first batch of images entails a performance penalty. All subsequent batches will be generated much faster.
 > You can enable this mode with `--use_hpu_graphs`.
 
-### Multiple Prompts
-
-Here is how to generate images with several prompts:
+To generate images with multiple prompts, simply include two prompts in your input as shown below:
 
 ```bash
 python text_to_image_generation.py \
@@ -70,9 +69,7 @@ python text_to_image_generation.py \
     --bf16
 ```
 
-### Distributed inference with multiple HPUs
-
-Here is how to generate images with two prompts on two HPUs:
+Distributed inference with multiple HPUs is also supported. Below is an example demonstrating how to generate images with two prompts on two HPUs:
 
 ```bash
 python ../gaudi_spawn.py \
@@ -90,13 +87,18 @@ python ../gaudi_spawn.py \
     --distributed
 ```
 
+> [!NOTE]
 > HPU graphs are recommended when generating images by batches to get the fastest possible generations.
 > The first batch of images entails a performance penalty. All subsequent batches will be generated much faster.
 > You can enable this mode with `--use_hpu_graphs`.
 
+You can run other older Stable Diffusion models in a similar manner. For example, to generate images with Stable Diffusion 1.5, use the option:
+`--model_name_or_path stable-diffusion-v1-5/stable-diffusion-v1-5`. Examples showcasing Stable Diffusion 2 are provided next.
+
 ### Stable Diffusion 2
 
-[Stable Diffusion 2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion_2) can also be used to generate images with this script. Here is an example for a single prompt:
+[Stable Diffusion 2](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion_2) can also be used
+to generate images with this script. Here is an example demonstrating image generation with a single prompt:
 
 ```bash
 python text_to_image_generation.py \
@@ -114,17 +116,18 @@ python text_to_image_generation.py \
     --bf16
 ```
 
+> [!NOTE]
 > There are two different checkpoints for Stable Diffusion 2:
->
 > - use [stabilityai/stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1) for generating 768x768 images
 > - use [stabilityai/stable-diffusion-2-1-base](https://huggingface.co/stabilityai/stable-diffusion-2-1-base) for generating 512x512 images
 
 ### Latent Diffusion Model for 3D (LDM3D)
 
-[LDM3D](https://arxiv.org/abs/2305.10853) generates both image and depth map data from a given text prompt, allowing users to generate RGBD images from text prompts.
+[LDM3D](https://arxiv.org/abs/2305.10853) generates both image and depth map data from a given text prompt, allowing users
+to generate RGBD images from text prompts.
 
-[Original checkpoint](https://huggingface.co/Intel/ldm3d) and [latest checkpoint](https://huggingface.co/Intel/ldm3d-4c) are open source.
-A [demo](https://huggingface.co/spaces/Intel/ldm3d) is also available. Here is how to run this model:
+[Original checkpoint](https://huggingface.co/Intel/ldm3d) and [latest checkpoint](https://huggingface.co/Intel/ldm3d-4c)
+are open source. A [demo](https://huggingface.co/spaces/Intel/ldm3d) is also available. Here is how to run this model:
 
 ```bash
 python text_to_image_generation.py \
@@ -144,8 +147,7 @@ python text_to_image_generation.py \
 Here is how to generate images and depth maps with two prompts on two HPUs:
 
 ```bash
-python ../gaudi_spawn.py \
-    --world_size 2 text_to_image_generation.py \
+python ../gaudi_spawn.py --world_size 2 text_to_image_generation.py \
     --model_name_or_path "Intel/ldm3d-4c" \
     --prompts "An image of a squirrel in Picasso style" "A shiny flying horse taking off" \
     --num_images_per_prompt 10 \
@@ -160,15 +162,16 @@ python ../gaudi_spawn.py \
     --distributed
 ```
 
+> [!NOTE]
 > There are three different checkpoints for LDM3D:
->
 > - use [original checkpoint](https://huggingface.co/Intel/ldm3d) to generate outputs from the paper
 > - use [the latest checkpoint](https://huggingface.co/Intel/ldm3d-4c) for generating improved results
 > - use [the pano checkpoint](https://huggingface.co/Intel/ldm3d-pano) to generate panoramic view
 
 ### Stable Diffusion XL (SDXL)
 
-Stable Diffusion XL was proposed in [SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis](https://arxiv.org/pdf/2307.01952.pdf) by the Stability AI team.
+Stable Diffusion XL was proposed in [SDXL: Improving Latent Diffusion Models for High-Resolution Image Synthesis](https://arxiv.org/pdf/2307.01952.pdf)
+by the Stability AI team.
 
 Here is how to generate SDXL images with a single prompt:
 
@@ -178,6 +181,7 @@ python text_to_image_generation.py \
     --prompts "Sailing ship painting by Van Gogh" \
     --num_images_per_prompt 28 \
     --batch_size 7 \
+    --num_inference_steps 30 \
     --image_save_dir /tmp/stable_diffusion_xl_images \
     --scheduler euler_discrete \
     --use_habana \
@@ -187,30 +191,12 @@ python text_to_image_generation.py \
     --bf16
 ```
 
+> [!NOTE]
 > HPU graphs are recommended when generating images by batches to get the fastest possible generations.
 > The first batch of images entails a performance penalty. All subsequent batches will be generated much faster.
 > You can enable this mode with `--use_hpu_graphs`.
 
-Here is how to generate SDXL images with several prompts:
-
-```bash
-python text_to_image_generation.py \
-    --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
-    --prompts "Sailing ship painting by Van Gogh" "A shiny flying horse taking off" \
-    --num_images_per_prompt 32 \
-    --batch_size 8 \
-    --image_save_dir /tmp/stable_diffusion_xl_images \
-    --scheduler euler_discrete \
-    --use_habana \
-    --use_hpu_graphs \
-    --gaudi_config Habana/stable-diffusion \
-    --sdp_on_bf16 \
-    --bf16
-```
-
-SDXL combines a second text encoder (OpenCLIP ViT-bigG/14) with the original text encoder to significantly
-increase the number of parameters. Here is how to generate images with several prompts for both `prompt`
-and `prompt_2` (2nd text encoder), as well as their negative prompts:
+SDXL integrates a second text encoder (OpenCLIP ViT-bigG/14), alongside the original Stable Diffusion text encoder. This addition significantly increases the number of parameters, enabling more detailed and descriptive prompts. Below is an example of how to generate images using multiple prompts for both `prompt` (primary text encoder) and `prompt_2` (secondary text encoder), along with their respective negative prompts:
 
 ```bash
 python text_to_image_generation.py \
@@ -230,11 +216,10 @@ python text_to_image_generation.py \
     --bf16
 ```
 
-Here is how to generate SDXL images with two prompts on two HPUs:
+SDXL also supports distributed inferencing with Intel Gaudi accelerators. Below is an example of generating SDXL images in a distributed manner using two prompts on two HPUs:
 
 ```bash
-python ../gaudi_spawn.py \
-    --world_size 2 text_to_image_generation.py \
+python ../gaudi_spawn.py --world_size 2 text_to_image_generation.py \
     --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
     --prompts "Sailing ship painting by Van Gogh" "A shiny flying horse taking off" \
     --prompts_2 "Red tone" "Blue tone" \
@@ -252,26 +237,13 @@ python ../gaudi_spawn.py \
     --distributed
 ```
 
-Here is how to generate SDXL images with optimized pipeline:
-```bash
-python text_to_image_generation.py \
-    --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
-    --prompts "Sailing ship painting by Van Gogh" \
-    --num_images_per_prompt 28 \
-    --batch_size 7 \
-    --image_save_dir /tmp/stable_diffusion_xl_images \
-    --scheduler euler_discrete \
-    --use_habana \
-    --use_hpu_graphs \
-    --gaudi_config Habana/stable-diffusion \
-    --sdp_on_bf16 \
-    --bf16 \
-    --optimize
-```
+The performance-optimized SDXL pipeline can be enabled using the `--optimize` option. This option utilizes a more aggressively optimized attention mechanism for enhanced performance. Additionally, it supports running
+inference in mixed FP8 precision.
 
-Here is how to generate SDXL images with optimized pipeline in fp8:
+Here is how to generate SDXL images with optimized pipeline in FP8 precision:
 ```bash
-QUANT_CONFIG=./quantization/quant_config.json python text_to_image_generation.py \
+QUANT_CONFIG=quantization/stable-diffusion-xl/quantize_config.json \
+python text_to_image_generation.py \
     --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
     --prompts "Sailing ship painting by Van Gogh" \
     --num_images_per_prompt 28 \
@@ -286,13 +258,11 @@ QUANT_CONFIG=./quantization/quant_config.json python text_to_image_generation.py
     --optimize
 ```
 
-> HPU graphs are recommended when generating images by batches to get the fastest possible generations.
-> The first batch of images entails a performance penalty. All subsequent batches will be generated much faster.
-> You can enable this mode with `--use_hpu_graphs`.
-
 ### SDXL-Turbo
 
-SDXL-Turbo is a distilled version of SDXL 1.0, trained for real-time synthesis.
+The knowledge distillation technique can be used to train a distilled version of SDXL, allowing for high-quality
+image generation with fewer inference steps. SDXL-Turbo is a distilled version of Stable Diffusion XL 1.0,
+optimized for real-time synthesis.
 
 Here is how to generate images with multiple prompts:
 
@@ -314,11 +284,9 @@ python text_to_image_generation.py \
     --timestep_spacing trailing
 ```
 
-> HPU graphs are recommended when generating images by batches to get the fastest possible generations.
-> The first batch of images entails a performance penalty. All subsequent batches will be generated much faster.
-> You can enable this mode with `--use_hpu_graphs`.
-
-> Note: there is a regression with "--guidance_scale 0.0" in current release which will be addressed in later releases. Setting `--guidance_scale` to a value larger than 1 resolves the regression.
+> [!WARNING]
+> There is a regression with `--guidance_scale 0.0` in current release which will be addressed in later releases.
+> Setting `--guidance_scale` to a value larger than 1 resolves the regression.
 
 ### Stable Diffusion 3 (SD3)
 
@@ -337,7 +305,6 @@ huggingface-cli login
 Here is how to generate SD3 images with a single prompt:
 
 ```bash
-PT_HPU_MAX_COMPOUND_OP_SIZE=1 \
 python text_to_image_generation.py \
     --model_name_or_path stabilityai/stable-diffusion-3-medium-diffusers \
     --prompts "Sailing ship painting by Van Gogh" \
@@ -353,14 +320,53 @@ python text_to_image_generation.py \
     --bf16
 ```
 
-> For improved performance of the SD3 pipeline on Gaudi, it is recommended to configure the environment
-> by setting PT_HPU_MAX_COMPOUND_OP_SIZE to 1.
+This model can also be quantized with some ops running in FP8 precision.
+
+Before quantization, run stats collection using measure mode:
+
+```bash
+QUANT_CONFIG=quantization/stable-diffusion-3/measure_config.json \
+python text_to_image_generation.py \
+    --model_name_or_path stabilityai/stable-diffusion-3-medium-diffusers \
+    --prompts "Sailing ship painting by Van Gogh" \
+    --num_images_per_prompt 10 \
+    --batch_size 1 \
+    --num_inference_steps 28 \
+    --image_save_dir /tmp/stable_diffusion_3_images \
+    --scheduler default \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --sdp_on_bf16 \
+    --bf16 \
+    --quant_mode measure
+```
+
+After stats collection, here is how to run SD3 in quantization mode:
+
+```bash
+QUANT_CONFIG=quantization/stable-diffusion-3/quantize_config.json \
+python text_to_image_generation.py \
+    --model_name_or_path stabilityai/stable-diffusion-3-medium-diffusers \
+    --prompts "Sailing ship painting by Van Gogh" \
+    --num_images_per_prompt 10 \
+    --batch_size 1 \
+    --num_inference_steps 28 \
+    --image_save_dir /tmp/stable_diffusion_3_images \
+    --scheduler default \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --sdp_on_bf16 \
+    --bf16 \
+    --quant_mode quantize
+```
 
 ### FLUX.1
 
 FLUX.1 was introduced by Black Forest Labs [here](https://blackforestlabs.ai/announcing-black-forest-labs/).
 
-Here is how to run FLUX.1-schnell model (fast version of FLUX.1):
+Here is how to run FLUX.1-schnell model (distilled fast version of FLUX.1):
 
 ```bash
 python text_to_image_generation.py \
@@ -449,10 +455,11 @@ python text_to_image_generation.py \
 
 ## ControlNet
 
-ControlNet was introduced in [Adding Conditional Control to Text-to-Image Diffusion Models](https://huggingface.co/papers/2302.05543) by Lvmin Zhang and Maneesh Agrawala.
-It is a type of model for controlling StableDiffusion by conditioning the model with an additional input image.
 
-Here is how to generate images conditioned by canny edge model:
+ControlNet was introduced in [Adding Conditional Control to Text-to-Image Diffusion Models](https://huggingface.co/papers/2302.05543)
+by Lvmin Zhang and Maneesh Agrawala, enables conditioning the Stable Diffusion model with an additional input image. This allows for precise control over the composition of generated images using various features such as edges, pose, depth, and more.
+
+Here is how to generate images conditioned by Canny edge model:
 
 ```bash
 python text_to_image_generation.py \
@@ -470,29 +477,11 @@ python text_to_image_generation.py \
     --bf16
 ```
 
-Here is how to generate images conditioned by canny edge model and with multiple prompts:
+The ControlNet example can be run with multiple prompts by supplying more than one prompt in the input.
+Additionally, it supports distributed execution. Below is an example of generating images conditioned by the Canny edge model using two prompts on two HPUs:
 
 ```bash
-python text_to_image_generation.py \
-    --model_name_or_path CompVis/stable-diffusion-v1-4 \
-    --controlnet_model_name_or_path lllyasviel/sd-controlnet-canny \
-    --prompts "futuristic-looking woman" "a rusty robot" \
-    --control_image https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png \
-    --num_images_per_prompt 28 \
-    --batch_size 7 \
-    --image_save_dir /tmp/controlnet_images \
-    --use_habana \
-    --use_hpu_graphs \
-    --gaudi_config Habana/stable-diffusion \
-    --sdp_on_bf16 \
-    --bf16
-```
-
-Here is how to generate images conditioned by canny edge model and with two prompts on two HPUs:
-
-```bash
-python ../gaudi_spawn.py \
-    --world_size 2 text_to_image_generation.py \
+python ../gaudi_spawn.py --world_size 2 text_to_image_generation.py \
     --model_name_or_path CompVis/stable-diffusion-v1-4 \
     --controlnet_model_name_or_path lllyasviel/sd-controlnet-canny \
     --prompts "futuristic-looking woman" "a rusty robot" \
@@ -508,44 +497,7 @@ python ../gaudi_spawn.py \
     --distributed
 ```
 
-Here is how to generate images conditioned by open pose model:
-
-```bash
-python text_to_image_generation.py \
-    --model_name_or_path CompVis/stable-diffusion-v1-4 \
-    --controlnet_model_name_or_path lllyasviel/sd-controlnet-openpose \
-    --prompts "Chef in the kitchen" \
-    --control_image https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/pose.png \
-    --control_preprocessing_type "none" \
-    --num_images_per_prompt 28 \
-    --batch_size 7 \
-    --image_save_dir /tmp/controlnet_images \
-    --use_habana \
-    --use_hpu_graphs \
-    --gaudi_config Habana/stable-diffusion \
-    --sdp_on_bf16 \
-    --bf16
-```
-
-Here is how to generate images with conditioned by canny edge model using Stable Diffusion 2
-
-```bash
-python text_to_image_generation.py \
-    --model_name_or_path stabilityai/stable-diffusion-2-1 \
-    --controlnet_model_name_or_path thibaud/controlnet-sd21-canny-diffusers \
-    --control_image https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png \
-    --control_preprocessing_type "none" \
-    --prompts "bird" \
-    --seed 0 \
-    --num_images_per_prompt 28 \
-    --batch_size 7 \
-    --image_save_dir /tmp/controlnet-2-1_images \
-    --use_habana \
-    --use_hpu_graphs \
-    --gaudi_config Habana/stable-diffusion-2 \
-    --sdp_on_bf16 \
-    --bf16
-```
+These ControlNet examples will preprocess the input image to derive Canny edges. Alternatively, you can use `--control_preprocessing_type none` to supply a preprocessed control image directly, enabling many additional use cases.
 
 ## Inpainting
 
@@ -591,44 +543,106 @@ python text_to_image_generation.py \
     --bf16
 ```
 
-## Image-to-image Generation
+## Additional Stable Diffusion-based Inference Techniques
 
-### Single Prompt
+This section provides examples of additional inference techniques based on Stable Diffusion. For more details, please refer to
+[Hugging Face Diffusers documentation](https://huggingface.co/docs/diffusers/main/en/using-diffusers/overview_techniques).
 
-Here is how to generate images with one prompt and one image.
-Take instruct-pix2pix as an example.
+### Unconditional Image Generation
+
+Here is how to perform unconditional image generation on Intel Gaudi. For more details,  please refer to the 
+[Unconditional Image Generation](https://huggingface.co/docs/diffusers/using-diffusers/unconditional_image_generation)
+section in the Hugging Face documentation.
 
 ```bash
-python image_to_image_generation.py \
-    --model_name_or_path "timbrooks/instruct-pix2pix" \
-    --src_image_path "https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/main/imgs/example.jpg" \
-    --prompts "turn him into cyborg" \
-    --num_images_per_prompt 20 \
-    --batch_size 4 \
-    --guidance_scale 7.5 \
-    --image_guidance_scale 1 \
-    --num_inference_steps 10 \
-    --image_save_dir /tmp/stable_diffusion_images \
+python unconditional_image_generation.py \
+    --model_name_or_path "google/ddpm-ema-celebahq-256" \
+    --batch_size 16 \
     --use_habana \
+    --use_gaudi_ddim_scheduler \
     --use_hpu_graphs \
-    --gaudi_config Habana/stable-diffusion \
+    --sdp_on_bf16 \
+    --bf16 \
+    --save_outputs \
+    --output_dir "/tmp/"
+```
+
+### Controlling Brightness
+
+Here is an example of how to control brightness. For more information, please refer to the
+[Control Brightness](https://huggingface.co/docs/diffusers/main/en/using-diffusers/control_brightness)
+section in the Hugging Face documentation.
+
+```bash
+PT_HPU_MAX_COMPOUND_OP_SIZE=1 \
+python text_to_image_generation.py \
+    --model_name_or_path ptx0/pseudo-journey-v2 \
+    --prompts "A lion in galaxies, spirals, nebulae, stars, smoke, iridescent, intricate detail, octane render, 8k" \
+    --num_images_per_prompt 1 \
+    --batch_size 1 \
+    --use_habana \
+    --use_hpu_graphs \
+    --image_save_dir /tmp/stable_diffusion_images_brightness \
+    --seed 33 \
+    --use_zero_snr \
+    --guidance_scale 0.7 \
+    --timestep_spacing trailing
+```
+
+### Prompt Weighting
+
+Here is an example of how to run prompt weighting. For more information, please refer to the
+[Weighted Prompts](https://huggingface.co/docs/diffusers/main/en/using-diffusers/weighted_prompts)
+section in the Hugging Face documentation.
+
+```bash
+python text_to_image_generation.py \
+    --model_name_or_path CompVis/stable-diffusion-v1-4 \
+    --prompts "a red cat playing with a ball+++" "a red cat playing with a ball---" \
+    --num_images_per_prompt 4 \
+    --batch_size 4 \
+    --use_habana --use_hpu_graphs \
+    --image_save_dir /tmp/stable_diffusion_images_compel \
+    --seed 33 \
+    --sdp_on_bf16 \
+    --bf16 \
+    --num_inference_steps 20 \
+    --use_compel
+```
+
+### Controlling Image Quality
+
+Here is an example of how to improve image quality. For more details, please refer to the
+[Image Quality](https://huggingface.co/docs/diffusers/main/en/using-diffusers/image_quality)
+section in the Hugging Face documentation.
+
+```bash
+python text_to_image_generation.py \
+    --model_name_or_path CompVis/stable-diffusion-v1-4 \
+    --prompts "A squirrel eating a burger" \
+    --num_images_per_prompt 4 \
+    --batch_size 4 \
+    --use_habana \
+    --image_save_dir /tmp/stable_diffusion_images_freeu \
+    --seed 33 \
+    --use_freeu \
     --sdp_on_bf16 \
     --bf16
 ```
 
-> HPU graphs are recommended when generating images by batches to get the fastest possible generations.
-> The first batch of images entails a performance penalty. All subsequent batches will be generated much faster.
-> You can enable this mode with `--use_hpu_graphs`.
+## Image-to-Image Generation
+
+Images can also be generated using initial input images to guide the diffusion-based image generation process.
 
-### Multiple Prompts
+### Stable Diffusion-based Image-to-Image
 
-Here is how to generate images with several prompts and one image.
+Here is how to generate images using a single prompt and an input image with the `timbrooks/instruct-pix2pix` model, which is based on Stable Diffusion:
 
 ```bash
 python image_to_image_generation.py \
     --model_name_or_path "timbrooks/instruct-pix2pix" \
     --src_image_path "https://raw.githubusercontent.com/timothybrooks/instruct-pix2pix/main/imgs/example.jpg" \
-    --prompts "turn him into cyborg" "a strong soldier"\
+    --prompts "turn him into cyborg" \
     --num_images_per_prompt 20 \
     --batch_size 4 \
     --guidance_scale 7.5 \
@@ -642,13 +656,14 @@ python image_to_image_generation.py \
     --bf16
 ```
 
+> [!NOTE]
 > HPU graphs are recommended when generating images by batches to get the fastest possible generations.
 > The first batch of images entails a performance penalty. All subsequent batches will be generated much faster.
 > You can enable this mode with `--use_hpu_graphs`.
 
 ### Stable Diffusion XL Refiner
 
-Here is how to generate SDXL images with a single prompt and one image:
+Here is how to refine SDXL images using a single image and prompt:
 
 ```bash
 python image_to_image_generation.py \
@@ -667,17 +682,17 @@ python image_to_image_generation.py \
     --bf16
 ```
 
-### FLUX.1 Image to Image
+### FLUX.1 Image-to-Image
 
-Here is how to generate FLUX.1 images with a single prompt and one input image:
+Here is how to generate a FLUX.1 image using a single input image and prompt:
 
 ```bash
 python image_to_image_generation.py \
     --model_name_or_path "black-forest-labs/FLUX.1-dev" \
     --src_image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png" \
     --prompts "cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k" \
-    --num_images_per_prompt 40 \
-    --batch_size 10 \
+    --num_images_per_prompt 10 \
+    --batch_size 1 \
     --strength 0.9 \
     --guidance_scale 3.5 \
     --num_inference_steps 30 \
@@ -691,7 +706,7 @@ python image_to_image_generation.py \
 
 ### Stable Diffusion Image Variations
 
-Here is how to generate images with one image, it does not accept prompt input
+Here is how to generate image variations of a single image (without any input prompts):
 
 ```bash
 python image_to_image_generation.py \
@@ -710,7 +725,7 @@ python image_to_image_generation.py \
 
 ### Depth to Image Generation
 
-Here is how to generate a depth2img-guided image generation using HPU graphs with BF16:
+Here is an example of performing depth-guided image generation:
 
 ```bash
 python depth_to_image_generation.py \
@@ -724,88 +739,20 @@ python depth_to_image_generation.py \
     --bf16
 ```
 
-## Unconditional Image Generation Example
-
-Here is how to perform unconditional-image-generation on Gaudi/HPU.
-
-Original unconditional image generation pipeline is shared in here: [Unconditional Image Generation](https://huggingface.co/docs/diffusers/using-diffusers/unconditional_image_generation)
-
-```bash
-python unconditional_image_generation.py \
-    --model_name_or_path "google/ddpm-ema-celebahq-256" \
-    --batch_size 16 \
-    --use_habana \
-    --use_gaudi_ddim_scheduler \
-    --use_hpu_graphs \
-    --sdp_on_bf16 \
-    --bf16 \
-    --save_outputs \
-    --output_dir "/tmp/"
-```
-
-## Additional inference techniques
-
-Here is how to run the diffusers examples of inference techniques. For more details,
-please refer to [Hugging Face Diffusers doc](https://huggingface.co/docs/diffusers/main/en/using-diffusers/overview_techniques).
+## Text-to-Video Generation
 
-### Controlling brightness
-
-Here is how to run the example of controlling brightness. For more details,
-please refer to [Hugging Face Diffusers doc](https://huggingface.co/docs/diffusers/main/en/using-diffusers/control_brightness).
+This section demonstrates how to use the `GaudiTextToVideoSDPipeline` for text-to-video generation tasks on HPUs.
+The pipeline employs a UNet3D structure and generates videos through an iterative denoising process.
 
 ```bash
-PT_HPU_MAX_COMPOUND_OP_SIZE=1 python text_to_image_generation.py \
-    --model_name_or_path ptx0/pseudo-journey-v2 \
-    --prompts "A lion in galaxies, spirals, nebulae, stars, smoke, iridescent, intricate detail, octane render, 8k" \
-    --num_images_per_prompt 1 \
-    --batch_size 1 \
+python text_to_video_generation.py \
+    --model_name_or_path ali-vilab/text-to-video-ms-1.7b \
+    --prompts "An astronaut riding a horse" \
     --use_habana \
     --use_hpu_graphs \
-    --image_save_dir /tmp/stable_diffusion_images_brightness \
-    --seed 33 \
-    --use_zero_snr \
-    --guidance_scale 0.7 \
-    --timestep_spacing trailing
+    --dtype bf16
 ```
 
-### Prompt weighting
-
-Here is how to run the example of prompt weighting. For more details,
-please refer to [Hugging Face Diffusers doc](https://huggingface.co/docs/diffusers/main/en/using-diffusers/weighted_prompts).
-
-```bash
-python text_to_image_generation.py \
-    --model_name_or_path CompVis/stable-diffusion-v1-4 \
-    --prompts "a red cat playing with a ball+++" "a red cat playing with a ball---" \
-    --num_images_per_prompt 4 \
-    --batch_size 4 \
-    --use_habana --use_hpu_graphs \
-    --image_save_dir /tmp/stable_diffusion_images_compel \
-    --seed 33 \
-    --sdp_on_bf16 \
-    --bf16 \
-    --num_inference_steps 20 \
-    --use_compel
-```
-
-### Controlling image quality
-
-Here is how to run the example of improving image quality. For more details,
-please refer to [Hugging Face Diffusers doc](https://huggingface.co/docs/diffusers/main/en/using-diffusers/image_quality).
-
-```bash
-python text_to_image_generation.py \
-    --model_name_or_path CompVis/stable-diffusion-v1-4 \
-    --prompts "A squirrel eating a burger" \
-    --num_images_per_prompt 4 \
-    --batch_size 4 \
-    --use_habana \
-    --image_save_dir /tmp/stable_diffusion_images_freeu \
-    --seed 33 \
-    --use_freeu \
-    --sdp_on_bf16 \
-    --bf16
-```
 # Stable Video Diffusion Examples
 
 Stable Video Diffusion (SVD) was unveiled in [Stable Video Diffusion Announcement](https://stability.ai/news/stable-video-diffusion-open-ai-video-model)
@@ -834,6 +781,7 @@ python image_to_video_generation.py \
     --bf16
 ```
 
+> [!NOTE]
 > For improved performance of the image-to-video pipeline on Gaudi, it is recommended to configure the environment
 > by setting PT_HPU_MAX_COMPOUND_OP_SIZE to 1.
 
@@ -845,10 +793,11 @@ Here is how to generate videos with several image prompts:
 PT_HPU_MAX_COMPOUND_OP_SIZE=1 \
 python image_to_video_generation.py \
     --model_name_or_path "stabilityai/stable-video-diffusion-img2vid-xt" \
-    --image_path "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png" \
-                 "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png" \
-                 "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png" \
-                 "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png" \
+    --image_path \
+        "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png" \
+        "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png" \
+        "https://hf.co/datasets/huggingface/documentation-images/resolve/main/diffusers/input_image_vermeer.png" \
+        "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/hf-logo.png" \
     --num_videos_per_prompt 1 \
     --video_save_dir /tmp/stable_video_diffusion_xt \
     --save_frames_as_images \
@@ -859,46 +808,75 @@ python image_to_video_generation.py \
     --bf16
 ```
 
+> [!NOTE]
 > For improved performance of the image-to-video pipeline on Gaudi, it is recommended to configure the environment
 > by setting PT_HPU_MAX_COMPOUND_OP_SIZE to 1.
 
-### Image-to-video ControlNet
+### Image-to-Video ControlNet
 
 Here is how to generate video conditioned by depth:
 
-```
+```bash
 python image_to_video_generation.py \
     --model_name_or_path "stabilityai/stable-video-diffusion-img2vid" \
     --controlnet_model_name_or_path "CiaraRowles/temporal-controlnet-depth-svd-v1" \
-    --control_image_path "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_0.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_1.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_2.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_3.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_4.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_5.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_6.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_7.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_8.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_9.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_10.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_11.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_12.png?raw=true" \
-             "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_13.png?raw=true" \
+    --control_image_path \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_0.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_1.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_2.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_3.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_4.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_5.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_6.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_7.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_8.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_9.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_10.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_11.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_12.png?raw=true" \
+        "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/depth/frame_13.png?raw=true" \
     --image_path "https://github.com/CiaraStrawberry/svd-temporal-controlnet/blob/main/validation_demo/chair.png?raw=true" \
     --video_save_dir SVD_controlnet \
     --save_frames_as_images \
     --use_habana \
     --use_hpu_graphs \
     --gaudi_config Habana/stable-diffusion \
-    --bf16 \
     --sdp_on_bf16 \
+    --bf16 \
     --num_frames 14 \
     --motion_bucket_id=14 \
     --width=512 \
     --height=512
 ```
 
-> [!NOTE]
-> For Gaudi3 only:
-> 1. Due to a known issue, batch sizes for models needs to be reduced. It will be fixed in the future release.
-> 2. The Image-to-video ControlNet command is not enabled on Gaudi3.
+# I2vgen-xl
+I2vgen-xl is high quality Image-to-Video synthesis via cascaded diffusion models. Please refer to  [Huggingface i2vgen-xl doc](https://huggingface.co/ali-vilab/i2vgen-xl).
+
+Here is how to generate video with one image and text prompt:
+
+```bash
+PT_HPU_MAX_COMPOUND_OP_SIZE=1 \
+python image_to_video_generation.py \
+    --model_name_or_path "ali-vilab/i2vgen-xl" \
+    --image_path "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/i2vgen_xl_images/img_0009.png" \
+    --num_videos_per_prompt 1 \
+    --video_save_dir ./i2vgen_xl \
+    --num_inference_steps 50 \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --gif \
+    --num_frames 16 \
+    --prompts "Papers were floating in the air on a table in the library" \
+    --negative_prompts "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms" \
+    --seed 8888  \
+    --sdp_on_bf16 \
+    --bf16
+```
+
+# Important Notes for Gaudi3 Users  
+
+- **Batch Size Limitation**: Due to a known issue, batch sizes for some Stable Diffusion models need to be reduced.
+   This issue is expected to be resolved in a future release.
+
+- **Image-to-Video ControlNet**: The Image-to-Video ControlNet command is currently not supported on Gaudi3.
diff --git a/examples/stable-diffusion/image_to_image_generation.py b/examples/stable-diffusion/image_to_image_generation.py
index acc2536a26..68e29c97bd 100755
--- a/examples/stable-diffusion/image_to_image_generation.py
+++ b/examples/stable-diffusion/image_to_image_generation.py
@@ -223,10 +223,10 @@ def main():
     args = parser.parse_args()
 
     # Set image resolution
-    res = {}
+    kwargs_call = {}
     if args.width > 0 and args.height > 0:
-        res["width"] = args.width
-        res["height"] = args.height
+        kwargs_call["width"] = args.width
+        kwargs_call["height"] = args.height
     sdxl_models = ["stable-diffusion-xl", "sdxl"]
     sdxl = False
     flux_models = ["FLUX.1"]
@@ -236,6 +236,7 @@ def main():
         "use_habana": args.use_habana,
         "use_hpu_graphs": args.use_hpu_graphs,
         "gaudi_config": args.gaudi_config_name,
+        "sdp_on_bf16": args.sdp_on_bf16,
     }
 
     # Import selected pipeline
@@ -251,7 +252,7 @@ def main():
         from optimum.habana.diffusers import GaudiStableDiffusionInstructPix2PixPipeline as Img2ImgPipeline
 
         kwargs["safety_checker"] = None
-        res["image_guidance_scale"] = args.image_guidance_scale
+        kwargs_call["image_guidance_scale"] = args.image_guidance_scale
     elif "image-variations" in args.model_name_or_path:
         from optimum.habana.diffusers import GaudiStableDiffusionImageVariationPipeline as Img2ImgPipeline
 
@@ -290,7 +291,7 @@ def main():
         kwargs["torch_dtype"] = torch.bfloat16
 
     if args.throughput_warmup_steps is not None:
-        kwargs["throughput_warmup_steps"] = args.throughput_warmup_steps
+        kwargs_call["throughput_warmup_steps"] = args.throughput_warmup_steps
 
     pipeline = Img2ImgPipeline.from_pretrained(
         args.model_name_or_path,
@@ -324,8 +325,7 @@ def main():
             output_type=args.output_type,
             profiling_warmup_steps=args.profiling_warmup_steps,
             profiling_steps=args.profiling_steps,
-            sdp_on_bf16=args.sdp_on_bf16,
-            **res,
+            **kwargs_call,
         )
     elif flux:
         outputs = pipeline(
@@ -340,7 +340,7 @@ def main():
             output_type=args.output_type,
             profiling_warmup_steps=args.profiling_warmup_steps,
             profiling_steps=args.profiling_steps,
-            **res,
+            **kwargs_call,
         )
     else:
         outputs = pipeline(
@@ -355,7 +355,7 @@ def main():
             output_type=args.output_type,
             profiling_warmup_steps=args.profiling_warmup_steps,
             profiling_steps=args.profiling_steps,
-            **res,
+            **kwargs_call,
         )
 
     # Save the pipeline in the specified directory if not None
diff --git a/examples/stable-diffusion/image_to_video_generation.py b/examples/stable-diffusion/image_to_video_generation.py
index bd704a301b..14a69f3072 100755
--- a/examples/stable-diffusion/image_to_video_generation.py
+++ b/examples/stable-diffusion/image_to_video_generation.py
@@ -19,9 +19,13 @@
 from pathlib import Path
 
 import torch
-from diffusers.utils import export_to_video, load_image
+from diffusers.utils import export_to_gif, export_to_video, load_image
 
-from optimum.habana.diffusers import GaudiEulerDiscreteScheduler, GaudiStableVideoDiffusionPipeline
+from optimum.habana.diffusers import (
+    GaudiEulerDiscreteScheduler,
+    GaudiI2VGenXLPipeline,
+    GaudiStableVideoDiffusionPipeline,
+)
 from optimum.habana.utils import set_seed
 
 
@@ -57,6 +61,20 @@ def main():
     )
 
     # Pipeline arguments
+    parser.add_argument(
+        "--prompts",
+        type=str,
+        nargs="*",
+        default="Papers were floating in the air on a table in the library",
+        help="The prompt or prompts to guide the image generation.",
+    )
+    parser.add_argument(
+        "--negative_prompts",
+        type=str,
+        nargs="*",
+        default="Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms",
+        help="The prompt or prompts not to guide the image generation.",
+    )
     parser.add_argument(
         "--image_path",
         type=str,
@@ -177,6 +195,7 @@ def main():
         ),
     )
     parser.add_argument("--bf16", action="store_true", help="Whether to perform generation in bf16 precision.")
+    parser.add_argument("--gif", action="store_true", help="Whether to generate the video in gif format.")
     parser.add_argument(
         "--sdp_on_bf16",
         action="store_true",
@@ -184,6 +203,24 @@ def main():
         help="Allow pyTorch to use reduced precision in the SDPA math backend",
     )
     parser.add_argument("--num_frames", type=int, default=25, help="The number of video frames to generate.")
+    parser.add_argument(
+        "--profiling_warmup_steps",
+        default=0,
+        type=int,
+        help="Number of steps to ignore for profiling.",
+    )
+    parser.add_argument(
+        "--profiling_steps",
+        default=0,
+        type=int,
+        help="Number of steps to capture for profiling.",
+    )
+    parser.add_argument(
+        "--throughput_warmup_steps",
+        type=int,
+        default=None,
+        help="Number of steps to ignore for throughput calculation.",
+    )
     args = parser.parse_args()
 
     # Setup logging
@@ -194,6 +231,9 @@ def main():
     )
     logger.setLevel(logging.INFO)
 
+    i2v_models = ["i2vgen-xl"]
+    is_i2v_model = any(model in args.model_name_or_path for model in i2v_models)
+
     # Load input image(s)
     input = []
     logger.info("Input image(s):")
@@ -201,7 +241,10 @@ def main():
         args.image_path = [args.image_path]
     for image_path in args.image_path:
         image = load_image(image_path)
-        image = image.resize((args.height, args.width))
+        if is_i2v_model:
+            image = image.convert("RGB")
+        else:
+            image = image.resize((args.height, args.width))
         input.append(image)
         logger.info(image_path)
 
@@ -263,11 +306,32 @@ def main():
             output_type=args.output_type,
             num_frames=args.num_frames,
         )
+    elif is_i2v_model:
+        del kwargs["scheduler"]
+        pipeline = GaudiI2VGenXLPipeline.from_pretrained(
+            args.model_name_or_path,
+            **kwargs,
+        )
+        generator = torch.manual_seed(args.seed)
+        outputs = pipeline(
+            prompt=args.prompts,
+            image=input,
+            num_videos_per_prompt=args.num_videos_per_prompt,
+            batch_size=args.batch_size,
+            num_frames=args.num_frames,
+            num_inference_steps=args.num_inference_steps,
+            negative_prompt=args.negative_prompts,
+            guidance_scale=9.0,
+            generator=generator,
+        )
     else:
         pipeline = GaudiStableVideoDiffusionPipeline.from_pretrained(
             args.model_name_or_path,
             **kwargs,
         )
+        kwargs_call = {}
+        if args.throughput_warmup_steps is not None:
+            kwargs_call["throughput_warmup_steps"] = args.throughput_warmup_steps
 
         # Generate images
         outputs = pipeline(
@@ -284,6 +348,9 @@ def main():
             noise_aug_strength=args.noise_aug_strength,
             decode_chunk_size=args.decode_chunk_size,
             output_type=args.output_type,
+            profiling_warmup_steps=args.profiling_warmup_steps,
+            profiling_steps=args.profiling_steps,
+            **kwargs_call,
         )
 
     # Save the pipeline in the specified directory if not None
@@ -297,7 +364,11 @@ def main():
             video_save_dir.mkdir(parents=True, exist_ok=True)
             logger.info(f"Saving video frames in {video_save_dir.resolve()}...")
             for i, frames in enumerate(outputs.frames):
-                export_to_video(frames, args.video_save_dir + "/gen_video_" + str(i).zfill(2) + ".mp4", fps=7)
+                if args.gif:
+                    export_to_gif(frames, args.video_save_dir + "/gen_video_" + str(i).zfill(2) + ".gif")
+                else:
+                    export_to_video(frames, args.video_save_dir + "/gen_video_" + str(i).zfill(2) + ".mp4", fps=7)
+
                 if args.save_frames_as_images:
                     for j, frame in enumerate(frames):
                         frame.save(
diff --git a/examples/stable-diffusion/quantization/flux/measure_config.json b/examples/stable-diffusion/quantization/flux/measure_config.json
index 865078d99f..f90605dba8 100644
--- a/examples/stable-diffusion/quantization/flux/measure_config.json
+++ b/examples/stable-diffusion/quantization/flux/measure_config.json
@@ -1,5 +1,5 @@
 {
     "method": "HOOKS",
     "mode": "MEASURE",
-    "dump_stats_path": "quantization/flux/measure_all/fp8"
+    "dump_stats_path": "quantization/flux/measure/fp8"
 }
diff --git a/examples/stable-diffusion/quantization/flux/quantize_config.json b/examples/stable-diffusion/quantization/flux/quantize_config.json
index 8fdb21fccf..e601db4ba4 100644
--- a/examples/stable-diffusion/quantization/flux/quantize_config.json
+++ b/examples/stable-diffusion/quantization/flux/quantize_config.json
@@ -2,5 +2,5 @@
     "method": "HOOKS",
     "mode": "QUANTIZE",
     "scale_method": "maxabs_hw_opt_weight",
-    "dump_stats_path": "quantization/flux/measure_all/fp8"
+    "dump_stats_path": "quantization/flux/measure/fp8"
 }
diff --git a/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.json b/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.json
deleted file mode 100644
index 91a74c633c..0000000000
--- a/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.json
+++ /dev/null
@@ -1,18871 +0,0 @@
-{
-    "GlobalRank": null,
-    "LocalRank": null,
-    "Mode": "DynamicRange",
-    "Nodes": {
-        "conv_in": {
-            "inputs": [
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.4765625
-                    ]
-                ]
-            }
-        },
-        "time_embedding.linear_1": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.171875
-                    ]
-                ]
-            }
-        },
-        "time_embedding.linear_2": {
-            "inputs": [
-                [
-                    [
-                        3.671875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1318359375
-                    ]
-                ]
-            }
-        },
-        "add_embedding.linear_1": {
-            "inputs": [
-                [
-                    [
-                        7.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        1.390625
-                    ]
-                ]
-            }
-        },
-        "add_embedding.linear_2": {
-            "inputs": [
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.0.resnets.0.conv1": {
-            "inputs": [
-                [
-                    [
-                        9.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.57421875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.0.resnets.0.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.57421875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.0.resnets.0.conv2": {
-            "inputs": [
-                [
-                    [
-                        7.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.486328125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.0.resnets.1.conv1": {
-            "inputs": [
-                [
-                    [
-                        9.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.65234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.0.resnets.1.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.294921875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.0.resnets.1.conv2": {
-            "inputs": [
-                [
-                    [
-                        8.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.41796875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.0.downsamplers.0.conv": {
-            "inputs": [
-                [
-                    [
-                        6.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.proj_in": {
-            "inputs": [
-                [
-                    [
-                        7.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.177734375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        9.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2265625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        9.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.19921875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        9.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.59375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2314453125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        9.1875
-                    ]
-                ],
-                [
-                    [
-                        9.5625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        5.75
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        314.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        11.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1611328125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.287109375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1279296875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.10302734375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        107.5
-                    ]
-                ],
-                [
-                    [
-                        5.6875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        20.125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        856.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        7.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2255859375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        32.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.3125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        10.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1865234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        10.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1826171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        10.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1826171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.3359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.8125
-                    ]
-                ],
-                [
-                    [
-                        9.5
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ],
-                [
-                    [
-                        6.21875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        376.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        14.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.33203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.154296875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        13.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2412109375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        97.0
-                    ]
-                ],
-                [
-                    [
-                        4.65625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        17.375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2576.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        9.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.240234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        70.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.291015625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.0.proj_out": {
-            "inputs": [
-                [
-                    [
-                        20.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1083984375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.proj_in": {
-            "inputs": [
-                [
-                    [
-                        11.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1748046875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.193359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1923828125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.248046875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.96875
-                    ]
-                ],
-                [
-                    [
-                        7.90625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        6.40625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        199.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        12.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1337890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1533203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0986328125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1884765625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        135.0
-                    ]
-                ],
-                [
-                    [
-                        4.125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.953125
-                    ]
-                ],
-                [
-                    [
-                        15.125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1864.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.953125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        6.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1953125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        19.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2578125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        9.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.193359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        9.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.201171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        9.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1591796875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.71875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.35546875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        8.875
-                    ]
-                ],
-                [
-                    [
-                        8.75
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        6.03125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        173.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        13.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1201171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09716796875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        8.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1943359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        152.0
-                    ]
-                ],
-                [
-                    [
-                        4.46875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        19.625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1792.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        5.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.201171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        33.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.22265625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.attentions.1.proj_out": {
-            "inputs": [
-                [
-                    [
-                        22.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.14453125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.resnets.0.conv1": {
-            "inputs": [
-                [
-                    [
-                        15.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.8203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.resnets.0.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.resnets.0.conv2": {
-            "inputs": [
-                [
-                    [
-                        7.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.271484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.resnets.0.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        19.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.251953125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.resnets.1.conv1": {
-            "inputs": [
-                [
-                    [
-                        7.46875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        1.1953125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.resnets.1.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.326171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.resnets.1.conv2": {
-            "inputs": [
-                [
-                    [
-                        7.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.322265625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.1.downsamplers.0.conv": {
-            "inputs": [
-                [
-                    [
-                        27.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.25390625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.proj_in": {
-            "inputs": [
-                [
-                    [
-                        9.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.3359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        3.359375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1455078125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        3.359375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        3.359375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.14453125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        8.1875
-                    ]
-                ],
-                [
-                    [
-                        8.1875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        6.8125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        282.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        9.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09521484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2333984375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.119140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        12.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        104.0
-                    ]
-                ],
-                [
-                    [
-                        7.71875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        21.125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1904.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2158203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        23.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        4.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.134765625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        4.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        4.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1279296875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1689453125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        5.96875
-                    ]
-                ],
-                [
-                    [
-                        6.28125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ],
-                [
-                    [
-                        8.6875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        155.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        11.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.10400390625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2353515625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.05322265625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        95.0
-                    ]
-                ],
-                [
-                    [
-                        5.40625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        21.75
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1192.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        2.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        18.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1748046875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1357421875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.142578125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1533203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.375
-                    ]
-                ],
-                [
-                    [
-                        7.65625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        6.625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        194.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        14.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09912109375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1630859375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12353515625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0439453125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        126.0
-                    ]
-                ],
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        20.5
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        980.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2119140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        22.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.14453125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.53125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1513671875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.5625
-                    ]
-                ],
-                [
-                    [
-                        7.71875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        7.96875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        188.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        14.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.10888671875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.169921875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11767578125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.53125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.049072265625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        122.0
-                    ]
-                ],
-                [
-                    [
-                        6.28125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        25.875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2528.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.671875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.193359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        26.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.13671875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.123046875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1279296875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.119140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1337890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.0
-                    ]
-                ],
-                [
-                    [
-                        7.15625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        6.96875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        185.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        16.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0810546875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1357421875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1044921875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.796875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.04638671875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        110.0
-                    ]
-                ],
-                [
-                    [
-                        4.5625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        23.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1448.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.53125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        26.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1455078125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.28125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.130859375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.28125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.126953125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.28125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1259765625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12451171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.65625
-                    ]
-                ],
-                [
-                    [
-                        7.3125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        5.6875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        172.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        17.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08349609375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09716796875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09228515625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.34375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.040771484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        65.0
-                    ]
-                ],
-                [
-                    [
-                        4.09375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        26.25
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1104.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.890625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1533203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        22.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1240234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1220703125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1357421875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.21875
-                    ]
-                ],
-                [
-                    [
-                        6.5625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ],
-                [
-                    [
-                        5.3125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        149.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        20.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.080078125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06884765625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.049560546875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        1.21875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0260009765625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        43.25
-                    ]
-                ],
-                [
-                    [
-                        3.640625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98046875
-                    ]
-                ],
-                [
-                    [
-                        20.875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        940.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98046875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.59375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.18359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        22.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.14453125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11962890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11767578125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.71875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11328125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        5.9375
-                    ]
-                ],
-                [
-                    [
-                        5.75
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ],
-                [
-                    [
-                        5.03125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        145.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        18.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0751953125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08740234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0693359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.296875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.039794921875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        42.75
-                    ]
-                ],
-                [
-                    [
-                        3.734375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.953125
-                    ]
-                ],
-                [
-                    [
-                        24.125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        988.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.953125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.734375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        28.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.154296875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11865234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11083984375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.169921875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        5.8125
-                    ]
-                ],
-                [
-                    [
-                        6.0625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ],
-                [
-                    [
-                        5.71875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        139.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        19.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.068359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.083984375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.059326171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        1.515625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.039794921875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        49.75
-                    ]
-                ],
-                [
-                    [
-                        3.921875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ],
-                [
-                    [
-                        24.25
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1368.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.90625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1455078125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        24.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1796875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.130859375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.126953125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1240234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.65625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.0625
-                    ]
-                ],
-                [
-                    [
-                        6.53125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ],
-                [
-                    [
-                        5.5625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        129.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        16.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07275390625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.087890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08984375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.134765625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        63.5
-                    ]
-                ],
-                [
-                    [
-                        3.71875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        32.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1312.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        5.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        30.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.17578125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.0.proj_out": {
-            "inputs": [
-                [
-                    [
-                        44.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11962890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.proj_in": {
-            "inputs": [
-                [
-                    [
-                        8.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.490234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        3.59375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        3.59375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.142578125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        3.59375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12255859375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2294921875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.90625
-                    ]
-                ],
-                [
-                    [
-                        8.4375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        5.4375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        230.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        12.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.10498046875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2060546875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2021484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        131.0
-                    ]
-                ],
-                [
-                    [
-                        6.9375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        23.375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1608.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        5.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.177734375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        34.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.251953125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        5.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.138671875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        5.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        5.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1787109375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.3125
-                    ]
-                ],
-                [
-                    [
-                        6.4375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ],
-                [
-                    [
-                        6.15625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        166.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        16.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2392578125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1474609375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07568359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        117.0
-                    ]
-                ],
-                [
-                    [
-                        7.71875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        19.75
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1528.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.453125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1787109375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        30.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.16015625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1728515625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.5625
-                    ]
-                ],
-                [
-                    [
-                        7.21875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ],
-                [
-                    [
-                        6.8125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        157.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        17.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.134765625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.224609375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        17.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07568359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        134.0
-                    ]
-                ],
-                [
-                    [
-                        9.5
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        19.875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1400.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.162109375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        46.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.142578125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1337890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.40625
-                    ]
-                ],
-                [
-                    [
-                        6.53125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98046875
-                    ]
-                ],
-                [
-                    [
-                        5.875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        157.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98046875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        17.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1328125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1806640625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.142578125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        13.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07373046875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        142.0
-                    ]
-                ],
-                [
-                    [
-                        8.0625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        20.125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1624.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.03125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        54.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1640625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.65625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.65625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1318359375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.65625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.34375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1513671875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.15625
-                    ]
-                ],
-                [
-                    [
-                        5.71875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.96875
-                    ]
-                ],
-                [
-                    [
-                        4.84375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        152.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.96875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        16.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1201171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.212890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.142578125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        18.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.064453125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        139.0
-                    ]
-                ],
-                [
-                    [
-                        6.40625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        24.875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2304.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1923828125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        87.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.166015625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1201171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12451171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1611328125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        5.78125
-                    ]
-                ],
-                [
-                    [
-                        6.03125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        5.5625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        176.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        14.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.126953125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.21484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.13671875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        12.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06396484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        168.0
-                    ]
-                ],
-                [
-                    [
-                        6.71875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        26.875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1872.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1640625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        92.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.138671875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.123046875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1201171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.3125
-                    ]
-                ],
-                [
-                    [
-                        6.3125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        5.3125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        166.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        12.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09033203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.169921875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11328125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        28.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0576171875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        156.0
-                    ]
-                ],
-                [
-                    [
-                        4.625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        30.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2096.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        5.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1572265625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        111.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.130859375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.138671875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12255859375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        5.875
-                    ]
-                ],
-                [
-                    [
-                        6.65625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9609375
-                    ]
-                ],
-                [
-                    [
-                        6.6875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        139.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9609375
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        9.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1396484375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.123046875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        20.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06298828125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        142.0
-                    ]
-                ],
-                [
-                    [
-                        5.25
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        34.25
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2368.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        5.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1591796875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        107.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.46875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.13671875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.46875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1328125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.46875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1416015625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        7.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.34375
-                    ]
-                ],
-                [
-                    [
-                        7.875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.953125
-                    ]
-                ],
-                [
-                    [
-                        7.40625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        164.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.953125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0986328125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1630859375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.126953125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        17.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0595703125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        151.0
-                    ]
-                ],
-                [
-                    [
-                        5.625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        30.125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        4160.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        6.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.16796875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        98.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1533203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.130859375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1328125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.28125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1240234375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.625
-                    ]
-                ],
-                [
-                    [
-                        7.46875
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        6.625
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        223.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.90625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08349609375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11962890625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08984375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        16.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.055419921875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        119.0
-                    ]
-                ],
-                [
-                    [
-                        5.78125
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        39.5
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        3680.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        6.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.177734375
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        55.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.16015625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.attentions.1.proj_out": {
-            "inputs": [
-                [
-                    [
-                        28.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07666015625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.resnets.0.conv1": {
-            "inputs": [
-                [
-                    [
-                        8.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.58203125
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.resnets.0.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.65625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.resnets.0.conv2": {
-            "inputs": [
-                [
-                    [
-                        7.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.29296875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.resnets.0.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        84.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.265625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.resnets.1.conv1": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.494140625
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.resnets.1.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.66796875
-                    ]
-                ]
-            }
-        },
-        "down_blocks.2.resnets.1.conv2": {
-            "inputs": [
-                [
-                    [
-                        9.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.3984375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.proj_in": {
-            "inputs": [
-                [
-                    [
-                        9.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.341796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        4.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        4.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1572265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        4.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.34375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2001953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ],
-                [
-                    [
-                        9.6875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        6.9375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        372.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        11.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09521484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1708984375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.052734375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        85.0
-                    ]
-                ],
-                [
-                    [
-                        4.75
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        33.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        732.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.015625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.173828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        31.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.392578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1728515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1552734375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1806640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        8.25
-                    ]
-                ],
-                [
-                    [
-                        10.875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ],
-                [
-                    [
-                        6.0625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        312.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        13.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1708984375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.13671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.05908203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        88.0
-                    ]
-                ],
-                [
-                    [
-                        6.84375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        20.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        864.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        22.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.21875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.21875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1650390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.21875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.228515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.28125
-                    ]
-                ],
-                [
-                    [
-                        8.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        6.84375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        236.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        16.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1396484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.29296875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.51953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.53125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        108.0
-                    ]
-                ],
-                [
-                    [
-                        7.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        20.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        840.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.640625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.193359375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        34.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2392578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1552734375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1611328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.220703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        8.375
-                    ]
-                ],
-                [
-                    [
-                        9.5
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ],
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        231.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        18.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1533203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        127.0
-                    ]
-                ],
-                [
-                    [
-                        6.6875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        18.5
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        924.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1728515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        44.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.197265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1591796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1513671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.318359375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.71875
-                    ]
-                ],
-                [
-                    [
-                        7.78125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        7.15625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        190.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        19.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12060546875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1689453125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.21875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.072265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        106.5
-                    ]
-                ],
-                [
-                    [
-                        4.96875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        20.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        888.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.34375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1845703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        54.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2275390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.14453125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.154296875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.138671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        7.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.294921875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.625
-                    ]
-                ],
-                [
-                    [
-                        7.59375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        7.625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        204.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        20.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1162109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.17578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1279296875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06787109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        96.5
-                    ]
-                ],
-                [
-                    [
-                        4.625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        22.5
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1168.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        53.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.197265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1396484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.353515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.0
-                    ]
-                ],
-                [
-                    [
-                        7.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        7.40625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        198.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        20.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11572265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.177734375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.123046875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.703125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.061767578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        98.0
-                    ]
-                ],
-                [
-                    [
-                        3.96875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        25.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2144.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.71875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1689453125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        56.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.216796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        9.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1572265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        9.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1630859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        9.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.138671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.30859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.25
-                    ]
-                ],
-                [
-                    [
-                        7.5625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        6.65625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        171.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        20.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1220703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        13.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.052734375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        93.5
-                    ]
-                ],
-                [
-                    [
-                        4.46875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        30.625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1784.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.236328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        51.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.337890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        9.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1416015625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        9.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        9.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.134765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.3359375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.59375
-                    ]
-                ],
-                [
-                    [
-                        7.15625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        8.8125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        143.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        19.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12158203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0947265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        7.71875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09130859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        138.0
-                    ]
-                ],
-                [
-                    [
-                        5.46875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        39.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        5216.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.29296875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        50.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.33984375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        10.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.16015625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        10.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        10.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.302734375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.71875
-                    ]
-                ],
-                [
-                    [
-                        7.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        7.1875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        151.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        23.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11181640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0810546875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.080078125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        91.0
-                    ]
-                ],
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        31.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        3392.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2021484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        46.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.474609375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.0.proj_out": {
-            "inputs": [
-                [
-                    [
-                        162.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.proj_in": {
-            "inputs": [
-                [
-                    [
-                        8.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.28515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        5.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1533203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        5.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1611328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        5.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.162109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.31640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.125
-                    ]
-                ],
-                [
-                    [
-                        9.6875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ],
-                [
-                    [
-                        6.6875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        228.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        12.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09521484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        2.90625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15234375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        79.5
-                    ]
-                ],
-                [
-                    [
-                        4.59375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        20.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        648.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        5.59375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.220703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        22.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.259765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1572265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1455078125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.71875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1845703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.5625
-                    ]
-                ],
-                [
-                    [
-                        8.4375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9765625
-                    ]
-                ],
-                [
-                    [
-                        6.75
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        241.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9765625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        13.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1591796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1982421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06884765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        65.0
-                    ]
-                ],
-                [
-                    [
-                        6.75
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        25.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        684.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        7.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        23.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.248046875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.154296875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15234375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.138671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.185546875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.03125
-                    ]
-                ],
-                [
-                    [
-                        7.75
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98046875
-                    ]
-                ],
-                [
-                    [
-                        7.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        184.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98046875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        15.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1630859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.181640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.53125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        126.0
-                    ]
-                ],
-                [
-                    [
-                        6.34375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        24.375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        904.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        6.71875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2236328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        45.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.20703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1669921875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1474609375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.16796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.90625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2216796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.9375
-                    ]
-                ],
-                [
-                    [
-                        9.0625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        7.3125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        182.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        15.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.201171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1689453125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        7.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06787109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        154.0
-                    ]
-                ],
-                [
-                    [
-                        6.65625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        18.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1360.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        6.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.275390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        86.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2431640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1572265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2353515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.3125
-                    ]
-                ],
-                [
-                    [
-                        8.375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        7.625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        173.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        17.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1259765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.193359375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06396484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        118.5
-                    ]
-                ],
-                [
-                    [
-                        5.40625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        19.375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1072.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        6.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2099609375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        39.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1982421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1455078125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1474609375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15234375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2490234375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.3125
-                    ]
-                ],
-                [
-                    [
-                        7.3125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9765625
-                    ]
-                ],
-                [
-                    [
-                        7.65625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        194.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9765625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        17.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1669921875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        7.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0732421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        117.5
-                    ]
-                ],
-                [
-                    [
-                        4.6875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        23.5
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1816.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        6.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.26171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        43.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1923828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        10.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.162109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        10.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.150390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        10.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        7.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.345703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.09375
-                    ]
-                ],
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        204.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        17.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1220703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1337890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.142578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0634765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        199.0
-                    ]
-                ],
-                [
-                    [
-                        5.40625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        22.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2320.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        6.71875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.19140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        53.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.17578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        9.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15234375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        9.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.162109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        9.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.267578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.875
-                    ]
-                ],
-                [
-                    [
-                        8.1875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        211.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        16.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1005859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1279296875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.059326171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        144.0
-                    ]
-                ],
-                [
-                    [
-                        5.28125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        25.75
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2640.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        6.59375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2275390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        56.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.19140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        10.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        10.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1611328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        10.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1611328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        7.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2099609375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.9375
-                    ]
-                ],
-                [
-                    [
-                        7.875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        9.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        201.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        16.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09423828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11572265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.03125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11474609375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        123.5
-                    ]
-                ],
-                [
-                    [
-                        4.5
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        28.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2944.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        6.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.228515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        55.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.27734375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        11.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        11.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1572265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        11.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.25
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.625
-                    ]
-                ],
-                [
-                    [
-                        7.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        9.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        136.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        17.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1220703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.103515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.076171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        97.5
-                    ]
-                ],
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        34.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2336.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        5.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        43.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.37890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.1.proj_out": {
-            "inputs": [
-                [
-                    [
-                        120.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11865234375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.proj_in": {
-            "inputs": [
-                [
-                    [
-                        12.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.251953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        4.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1630859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        4.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1982421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        4.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.130859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.5390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.59375
-                    ]
-                ],
-                [
-                    [
-                        8.875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        5.0625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        208.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        10.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.10009765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.341796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.259765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        2.421875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2392578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        116.5
-                    ]
-                ],
-                [
-                    [
-                        4.75
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        31.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1080.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.19140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        16.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.365234375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        4.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        4.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        4.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.126953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.26171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.34375
-                    ]
-                ],
-                [
-                    [
-                        8.4375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        5.90625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        227.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        10.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1689453125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.55078125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        2.71875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07373046875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        80.5
-                    ]
-                ],
-                [
-                    [
-                        6.34375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        42.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1056.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.185546875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        23.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.244140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        5.28125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1611328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        5.28125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1591796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        5.28125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.263671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.84375
-                    ]
-                ],
-                [
-                    [
-                        8.9375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        6.78125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        227.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        11.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.5
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.412109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.703125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08056640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        98.0
-                    ]
-                ],
-                [
-                    [
-                        4.9375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        49.5
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        676.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.173828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        35.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2392578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        5.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1650390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        5.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        5.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.34765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.625
-                    ]
-                ],
-                [
-                    [
-                        8.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        5.90625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        189.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        12.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1513671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.4921875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.203125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06494140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        105.5
-                    ]
-                ],
-                [
-                    [
-                        4.75
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        19.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        924.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1982421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        37.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.162109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.169921875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.494140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.09375
-                    ]
-                ],
-                [
-                    [
-                        7.59375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        6.1875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        163.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        13.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.10791015625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1728515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.18359375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.109375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07958984375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        76.0
-                    ]
-                ],
-                [
-                    [
-                        4.71875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        17.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        648.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.19921875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        52.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.28125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1650390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.337890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.71875
-                    ]
-                ],
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ],
-                [
-                    [
-                        5.6875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        187.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        14.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0947265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.134765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.10205078125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.234375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.05810546875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        79.0
-                    ]
-                ],
-                [
-                    [
-                        4.21875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        25.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        828.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1826171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        28.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.19140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1826171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.18359375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1376953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.30859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.3125
-                    ]
-                ],
-                [
-                    [
-                        6.625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        5.625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        154.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        14.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0810546875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.11865234375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08154296875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        2.453125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.04638671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        60.25
-                    ]
-                ],
-                [
-                    [
-                        4.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        34.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1480.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.84375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.18359375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        50.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.220703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.16796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.19140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1513671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.03125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.32421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.09375
-                    ]
-                ],
-                [
-                    [
-                        8.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        9.0625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        177.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        14.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0869140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1318359375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08642578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        2.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0419921875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        61.75
-                    ]
-                ],
-                [
-                    [
-                        3.59375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        20.5
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1536.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        5.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1669921875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        43.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.38671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1806640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.208984375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1337890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.65625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2119140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.90625
-                    ]
-                ],
-                [
-                    [
-                        8.4375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ],
-                [
-                    [
-                        10.6875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        211.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98828125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        17.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08447265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0751953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.28125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.043701171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        70.5
-                    ]
-                ],
-                [
-                    [
-                        3.8125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ],
-                [
-                    [
-                        38.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1952.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        5.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1669921875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        42.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.298828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1640625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.251953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.138671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2470703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.96875
-                    ]
-                ],
-                [
-                    [
-                        7.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        5.78125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        188.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        19.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1787109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.134765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07763671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        80.0
-                    ]
-                ],
-                [
-                    [
-                        4.28125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        46.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        3088.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        5.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1845703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        43.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.5625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.attentions.2.proj_out": {
-            "inputs": [
-                [
-                    [
-                        174.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.0.conv1": {
-            "inputs": [
-                [
-                    [
-                        10.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.435546875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.0.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.62109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.0.conv2": {
-            "inputs": [
-                [
-                    [
-                        13.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.384765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.0.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        59.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09228515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.1.conv1": {
-            "inputs": [
-                [
-                    [
-                        11.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.53515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.1.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.76171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.1.conv2": {
-            "inputs": [
-                [
-                    [
-                        11.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.50390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.1.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        94.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.162109375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.2.conv1": {
-            "inputs": [
-                [
-                    [
-                        9.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.91015625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.2.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.82421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.2.conv2": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.26953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.resnets.2.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        94.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1396484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.0.upsamplers.0.conv": {
-            "inputs": [
-                [
-                    [
-                        67.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.400390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.proj_in": {
-            "inputs": [
-                [
-                    [
-                        6.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.26171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.20703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.197265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1923828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.34765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        9.125
-                    ]
-                ],
-                [
-                    [
-                        10.125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98046875
-                    ]
-                ],
-                [
-                    [
-                        7.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        268.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98046875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        14.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1376953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2236328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0751953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.59375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1650390625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        91.5
-                    ]
-                ],
-                [
-                    [
-                        4.1875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        22.875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        836.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        7.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1982421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        29.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.380859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        12.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.201171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        12.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        12.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.380859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        9.875
-                    ]
-                ],
-                [
-                    [
-                        11.5625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        7.25
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        412.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        16.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1357421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.244140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.10498046875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        17.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.314453125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        159.0
-                    ]
-                ],
-                [
-                    [
-                        4.59375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        27.5
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1336.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        9.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1962890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        33.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.201171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.0.proj_out": {
-            "inputs": [
-                [
-                    [
-                        34.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.23828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.proj_in": {
-            "inputs": [
-                [
-                    [
-                        7.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.205078125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.23046875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.228515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.173828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.90625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        9.125
-                    ]
-                ],
-                [
-                    [
-                        9.3125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        5.625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        272.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        13.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1357421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1982421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0751953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        9.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        108.0
-                    ]
-                ],
-                [
-                    [
-                        4.9375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        21.625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        984.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        7.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.251953125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        23.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        9.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2099609375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        9.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2138671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        9.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2099609375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.28125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.326171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        9.5
-                    ]
-                ],
-                [
-                    [
-                        10.5
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        6.625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        508.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        13.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.22265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12451171875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        10.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.291015625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        141.0
-                    ]
-                ],
-                [
-                    [
-                        4.65625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        22.875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1376.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        7.90625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.248046875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        68.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2421875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.1.proj_out": {
-            "inputs": [
-                [
-                    [
-                        21.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.16796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.proj_in": {
-            "inputs": [
-                [
-                    [
-                        9.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2255859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        10.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.23046875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        10.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2314453125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        10.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1630859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.298828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.875
-                    ]
-                ],
-                [
-                    [
-                        9.5625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        6.90625
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        243.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        14.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1318359375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.255859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12353515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        14.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.203125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        170.0
-                    ]
-                ],
-                [
-                    [
-                        3.4375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        24.375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        968.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        8.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.396484375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        15.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.318359375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        9.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2236328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        9.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.263671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        9.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1748046875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.427734375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        8.125
-                    ]
-                ],
-                [
-                    [
-                        10.375
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.98046875
-                    ]
-                ],
-                [
-                    [
-                        6.53125
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        314.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.98046875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        14.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.29296875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1591796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        16.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.298828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        117.0
-                    ]
-                ],
-                [
-                    [
-                        3.671875
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        37.5
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1448.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        7.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.23828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        29.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.30859375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.attentions.2.proj_out": {
-            "inputs": [
-                [
-                    [
-                        22.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1591796875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.0.conv1": {
-            "inputs": [
-                [
-                    [
-                        12.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        1.0078125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.0.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1552734375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.0.conv2": {
-            "inputs": [
-                [
-                    [
-                        11.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.59375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.0.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        146.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2236328125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.1.conv1": {
-            "inputs": [
-                [
-                    [
-                        7.59375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        1.015625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.1.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.470703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.1.conv2": {
-            "inputs": [
-                [
-                    [
-                        7.90625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.5546875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.1.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        42.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.24609375
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.2.conv1": {
-            "inputs": [
-                [
-                    [
-                        8.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.64453125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.2.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.7578125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.2.conv2": {
-            "inputs": [
-                [
-                    [
-                        8.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.263671875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.resnets.2.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        31.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1259765625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.1.upsamplers.0.conv": {
-            "inputs": [
-                [
-                    [
-                        20.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1923828125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.0.conv1": {
-            "inputs": [
-                [
-                    [
-                        9.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.87890625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.0.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.296875
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.0.conv2": {
-            "inputs": [
-                [
-                    [
-                        12.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.90625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.0.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        54.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.3515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.1.conv1": {
-            "inputs": [
-                [
-                    [
-                        9.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.1.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1572265625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.1.conv2": {
-            "inputs": [
-                [
-                    [
-                        12.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.66015625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.1.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        25.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.2314453125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.2.conv1": {
-            "inputs": [
-                [
-                    [
-                        21.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.78515625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.2.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1845703125
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.2.conv2": {
-            "inputs": [
-                [
-                    [
-                        9.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.9140625
-                    ]
-                ]
-            }
-        },
-        "up_blocks.2.resnets.2.conv_shortcut": {
-            "inputs": [
-                [
-                    [
-                        13.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.25
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.proj_in": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.298828125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        3.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.140625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        3.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.14453125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        3.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1328125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1923828125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        8.0625
-                    ]
-                ],
-                [
-                    [
-                        8.375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        6.4375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        274.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        11.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07470703125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12109375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1064453125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        3.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.046875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        83.0
-                    ]
-                ],
-                [
-                    [
-                        4.59375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        22.625
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1216.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        2.640625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1806640625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.0.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        20.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1630859375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.53125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.53125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1533203125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.53125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1494140625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ],
-                [
-                    [
-                        9.6875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        7.59375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        215.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        12.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07958984375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1259765625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1064453125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.78125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.042236328125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        83.5
-                    ]
-                ],
-                [
-                    [
-                        5.21875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        23.75
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1120.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.484375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.16015625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.1.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        22.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.142578125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.140625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1767578125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15234375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.09375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.90625
-                    ]
-                ],
-                [
-                    [
-                        9.0
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ],
-                [
-                    [
-                        7.03125
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        233.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        13.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.083984375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1220703125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.10546875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        5.59375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0439453125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        72.0
-                    ]
-                ],
-                [
-                    [
-                        4.03125
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        24.75
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        864.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.546875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1611328125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.2.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        28.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.14453125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1376953125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1513671875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.9375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1337890625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        8.4375
-                    ]
-                ],
-                [
-                    [
-                        8.875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        7.75
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        219.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        12.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06298828125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.10791015625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07958984375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        13.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.03515625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        62.0
-                    ]
-                ],
-                [
-                    [
-                        3.921875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        24.25
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1184.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.34375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.154296875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.3.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        29.875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.138671875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1416015625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        8.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1455078125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        8.1875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.15234375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        7.4375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1708984375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        8.375
-                    ]
-                ],
-                [
-                    [
-                        9.125
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        8.4375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        212.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        11.8125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0576171875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.09619140625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0693359375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        2.359375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0322265625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        60.5
-                    ]
-                ],
-                [
-                    [
-                        3.640625
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        32.0
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1080.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        3.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.158203125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.4.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        37.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        7.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.126953125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        7.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1484375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        7.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.138671875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.150390625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        6.96875
-                    ]
-                ],
-                [
-                    [
-                        8.4375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        7.375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        189.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        9.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0556640625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08642578125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.06591796875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        2.546875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.033935546875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        47.5
-                    ]
-                ],
-                [
-                    [
-                        3.46875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        27.25
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1072.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.34375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.16796875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.5.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        34.25
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1552734375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1357421875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1318359375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.123046875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1240234375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.0
-                    ]
-                ],
-                [
-                    [
-                        8.125
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        7.03125
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        175.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        8.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.059814453125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08349609375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.055908203125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        6.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.030517578125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        61.75
-                    ]
-                ],
-                [
-                    [
-                        3.90625
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ],
-                [
-                    [
-                        31.5
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1528.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.984375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.46875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.6.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        44.5
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1396484375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.12890625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        6.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.126953125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        6.15625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1416015625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        7.21875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.03125
-                    ]
-                ],
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        8.5
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        153.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        6.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.054443359375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.07958984375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.05029296875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        10.375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.031982421875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        57.25
-                    ]
-                ],
-                [
-                    [
-                        3.40625
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        30.25
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        1080.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1455078125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.7.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        42.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.14453125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        5.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.134765625
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        5.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1318359375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        5.625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        8.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1484375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.84375
-                    ]
-                ],
-                [
-                    [
-                        8.625
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ],
-                [
-                    [
-                        11.375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        179.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.99609375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        5.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.05517578125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0830078125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.052734375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        1.7109375
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.0296630859375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        61.0
-                    ]
-                ],
-                [
-                    [
-                        3.59375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ],
-                [
-                    [
-                        26.75
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2016.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.9921875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.90625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.17578125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.8.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        56.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1435546875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn1.to_q": {
-            "inputs": [
-                [
-                    [
-                        4.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1298828125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn1.to_k": {
-            "inputs": [
-                [
-                    [
-                        4.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1240234375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn1.to_v": {
-            "inputs": [
-                [
-                    [
-                        4.96875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1396484375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        4.90625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.14453125
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        7.15625
-                    ]
-                ],
-                [
-                    [
-                        7.71875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ],
-                [
-                    [
-                        8.25
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        153.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        1.0
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn2.to_q": {
-            "inputs": [
-                [
-                    [
-                        5.0625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.046630859375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn2.to_k": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.08740234375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn2.to_v": {
-            "inputs": [
-                [
-                    [
-                        852.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.04248046875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0": {
-            "inputs": [
-                [
-                    [
-                        1.0078125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.026123046875
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
-            "inputs": [
-                [
-                    [
-                        63.0
-                    ]
-                ],
-                [
-                    [
-                        3.671875
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
-            "inputs": [
-                [
-                    [
-                        0.90234375
-                    ]
-                ],
-                [
-                    [
-                        28.625
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
-            "inputs": [
-                [
-                    [
-                        2224.0
-                    ]
-                ]
-            ],
-            "outputs": [
-                [
-                    [
-                        0.90234375
-                    ]
-                ]
-            ]
-        },
-        "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj": {
-            "inputs": [
-                [
-                    [
-                        4.40625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1552734375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.transformer_blocks.9.ff.net.2": {
-            "inputs": [
-                [
-                    [
-                        51.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.146484375
-                    ]
-                ]
-            }
-        },
-        "mid_block.attentions.0.proj_out": {
-            "inputs": [
-                [
-                    [
-                        26.0
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.1396484375
-                    ]
-                ]
-            }
-        },
-        "mid_block.resnets.0.conv1": {
-            "inputs": [
-                [
-                    [
-                        9.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.41015625
-                    ]
-                ]
-            }
-        },
-        "mid_block.resnets.0.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.53515625
-                    ]
-                ]
-            }
-        },
-        "mid_block.resnets.0.conv2": {
-            "inputs": [
-                [
-                    [
-                        11.125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.71484375
-                    ]
-                ]
-            }
-        },
-        "mid_block.resnets.1.conv1": {
-            "inputs": [
-                [
-                    [
-                        8.5625
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.482421875
-                    ]
-                ]
-            }
-        },
-        "mid_block.resnets.1.time_emb_proj": {
-            "inputs": [
-                [
-                    [
-                        7.6875
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.640625
-                    ]
-                ]
-            }
-        },
-        "mid_block.resnets.1.conv2": {
-            "inputs": [
-                [
-                    [
-                        13.3125
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.61328125
-                    ]
-                ]
-            }
-        },
-        "conv_out": {
-            "inputs": [
-                [
-                    [
-                        11.75
-                    ]
-                ]
-            ],
-            "params": {
-                "weight": [
-                    [
-                        0.21875
-                    ]
-                ]
-            }
-        }
-    }
-}
\ No newline at end of file
diff --git a/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.npz b/examples/stable-diffusion/quantization/measure/fp8_hooks_maxabs.npz
deleted file mode 100644
index 2e6ad5c1968290f8a28d99c9d33aa52e4bcc3111..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 263025
zcmb@P1)Ln!()JTP!QI^wB#;Gy?q)U#8VJz1hIm4BD$SyeySux)ySux)ySvNxf4b%r
zdwG}KyF1@~fA3ROQ|CNg&(J+*PiCrnqyCdmF<U49-@=_Lo3B6mlgXy&r2p^IX|_&-
z%myysW88#YJ9nC?d{G0H)c=kfu-OJ1ZPmF`OQ+qJ96D^`5L<G!g_hi7!zGtlXvyK@
zEw@4A29CF(!}Ri6gT_o8#_JPD4w^8GxLfyS7Ah91?|)tQo_xr-oyJbswa1Y0Hmt|k
zoyO#&sCUq=T?g8*A>+qQ%x$M3dDm(uAJui-P|sF7_wRH-b+W#l2UjOQs5(Vfow8g1
z&fRN+hUU9Y7}m8q)s~YXaKlxqQy+5hLDgxpYL{;PC*QI&ukBIoI$*$n_5R|2CG_u|
zSEn6($adA~it6;;w(QD~wa1JfJZMZdXx!-P3<I|8!Yk{KA5wc^#sORE&D&u7&|%e?
zwww;P->%~ZjU6?F;Uk7sXWnv3UK&7lVs)0HI%~HrrzS>B%fM0Ns<Rc<*}H8y6|amM
zH({rIVs(z~s&kI)G_n*l*GSaNomJ;SjsEYz>bxUe#_$!Z^GU}1S#^OVV~y&9|0$zW
zUR`MLA$=+CluC8syt>HXLFJJq46;FECsr3Ns*9EJ&cjBH7@1cWPlsJXhg~wOc8kMy
zAGtuu^L<lwsl2-M;E6kJIVUCOqs9&!IBe|TVMB+G8aJZHm{H?~4YGkftIHJCWxI_;
zNsmOioG6#isy&0Up+>oP@9GM9b;ZGhMpDm8Bcq63nz3?LT_t9$TAQ(aQ+2hxy85Jx
zXsE7HR2$18no<$HMOn$J%|Y3xMmf4t?VDHoc@b;=UBp_Nv36EnCuXc$n=y4$b-ldW
zUq#HpJ27bJ(6-))^^58TWeo#T4I7GaqpZ4dFlIHzFDlhd^6I8u!)AZiu(@VzkyW>h
z8C%t6T-&?4bza?Ok~K6`w=JsMl{IXiY8WWS9kS}6U>say<UARYSBH8H!~U*exMqyV
zsv~2@sM?I5o2onJ)zPY9e%^<n<98l6aPXM%Lq<>Rv3w63HgVjrT%Jz%p4Bl$b!=J6
zxKzq`5l_gf)gZPS@hr{NiFq~mQg-^glorj{IjimxGj^@bn6-Cxx4gQ$N?CjorKFQ~
z-~=1LV|9<BT9hU2nM&GA2iQBS?h^;tw|0OZE7kq->i%BR0e_ctpk^GDRS%9Chx}dA
zp?USN|K;6js2*Nak0?tyGL>?ah>y;y#{}`QHR9hZ)#LK&@m|Uaf0uHiW}K8&PmUR<
z)MotBR6RAXo;K-Hde;6!onBPWC`&mrm2#Gd&(5mn1o62w;(3~@=jGM&y_5_7F6Bba
zxG1Y$95XJd%~-N`_0qh0*`)tN_3ZJV|Di4~s#laHU71R{N(Z<)t6mcaxVClx{zF}t
zSFiVyZuq;T8#Uvmta@|IxTQAZq^9bvdG)sc<=tth-d<GiC`-9Bm2#Je@6M|C1o6E!
z;;Eaf_vO|5y_5(3F6BYZcqpqr95WuN&3Lh?`e<H#Or<RRe@<gvUlRw7oiJvYrOT@N
zcu{?#Ea%Bo&Ql_PI;%bt<j>Z~->g)h%d5|OIWPQO&WoDyQdWIAX1r3HF`=pYYF>R!
z<t*}#a(ea{l;^|7<)c`bxyE%pX-Rv%sJ>Cw^Jc2&EwR6yRo@BrcWdl_R;usi)%U%g
z5B{#_L(TXot9~3aKB>*<)l~g7uYRU_R-1G^`fFt(f3=JqW`BPqQSmrVt?>h^)z6FS
z7iFDaraHgUQNPZr-^5YBtsQl?=IVEO^?R@LhrjFmQ8RwZsz1k!UurW(HdTMktH0G<
z=jiJ1MfH!e&OcL~e{odn#Gfvibq3S1Z8Cx=*d`~+Z3?g~+on`Dt*<tfB50<@W;P8N
zG+mVP1%Le}>q?Z{v|!y@T5LLK3Y#93BAWrHCdy`%PeC(bGn-jMYJc_EEJ}Gnvo+hS
zM7hldx~|!k4PA36f@V%^W^;i-Gq+MsGgp(%LzLURpma6Wb1seFDetko%?DLs^Mj?f
z1#ptK1?5xJLfFg}){vxa5v9DS<0`f&QErQYu5EE;L)#LHpji@|SvN3fx+~>0W1DO#
zqTH4SrLBL5wZ-*4aP0V@JB=CEWANCqJ#86y3tLu4<iBALY_}wIIlV}Q%VRU^sZ<i$
zpp;iQYqPCDl-r8n;52t74Pb6BY-THiF?SWEeD2)MwklC>tAT@uP5l3?uEAD^x3D#I
zMAS86LtT?zq{80V%qmJHb<Ik7g?sh3K18|o1w&mw4Pfq?*v!@fWA55Y`P`c;whmEl
z>w;1@;Q!x$yNTn6=VJ%$V(UR&SbwlQPuIsu{|PpbPmu$#nQf>cN#I6Gd68%IwT+2#
z%Ru%N+RtzaX!~ND@X~g+sRsKd#x|1==jPbVwg7`uyRX0BG`5{^ZVe6|)bS7ZhVsMR
zwt=RwZNXCHb~ve^?d4O@Kx}3^Xh;${NGTWDtJww<<u(NLi*cy3aWM{41kG@4W+T9$
z8L5=hEZ$_Jh;rKztmgx?)HT{hLsQrou+%jcC+Qj|pMu6?Gn=3xNmo@VFKDxVW<<G7
z1YK9IZ0Ooa5i~8>%ytHYW*4QL=C~%?l_<B}z`Av{*zV92wg*`1DsYmnJ>^r-Uf9g`
z){vxYAEiRqzC^k02fD8Pl?`17D1zodY-R_6L36NDPBVFv9YU1bp<q1=L;2s!7ly;2
zD(rBu)OG|;(srbLiaH9L+0h!3v>l_A7j=5YjwQ<NIMB5nuWV>LK@l`3Vlz7l44RXb
za+>EF?G&QiP6g{(7#jZm!|y*9hST6J>~tNGkIpl&@zHsvUZldau$i5$R1$iQQeNRp
zee7JK+|C2zqw{<XVD1Ij%q|3D?nO%Z+#4%)F;Q-pfb}d4ZL4drOW`f-G93|hmt#ZS
z6?%~huf%3{l~PIF)k=AVd-t|$h;q9Y40YFO0CTU$W_AM@b8l42=bqDSHxcD_Gg!~U
z(1yCSFx&!lVYh<id3qa8I!|wxPmy<EGrLnmlEAx^@*+3yXLl3jb`Lms(2l+s+^Ye#
z#o#{q@Z68h>;W)%9#kscsE3Ghdl=MW(Aa@haQN^Z{NqDyUt*8IRM?|nsqHbGRMX?~
zsp$!9W>0EJ()N^6UenJNdzvV>XF$IOpH()l!RHh~^E@`Q7r>x-Q7NYx(PS?X<@PeD
zYjBm0OKY%KASvusuvGOLPEz%{e9CzPo7tNhl2pB=l$X<~kG)Nl+dH7EdRN&{^`0VV
z-p6M40T?tND&;h1H`zx-xqS?FY&9r1leMjxeF9BkpMs^X&v25i&*f9l7ud|c)R3g>
zE2X@k?#=c!QEuOWuIpQ6L)UkTp!pt~*$-gQ{HT=E^lq}Bh;sWGtXtP;`vsc9eg#Wi
zzu_cZzsskfKd_nosUb<%UrL3pPW&@=Zk<8bH5uODx+Yfy%@o+orUZj#Dy5ue^(LE|
zD7R_Ax^=Z!7ibFW3YNO2#YwuRlTSg@V>6pULz1o;l?q)m5#=^B=(=W6HvW6fstB6d
zu$j#c2F)Bwg|0b?a+?d3uJ&*1TAhc!de042Ve^2cws~=qw)x~!)cn}Y7SNESZ9%2H
zC|2)<h;myPbZv_$8`>6C1kGaD%oYcOW(lR7=JF<6k|?)sptQArTbH!8y{+pGZ(&R6
zh^%i*V`F_=MlVv~ve?XeD3ydRr<7OtYsHo)%B?3D>sx~cFn0xPW-Ed*cO|8K?p)2*
zizv61!H%uolTz1UtH4{>syZU-R>Ow6)%7A3u7S<0QK_V^Nhz<e=xx1;a;t!$u2}<^
z+XtIjUohtOQ_AN)RIxRQa$5`R*y{bS>eA}HHq?c!1D5CMx;W`PT~9tm_Qz(nzJ?@$
z8z|*P-rm;+5aqTZ7_0Y28c<ukH<k}ihRtjfFnBgqD&DBgh;rK;?ATV$#n!fRTR>Bo
zzP&4jZH1Ex+FCvZZG+8hTMbFVwo}Rr8q;js6XiA#^b2qYW#a-IqzIb9*vy82K{He-
zr<uFSh7sj99IWR<v$mBR0Zm~e!BW>KoTO_<`4lu7o7or*NxH@=<pr^o8%LDec+hoC
zP&RZ`6+vUz%qD_ClPl#kmp0i>M7gzqb?a)eouMgg7qHZ|D^Ak2n|un|9h=!68j^Gs
zN`<aHiE`TubX|KZ8@l#U1kJwK%=QC=W`CueW`QO<fGD>E!Fm>hR$IA)pepQOu+(-4
zPSSR$e2O{@o7v$SlC&M6loz#ivmHs4+fkrvJ6hS$c8nruj>Tqn92hjmE9EphH`xh9
zxt$2svlz6o9Vq{e?<9B&J6T8M<MI@2d|aNY7pd?xY-Xn`m4u$5lvj9fA3KvMx3j?b
zxI9|}n0pR3vva|id!AB0_k@a_Pn6pQU_FaL+v*zZLU;?iNJm87#n@1HiC(0_OR<?<
zrc_dQxl&#sTe&NUa=Q`?bysNsbFao`b`2PFuT{$D9@cEv5#@G0SkGe6hPt#E+yHf9
zH-hDPdJ|4MPj8k_k+)zoyH!Jyz}uAaA{SoMZYRp^4lovjJ2jxT7~CZvp1ZM`-2(>C
zy-LL!bste~_k$hV%JuB=pIf;HU@GiEu+;VtPO9l)`PB3XHnT@HBx!q0DX)pYH6AC*
z?FrDY!6%iCYw#&W&^(RJ>=`g<o>j_eTAJ)RqTHSbbq%(ED;Lrl>;*^)dl4*Ey@Zog
zy)2({UcqMes)i(0uPNo_9MQ*KC(7*&&{e&uY^Zum5j1aOGkXUNns=3QniHDrJ)+#+
z2c>G!NpDcgZ~gd7**;)=VIP8}fRAvJfRE*K<WI1feX1czz-LPNk<V+k&xvyT0(1di
zDjNd6QUuM{*v!5GgXUYMoaW6&`;I8L??DMzWKsfprmrpS2Zk5+BUsk|6HcoCXZalX
z7i?y~YDlX8H>Lc*Yc$*MM7jL|di{SY8}<LC2%1ili>5OeG?Nk3XjW*l$%**p4pjY<
z{F;;RGAI2tr%ee>VN-#nuBma7u4&{`P#0`wT{R@>npP>-^+m;|Bg$=h&~?qAZ0MR%
z5i~Pl`Q{D`npu=`nzfp2RwBN+1MAi`+GdBQusOg|*PJ*>*Ie=`Xl`s~^JqxYHLp^k
zYd)gf<_BHZ0?LN21r<TF5SDN4z@S+~DW_Sp$rdHzTRX6BT`jgaG=(hzmb#Y2NxHhp
zr=afG%$CxSq-$xVLf0}xxh)I2t{%#UuH_U#vpkkB>A;|AP|9ieb9V(IzN7=CYm$5E
z|8^ewUV0^{3hM=y+E&I%+E$TIQLADzTTMffw$+vLq7JCo8brA@g08Je+0fQo5i}KS
zX3b#G^ij%b_H44gM7i|?rEQXX=}Bs9doR5vyoIf$Bk~8v+SvF5V;#Lnh3jH7TTiJZ
zw7*hb;a?S7pD4Et!1x1WfCeykLu_UnfiZVurF`xm70ZZn+XVcte?GNaU4v~3Z(*D1
zh^X5f8|t>ui&VHJHnXjiO6s;&$}8k6(rt)x+ZGIU+i3uEx5s8S5RADyDCKjXsMsK)
z+y;aH_0OmFsY`!84S~9_p<sEQ4#P?3>2UcJIRcy6NDWB>M=9k+uHDymB+6|x7=J#E
z(SX{YPh;i7GY*^CcrbV-C>3v1l_)m@|Lb0Q5@Kt6>50%3mV>3Rop4e?E%GU7XKZG>
zXh;&at5ROjn$5NwQEt0~egW>GY+QhaB53x+X0{g?G<z%MG+ml(AEMm$1?&0HtnH=u
zgQl?k!BW=&I7!!m@+s&bY-R^*NYZtPQeF^y=|hQfI}CJPhbtSpj!*>6k=V?R0)ysg
zrJQE|COd{Gw`0M&b+y=W&=ht&Sn4_fC+Rv-J_VhG&Fo|iNxDu^Ds-Jnl-p^b>pET8
z&~=6)XwJlDb`}^kXDj72k2cylM7f;{*0UJ2+Do4YRbl6YrM3%jlC}%wQ`AM+%r4fD
zr0o)=yr{z~b}3PAmw~SBa%Dr?6^fv_63f?eV9;Exl+!df*)>FbEeF=K7__k;EBDgZ
z!CTn%IwBvJH(=xA@<zQ#g*Ra{yIH9u^cJPO!m0b(twg!q2FAzb?Ha(`JFt8w2gcmH
zl=8XzSL|*gzLNv%Sq$1%*I@U;TiAU%BI@qPhPnszA{9P}&FmqilDdbL@(T0b_6SjK
zkAk7@F%4ku<Jioe0AucxO8H!Cwx@`4dm5}~F=#_wS`40ny0B-#@;rSGC!MFy%csZ}
zu$jH6AxYp%N_ml=^s|?Va(e}g#o$#9s4WJs$%p54Y-VqO!Skk4@kYHxl-t|jf89&}
z%VO}Kd+B#zD(qdb)b<`us_A|C)bs&1vkx^SY5Pbiuj$o_eN2?wC!k-0pDG*I;Ae`U
z`5c?s7huqQsg%>K(_~)}<@PnGYjBc#=}Aj#ux}tK>|3x@^&L)9^}T$``2m~Rj~bFx
z{iKwab6T_gOqAO%psV^-*--VHB4~ccX7&ddG=D1PH2XK%Uqrceq6ed6t3mo_fBj<B
zIzv;KT6IcYlj9^^Q^=>FDY2PNr6Ebz)Jl0l{P1oXqTITGuB)rEp=(-2&`gKTY<e(g
zW>Cs$*vidFl-o>T-MU8G%+M4z3s~x!6({MMO+E$9j?HWi4N1D@R4R1MMU>mzpzE4P
z+0ZqwB53BrW;Q<<Gz%!@Gy|G!L89Ci0_)b*Vhcl4*dk!5Yf+q}Yccs0v^X}iB{U@I
zT2iUd)r}~(?x5>hO4-o0v?6Gh!DhBB7&JYUa+*b(Y&oLbmItM){ad+K=b>-qdO}rL
z16XQX0Vio&Q9ebjgw3p%h9qq(E9FI<RIycva$6O2ZL298+E!Nt%^Fy~mIH&PNhznf
zvB`Q9@wFT%ZSCL6C2ehQ<(lCwtdEY!`lfEGSl{~TMJikq%WpcBN<!CG$}60q+14TA
zH=SUtZ|i9QbNgfYP7aK@8z|*-r)#zWM0_U)c5L;Yl)47n2;Rar))7&cVME;}dXWk@
z#b&mdQc2zBN_mBB<+dQoO-*p2ZYvF7?$+4MwgF@Awo3Wjdn&daQEuCV9b3KsRb5)W
z2SQ!g4q$nn4#G+2>0tR3IRu;8Pz^}}hbiSnF5B0J6XiAnjMaOj2GmyXQS#y05u4d)
zFnGo&6>rp7qTI%T9ox#e*xFWZJT!$(083$2oK%p>r=W@0%yJD$!gf;13)-~VT8MJn
z8T1Qq7iHrD+*J`YyJ0ii9SoX1lyaIM8?7M9ZBMYC56#+EZZBvG+Z!x(?Sqqa?JJ*x
z_QPhjzlJ1T2PowQv6VZJD7S+^*LAS6q3aMu&>V`*>@YBB4p+))=4-Meh;lm;tXo%$
z9R*EcM}wuVV{nqLW93uOaoEg`*N~*^1f@dPiA1@b1iG%1l?`2|D1zoxY-Xo{L36rN
zPV-Qsok5h_nP5GOL94CYSx^;rHdtyq2PbJeS3X6Zht2GK4N2NAP|Ay%zS%A$%IzZ1
zwOy=iXuCuaG?!xeS`G}F%awAPwVUh;BEFUb>sbui*bbCixvStU>}nm6kIQSY@o{;r
zUZle7u$f)2R1$iFQeNR)eeFh~+-?Hn<ML(=VD2qgzLNuE?rlo>+@mUXI}zW>f%Pl~
zZL4drJK-(tE*%kdcVk1{J$jJ}@5N?zpHfNP{YrU-d@cF_QEm@{q3$6KVD7`%%pL(_
z?xRZi+%uZ(F{0ca2kTi3+EAAkgD0Ra>`AaZPoKg`=jqe(De@U?X3uI!68M}_UgX>T
z?0KTxUI1e;cu@mti@{6s;dvRG*(+f1ysA{ZQLho@_Bz<Htz3L}=$&`=226#$36|R4
z!bvr~EuWg+!DjZZh9qt8DdjcYR<ZYq_$CkZYw$y5;~M-(5i}oTGy4P#nopH-8vcm>
zj3~FyL0yCG-^zuw2Kxe%!oCDcRbSyGRbR`eoNut1eXAi!)ptsHIaBnp?}>8z0d!SA
zDjTYPQUuM<*vx(bgXUMIoMvc~{YI4A@1RsI`agWDsINu;V0>YJf~A1JaFT#d+!#Pd
z?u^ZBGBA!jxl(@QtD9{KqTHqgUBFbzhJdLRK{E|Dvo2uJbXCe}W^1x(iE^6`lz@M>
zYxn$TyS7cw$iij-%hG4WNu|#upJUF9&1@D8N$>rvO8GG-G}~-Mxy=rG>2oL>rO&Ad
znz^u<%?$?4JW4rD*4ySK%56SSr7!+}-uu5h&Htlo*yd+OVGDqzgavVugoWgD?1izJ
zEutYw!lFv~v45=CVnn$u4!VRTlnn_>DuSjPHnZ+v&@82t(=626mL|$=8SsC3_Zw_k
zMi$lsEK6SwCzZaue2&=@n^}W~q|#SV%8$v<F;^tYZ6(l4@1<;%zOo`{R>5YrDi}1Y
zDdjYWHQDM!xvc^Iw|Bq1t;NKV+$v-U=YAtI3TpyO3B7TWgo=EQ-HgqwkA@@(eU<WK
zpWJNyh;myKbO~!I8xq!51kF0w%+>{iW<8~xW}hbOPn6sG;H2OES_iysz{tV|fMw|$
z;-u0ylFu<W#%7jjNGg33rTmy*RcuqD+%^Nf^v#ux(zj3qjcyOZH)UYZY^{{joWeI{
zM0`^QPWs*FU&U(o<0x-!VB0aHu<gN8!a$rPVF&pfdk{9W!5Wez3{lFD&AC66D7Rst
zOBk+fNEo3AnvvMdMu9=Iqf$<DQIm}(%54nzzr6bmHkOfvjRVWl$K#~ZC&=fRRcvOa
zA*u9<O8GH=t5{CN?P5SLy+zq5eP>0`?1If~S1@RHQ_5*BXtLdja@zw`=?nj#Po6e9
z@~vQWVS9pQ`Fr7{^7od{arePywy%by^7m89kNZ}$?N5~30ic(Ept4c^L5iR`7@OH4
zV9*?@l+*0nWQP&ub~vc=7x_o|t^QPM)xmE^FubrM!Lt6Na8mt8%jdwyU^6>bLsI?6
zDdh*Ax!H~<%IyTu>pxN1sQ)BI(436T>=ZC)PF2cj_{sZeM7f;~s{Vie6y_iG|F@sQ
zoB>T?XM&}!vv88Gv*lCJIaq#}s3A$$c}jUfTkyj~B7T?%x~>bA4P6&0g63juW|x3L
zbE#5JGkcR=MwHv-VBNY#+ZE6hb|qNqx(X-hx>`O3U4zZ+S`A6Mu2U*>T~CzT4WR3~
zQQ6RSlOkwt#%6X47&Nyk<ur3N*=<C*-452RtHthsrm#D~QrBHLN!Q)-Dd-+-X7_4H
z(siFwq3eF4+#Ud3*MrK2u7?ys^DvgJ2^chwD&;i%?erKCTN6;a{`v2x?K%(r-%p=_
zimeD(YI_PNX?t2eMLmOME21Gu+jB~JQ4dz^c_OwVplf?k+0gcqB4}R5vJ(M==2fMf
zW}zl~jfkBHC~g1z_tXDfTigGB`Ubr0OLRmwm2Y8VQ~9=Dq{4Tw>`RnNLf=!$E1bO9
z-X~&T0>-BDLk(c=N7&3h24n6gO2ynyiE{f4Y-`J0pSlM79Nxmd&=FDhB{tN3r5CC2
zYb-xkR4S?aRw=KLE%SFo{9F+Xbw6kTbAQBU_7fO$e^$!p^5xqvM7jM6wzXxhLtWZ3
z{|0qozk}s@`Ug%rPydupk$+(`>%>jWqR7q!wIYw`Ym*V>HaQqu<|#Cwwq>4DK0H%l
zGn*O=o@tbdH>wL!Ze78){{8fS6<hoF(`lh8Y&x(MHa$)%Xa@NdG$S^%nKUH*mz!BB
zFNj-_%tDmgte{_jvnd-F;OvT^nFE{IoM6z*rIgd$*JyJS<u(sk&xdC1-%sa-rm*?I
zQrG-AN!J4MDQH1#W(#RZ(zUQsUJ(C&x(HEji-N9eF=a#7;)<YI0-M>AV9<0^%4x<o
zS$Cq`mICY6)nZFSQ`j<KscTuBq^pN~3R(`E+435aboEp!bTts=wgTw7R#Y~0t)vK=
zURZ8G0tU@0N;%E<jkYQgHy{D)SqxhJ`|0XXanBL3)YgcTv^B}6sNPuaIiev+TeDJL
z6hF)AL&QBtK-bn!+0eG8B52maX0|pMH0vnkG#5A8x<t9H2iCI~w7M4m;|>A+;Vo=^
z9g&aA4Y2WXIY2K`;f7d#+o)6$y0KDT;nRIABjUG>V0>I|ssYU144c{JV9eb@DWCgO
z#dN>J+_nPiSq$1%*I--2Ti7-_BI>rqhPv(aA{B0r<+qJWC3QO}<rT6R3?kyUjbNx7
zq5;euip^{o7;}d!<#R7-wh=_RjRfmi4BAka7K2ex7q%l<o~NU6(s?>YK1GhjW;RYk
zlECpwd68fCvk64GRl!&cOap3*!9@A+<k-x10)wYTsd%GyCdzFWu&sYTZ6!iGn>Kd_
z|M^pxU12J0H?Y*UJ5H);5BbzoU^ClOLz1?=l=7PTRcvpf-1Y(e8r)aexCZxA1kL`~
z%nks9=0K&KX1gXkh$y#%L0yCY{P)v#NNcb|ASvunuvB#zPEvKae9Acjo7s^Xl2jd~
zl$Uc?vmH&8+cBW4I#$_Gb(|t-j>l$p0vI$WD&;iGH`z%<xt$DlY&A$<^_D+{IR%=+
zP6bO{r{N@Br^~0HGq9PRsUb<%SxR|9XI1QMqTJ2_UDvtFhOYAzL32JfvkSnWxlk#m
z>Dpu$5#@F<Shudxb_q0vT?&@EF2hN>E|*V1S70-{QbUritCR{|R}<xS4d}YARW@{8
zrwE$svD|M244NC2a+)cd>?R`aHv-nJtHo}ChI@;ErLNm>lCIn3Q_vk)e#fXGN!MLU
zg|54a_#GqYy6#mrbls;2n)|WbS_BN52bFRf?(g#u5w{irrK|m0xmM?)Z{;3=ihGNI
zrMAa#lD5a?Q`8e!?k%DrN!wFOc~N(BZxJHyEdsi>XO#_Y&nbfDc`Wx90fXj6rJQD2
z?kz&Zy+uH2YyVa*X={5c_X@n+ZbV09eR~ZX>)Y#kkqY0ya=Q_wlF+x5@(S;&*xN+h
zZUl_=?OhFE?t9qG-Unmu2TJ+eM=JIqQEnfB9b3I8rLMs~hPSX!bVSsBiVbz2=|w91
z9LtXwl}hTqRLU#l4gp^g@gqhs)P17?%>5Rd*>_;f{az`bJ8iT5K$P2$V8>SPe^r-O
z@1LM9>}Rk%Pk+Hl=jpHVDe^aLX1{Al68MKwUgW%e?N6fI{sLq5?!^7y5>IFO@Jxoy
zY;rJorcf&0s40nZn+ojMR?fxNwsKQLQ`j_MDXa@lDyXY`3Yr$1*>oC`giWuM7u3*f
zGZ5uABj^|4Ov=UuII|*XX2E7QD;PAhDdjXTG}`P$xy=FA^PySW%FPK)VRM0{uDNlN
zu6g8B(7f2p=F^a*YksA?AhvP~5aqTY=(-kCHgqkl2%1H(nJo$i&0<P9O|{7uC(3OJ
zux?!~wj?x#bpuOX-EoqxrQ}o4(%8(F(U7ETS*1c(52D<b16|kh%7(6<ilAx0X0`$t
zG%G6QG%Ru}5#`nktY<N3wUt{Ls=`(QOKq#-ByFq7r>NDjnXREANn4{*UKF2^O+>l%
z23=c4+0fRk2%0`vZaD%5O+TfaX6r^<lZac6fb}c}t^VBnkFDI=@N&Nq9g&aAb+Per
zxt?C6!v0u(+o)6$x`9$&;hlYK01>}!1moj!BMo5g##p{x17q$cO8ML?_;!tmZ`Z(j
z7K66cHQ46x7Pf_sh&tWkGSqFQ7pZV-EWd43DyiF6DX);N+;&9#wh;_<12urTJ76;#
z1jgLKO8ML?n{5bDZbQL(7K1j_rNv+v)P)TP%ky*uPC8FV%BRRt*vxj+kR))lQeNZ<
z{cH?TZezh%4900dZ7VlkK0Fh!nN`8yF{R>-nn;ve4t8uS7rz&&{S;;=m<nqFOKm&j
zq?&e-PffdGGuur=lD6HI@|t>8Y!9N`3ec~?J(Z1Xa4$vB?2XNAA24Y4Rmy4jxApyq
za@!x&HQ4^GTu5uM10X5vK(JJG5KdBcuzbon1e@8R8j@5Urj(a+OtT$Ml-m)Yt2$EI
zP<50dXpY8ab_^Ia$13GCG^ig(l-u!O$5w;#yi7NHI{}))P6SI`C*dSrC(EaxQ?Qwx
zsv$|&X-auPzgO&ZqTJ2^UDuh)hOV;|L31`Xvva_pIaevCS)|F%Bg*Z3ux?$W?E+{D
zyAUjOU4)Z#T`Zr1F2QDYsfHw7mnjvxE+@+E3ea_3sch)FN)a?yW4Yf57&O-^<uuba
z*>yzRZv?DcSBu>M4fhrSOI<hNBwaVlr=VM~{EksWlCIm73SGAo@jFJ)b=|3K=(<Z0
zG<RdUwFnqA_bTNyvozU#MBG{gl&<z~<yxJGzLk3bD()=;mf9Y|N!lKkPf?FxxwnXh
zByEo=<wc!evB!zHw+QIko>VrpJ*5bmr?K2y1Pq#Im2w*TNS`C(-Xfs1wSOy@w6(pJ
zdjVc<H=-l5zP*Hv_3dT7NQJLpx!s6TN$6`zd4-=>>~$h;Hv-1`_NE3f_bqH@Z-X)S
z9i@Ek4;6ctD7W{(j;-F4QrBSb!&}$~IwI;m#D=<$^dc30jO9m+N+orlD&-ZjdVfa5
zj~KyF_k{*9_e*SMUx6|AYo&bd^A-DsD7SCHj;-GRsxGbG-$7m2_h5OR{(zIt(;wwi
z<WJble%6pA@E4`L$Uc4TSEAg017r36T?1;X_aE}%`4gMjUtsWbnkIO7qdF7iHW}El
zt(=RkZRIA1rm!i%QrMI@si3LkQ_$2{o>-tENmv)9T-cCi>q^8E3qZdBr&BgA!08o1
zGXplW8Nr~LNhzm!tkGsB%54^~o)691R&G{k3Y!fqb<K{Gbj=~3g672X!~zXTy5?5O
z3t}rb4-ro+0A1I7%7(7_6+yEAHnRo6pjk*Mr&+Se7ADGV5wLDuEw(5$g)Ih_x)#St
zx|WbnK}%wJT7iZnUEP%mT}u)1v;xp|Eu(DcT2>J>J+PTA2L{dZN;%CRjn<PWw+67D
z#h}$zZUv|cTM;a^t%Q@b^^#9fD`PWTMMILdRh9Chxbx>~M7gaFy0$fx4Q-8zplQNp
z)*B3(ic(IqVxu(^<<<wRXE7+RG=E^re{ALY!dqBB9g&aAHL>w=xt3m}!nLuPt)o;D
zx~@`Q;qQHHJ)+$DgYj{>z6LON18imkz?i$CQa<;Tifu%c+s0r$i$UA!8Z3jiuuXJC
z)NP6lb(`r$D%>2)vk8<+>b6wME95h7D<Ymv0EW74G=RC=Vl&$gjJew@<#W$(wt+;s
z?Eu!Z7_^}-Ee3<2E^IJZo~J`_(s?>mK1B}0W;R?ylE4v4d67Hyvynu(jRIpa*ii#&
zi@|95@QlG`HWmz?aZ1G-HJ&K931G*zat;5u*;^H+!VE06O~gqx<?^X%Cv0Xd8j`f_
ztd!TZWyN+O%57KBufg4vjcagsMbPYl&8z@}W>2M@W~(OKizv6fL0yCG-^zuw2HOXc
z!uADARr}#2Rr|}QoCC0#9jGBm)j>*mIX5=j!9=+o0=lY0l?_#gDT3y3Y-UG*L35;1
zPQ#YxD5Bhs20OMIte|HBq^;aB&=htoSn4_sC+Rv~J_VhC&Fn-CNxDu_$_t`b_GF^m
zP61ulsmg|~(-c8-IySR2z@Rx(DW{pW$<89m?QF1aU8C(BXbL+QEOniSlXRUgpMoyH
z@^AtTNxCjlDs){;#KQ?d*LA6~q3be5&|Hqq><Tbwu2jlt_#xs|M7doJ)~&0>u7Reo
zYr#_2bvQ}a_3|m`25e?GYDm&`lTxAUW}@6~0bSRv%7(7n6hU)4HnTgxpt(~ir{Nz;
z?jp+VZcw_~zm;os9{N`99;gbt7c8~ihm*A3FQ1|wz-IQKh9qqdDdj~yQ?ZAMa(e`H
zZI3D&+8$E`&Er^ZQ33|dlS(<wyiN8L5w|D-rLFy2xumV_t=u#4a(@yXk@f94Y_}xz
zdA&%5FJQSpiBd`EOG<f#-&X8pBJNKD4o-7l)d1$chRy7CFy_9Yl+WciHE$B-_7>Q&
z)q7Iv8tiR&3wuXLMBTgCQ1_l*q{8>HJexqNr0zqdyh2v*kBE3S0T}8&(E#Rtip}gZ
zFy?-)l+WcZY+n%N_9fV{)%#!7rPcc@s0;fVEYH(#aMFK*Z{<_uci7Cn*N`Oe2c^8o
zHTv3*M7jM0#_IjE2GmyXU*yB{D>k#=z~K2^sd%IQAj<7euwz>}7hBuP{RK^7JagEE
zb;d~rO(vg$CdXzrg@z<yQ!3>Jjcc~4h;o}6^b2qrW#a<uq6nI<*vzH{gJwFVoaU`Y
zo1Q4Q8NhlzG;3SA8KEg`Cb0YuHZxArHH&--niZSbY#Neu&90Oe#8z$&qTJ>LUDsU7
zhOW64K{F3Fvw6XwnNKOF>Ct5K6Xmu5ShubgTM(MU76MCM3*#hRi^!*-MX{MJrXfk!
z;!1_CC5Uoc5_DbNlnq_o6+yEUHnXL{pjk#Kr}?JQmL<xq2UyQy&}u8U98`rZ50=_`
z;v{Vi@+oQsEYBy<kfd!TrM#$PD%OjL=M#XgZ53rh+p3D7Sq+=n>R`~Up_J3C+h~nM
zxix|HECy|C2g(NZ-tZPy(GmH$Y{tgNWgoprg?+J^^;0SdT~jHq@cTZt7Ex|%gYj{>
zjs`GyT`V`P1Y>T0rF<?w(O92|n^uDLECy|>Yp?<E7Pg^|h`Noip>AWnNQD`eXA>xu
z)NQJiSIA<p84=GW07Kmt8o*pV*qc^7Fy?Npl+S&t*|s5~6%VXuF=#_wS`4;>nw~wd
zJWmJWr1NwK`4l+_OV6H$B!NSe@*)>l(}oh!vj@gvFkAy_i@^x_@QlRLv<C*yj!MNF
zHJXT~J+Nb2xfTAgl^Y8aO?_agZ9Gn@X@Y!es$yyC(~zWXqEcSdXqx(nXzBy~8f;NE
zuECuZL9+{%ramxec2mk}xO>&^L^Sn*x(3_7l?!PNRzO0pA6Tl|3n!`CTR!FNgQeF`
zLz1ffl=5<(qt}m!UO&)P9jI)mI!F;T2V?2=1B2#JrJQCudi{v#^#eP$8m!oQD|ZAm
z^Z<gTuA^|0uA}8s&@otg05v4(I!-As==F*nPeczO=(<i+Hguh&2%3|z^Z<fEbE;BK
z^LwM6Mnn%FShudxb_O)`0D`5ivv88Gv*lCJIaqoCH6-aePpQy#J`p{DpzFF&+0b>7
zB4{qg(gO$v&812?4L?M@jEEjUux?!~b_F!_0D`5it8kL8tL0PBHCTE8H6-b}PN~p!
zJrO;CpzFF(+0b>9B4}>L(gO$v&8<o~4eb%P5zzw(N>}^0a;?ro-^$$q6@7qUsqHSD
zr0s6`6m<`lK0pmg+U`@zi@LXB_Y=_v2)echl?`nVDT3x<EPa4r&^)S?({R_?$B5_y
z1f{M0Te+mI?XBDs@X{2hBeK3dg^l&?X}w5=&tPc^R4NI5PARYOZJGj!XbJ>leS1*@
znEMhovzNh``-)ON_k)VPN|f7cV8>SPNvUhF*WoSf4IL46Z(>8;TY8ZS-^TK60;Q6=
zca`!A_v~%&5%FvSFw}jZ0nGgno7qQT%>7s?pZiwDJ|W8OQ?O&J_rI!3tM_M67xp<=
zo~K{nr1SJk`4ssTHnXoaBnkXRDKB!>zV<CqZr_2idVjA0wblCv`SARRr7;i;o}ZP9
zH|iH68Uw+OZRK2SZ7cU1G&BK%rLaG6QbB*pr=Y*EGy!&PRZwSwT0#3Y+hjyE0fK%3
zPN8gEfKw`hW-2T_fMDpFMk&{IVWV{+q6ZMH=R>o$m75kCdH}&v*Yr3^*9`J0XhtkO
zfEtqigUzgz7sOU>79x58LDw~#vY~5sMbONFr3VlUnz@v68h)ZNHxWI6VBNY}Y+h*S
z0R&53^W!943&^LS1+nx1YDm(xuu`FG5h8j3LD#jIvY~5nMbIpPr3VlUnr=!t&5MoJ
zoroSlu%5-B)mCn4sOSR(OKr>IByBz9Q`B-;`T#W~Y3r$!7d5eB4Mg+-g05{vWkcIa
zilFI*r4JAcnpKo?nkM=HiRc3a>sbui*bbCixz*vNDNsk`<FXMOAD2yfkqUcbX$n*-
z32j!&D}23=^&z4u5R8w@ej32sHL={Z5{$WPE9G;K<w4{`+_Vy`XEA77U4yL$FHa%Y
z5mC23Hq>pP7pZUnmZy*_mDFvdlvl`BZet>zLJo$yO*DYHn_{_XB^YxzSIXz!+-zGA
zannk$p2eUIb!jo!3hKhP2Fvqw8=Q2WZY!T6x5H+(y@n)#1C{b3Z|G+`5Yf5@#$qs7
z18R%G5c%*7#nKoE2G4M%;*A<XL}MV>v8~*S|Jcfnf{8vru+%mhC)G4YJ~fTS(g&y^
zN!xg(yr#hwn?OV#An4biDI3?|L`Bf#SlR%=plMOcY53k?XCm4FL0yCG-^zuw2HO=9
zIsm~^)$TY+)gJOGr@+zys3A$!UP^g6&otZKM05axu4-RpL)CtYpxGZw2Ot<U2P)+>
z+#cj0B02!Uj;#hOwcg4d0u4QYV5#dcoTTe;`4n^nmL5P2NxF_w$_x6WVn-9v0|>gV
zW0ehE$0>s5cq~1DV9=bXl+#Q_4<HdefMDIaM%yXS&;tmTx=zDMx=xo*L1$p;0o0JB
z>nx>0*V#n$0D`XTTxCPod5WMpA4?A)7&I3u<usGg14u*<AXvAq7P|x*dH}&v*JU_K
z*X8ml=n5=7fEto?U8Pj$x|)a{K+ttvt8D1HP7yTMW9b0|gXTu1oTf89fJF2Fg3{Ig
ztz4_~(6@58Kt&%QSZccsCuzG~K1JPur4LX;lD4~)@}gLn?k1uS5Oi(#DjVAFQv}WZ
zSo#3Lpm|U!r}?YV9wMR-5R|s|Z{?D<wzqPRz)Mr0j>!7<7&g|o$Mqr=K7pkvP^l#J
zDW$x^7ibD3qA3uJ_3c>=VD59+%$^5h?h8u!+)pd^B2jKHfgM}DC#9~zUWT`@S9C<w
zy^0NWujxf9d>zZP43tXh-c-sfWc7ZFh-Vpqq3#_GVD7uv%-#cI?)ysl+#4(Q0a0!r
zf*o7E|5aUDy+4Awu#ds=JpBYGou{A5r^wH+nSHJyN#GYsd6CQXwJ(Ws`wEQJ`)duT
zt=`|rhv!=?je%hBe6LizQ9lsT7zlQ3E9YWsTe+X0p$QNyh5dq)3i?$(1^tGl2~b0l
zus@XYf{tvqKZ$4p1pNZ+G%eY<06Qy!W-=^2fMC!}p_J2H)@V}_(E|w9^PySW%1sRo
zJ%C`Ts|!xj)m1(PO^c-mP(zZg>6P+=*vid7L=Pb7x@J-~bj_>?npv>)0D?g?n^I20
z?UiRIq6ZMHTUU$C2@O4fV5w_voTO_W`4luSmL5P2NxJ4&Ds(MCL=Pb7x)xG4bS<n1
znnkem0D?iYm{Lx2N24uHL=PZX&tlMOE4L(6^Z|mUw(dAd+fwo=YH2KefEtptEvu9l
zwRy#Q5YYz+y0+z&4Q)LYLDPVx4-gER6_s+DB^zxeBKiQqdKQB=wgcr>Ze@6B3e*w#
zxLg$*AD64?MJik!OH-gyNob=|Ug5rdtci%GKrlWoD;mJuW-K?+1Y>SrrF`yw73)XD
zO*FxJ7K66cHP~A47PhvIh`M#Kp>AEhNQLWRGwZKZQn$WRULlLY21L0H07Kn|8o=C*
zu$gTP#@tLPpZjdHZ9<gWreHmbK^yAQVz3$1g>4R&=jj$W={((1K1FVY&1`E8NdmV~
z%8NX-pKVK&+jd|q2HR^uZ7~=qAD$hsGzNmfGgzs3qlOUC7zlQ3E4R`=wsON@q7M)(
zwT-|@HI0-{O{1{%0cuFnHd-mKY1fL4A)*fu^lNaOvT+TLR|L%jENy^b(3nzAL&wWR
zBH92!U4!l4%7wHB+X)gn0Krn#&NxZcF7hd7S1cWX8j@7)u9TO<4=nc}q5}|gReLHM
zs`gR@&E8l#0KuTyS1G6AA)5OU(E$i{Y&GcBdMkGTH1q(1rLKc;lCFd0Q_vw;dH^*f
z={ig)FK9}70Ey@U1YOsW%7(6^6hU(|mL5PbXpU9NY5r`q<A~@11nbr{+D?Fm9zd|v
zbrMd}b+UX4It5D)poSz}rzsV>PA8%V5OiH<DjT}aQUuM}Sb6}#pgC75r|CowAQ3%)
zVBNY}>;h=$0R&537vUsb7t5!hOR)3+YDm&`nNp$aaw2*FLDzMqvZ3oLMbKQ0r3VlU
znroGEnqM33IwE=iLFsD$R<6}~=v%oPprQ{DEVbQ)leFC|pQ3KT(g&y^N!x8oc~Qsn
zJOd*7072Jwr?R2#E=ADXjinC|44QkDavJ{e{5~T306}T%Kk2*Nws9}0q^<3(+yn5^
z6bNd4dk7or+rxU13LnAJ6sS}Z`j}E);X^b9644Y0#`^Z81~B(2Y-Ue`G4~mzeD1Rq
zdzL7-=fIAw-jh<-V9&!_*b6!$>R!Z#x|j4K6~2t+4x&mWb+0Pr6>{5?*NC`-C>ZMA
z&;aJXiOuXSFy_9kl+V4iV($>;_Ac15)%#!7rPcdAs0(`^EYH&qaMF4Dp?r$`2+K`G
zH6#iAL@6(F_P+Kh5jPP9WA*-A18S@H7xLlx5=&zs7(8Dq6>roxL^KA19ox#e*xFX^
zJ7{PE1WRE*;G}|nlutoFVQB)?kR<FErM#eXo9$O3ngBt+0Do6DF2Fw&LGve;9zZZ?
zI!zZe=QmnsB6<M9dOkF3Te-=hp$8BwbxnzrbWJ6nf~LmO1E?WMR~MyR7hAcmMDzfH
zu4_7FL)Y|*pqT+n4<HycGb!aXN6-UEL=PZXx2_hO6&iW~!BW@kI7!zW@+oLeEIoi4
zl61|jROp(Ah#o-Db<L-2=$c;<Gz(zq0R)3)A*GzAf1@o-L=PZX&tlMOE4L_A^Z|mU
zw#9Lhwk70K)RI{G05v3O>#md+b$V}Giikcy(6ue2Y-n3n5i~up^Z|lFv%FGH^THa|
zlZZY*u%5-BjqO0Wm0JN`ngVr1J}y_n#>ZtZy-0;CV`&OhDhXXxDX(zZKDHVWO@Uy1
zT&|%3%x%PG)&$1f-b(r0Q5CBY<<<<=vlz6kuEF}iTUcKm5q15rp>9pRNQG-*xr3-u
zN!>b1d4;?8wsncPgD4p4`fC7l*T-hI0T^=!DCKi6YPJoDa@z>3XEA6)U0Mt_hPto}
zEYH(TaMF3YseFpu49iVKH6#h#LMbnDbU)jYh?|Ilu^4Qv0ky?o8~N~Ti={CT44&<k
ziZ^N?5siUh$F_34{;`!C1QUILV5w~gPO53Bd}<nor4LX;lC}{_c}>GBHj;=wK+vzj
z9hHr1aI_+5#$ag!1cPRrQckl*lZ_{$4G`2d*#50tNNcbvBy<3RrK*WINmVYNa(2Sf
z0jMEK)y_(JIoCAXE<|(yg05;eWkc2OilEs8O9vnrG<z!LH1uBWMMMW6*s;}M<<?ud
zeW0NS5G-}=hm&;eFQ0-Az|sS#AxYOkN_jyu(gR3D4<P8e4plaE9i|AH!?E-Lf<ben
zQclB`=O`k20KvL-jkaT;p$8BwbsdM3bR93Bf=<BF1E?WM*GWo+u9J!A0R&ywsmg|~
z(-c8-I+h+lFlf$H%4vRRw6lol0R-#T)neyBLk}QW>N*c6={jFN1zmup2T((ju8Wik
zT^AG40|>gVOO*{>mnnkgax6W7V9;Etl+(~2aTO6gfS`1>e=FDOJoK&HHBiw92$tHe
z!%5n%mrqeQVCe(YkfiM<rM#$d6}y>;K0wg5-KuP8yG;=^w`1u81cT;IrJUyDM!SoM
zK0r{~`nP*4m$bFLmAeOCngVr1*0=kxvA*4}7pd?8EKPw*C7}-~<rUsZQy>vdfncm}
zk7@vOAH!z$I2d!EP|D}tT(KvKa(fEw*y=qgbq)43yoEiZBckqEY^ZxqFH+(2SneRI
zR8se%QeNR6z3n9;?jQ<=x>q!Sxvyd~dku`auPfzq&#BlOM7g~Qc5L<jS9NLiehcct
z-UiF_^c|dZp1v!eBHzPu6HyIG0zXj7i=4TyeMrPjM8R0SKh}WS>ivm)cs|9_7zhT>
z=SsyJ^#u`)fndkBaxS*EmHP@BngGF4*f%(-pl{_<(05px05v2D`#~u$=)`9Gk%%Th
z&@aHBm5mGV7e&zgilqk-44U7Sa+*^c?GGY)0Ks}bG;3SAzo4N9aQd3A&NxZeWb!F!
zax6W78j^HPsgxJQR&FXHdH_M!HI1^NtBWFNx?<@81cPQerJUwEdH{*&0R-#T)nYS3
zLk}QW{`Z<0C+V6+J_XH+r3X+$lCIg63SDy$(E|v&uDO&AU2`jfW*#g(fMC$fr<Bti
z*=X|<(E|w9vlz76$}I>LeSl!8ZDE|GZ4vntwJ4T8Kn+RS7FWuP+N5Gj5YYz+y0&i0
zhPLjCpjiq_A0QYs%P8eEi_!;3L?0kn&tlNVcA(tKEe9`6fjS}|mp!rZaoM03sc;1>
zO@T@!p(`on6)xS!dJ)kS2*$_dDjLAtRk4|^2FBdgmGZgcE4Bs^56J`TSq$1%*I-TX
z7S>xwL|p|N>YDW;752e$2T`Sxx_(M|g?VpVlZZQrf}w704Pfp%Sf02C#@zLk^11gk
zTYn;+xChp=7_^}-Ee0Dv%_I82@;u!TC!MDo$*0JTvD`#dLz2Kvl=32X?q{14aT8H6
z7K6<-ptcxnAs?PCu`~vP!Lzke@kVV!L}MV>v8~+7|JcfH2NQjOV5w~&PO51K`P4KB
zOCO+yByB^K@|rlkh7!>S2>LZRT-mq=M<{}3B$hTnFlcsE%4v9_*JvWz06|@Y?cd6U
zG#={*2_1l7scJk<QZ+$7<y5hB0BT55HBl)q=ly2OiRb_XT~&*+p=xJE(CmVx0}u?F
z-IQ{g1?T`Iq5}}@*lMs!>#bY?4LyKhscSEsq-$^a6toYP9zYFAy7p7b3+h4-AQ3%)
zpzAtN+0b>6B4`fA(gO$v&7n#;&DV`~7!f^yVBNY#+Y!*v0|=J7j>1X0j+ReB$6)CJ
z)R3g>IHf|@@kI0hg0AaCWkc6Vil8|eOAjCzG^Z-%G~dw!NJI}HShubgI|CYe0Krn%
zSvX18+43pq94tM68j^IKr&Q=VpNJkn&~;s?Z0Nd35i}QL=>Y_T=2E4c=DkL{jEEjU
zP`cW`m1}h#`d01=sOSR(OKn%-ByCsAr>JYN^Z{x}(srFvUKID^x}JzWK+v__sBCDv
zNf9(RW9b6~gXUJHoQAs-+(twnASi9^-^wLzZExl7fS0B~9g+3zE^MrCck4weya!8D
zpi)WbeM)(Sx6%|yL{lIb>)V4Gz}$zhnLP}~+((r1xil0$N|f7UV8>SPNvUhF$KfsP
z2^|r2PhvyeQ+kmKpT=?rQKgc)XO;2_*~&df#2rMzQ1^lcF!x1lW-oy;_hqGg?oAbY
zg($aI!H%uo|Eeyn-mgJj*y~_<p1y&T&eJ#LQ{-D%ZX&87N#Hw5d6CohwRefQi6|JW
z_xl=9TfIM!56_2K8Uw-L`B<rVqdp;`F%az7R?fxNwsN0ALlYoa3i|>l74)Ti3i=95
z6QG79Vc#g_1@X}1Z;5CE1pNa1UfH++e^3O?k63yD!JzqBDW^HS(S9MK2N101L$kJ(
z`wbd;0Krn%A2><ZpYkc_FDyNPGqftGGeNB&wsMmZ(E|v&t|^oaT~jK8W-2T_fMDpF
zMk&{IIX!?x^Z<f&>uRxSp`iy5EOkwflXT4>pMqw@(gUa=>A%;^N_j!(nuUlSK+tu~
zrfle%T@f^MVCew_gJv$JoMs}=0VJXa5Ugi0XtkA_7b^My!BX4&I7!<A@+oRTEPa3)
zlC&+XloxexZ(D?jK0wg5Ev9T}TU-$|OJL~(1cRoVQclBGt~(KZfM7j~K^xnFax1qq
zyfg*sh<scwi;a)V9(s`qm&4K&s8kZ#Qz@@-?LO8(L{lIbAD1g?0CQKuX4VUgxhpH>
zb9b-UDnz-h3f8k2w5_heR)e>&)pbPFt$_`7je3y^o3PwLRH>w{qLf$2V$e*)9Yn!U
z*H;6W+Yg)BnqbUbODUgwZ?mmUl-oLBJ&Qpb>e6DcF4Tps2bSk)f1Grlt}mY=H^6ce
zQ4L7~H&n`tysV#XM8r)*!B`A34X7;!o5+V}Q!I^vVDN0NRJ>7J5YZS2c5ExR%0ISp
zTfsyhAXsYK1}D|Dt$b?Q4oe@Ph9qqRmGYW)tJn@i{A>yIYjCi#aSaYp1kF$^ZGd3V
z3|Go&=qejQL>nNeYq0%WxscXiqadLJ5G+-V#!0Hi$fumKSULbTB&iy&l$Y~hvrQnP
z0}ymorfjI1s0f-IO9vnrG%ZRw%_?*N643z&c5F3Rwe?nRS7_(~1WR4J<0M^s$fuwJ
zOAnxiBwc$c<poVc4<HdefS~KzSJ}|DpCV}X$I=4`2F-y=ISsvX2NBT&2-dA@v>gHs
zJ%C`T>oA<8>u~uLbOe?jKn+Q{j#4Ug9Zf_JAn3Y|RW@`TrwE$kvGf3fL35%~PD8KU
zNksGjf_3X^u~VR-2M{cEoraTioi3k(&cM<Gs3A$$SxSYjvx(>d1YOs;%7(7<6hU)7
zmL5PbXf9OBY4}0xMMU%fg3{Igtz4_~(6@4zKt&%QSZccrCuzG}K1E%Dr4LX;lD4aq
z@}f?t*wsYz0fMgWT4h7qb&8<59!nn}7&JF3<uu$)^(G?v06}SM|5h$(YkMnq3%oQ1
z>WHjww_#&_yIn6*;T>3-0+mWa?^4PuWIJ{@5lw+$tZ(;f0CVrdW_CXqb01L3=W?6R
z2Z?ff2<+JEJt=h!_AtDKJ)$F`?on*0drU7<;p15DAgWYS_oPx@A*=UOMBG6X40X?F
z0CS(kX7(HybDvkr=U!W}7l?9u5$xFN{jciM>irVbg}n@x=jkgr={$W^K1IHU<tCyU
zk_5h?lo#2#uf0jcO+>+1z2DY=+Uos|e0bi)(ijK^&-+Tn8}$JZje%gtwsJ1Eww3z`
z8kzvXQrIUrsi05gQ_yEvngBH<3Hw4RFX+H#`;v$zK+rG1ua%7p@Eb+Ye2b+A5Dc2{
zm2#RB8tn%ndH}(CJ~V4vxu2k+2M{cE{eqKp{VJb=e#6oOs3A$$A4+*aY<d19q6ZLk
zU7cnm8@f6xf@U%-J%C`)Orez1Tu%=m5j}ul-MU(AYG~*I1WR3AaFVXB@+oLqEIoi4
zl5|b4ROp(4h#o-Db<Lz~=$cs(G_zpo0R)3)Hl>{AW_kdL=m7-lSqxfj<>rKnK0vV4
zHaAYvHjjLYnioqSpoS!E^DE^=JzcQ{i0A_ZUE4y+hPH(jL9+;!K0q*N7E{V;c;M~g
zMDziI^(+Q$YzNA%+>-Fp6sRNeaoHUkAD2t%MJikxOH-gyN$9dld4+rSu^vP;1%mN$
zxx5B2w<k8U1~BHXpp?($cPuLs<+c)7&tlNFx(4e7Z(%Fzh^Siy8|qfoi&VH8mOF?l
zmDH`FR83uPYb4?hqF|`&tpUufU^8n5V{RX%d~T;c)|V)^eqcR|K^yAQVz4IEg{=ja
z=jqxw={#LWK1HsJ<tCyUk_7fw%8NX|pRG^CO+>+13<hXGZ86wTK0F&?X$%B|CsQik
zs7;7y3<Nv2m0R^6Te;0(q7M)(wQYfuYT8mhHEo5Z4^Tsrwr!O1nl7l=wnX#+f_@Ed
zuWVd{0~JBD1C};GFlYuV<ur3O*$^Vy06|@Y?cd6Uv<4dn2_1l7scHmHQZ-UO<&478
z0jMEK)o7)>oNt<K3=tiGpsO0EY^WNq2$~64Isn0-F{PYlMLGb9=l}$zYSBsm_-lfV
z-*F&+Dceqrr`ZoI1?-HI1neT8Bkzi(*-t}~fZdhyBlqlMdl1p=2fBbgl??%VDS~Ei
zEX{sk(Cn*})BMnA`w`LX2TH&qlM>LgM=4;`I6HvhH1&aH{RiQs`VW@Rfe*pb)Tbe-
z{=<~=19zsWkBFu|(Ca@^*{J_0MbI3LrKt}Lnq!r6n%{V^9}!J`pz2?3()G8|NVR<H
zz4Qst(9{Q(x=zALx=xl)L8oA8>eG;<>oldjAbyX0IuT8MpzAtQ+0b>CB52OW($ohA
z&ACcB%}g})5z*8K)~$;N`$0ofA6V+T2q)>fSUv?^f~BcXLz1q`lnPy!6VcQMx~?mg
z4P93$g63*0O?_a{T&tAR@BqB)h-m5q>(<4C{h*<t4=i=vgp+jLET4jI!P3yDAxYP5
zN`<c5iD>8pUDut;hOWC5L31~jhCVQ8?p4ZZervS*h-l~orE8LV>7G3%eIEK=`T?lu
z;R8!;58)(j56h>hN3itpX-Lxcm{MNU{`Bw>(ZdJ2wkMShZBHqJ=4mWn%Yi}jtWr)h
zng{z4@wFT%ZIj$fPf}akd+8V8<-vYBB7b1KgpEHiUe=3L_zIQ>`ze)#zNVB{_y!O5
zBjUk+VElpcrUo$gEiB*3fid?TrF`zx&Gs%4-^qdh_0OkvtK-3b@bX|k9T9aOVnf|W
zdXWl0#`0i4rINZ&mGTO?h3IEQJlGEmbzf)zbHBvWzX!(Lua)w-SMp##BKr5h|N7@s
z`_!dBpT2{do<6WVPk+Ek=jo5~De@;QJ$)LI1pcCw7de~<`w`L82gaXIziUA4&!<1+
z!}BMWramxuI?WV3yiuKrXzBz1>t1>iVrzTp$)TaC4=jaEiIWPNN<IZmjisqiLz1vA
zO1ZFgd9WW5O?{wWfYT`(7vS`YpqT+nQy&;KGb!aXFVWOTL{lGF&xdAhFFh+XH1&a{
zuGw*tt~umW(41JB`ZOfznp-I^h`sbYL^Sn*u4_JJL)ZL@pjiM*Qy&;K3n}F^oA6*i
zBAWWZx^?kjKWJ#^14~_t<0M^6$fuwsu{88)NYd3^snE3)5e<Ey>sm(H(6y{0XnJ62
z=mUdhd8M3Y!zSxVL_;4~&tlMOFTDa(^zebDwv}*_wqEinYGo`vd>WFpt*Vq4bvqCC
zBcg{7bZu)W8`>HbLDPigYdJ7zDoQyGcZX;u;%hmup2eVz{aCq|?h7vu_R|shxLgw(
zAD3(CMJiky%Y*%tN<!CF$}60_udPSKgZ;qxxLjWYn7aX%@8rOkyP;A(_e~z`N5pq>
zU_FaL+v<3*AG|!+Pe(-Err1!onO>yA&9OY#PpPDCOQpO*?mM#;5fAnQL)|tSz}#)I
z^zVT&cYCFL?rl8SkBI&~u%5-B4RvWT7z8y9ePDT>4#7$1=}`F;ISflfpN1rXBb4$Y
zAK}4%L^Sk)u^8;A0ky?ow0wBRVCm=sgJ+yl@kWg&qN5M|uY2i#Sqz2`?=fyz-lO)6
z_9{%Y^?{|fi8!gITs}4Jgr%)dLz1?gmGYYSm+f7MXzK(08r)6UxCVDu1kD~;+WNqt
z*;6T}=|x)~5p8{-uE9y}r6(<o2m3)nQy*BW+7BnG+Fw599Dt>%PeYQbgOu`ey7sYy
ziD>ErUDct=hN{C9L322kramxej#SEN_Mxedh^9WUW2-^W)?2w_prNS`EOi}+lXM*~
zpMp-n($uFRN!LkAc|qq@>|`RE`asups<NT$G)2&yj-{y&44N~Qa+(=<upbdkePG?X
zc(5NdH1&a{uJdq`uJh$n&;?kU`ZOfzx=5+ebukf5eW2^QRN2sVnIdQ|$I{dX2F;a9
zISnhpRYWxPfpzQR!G6%t&<B>fuER;Xu9r_iH(+V#(~zX=CZ$5x%|tZxfv)RTWkc6(
zilDh2OG6(RG<PcHG*j?kKO!3XK<R4#R<6}~=v%paprVHlEVbQ-leFD0pQ0YX(!-}A
zN!vq8c~OVb!$(99AL!a1RW`IerU;tHv3xBD2F;U7InByE*pG;><v?j`|5h$(YkMpA
z47@zpPe){Zdk!1x+w*#n3SYqTU_Ygj(3h0*3b_sF%S1fb4~+HgRSjV7YgoRM17q$R
zO8ML;d9WW5-^qa;TfHZxjtBd}%Y*%NMAW^D4R!D7MJjwB%Y*%tO6opT$}8k+(T|9D
zupb!eKG6W?eu|}k4~)5=E9G+^;K6=G^zVTkTfP5PU0S`rf|{N_uslz{!Aa-oxAH0S
zJ1jkY8j=M5pp+N61P}HjqNfjx)%#}+sIA_=$cN`wEKPl2@cgb+yitD;(bNZaY%Awt
zYg@U$prNU6=9;k1IH{n?<Wtb(Sep7YBng{RDKBVm9_&X%Qy=IT;55p{1=vLqG+nVY
z^?^Y%ol;ISUz1HwL{lGF&xdAhD>oxFH1&byf3TTxlCD|gQ_!qfn));(>6%?BFNm$&
z97Htrfv#&VWkc88ilCVXOH&^hH1jFtG&}KNKO&m?z`AwuU_WSR=mSe#3*#hRi^!*-
zMX@yWX-LwwxKg2O2_hQ$K-bkx+0fNp5j0C-Y3KukW*McNX3i#CmWYNvu%5-B)mCmf
zsOaGXOKm-IlC}o<6tx1D9zG38+E!A^i{k!Ty@=@H16|uH%7(U86+yEamapZ&pjksH
zr#X!W`w{WA99Yj{(8hM4+{*Qax3G$i$j4<fHa;%<=tU~*i{-(7N+qFdD&-Y^%7gug
zc(5NBAD8QB0CU&H@|_$QbNeghb9u_^`b2yu2iCI~w5^T@`@zeD{d7dsZG;VV8|y_X
z%&<JzPpPDCQ>DB@wsM;h@nAnN)NP>w%-s@8{~j1~w^qvMp2>s#i0Iz~>sbuiP?r{i
z?VzTi4=m5qfjH?r-9bJ@4#LvVry)t;5T(4xyZYHsA{zR@SPX`1Ky5J?As?QRSUUQ^
z;Mq~Bc%w!W(a{HXY%ACEA76`(g^9L4u+%mlC)G4TJ~dUbwDoC7(l${kuW36T>_@~m
zd7xi|Ey~6<xU(W?cEQrt2L{b<N;wUGknK)HTOX)vu>D)Pkj8`kAfc%bELH7=lT__3
zpK|uW($uFRN!5Nzc{zM7x<3(3eW0s4P}xv*kRoUf#?sUW2F;;LIn81`*pG;&KCok}
zL3v)LXS5#y4NZMusp}}5r0Zz;6m$%hralcxx{g!I3%Z{N`w`L92fD5kl?`1dDT3x?
zEKPl2(44B2)6iOa8WBx>VBNZSupcxu^?{|Xvv88Gv*lCJIar$dG$iRdPpQy#J`qiQ
zpzFF&+0b>7B4{qg($ohA&813(uFHsM>I3W6#e@Bzp`i~fbzOy%bX_f<g08{R(5E3u
z*L6yTuIq_t=mTBXjmn0un-oEFGnR%vFlcU7%4zr&=xsza^nudV{;gcA^U$|)cR)oC
zA6RO;3nyv2TRuhIgQbU0Lz1@pl=7m^qKA)&9zM{uJ*aGGdq@#94`X?-9~d-`D&;gi
zd9WW55B3A4t^HfMq^<3(+!OHfU_Tv^_3bHatZz^2MJjv-%Y*%tN<yDg$}8m0l;??f
zupb!f+lv~&+?TLC*bj`kuPEhnKjOiDL_F9J?AYo(DRn&94_+SZrz4{7O>C%pOD|I4
z+gKj#r&Ln+u2Nnhw-9}ghzI+Dq3#0>VD5)l`uD(?`>|3!_f;P3M@0V~*s<07U)816
z`!lHN=>yC2^b4GHo_;BxBEQ1Y)2AUx;5SNnk@NFlKO%biz*xP%*MQpU{eygXe#Fw$
z2L{j2O2r%X3lU9yV8^y{F1EIn`wbeJ`oL1yA2_L?Kjl-<Us#&@W@%MWXM$Qmhw)%P
zBAWU@zW}FDHZH&^6+tr<mZm;1bWNj_>*801U5IGv1MB(FtZn6{g@&d+u+%j@PSQ1l
zd<vQoOH-eQr2k+uE9C{LsgH=JKG1c|rfle%T@f^MU}@?DgJv$JoMt!=_9LRH53E}k
z5B7tGhCZ;=H9t<$wSasIS`bS^pN1q|3o8}679pac4|H9NDI2;LR|L%xSQ`4kpy{TR
z)9~+=-HB-E1M67~T5aW)hKe3Ou++9JPSVyxK1D5urH4;LlD3{oc~P8;4Mg<tfv#;u
zWkcIailFI*<-vYn(5#}A)3B9Wm52xXf%Pl~<&`#UTs~_2xPfEG58Y|ZupZ@BZgqHh
zu%C{|$7LfnJ}#T|A{F+=W>!%u32j!&E1asY^&#THeqelD_R|37u8HNreqhX9TPdGA
zSF^1{#Do37dKQDW)$w3IczLj&j)=PTv7v4Qy-0-vusqmLsibZrrMyD6avKxzU_UU_
zZK46p-4sjz9vE{sSIXy}#e@Bb=-&hDSq$1xmllJqpr)Y@EYH(zaMF3Yt$d2y4ogFy
zh9rRlmGUC5>SsF;@v|i`7K6bWP+JU!$cJYrmX1C!c!nz#Z`24PI{LtlZRHyNv6UMI
z6K#E9scke)s%eaTY8s2BtxrRew(&}NO=Ebl9}#VRpkITgY+QpA6+x3@Y3l=nrbQ{I
zS%S7cBHH>uU4!l4%7ru@><0-=ePF3-cbueZ5BZc+U}@^pkfdrarM#R~`q<t?H1&b5
zYF}kT)qaYg*&j<&9~d+TD&;i%VDBIzn)<+wtp+Q!{#Nb~XlUvKOI?TIBwdHgr=TOS
zH1%mn(sh(lUJ!j#M-$Q12fD6fl?`3TDT3yBEKPl2(4450(@aTI9}!J`VBNZSupcxu
z^?{|X({Pfm)8$jp8CaV7G$iRdOR3OxHW5vIpzAtU+0b>KB52OX($ohA&4o&Zu8W9h
z>I3W6#e@Bzp`i~fbzO#&bX_i=g08^Q(5E3u*Hub|uB(Y?=mTBXwaSLB>l8tAJ(h+(
zFlcU6%4v9x`AtML^nudV{;gcA^U$|)w?IV?A6RO;4JT>4T|PzKfu)B}Lz1?;l=7ku
zq=%1)9zM{u-K%V9yH61`_hWgm9~d+bD&;h6Qy(JY!G55$wSOy@w6(pJdjwt{?588L
zzCDJG_3d%J`2Sct%K*8mrdtPsySuwX28aMN%uEto8`oe7gpdR(P0+^O-QC?icz6jA
z+}+*X<v#0FSAFwxC;9H2zJFF#P3^O~*3f-UHp%MEgijGU*iWqx`ixpW;Zq#!N5#Q@
zU_RfT*8;A6fylvrU|#!@T7E5N#h0l#*bglC^d6r&4)%kWgZ=c1sC%85>fX=~nea^_
z2m7fN>fTn%C*%vj-=X4QKQPt3rv+U5J`w*OnAd)&mS4+1tNMrv{~lQG={-($@$~)#
zYCL^l@jm^GB;Kc=D`&_rh<N(6Bm{n?mJi8R?rSPMePBMlztw{J)B8K+@O)3i)CVTd
zk80&F>L)5pePFq*oQth*<$i$%Qy*9e`;8<9`dv8#{XxXkrzIimFSUFi9qdPisSorI
zV7D3R<_B;BRnSaG#MB2S%|vQB&15Y$F%_mhu#uZ)eJeL9G?@CpLf7OZp=%1|3^XMX
zQ=gWEuBp}Xf!NATLxrgibY0V_o4Te~1<edZOnqR|%%qmn^l7n~sWA0{4eR1yKWH%Y
zfrYNwNkZ2g${A=*B8EOK30-rmmAdAk!q5l0uKCnWUGuAgW&t9GJ}_w(Qp;)hL&?Ha
z82Z3Q9tIt@a*INRhYu{YElv{JmQc=6OA_($X-Q~XS}h-H<2GA{3J)LX+Llu{we?U1
zO%suW{lKJIK`o~_lY{-JIM@$t<YCascA(hG^@5j!{q&05F8dJkcDa&%$b>5sIoMCF
z5ZbJkPdH6qYoX#`KQM2Xty;jfZA1?C1M}LxYWcO;e)>^yupijS!=Q6@9P9@#2m9$2
zQMU#$)vc)?GT~Z84)#+k)UBhIPpE_as5saUOm*vP0oQIo#J>mTwH3Ae+KV~Zj|%@D
z*vP}66Ls-0*aT_}ePHoE-HasOr<*Hh$SsH%`m`hjZl#tFIhKR{s4(<_`7qd43+fMp
z0m|Xoj)<cVOr9Oo%3suuR5<#;a$C9O|FxAH2ottGu+X+MNzAm1a%LJ##MY-Jp>2p-
zKGVS*>_>&I5A@IAFm>}YI9wGpyA!eXfk|U(ISv1-Xap6uK2V>*u5aa18VCD9g4Yi$
zRP99)s`ggSIQtOs`e{k1+D|PXXA8W3RCxVBS9O58sp>#g&>Td>>jx&yA!<3zVjS#8
zh1U-(_cU0c<5uo)Xz&1ng{~t>Lf28s8R%#t9zZP#UB{~B194pRaa4E!LDzMHx~c0#
zRnVM7!~+N>%_(X*4O_WWsqg@T4eR1yKWOj(f`zU#NkZ3I${FZvA|60330>!^mAcNO
z!UG7pt_##nT^Fi?<{}~<Krm@8QOjv2#REu%2M}yn7YF-6g9i{SbX`djx~@{rKvxs-
z0BT9-x>l{!bsZHRK+tvFpl<5AQ57^d5%B<mNpp)@PQ&RPw^HE&1f{F%Te%MJL*L5X
z4i!E?u+VlVNoc!EIYZq|#0RJ)p>32}KGd-s>_>$U5Oi(#tDD*$PzB9{M0|i?(mbq|
z)6B`iepL7XL22vyRxY%4zLonAyqE&@iag)O5cB!=n10BFj}tKksue=Vs^t^%&j_ES
z!W0PR^X+LZ;M!-19P9_?wa=;L*YYKs&r@-*A6V|`JwA0D><2Fg`{@-?_cAfny`mp7
z;j2Ur_ERg=y{?u|xHkv;QE{*znCjlr0<L|V$iaSKUi+?Ee(kv&>_^4Heqgz$_c+zX
z)B6LcIoJ;@-lrdt#QXGP<qY`=k%RrTBm{n@mJc}_2m4WRupgLD?=Q8W{`CGzIXqtz
zF$RLk^Q~I>i~5cVV<1>=E9YYCTe%;g!2}2v!hRx&fqqubK)(<%0cuGI`%Nt$i0_^G
zoeC2m=pVp8)y)s!U#g(_n}`PxOqy;pCC$$q>_>$M5Nzb8S>MWahXxNISm>IVBy>%p
zoPj1K;sMl>&^5VQu8XbQ6jXQsLDw~vx~XewRnSaB!~+N>&2(xx&6X`TJry26uwh*s
z><0}VK(Np?GfC*0ML7e_O2h-GC829}wNlp{RCoYE*EN^AscUXk(9A=`0|+L~d}=uj
zTe<nE@Bo61JPbN)<rahrA0Sw0TbLxYEux&E7A4{X)RNG)xLQ8c$!)d-6+S@FwJoJ?
zYFk<rG|Le20fI@hoLWwEShMw@!UqU8@-XORJ5X%pmWLNppk9&N<%-0-UG~%unXnfT
zQ=nQQw2xXoAzw(m5*4OEFmIQuXaUzY6FJxq%xhOw%dh4C3AR#kupijS!=Q6@9P9@#
z2m9$2QP+={>iX-4Ot>16gZ<PBb!({Q6S9?ClZu1=z*M)k7I5u4L=N@?^V;>)@@vO%
zupbo%`+<!-3_4L44}%S%=3qatc%N=W67SQEl{4ffL=N`Tk`TC=T0Z1-{cUq94)z1{
zVX&nZ)E@?0DTilkBE~>4dA3z6e^CRds<s_iZY#IKzqWGQ!-NkIEVS)N5;N_joS6m^
z@d0W{XxmvWpNapsz6%vTK+r#fyQ-U?!6B-k*^P(|5KNk3YB^1R4)&wM1_<gi*!8Vk
zO5<QZNN@mxg{l!Gp{lB!arPwQ0MwFDwU=5x&JFFhHx&**&{gfLZmQZ(6*T)3aR7oz
zbAVb-vjh%6Dja}dxu?O39k+4^LxTqpEOZ@861omk&OnC~@c?Q`=sHp@A7~N|_M^fB
z2)eFg)J<K-s)FV?A|606X--hfY1qo0NQDOwY*-fu`$2;T5G-_^N)ozGQ_euA6Y&6Q
zN$5ILt<-fE6&^s)b)BPb>N-~yH0KfV0D?(#fm%+(pGq#I!UG64tc!#FpuqzO7P>Aa
z30;>dXQ0c8cmTB|bX}=d>bi;w4<P8eu2DC2U8@S3>xg&&!KArCEvNa1gZ-%R0D{uh
z^{rfo_n~j)ZiWgUAXsR-l_a#?rktT}C*lLtlF)XiT0YdF9PCGh4-j;1_o$oNMyZ14
zULrm~Flp{r%V`$I2S`=52SI7;`c^Kqb-tB*2wqHqdPSaZqlx)^dqh8E!v7F41*#Q7
z$Ef8K-i;}c3R57M&$lPEfNRGRIoJ=(YoAifuYHJv{irzD4=nfe9-lf6_Jfy${q%~c
zd!Cr;UeFJj@I@jA`>7S`URKK|JdlI^s5saUOm(km0oT4x<X}HAuYFT3zZRqQTT~qE
z2bOz!k5gSdz2AYFgZ;qbefl0ryiea(&X6AvIoMB2Lf}Vg`H;(Tupbo%`+@oN{!|O<
zPw&r^!}B>2V<4D3U#gYAsIRCn27=|baxS*MmHP%7On_h^>^qVe=zHZ1^aBwSpq7NN
zpVab!26M0<6(&H?KY+ign;*d6R6+AQ5f31kG=Hk)G<@&!UsQMi!A5SH^{w1L(A2Ek
z%ynH8kc6%Yl`~LxA|60330)Jb<pZ&mn}iAvAn3X#Q#W-@t_qqdh<E_Oq?t-Br`Z}0
zAQc`!uwh*s><0}VK(P4lH9bk_nn5`O%}B%ps3oCmX0=k+EL3;^LDw~#x~Xe+RnW{q
z!~+N>&0K0Z%~W^*sqg@TjXVrGY~|*Ks%G<ng|_)gLfZn$8EQcyK0qxAZ40aAL#^9p
zi%{VM1YO%=>ZZ2ERY9`^5g#C!G)t-FG>3Aq9~C}8u#tyBC)<HyE4M7Xm;&{R+%9_%
z^LE*!A2Q+cL`;Edh0qn%@(CB~Ydxtj1%i3I?5zb{+lR=(eqdg^vRZ!aD;(@c#le1H
zBM*bl)p4*Nyd3POS43SaG1ax{hfLT`<X}IwLR~+#d_uNz{i!(E4@`BdYXR4;LF8aR
zFt1%pt-3l6_M_rpKd_O9K_}|sVX!XL9P9@c@6+{3;(fY-a)#WH$iaSE5&}0;%ZL1&
zgZ-#D*bmHy!KPYJe;9109G=aI7z4rN*;1|iMQufeF%T@bm0R&&Te)pu!UqTz+6Ius
zOxr1ErtOLN0JS8v?WmT|#Q#{`i3%Se=%2ws>gH!~XI0SbLc|6LCe5yDISqe*HG~Qq
zAgIq^*SB&hjf4Fl!2t*ss)mz<s@;_{&K^V@fLao&MyTcE+}&<fDja~Is~V|ps@h8x
zG<y?q0D?)guUbyC01iMZ9DrcCr$Nt-Te%uEcmTmd*MTIV>mcO}bTAPQpq7NLL)G$u
zbg&;49zf7_9ieXOI#LxhM-lM=f=P3XT28~)o*YYs2M}yn7YF-6g9i{Sbe%{Nx=vEg
zKqnLN0BT9-I#sRIbs7~OK+tuap>FCrQx!C45%B<mNpp@`PV*-R`%&Qm1RK`H!G6%-
z0R#(O7m|doi<C3a#Y8-SS`xZ0RV#H}Mui6ubX`}do4T%41<h4NJb+-*T%(rLutmL=
z3J)MCU0vVGb$B28R_=PJ@BxB_wi`)8+fB+D>SiK7KrIPvx2oks9mm0bRQLcv*LH`x
zsqIcx(A-7D2M8w3J!&}(f1f{!3LhXSZC&5Wg|^POa`(ZDDNwJ-^X&m*KHnbH51H^G
zBBnsKLg;9<e8ML%1yW%O1oQd!s1|VT7$OJzfqCuYYUQ<0P;syySnlaPK6M=I2QLTv
z=@n7;G%?jZqaQNivqZi@RIO0=yjngXU(EOd72hBVrn;B3fNNhSa<Ctm*S@NjUwaJ)
z`%!VQA6V|`Jx+D;^nL?s4)z0!_vu?C@jiW9IYYiftk}C+5(3{-%ZKD^Pu{2EOGLqZ
zdVi<|^{4ko%HjE#h%pdMo=?@vU({z*7z4p_TR9h7-^zUf4JJUa5cU;G4D_{f2Kt7G
z2~bNy*mr9AKx=TY9~CA*&_94bs+%9cpHxBfGZ7CUm^8nt<urG5upbp3K(LXUW_>I7
z2Q+v9!9v$xB%$kX<qY%>v0~k3=`hd)6!n4F%1uax2M}~!6RDfJCRPQ_Bt$%bVCtGo
zE!V|~p_5bL0R$V?#le2i-~j{+T~m{Uu4$Ar(6mH6fLap&y{1>o2SV2jRCoYE*EN&6
zscU9c(9A-_0|+L~Y-%~pryT4@g$EF9<YCZZD>o-p_yEB|+uS6fZ64(eH7~JZ^Jz(F
zn_n#->f<(BfC?WV=-L)iH?=LS3YtZT_yEDASxhab*^Pt!sPF-TjXVrG*$xz2xh3Jn
z6sT9^cDXb$Z<ovHhfKIE5mTUAA+(2DKH*$_t%(X#Aegty6|{hBS0r+<ADGwnQp>MB
zjf4HDIM@$t<YCacIu7=OmxKNEil|$KnChDKLndq?@(rSDg}PR?d_uNzZB%@ND46Q{
zY5~{wBXY1GnAfhRmS6iV2m4WRupijS!=Mv&@i15uY7X`Ti}&f;B=J67M>#{TOXN#L
zwIl?tua*zly}xZh#g~YJ`7o$xLH%K{k#cx8CSnW(lV?-4@)xxk6~;iY+*YpVzqWE)
zz=RJFEVOM!5;JYBoSC*E;sex@&^ACVpXshv+l~q!An2dL9n{Uw;Et-G*@=h^5KNju
zYB|mP9PCGh4G`34u<Kj7l*Yk+kl+9W3spl%Le*}{8D}UF2cVXOs^Mz+IB&Px?o>Dc
zL04t!rm7LDps5mZ0D?&~QZ1+9jElXfZ~%hko(8=-ZsqoY1`i-u=-Q7YbnUO4foeoN
zfLao|4phqr`n}Z-qQV0Rx~@aiO<jkog61$H9zZZ@j!?^Kbg&;49zd{RT^#HO4IV(S
z&~+?H=sHe0107Gq1E?jT>qNCu*GW`(072Jvin^)mR8`QNM#KXMCe0aYISpIXGpX<Z
zf(`59U_WT^0D^_Cb4fzidCD2+d?Fq|EeTy0s+GDfqQV0Rx~@yqO<k9&g61+J9zZZ@
zu29Qq_y)@>sqg@T($)2?T!;6eZ{@Cr3LhX?XuFmqv|Xp1p{^(51JshxcB5K8)D;};
zM}-d%bZxh&o7!$w1<h?le1Krm+@Y4!aL~e?RQLcvY3uq{F0^&NmAe~WOo4huo^PXw
z`Fy)qKV-uDh?oM^3ZW0E<rCrvdyooKAehg$hqZufM-w^N56o--qn2OGzleU6ii7>Y
za!>E^spDWjcsbZluZX%Qh^cO@e#nGR68Q#EwL;y~YWajCIoOYiZx97j-E&&Nwa*hd
z*bmHWUsTJleVK#(s5saUEcf&tr@DB0zXCM}`+>##^fi)rpT4e~A>SbKC8Amq0^d^0
zhvc6`y-mfJh=Td_epd_XPw)4X!}C57V<4D3AF7qVsE?>H27=|baxS*MmHPx5On_h^
z>@$)W=yT-^^aT+Upq7NNuhjB^=H_5ODolW&e*nK#H$Q;ise<NvA|606X?|48X&&TY
zKPo(cU?Vrp`d022Xz&1ng|6R7Lf7xg8R!op9zZP#U4N<N1F_}#n+gvg=(@ViN;h>)
zpbDA^iFg3Pq?t%9r<oQHAQc`!uwh*s><0}VK(Np?IZ5c6LOBCXNyG!FC829-wNlqK
zRCoYE*EOBGscU*w(9A%@0|+L~Olmm||G#o(Dm;K-BM*ZPTe(@G!UqTz+GZySZF4AR
zs5y!F0JS8v&8?OXb$Of3Lxm3zbZzsgo7(1A1<e9Pe1KrmETopx9NcURQ{e*y8+jOX
zvK=V4a*M)?DNwJ-?Q(Hq-Y%EW51DXDBBnsKLg><J`Gk1omZ8EF2<Gi_IW6GY9z+iI
z1M}MD)$(hv;$S~24)y~Zc^GuAj)VQ+<zPR(BI<e*Q(YhZkO@~J@(rSDg}PPL@(J0>
zHB<2oqF}09RSUSbmB_(<U|!p<mR~zL2m4WRupijS!=Mv&@i6EQH3$2F#rt%1l6ar4
zp`0PtB=RMqS`q@+R?CN+slTm5#g~YJ`7l^d3+fMp^_9c30TE*$m^>A=@)xxc6~;iY
z+*YpFzqWFlz=RJFEVOM#5;JYCoSC*D;sex@(6*IYKGP@;_M^fF2>NGmTXpj@I6xIN
z+Yzw=f=RQ3T28Yd2m4WB0|fOM?D|$NrE#zyBsc)ULe<VBp=uZ9j5C;s15iss)eyCO
zoR`~eH!2)}psO0DZmJrt3Yy)CH~_(<F}0kA?`a-Eg#!>Q_cZ9;aVxhcG<X2PLf2j-
zp=)pD473js51^KWuKm>Vfxc<A{i*N(g0AZTbyL@Ys-QWDhzAf%nnTob8vc#Op;UMP
z!G?7WvBROk0|*wnjwA_PM=58ZqltI`wIp;Mt5)hdjtUPT=(<i&H+7w;3YwFMcmTnq
zIYlj}!NGqj6&^saVO=BbbZGDZf`zU#NkZ3I${FZvA|60330>!^mAcNO!UG7pt_##n
zT^Fi?<{}~<Krm@8QOjwj#{)=(2N0C5u5aZ!ybpaVcR5t}0Kr1rl_a6<D&-7yH4z`6
zmV~xz)$*Y}XtnF8@BxCZ?FMyI+l{KAxrvAm5KNj|)N&fOskc($0|ce5>sz_d*7;WM
zc6c!b>J@pu-AT;n+g<u06W&e46sT4R9i^5}I2KbN6{bKipKteT0oOi2tk{EKUi*+*
ze(j&F_Aph|MuX*^-s4l(WRJjGv;XK7QTHe@)s4{)neZ_p-yo`1sCz;!pRl^Bjiurn
zM8Q<|looL9)5MBB1Ln2Qs^!;y)oRaCRqc7O+|zrU>f-7B0@O8o5iH)PFOkIi^kwA?
z`3jLQ5!I3q_?lWi<U)Pzbt=9@6wIghn_5tRdcUO{p0|k@1Ht5ZSFQX-y+?&H5G=Qq
zbFuZU+y~HL0t5?TACbgBA1h~|Pl%WRwIqanrj`#hS(|-Mg$WS!58#*T<_GXARnUA*
z!~+N>&9`bf%@fV`9Tgrxu#uZ)eJl3^G<X2PLf20uq3dVm4D<^T51^KWuHV%1f!NCZ
zPK5^$bX|X{o4Wo|1<l_?Jb+-*bek<{7HzQ!sPF)S4eJ_d-J!t)2o|~~CJ9}WC}*Ha
ziFg3DBy>%#R_dC93J)OYx~5V$bxo}bnrVo50Kuf0PA#Y5AH+^ig$EF9<YCZZD>oxl
z_yEB|+sq`PZ5HJWH7gMxpq7NT+12u)_y)^4sPF-Tu5B)LQ`_9CpqYn=4-ib6`P6cn
zYO~Exg%1#H<YCascA(hGEeJ2BK)oWj%Y}(~yIe#+WWq&>m;%)bp^K~K6VBS#mY~8E
z2<Gi_DJ|gIrHK_=2Fz=hRm-ovt<{#Js#*`Qk%vL&>YA(x-kL41S47<k#8kJUe#nG9
ziF|{oTA{ADT0S9Lxjs~UgD9BlR@MTpU4>Y&W-za9QOmF8TjN)ys#+`9$itu$b@4E0
zgSuwzVDUcfOA_zXe##lLKanpH)shgnx>`QuA^mL)D!xP%%!k2RT2Ox&tgRfLb%+=P
z!Q@#_t^7r;PlYiMEVq^G{jaUuhA`m+1Pg5&k;F_JD`%!ni1+}tB(!a&me0i3+iy;V
z4-oXv;Fjv<XK*W3&}>b_1_&n2wrV-eQY|)s3L7A(&tUH^AEuqsnrwSWZ~%gZsvSu}
z)lSM8XCM&=pq7NHoz<$VYPVgeZ~%g?YFBkr)eu$C>_)@^2qw)iwVY;d9Dr0f0Ksxk
zgFYR%a(h672M{cDjUWkKRpkt{ClL>zmV~ao)bfG8<P<<EJb<9<+E?AwwVx_z_9x;2
z1e4|fwVZ~(l{=8CY6pQ0>l$JQLxTqpEOZ@861omk&OnC~@c?Q`=sHrZ)O8dU9zf7_
z9iwjQI#v}l#}V-Wf=P3NT23PmAQc`!uwh*z?PO^10D^_CQ%OSCY04SsbRr%=EeTy`
zs+GFVqQV0Rx~_B7O<m`zg62FT9zZZ@E>O#9*rHxYRke#i>FWAcuEYD#w{jOlg%1!c
zv|UOP+AdShP?r<&0cuHTyHYJ5>hV^)iV7bf=-RGPH?>`>3YzPP_yEDAxj`+b`5PY~
z6+S>v+Pc1#3vHcm<!*)-Q=ndv=i9Bse7@bLA2Q+XL`;Edh0r_I@(EwW6i9_B5X|S>
zJzBuEqlgu|7tCw#Q_HXA|GnH#Rka7ea!>E^scW(a;jP(2dPUSdOiXp7^+P6ngvd9D
zsuk)URm&&j={<&uZx97j-Q!xowNDT$HWtilpHwS<Gfz=f?P;*w(|eri;_3Yi)HQn+
zEZ(Qjk;MD-dF2fG0+BBf)shhSl3G6GfWG!J6<;C>=F|ICEvP@eUsDdx>qLx!VDh}F
zR{o;iqQV#mmfOm?*!ouP9cXIyE?5YAk0b_qUpWJPK*R*7B_Zr1wS1tzTJ2*hOn{(&
z06$eXKY*X9g64A~9zZZ@zEsO;u5GrjsPF)SjodWrTe)wb!2<{uy1pX`UEeEbpdW~M
z0JS7^{iK!;#8&QSDm;Lo>-tsQ)b*PxXnrT+0R)rgPqm!pr)K+$3J)OIu&$Bz4>WiH
zXRqs;fFyKHsGNbi6Y&6QN$8qbt<*IM6&^s)bxo#j>Y7{?G*b}q0D?&~m0C`7SF=q`
zg$EF9<YCZZD>p4v_yED;zuEL8p=}1`3^gMWAE1_mwwcxPp^j{`S*Y*<g05{gbyM5y
zs-T&Jhz}4<nz__+njM>MZYq3$U?UHMPPPNZR&HK+F$L-sxn0gr%-iJx`XLi8NW>JV
zRtQ~KEuZk~c3XrBQy`eP%f+;SYZoV0YzZ*0T~aN-7Sr`oR8?CVY~*3kxw<A>2Hu)2
zt5-zba>P{ELqBA~CL-S;s#d65K`oz<hrx<ee1j;M>UwDb*Y+k>tPhyiuB4V<J5Rf<
zOjWg2z(yVhov4e4K{M1fYXOV*>8d30K5bRbkZnZ1L{v*cU|+R-$U*(B9~ECB3g*LL
zH7%$=3|3bT&l*IGfnf5irB?o;)~3Q32$tK*_4(IUZe5u00fL3L^+{r;4U{v}hD3aT
zS`ykeQp;z0u+=uE!UqWYXK+(>^E0@aDrhz*Vgm$|W=pl4h9knaqQV9U>ND88%UijW
z)@0j2f&&mNR1F{rRof|Nob8D?0JS7k?WmTIvu(TWM1=zobX9}YO;tOqf@T*Y4nQzz
zc2&!1I9F~66%Ig9sumo-s@-k)t^@d!vJGWDW<RhHFq|X=?5><w-h+tQPfJ3;2(|o`
z`M2j)D$IVM3mB<x3fM~(G<y><`+-TbuUbxXezWaIh1n03fVuxK0XA?1N6wGn%=sOM
z4;$ITYOKW12Nt6rND`wTq@340n24WGOJejx)$-TuhM$iLKOg9$AE9oJexxdBjw0gc
z1C!<$wVY<=Rqa?R{CuEBUwAyDR|gLrIAGw=9R}#XfE~{jSogp}!igjy;Uwj}_Q^!7
zds-3_PF2fa`)7{tqr$oex`Z>-O$leJg61qD);%z3&QZ&07F*TMrNX)g{=fY8o9uj6
z;?4t$(Jv&4(JxZYYhFymou?%+`lV|5Yku8omr>!)1AX)>)XmYaR0YjdMBI5`(p;mK
z)11*_*HYom1OK<*zTP+kM(i@&syp)Dzn&{F;emyO8%aXKP0D%gn~9k4v?L_ls+Pa@
z)$Mj06(&5;CETHIO1M)MG<Oj(;eknWk6KQ16ec_>OnBh<fBQY^@BRB&iLDMSMt^`L
zMt@K_ulW!WTb-7~=%dy0*W``!2o<(E&_{n%-5h<4Drg=f@;7B*(mbJ-)BMF*dsO^Q
z894smevf7T`@R1ZS8$3RSV(w=BqThmoY#Ji$SHbS5)xieD<!;0#VLBAOL$q`l<<lw
zXkI1q#h75yysnnhEXfyRQt`!@;Qz~SzscTWWzF6Oi_za9iP7Ix&TGC$tl0Zn5~F{h
zmcQooZT2Bm)jk4!^pDle(LYfI&8NhQeFi4Y=W034lP&fIRn@)(HTrzqdG_3Wzz(|&
z-*Jd;l=;65R2298!Ncq;7T4@+u$cZEl9>Km<-F{7#EN~dB{BUEYWd5~+-5&gRqZFx
zr~g^qoc<S8(ELiQ*l%Fc{H~VM+}L7&P*v?uP}A#4JkIGcRty_4Xt)g>X#b@zPD1?)
zlA8Su7OMUs302+Z@Np&}R%}8r$LX$?k8@P3O+;0-i9uI2iMpw3QdOv$j99VB!K9f&
zEvLb?J|$JvrUDyOHN>Wdq-N8Ag{o;uLe+H28E1N8#b(fw_-{3%TB&L#s;bQlx~f^!
zO;xk1f@U^i#byVSW)8KS23yLUR8^Y`Y*5umn;Vjv%>x#y<|PSL^C@SX`H2-<Kubc^
zf@-Czg{Z2wFzBilQ8!gBstTILh!tBLOqwOsavHvGc}c3OEd@%|D&^mS{=XfUhNx!C
zfQ7PUNkZ9j${DE#k$>8#C82D2wR|N0VZ{nm{L@CzmGx9NmGx2uO>ZK*6)<U5Qp;)9
zZLyW9*sXw4wq{9Xc?bEgL+_elWuv0kV<)yMF?V9E`XTeR5!tAy6*Bv(<@51X3jL_q
zsDQZ>TTKhNc6DOK)&TR`HP!NK=WMgJsH(O$sGZpVPi;-M4y-j>SFeY*^@yo$ef^O6
zHX!m(8r2GI6}5amzMg9%D*j0$nA$ec0<PVZSh3B(ymoW7{Mxr$Z40WZZ3#--I{&}^
zyTu-JD~M~hHCVhuw;_pl=(frkaR9Mm+i6M2+g>dn@t(f6169>_1aptMlNQwXm;;r=
zGl*ERox$YUMXmfb4W_EvuAugq<!=A=|J^tQlA7%X7P5wt#5lv0GtO{g#dg<{khO<e
zK2CF+8CBIrfd0X&s+%9YJyk(7l3205z@*t*EvFgXZ2M4EZC|kbjk5kf8~1~xX8VJM
zsv1eCIzTz&97wF#L0S^34pz&@;eR$BLRGaxL05H{x~b}LRnQzktk{uY(j29h(=6L!
zM^jbp7_dQABkfp7YIYo0s5+h`RGpxlaZV&w>?AD-RVS;Js!pM*+Nq$cI!)bFb-F5O
z&LCFoOfYHAQp;&JZ?Utfs&)=o{`TMDe>9#8QO(W+3uWh%gt7~iGtz~`ie027q3mL{
ze58w7?Gmc0T?)Fg%hXL}m#c#23Sz~s1e4|}wd%^6?P{v3T?3ZC{de-es*3;3crC0o
zyH2miE%JI|-Xd?%51H>qV#RJ!D`eiRmd|%hyWK)nwOhfwMc$?bTzfl_zpn!G+B?<q
zYtQ2EtEl+<DzN<RzjJL(b`PvI8>QDn+r7lpcAtL8eD@P8_JCTU?LoDCKK>`;LsV6J
z7)))WwSa3MA@a9XU|##ET7K=s?KXyrzpVnx-~Kz%7PtS$A?Cn4uy}`#C5d<Flgb(K
zDIy2nX-UX?MlBz)S3i4}iUaS!y!}6~1@+ti3(Dbnk;s==g30r;TKQ{wg^Dk+1a<pg
zxyw(wL4%gze+bogHuf4kHG3T_l)XU`6TPXNiQXbs>}@RxW$&ov6ScM4yHr(s5A@I8
z`|9Rr?*moPd`PU=M_|%?td`R>wb&<ARr?gwXRl{jVNLcK6gB%CEHr&V5}LkL&M;pQ
zEB3XPgr;xQ@?rjJvu~-Y_8sV&zE?Lj{h$h(ABh$F2~3)w)pDB0TkIFAs{IO<x&Qa*
z`0qr1gQRA^gN3R;NJ7=0${FV`V#WT}l2G-JT0Rc{E4bU78fOB~RZU3tKdQQ`f@UIO
z#U=)mW)iiWhOOG9R8^Y{Y*5t@n;epwO#v3FrX&efQz>VhsfiVvMoU7~v}&cQ>8PqU
zJ?N@tP&fZu&8P~RnTQpe8BCg4)N-00EjBAv)n)@5R5j9Ohook6fQ70#NkY|J${A;F
zV#Vgsl2A3TTB&M2s;bQox~c`#O;ro3f@UFN#TEvWW)ZcVX5JQCl&Wfrfl}4gty+h7
zpl{U{hp1*tfQ7OpNkZ9D${A^CBL8$zOG4SQYWYZf1=n&^{L@9yl{KlG%9d9J%?iYd
ztq3MfPqmzeqX>IZRjoHDWnJCQgtE@JYJFg>*-Cmno@Xl)^Le(4e#m^yL{2kMD`c*!
zmd`g)o3&DLngN*4vvw`u+P=h!^#k+T{%ZNP%e2{QR8?CYEc2`$pSC7j1J;_Ysn<i>
zTEx`0wtmQb>k#=Ti)w|o_0;nDYOC7%RQ!`gFtu%{1zcMpR%|0MuiaQJzn0^cH=(N9
zreK+8^*FV~vwAa#YqmL9yhFDjiFfFh${BGhV#T)Bl90EJT0Y`=eQjH+sto}1S-qVW
z)SuPcD~D$XV#Rg@lV>Nj^4Bzws%nG4GFvs5THmVe3`xy)0Sj4!Nn)H`l{3x|V#RjT
zl8`l2Egxs=HXBA&wc((D@OD=>KX`kng2spy8v!OwRV}B%WwR$$)kcElZ<O_|+Fp><
zY;UkowGT<C+E+Q_>_@EF{#p{MYHIm7Y}F2+s@j2|t2#*CRCTZ_XbvG(>`*Xi4pYl%
z*s2{)Rkb6)233u;BO$5TQDC9!Xp&HMjB>^~mRPalv?Np=uU4u$fvRdJg0AW$byL;J
zs-QWASg}*Vq&ZD3r@@_fI#t!q0L$P0J8ad?gs5g`frYZONkZ8<${FcgV#UtWl2CTO
zT0YYCt#$!b)h+~G*+uH6vWrzga|y9xmx4)inOaU`&2~9e)vf@`-~K!KBY&|~yAsx#
zU8UFK7I`%>Z;{vNhs<{^v0~S$6*8|^%jdhP-EN?&+Kph|B5%?HuDzMa-;aTL?X7D0
zwQukxl~nxw7+C)H-?_FXy93sm-Kp0@+g-%ecDH`WeD@G5HcG9~cCT7KA6vEisH%29
znA#rD0<L|K$a!{PUi+|Gek~`Oj;7)~JFxuizY}e7`~MHb9B2m?@6a(M@eX}VIU_z!
z<Ul(u33+4H@)0-hXHQabpdFaE|EIN}e*1q$IXur2`I1U7d7f7*e@!n?@g<dDnXOun
zfBjpKm*A<{%V44G6_S|fRpm_d8nI%pYe^`3LoJ_ZtyX)Js%me6{@Ht5-TdsmqY9dL
zi4}VfOq%!Aa+)<->;tN*eF*Ba*VV0B3Tv{Dps3l$V4>*~lF;<2a)$YgSh3HwBs6`Y
zmJf4Bn|(=DwXZ<e^tHOF=^Ithd`qm@cVN<dua?vBm&rd+RqaQx%>BQ3N5<c({RBzP
zeg+FwzmSBgUzIb?Z^VlIt|g)B54C)pPg?Cys;d13x~jj`O;!J>f~MPCqL~0pnh7cD
zG>f%ZcdDvQ1U9H@h)oPh%_ad0Rg;p0s>zfy&g8_3O`#>BYD%?I)l^hfn;LXg)2N%O
zrd0*abi|5H4<^kFYB|l~EjA-n)n)=4R5j9OhNNb*fQ71ANkY|Z${A;NV#Vgrl2A3L
zTB&L-s;bQmx~h59O;z)%f@VHq#pVZ-W&yRFW|kIPkg946fl{?f7yk!Hhj*ZF)fR@R
zW{ZG@vPDTk*<#8WX>lU|G*L@J*^+AcNZ5duqT-(>g05^CbyL~0s-RhpSg{^p(ln{%
zG`qIg@>Eq@0hF??Zq-6r=UcTEVXawDy&liAUc`K!_0|uWuMd&a3e*ajE34)6ea2}8
zRGd}-=JTvY3%GVwV#Qj)ytYj(zm{!YJ5|;Cf@Pl7<I~n;{a~$Gf4v^sRwJgi)%8Q>
zTZ70yNmMJet)-UF$2TRfO~pS+1XJ6(TEMmI5i7PnnAdKgmS4-CLpG$US_LfgtRAPf
zcvf!&am_Xci+AWIB=HX2R5>GVMy%N8S`zZMP|HW$t*>oKRkf|ad{%F*1@&k3Hp=1I
zmRPX?VDfCIR{ol{r>fcxV41C&ORaC!c7&v6JAs9)fg~}`Amxm+GqGa3Xi3N#td@^+
zXq)XyRkb0YfADrwH$QkoRY5b1Sh3+?((JC5)9@8kdr(!)!16cB`c`cOBsHsog{nPC
zLe)s+jI$T9VtZ>zsM<#@ABU~lzEoA)4|G-gtDCB7s-QW5Sg`}aq&Y|}r{SNj9ZXfV
zL%;@AjkH4{so7y*q3UpwP<4cI#yOH$v7@vkR2{8Wsyc?MYR7`E>Ns^%)$yvJIe}QQ
z6Tzf8NiC<DcvU-@s%ocz<!}ETwrZzBRI}5-LfPpgq3jIhjC3ZkVrOYdC_7s%AL-^+
zJBO-j=Yp>6Jatpq`Kq9~fLO5$!KArJEvGrE*)FE4+9hE5+kf#18#t^wc=)jY`bQO)
z!dkP-^m^PPFDK?L@(TTs`K}~Z>?*ZF=GAKXe6P0KHB?o*7R+1Zby~o+*Aw{?N-(dz
zQ7ymrL%xKPiZ7uA%isPx*VbgWz*@6g^?GQ#jhNbQ*AJQR4r0abR4cUIrIydfR_$)8
zs@(&owozKZwf7P^uMW&>?^nyOy`;?^pyIqbu>9@66K!$(e+Xg@tOJX8=xCC7hd!d5
z5&uKvz&b4nd1KV_5i9-dF)9wM1M~L(gcj6q|6`TI^CXckp#+oXX|?j#^b8eWLJ5}H
zsx|#<tM(i`HG3W`l)XR_6TPUMiC!XB>}4$pWv{5^6Af;)SE;J@8t9+B*VWC>-W#f*
zd6QVNx4@)%TP>$qt;OD<s@l7tK6_o=s->_ddk>15y$=?eJ|GEAA1Y^<kBAlfSW80F
zCu;dHd_&u(R8{*7bWNYDo0`5*1<jYlihTtp&DUx<&7Ljx4OP{?1<Tz3m+$zuYTrRp
zv+uz|)ej`0>PO{_^AoXRKWj;-`b8}t=hasGm8xpLfv)OzbyL+Js-XFkSh2srr1@Jd
zr{QmE{-LT`H~tBKgQ|wu1d!BhLa<QPog`FEq?~akCRS_`EeTbVs+FoHqpI5EpsSif
z-BdNDDrlx6R%~i8X{J%jX*lz8TB@o|2R5i`q)iV=&1L`#RWp)=s+p8C&dkJ$&7vit
zYF4#U)ofH%n;mpjbEuoD=2QjET*Qja4JOSzYB>#CwRx$kHXkTet8{Uz*5MuKTebNi
zs@Vcyp=?2tP_~eAMp~Fyu|>2blr5^3k2Jc~7Ne@#;-D*ALfurhq$+5ZB35i^Flm-i
z%V~CKv1O^Mwj3yBUEQjMvd*_^Jz%X_lU|SK+497Eo~@uCGT(|sP9sn&WcE_a=i^x5
z-c+1M0Os>-B`x6Em5CKw1<Y%k)$(g+YqJ)rs;vr^c~*~4Ta&fITC+C29@^T8sjaVm
z$b9{XoJF8kXj@G!pO3#)Tb+ut2*A{~rWSDRTEvR24d%7$sO8t<OIVkxYU_bzp4H>j
z7SHPSA+Fg5VDS#!kR;xr73GY$5wT($Ye~r4L@gh2=f1WnRn;~F^I0v!Qt)h{9G)$S
z7266-o~_l&U(+^JRofOUvsH7c^{v_fNNTnnSjgI*B*xi6Ipgd|tk_Ok60!!W<>Q>z
zW`n4zwlnA-yj|4I58hx^(CkXA*bp#jc2mn~_&3!<sj4;%EPtb{Z`FoFQnTH`Le(B5
zp~{pq&In?~s#+4N_EgKqVXHQhs%m?Iu4->}Q`J7IpxKvLvHifL*<UTE;r|rZsH%1V
z*r2MBb|54*I|wXP9ZV9c4pGiHhY~Axn3ja9!_`VvM^IJmNYGUsrEaP^S`{?M5G!^p
zm^8<!<urT&%<)uJI{_?z`|q$-I}xIqodg!jP9_OurzmHnQ;8KjO-n-A>1z2%d>_;q
zR8>0@bY*9$o662s1<g6cik%B4&3S4$%}LF6K2_B&0L$P0JNYAj@waLh!dkP7^m^PP
zFDB+K@)G@!`7R|^>@u}N=H+Vne1Esw6;xHb63koVRa(HcR}=Y?NieUyRxQ7lFN?p9
z3jZ2d{`TLwwkEp))|%a@*F)P)#ME}Pe#m^c5G!`8TA}SWwR}FdYPVBW?G7-t-Khm!
zdlwNa9GKVMqn2OGO=uJqRyeTy?Y|Rkar?gyVw`he@eX}}B;KJ9Drdxph&bo8B;<`&
z%SYU~pFKi_a}Lbg|D#$^->QvK4$osmtaD)UJfT+pn#NLLode5k)t3L)R_!TxFw}vC
zvS&zQqGy#e(Q`x$by^b2UQo*?+Pl?Wq{2`K`e*NDb@Q|LiYjPcC1R)plje1`oQ8jZ
z_68M(I#8dzu5Q&*7*0DVaN2=|rgum})4R$U<~<@#J1q%KAE@QSEZ1%yQsJ}%UDL<v
zrlwC+LGvjQryZCypR46GYvHt`!f6MVx&N=wajW(fBslQELe)1Uq3T=ZjPo542cDLM
zsvp$yaX7x_M=Bh6psV^>-Bk69DrkNs;=luw=6AK6hHt3;g9-;8*r2K*_7@~L@W4XV
zKO~{5+dMwb1VkKoV2;yWtyDD;6%IVmRZXI9s+v?4swN}izyp(J3bmYuP0f^4IPky*
zRgJW%A;EzM7OJKt302c6XPoJYIPkP2{#(tcR;rqb3I`tOs%B9)Rn4jjn%Rgr@W7;*
zLoKJ769*m@4m?n*y1G^C@DB8?+T0M~!UGFs^OA(J`IIx#{6t)MS`x|@RLe)YqtzCo
z!i5LAvPINQWs9nUW-%fzJTPgNP|Im9#f3+O3lEgCu5Q&rS?61|rD4UAr`O|owk$E9
zXUpk_%-4g6B~Pu8xx89F-!EA5sIcUL`8?~X1zg*USh3z<UfV}4zxKCQTZyV_D}!a8
z)#KCFWUIhhvu3>>+FFRIZB_k{`C5sbHK0~#YgfzX!>iVpin9j5)Ye}MxOO#S#a0LN
z+BMYjYd>nWHL0q$7Fgz4Jx*=$tX>=9nymvC@6dHg;vKr4az<RASg{SXB;;+VmXFxd
z*D6$1+X&2O^~PFIe^zgz9G*>y*z&;S*<7vsHEltKEe|ZSRdcEJt=d+QV8H_mS=*4r
zINK^`oB>2Ecv=#&wpYu?IlIkvpu&O&`Uh_(b@PKaP!%+Th&b@Tq}fF+r}?GX22<g{
z1Iyni>sz%Ukl?@r3spl&Le((kj5C~w15Zmr)gEg3IBeC73I`tOs;cUysy$UfGm?k{
z4@{c9)pDApTWlXH9C%=Zsz%y=kl?@r3sp6eP<4QE#yOCP15Zmr)xm0|sza!7;DN5{
zFm+Sa;i{lHf`|hTOq!$AavDz1JDLgy9$5bN-(jnEEJV2Qz(U#aB%$mC<&1P95f`48
zgtC*>@{z7=wNt2Y;eoE~G<8$i>8hYPgNO?cOq#RQa+=ec?QAMscwqV4e<y$BFScsu
z!iptNug5L&d}7`rFVGK}??NJ$JhejR#cKI{FSXkxR9N!ByhUE71zdYMkuQ-1^V%!b
z@@p^TOC+iI5=pT9?Z0zvO?C~eHM>@?hqmj8sqK3Gkoj&PR_sQ#LfcJh`Fw2EZl<c*
zEnsT9RSUTGHX`TEfqCs6YWcPN1IRn6IByOtfBWx5TipKdhL{8Az~UV`iX`5l_bO+^
z`-mJkrzIip0kwR@ZTs1SR2(=5=I#GsEvVoAM=OWt5hAucFnJzTD}PO6sIcXMWwvT7
z{A;WBI6S!Uz(Uzrl9=d8<xKPx5f`48gtBMU@`;XZwP&et;er0ydtTlA?7g50niq+f
z@W7;bSuLk&Zn0OWFyVpv>~(dkmcp9sH7M}kfrX|wNJ7(_${FS@BK|uq2~F>)<-=^=
zZtqgzzXM&<`|75q4^%<(Arb!_m^2@&<urrw-%;Ve1Iyh1SM0b|`wS8scwnLG3zAUv
zrE<pkiiiVGOG4E*YWX;SwA!~+IPgGM^}V{O>IYTO{7A%s2PVzWYB|kJIPj=&;DHUQ
z8e+def&&jMRQ*8`s{T~YIDZjw;Au&y`bVu))ooslGXdzTCM5eGRozuVGZ7I79+)(f
zsO2>C;J~B8fd@9IYNSmL2@X84P&Fk<sG3SS<4jG&fu|**YFf2Y)pS%i@IY5JgSz?O
zYDQJi%tXY22PVxdYB>$3r_M@+0}qs{u5Q&jyaRo!HakSP@W4XZoFt)aF6E3gHxU<}
zmV~l-)$)-zm}WjITzH@>TR`1ZwxB9#79!%p1CwSEwVdWmTzFKt@IWc+>Q*h3b-q<w
z99ArOdOe<JOA_;Wwv>Lzd`lCt<f#=hmsQK>`w>eX6_z|OpJz>4z_rU0E4Bid*RH6R
zU;9I=^`xp=FR;wBdVJcNtT(JR>!a60+e*aLwz7W6e5(*SOF*sA)}ofr$5w4sD$WuB
zQ(K!BaBVxWVtv89wx3%0d+ASAwbj5f&+2h%i)Z!f5Z7!Cuy}{ANfPhSwUjgB+Qf>j
zqa`74UA26~b^6+RR8?Ca%xComT2OyhZ>Sue3K3f#m^>S+mA|G<sIcXMWwvTAwZ2u`
z3=%AOU?FP@k{D-8<&3ix5euG{gsg4U@^LO|vu&xc;DP?Z+fLp5;BBu8njMHZ@W7<m
zNiC=Oy4eO&;lKmS-ze)_wVffsfd>|<29t!UU6nJ=5F!pdEeTaa)$(!Jstu#Ufd{&(
z-PKK1d#HlOh&b@Tq^YXqG%Mi1qr!m)HmGW(?F9)AJg`u;4@s!nS2^SCN5p}rC84UO
zR;oIH3I`tOst!^&RUNDfnnQ><@W7-wOf9GBjsuSh2Oe1d_TOQvb|gf&@W4XZ(Ilbl
z80Cy~ED;x;mV~n7)$)-}Yqb-oaN&Wj>?Czl*~zM)IfaM|4@{cV)N-0*o9%QeTzFvl
z+kYp2<S({rXTpjlPp`)<@@!(>BG1teneSX8mOQmW=J{&*d=ItT1yoq_z`R9Xqy=1i
zF_EvM1oPTU)yiMdWmJ3}C0PFU-?_FXy8_mlU8&bY+f~HWcC~)UeAf^wcCA{W?K-u5
zKDKJtQ&sH-Fty#N1zdX*k@Muhy!IBg{My^v>{cqylLO1&{yWhYxBuHA=0G{Hc!%Ce
z67SHvlr!SpL=KeGl8`q_Eg$iees(Vv2g-qY`@dfc>bL&~l*98N5nCRZJP)gtzoyYt
z*z&+KTeTJcwN?8MJh<?{LfIISnCLO(O!PPr7oL`cvaxFUM8~(<lT^6yK>zGLt!{qy
zo>2wOvqVgIVA4FVmeaJh*b7vc@IZa`y1G?MVNLcD6!`DJLencGq3KoS4D%Wh|DBeE
zrZ?2`VK!^GH>vR7fv)LobyL$js-St7i2n{un)lUm8va)811kJ?V43@W&yHKQk08N;
z2NtS6AqiEVDrcO}h&b@HBvgH&mXE`i(tSyV0}pgnU#pv{zEK6uw?rIxVA6cAmeXKX
z_<;%s9@wC&A@&m_IPkzi)h{HW>R08A^BWNdo|c5FKh#Q9e^TMV16|eM>ZYoHR6)~i
zKG947Ce4HtbsD}br8^Z4Jg`AkBW+?xaNvQ3s!2&g)nv*UXL2GAJS_=TQ>vA!rlP`u
z2fC_h)J;{>s)A-ZA`Uz-X=YH%Y52Rs8L4pKfl}4gty+h7pl{V?h6ooPSSXv7B$Umj
zoRMZH;=<FCP&TJpKGIXIHWw8xJkXWRqi!mjR~0n#5pm&xNwa`jPIEghJStpxpp<oW
zs}{;S->NMPE0#RH9?!EyiTONROh074#fez*)C!qPs^#;&izSZ=OCFfdvt_h^YnLTf
zY&kHm?V(m)+eB5h<-sz~>hWo7vK3&h*@}8SwDlyWwqE)n^Yte3?VD<aww2WK`Pizh
zOvSfvf~l=p3%It0Sg}>XytY*>zn1eu+o-D64wiXVk5gMbtNTJ+vwmRl4((48@6gqh
zGvew*zJOCpLf)Ec`G~9awY8}D0!}cW)$3?M{aL-Pa(LDwV#@=QX9KnJ*R&xOwmh)R
zR?Vf>w`v<ff&~vOWNktc<7}#&aW*4j!PAnEwS`(f4*x@YODZgQpnveTRyRL*+o*zO
zTOtlTFln|^tNtIb*|w*`fd`hqQP#I=J3@j34=hv-Bnef6lrzrGL>zcp5~>EP<>Rnb
z+m#9j9_XreQ#Vx&RRzs3A`Uz-X?9o3X?o(oqr!m)HmGW(jerCP9$2W_lO$A)RL(ei
z5pm#YNvPUKtyHxy6%IVmRqd~Cs;a4i<^UoNJTPevQp;)nY_@}`aNvREZ~q;(YKKCE
z3lA)m9ZnL;j!@1>M-p-2X-OzMS}h-GXsaDVg$oaKWyh(T%8pkB%?U(Ycwo|;q?Xgv
zn(bsNTzFvl+kYp2<S({rr^1RQPp`)<@^oU}BG1qdneR*@mOQmW=Gkibe2=u-IaFBk
zz`RACrv+SlKCxmKfO+kOYWcMXwAw{fRl680fBWxTTa#S^Yt1eNrR_3eYP(!NWWFni
zeEX(aq3tTQd_J~nS5xurn_z0YRtvcHIwI%AfqCr>YWcN~wb_kSoEHa{zx{WjEpGoe
zL(GA3VDS#Ul_cJww<%}D+lhPur<R1gJJs?LC+Tl@QSk+wVBY@k(SrKzf0S}~?j>T&
z1C!@|wer{W02Q`8u*_Di=fAdU55a>A4=j|8CW(n2QO-pFA>zW*l2A5AEuZMjR(p&J
z7ar)Jy(iSo&)!&7&^$@Rga;<g(`q>l{{rC|Dol8wK6^d8b?uAoQ&^Kd2L=8+u+a1Z
zNoaaeIm5g}#DAwHq3IR1e3%{E?NuuLcc5!}UES35hAL>@B;vmVljd!;oMv16cU1WA
zz%uv$ULCh;??HkC4=hxDKoY7xRL(db5pm#YNvQfnEgxt0Hv5zc2Oj9EK36wYeW41P
zFNrwtz@+(FEvMmY?Z2VIfd@9IYKVOY2@X84Q1t^zsQOVk<NQR#fu|**>KC<A)vr`I
z@IY7fySl0B4^`0oNyLE%Ce7b!ISr?L|3ifXZ~lf=jkF0M!GQ-Bs=AYes)>{{&cs9<
zcv=#wCRHm{O-6+S4|G*isGF*$R0YjcL>zcv(oCb4)9|-y(^BEU1NGl(m2O@8RCRa<
z`c`dvh;ZS7g|Zn*LfK5p8EIxBE<7y>WwWZ~Bi+(!vr*x~16|o1>ZY<eRY5Zs5f>hq
zH1nwCG#B8)qr!y;O4*tvm36*Vn;%vzd3rsbXA2VZdA5*#$b1VEvE->0G8a|L=lc{(
z9u<~6FrQ~jXaUzQNvzmXU|zelT7K>Kt+otR)s_XzJgeisT4&mtY&lqK)<ds{wkBd~
zTV6k8z7>dk`=(l<t*2T(A73ldi;8dG1XEidE#TUfh!tBI%xhOs%ddT@)tafQ)&iD!
zR*zF#JgZlQxMr<j@eXYxiFatbaz^Y+<O?{pB;@s1%ST+YudPPK7jT04tX@M4>d)#m
zmBX_Z5nCRZJnN{HzovDmu;qbewrVc5zExWv5-fONA!|dD7^k9~aW*1i!PAnEwTW6j
z&Sh=3DHRqx&_8&atD7IZEmT3XB@qW6m^53f<ungA+cs1<@WAr7|N2&K03<l@z(Upb
zB%x{t<&3i<5eJ@@gsOpR`8aIV22tU_16|cF>ZYo}s-W4GhyxEyn%&fL8vak+P%0dF
zV1ueg+Hgp4;DLpzJxD^8DQBD!L>zcp5~}u8D^-o8!hr|6s=d`sRr{!dW?v!>JTPhY
zSIcRBZ?+m04m_~@?Z3lT?Ldfd;emy+gGoZ!A<7x)P$DioEeU0ZtK}nY)@Dag;lcx5
z*-`4IvZGZ&a|{s|9+))8spT}AH{0=4xbVR8xBpK5$X{&LPJ|Upo?eez<jKUmMV_J`
zGT*60EO}~$%+uBK`9`(d8B|#Ez`RACr3GAjHnC#ofO+k?YWcM%w%U1ARXZOnfBWxT
zTa#S?Yt1gy>!Ix;VrsisKV-g3h<y8|TA}SSwR}FdYL`>-?VDg~yHX3d_9`ML&w+XE
zHEQ{_KegGlRGd5qmcRXXqAhO!*F($!bzt!hy^$o|p*JaK#G8qH0jHLPyj#`s5#Q-&
zw^8v0oM7Jm@6dw!?f*{Y@Z3ejmIo%!J!<8zX%rQ<Jh04Gt=GS{YWKl|3lA)mJwOr@
zJ*b?C9wOqx(~?j&S}mW5ztMh#3Kt&epS?%b&ClK#RnR;}#DoVX%@b-ljd6Y*6(&4T
zpS`Yb)lyiKJp~2+JFw973`uBuRyo5wN5p@pC86mBwS1U$+wDav{CA*hdRg7n^olBI
zUM1qc1C!=;wVdX9{C8CN@4zzm|K1(9YHvY;0}m`zy+abJ-c`;x?-6m}X-TO1KrJ6<
z9?rU>!hr|6s*lx8RiCJW=2IdLJTPfKSIcRp$AL$M0}pIa)e!p%5*&D7q3RowQ1z{H
z#`%tj15Zmr)emZ=svoIv;DN5{XLVE6FRGyVm52imOq$=-avJ{r=?^L#cwmF7M%rJH
z;J^b5RsWELs%{JTI1><Y;DI?#cePU0L{vENKvy-1x~XbXRj8VbhyxEynkm$BnkjJL
zQQ^P?rK+o2wGQt<->OXw5iUHiP&O?|D4R|>BTY}lg{LL)-)ly-e5BV}Z6+#Qc%Unr
zMcq_3t14(_BjUmXlV%RJoaQ=QcvQIXKq>3$RxOluww>u=bHj=yPp`-GY+hnM&*sw)
znQwj~mOQmW=7MVZd@o?hqr#F0=JRY3E#TTki4|K6%xf1{%df@HwFFhwmITW@tH-CU
z$(Dk(W=re!(6$UQwJob3GT(AUzI{`z(AK1u&&O76c`Ckr6HIL@Y5~{wBvz~!nAi4J
z%deff&H7MPZ6&bGvwED`;#s{i#5G$5EZ(8bB=HVyQO<~~68Qp7EeUyTYWaxM_O*5@
zzJL?VXLUa<s6VUwD~D$_BDOp*dDc)Xe@$yrVao%{Y}H(9eXF)MBv|mkLe{z@G0u9*
z8E1VW7CbEpSsSY5<DB1S6)G%vpnvc-RyRL*o2Y_jQz8yLFljbd%V}<Ewk@b|;DP0D
zl=ZFJR*>Mp0}EB#kc6sjl{3x&A`Uz)302#x<>Rnb+kpxP9_XreQa4o%R0Yi-A`Uz-
zX?9V|X@0|jM}-3qY*5um8v+RqJg`tTlq6IQQ_eWUi8%1IBvkF8R;n^89C)Css;Zl+
z_EZJUNFokAFlqKy%V{3w3_L0vcwqV4e}}Eweh}fp0}Ev}l2CSlaz;9khzn0kLfOG;
z`AB$p523<^2fDJu)J<iFtAgeTA}%~IX^v9MX+|~M(Nwtb!1A~MPX5SWY}Jm16-%C8
zk6YyN#JokGpdT{di9{@UYK6>`)$;kK=xe7?VaWsY7I~T$aP8^Dik$)GwP&j3*WTD_
zXHiw{Y_R<8zjJL(b`GpHJ6ErVw)2Ro?R@=^`7R*x?VD<awu{vA`PiymOvSfvf~oCN
zE#TVAh!wjW%xkYu%defg-L9mn+Erlr+kYq8;`V<v#5KDHEZ(8llEgdoI^~RbJ&`Zq
z)RK^Qqgp=VrTy(DD!zad%-jDhT2R0J->Mv*+lbimz~s3@t^76JNrf#BEVEVX{jaUs
z-SFVT0}ExNNMfRUl{3+OL|k}U63QM>%O~Q%z6Ysr;er0ydsyB4?2T3h%_Bricwo{z
zs+QB7(PCq$FyVpv>~(dkmcp9saVYTLfrX~AB%$d^<qY!_5&xZ*gr;ZI@?q9!w`ZyF
z-+`{_d396M3#y=bk%<2eOq!R~a+(|P-%;Ve1Iyh1`*hr@y#@&mJg`vp21%%TQ#s?j
zMZ|%pC86pawS1f<+U#8_9C)CsdSBgC^?@pAJ|yD61C!=swVY-l9C%bX@W2LD4YAK4
z!GQ-Bs=gozRbMJ+oUe#D@U$dUeWO;Y`j!d@9_XsRS2tDtpbDBFi8%1Ur1@Dbr<olG
z9u*Eeut8NL?Kenp;DLpzKS)B=pUN5MFCq>+EeTcssFkX^EvRuO0A1CDWdEb8yDDfV
zBI3XUlV%dNoMuKGcvLv>K&k5LR;|N3(6?%nLxc+tER;=263V7h&PY=eap7r6D4SL-
zABivPpN<L_9_Y$uP&fa3&8P~RnTWXXz@(W)EvGpb7akQZJW$HIx>XBhop04<hZRen
zUXSP5oWy*d&7~hQ-`qqjd1{5sdDZgyUcr(_g(VNn=h*^Uz_kk!E4C1r*DkD<U(2^O
zFG5wdMZq%9>hWo7vc+Jn+2VRVv@JnQZA<Eh%(oPgZ{Jiav@N5S&&NN2T$YM&-vm=z
z4=v!@CSt{w2lLt$)beXTZnYJus@4-M^Q<1Hws=<eg1BbA!Qvg-ha}#iD=BBhm5F=-
zr<R1gX0?38S^HWG6<@#!=Cit03+m76Hs$cN6S3ui$<t4*{5AEb!j=b?*{ZqJ`c`dq
zNU-36g{(D6Vw|;<GtSyXEO=THves40$GNx7)}z9L2l@wZ19kI*x1lO%DnuN3VA5=?
zmeX*q+9p&u@WApn%KBDqGe~gYfrY9qNJ7<?${A-XA`Uz)302#u<>Rnb+m;Fk9_XsJ
zQ#Vy@uL_zSh&b@Tq}fR=r}+p69u*Eeut8NLZD&Yu;DLpz!6cz-SLKW|gop!AOG4F9
zwNlkEDjayAtJ+=NRJDgHXpD#h4@{b>T26CCv+YTR0}m{J`|q$-+Y2IGcwnJyACgeE
zuX0A(kBAFTOF~&qEg$KoRy%+S7ar)!4pKLj9jpqPLx{NWz@#}$EvLDx*$$_|g$I_u
z{de+5{$i_kB&=BS^m^PPk0$0V@)-S)`Hm%G$x|z29<P?qcSpOOK!qg_%v<D1TEMj@
z6DxKKnAe`FmS1~ztDQzwwbQ}!xBt$yHQ5=k*6d8Z9@@?#rna;7L*_e&$hU8*723{I
z%jaXOc0Lu~z6qwb3$=i2FCtd#Vlc10L@mGew>G<!s%n>k<!}F;Xp7tb<q+5G3b1&G
zUP%(~(5sX);?+dHfKy9C-nDA^h#U8}>!|nwPB3r(H)uiq_J5;tcy1zM%L9|=7Pa!%
zbSo9MJh04Gt<S%<YPZ9K3lA)m-ANJ?-KCs~?k3{G(~?j&N-dx0yjHuH3Kt&epS}Cl
z&ClKgs-St0hzSo&nupbLn)O?3G!-U1P@lc7Zq-s)ll=z@{C8lXX$(nddQ3UPJWj-a
zrzN3jtXe)y?{<5V3jZDGnx0lSH9eyWnrDgl@4%#aUM;8j0skEp{yR{b_{)`Ud+t79
zhuwznIAjEekmny0RqSrVcOAf=hwUX+W3>Z|@n0c{@n2QW>%K<BYNsVJ{u^rf>n_mO
z-lW262m1JLtDED$qY9dLiCFExq<LR0r+K&8KA^&C2Wou&f#A5t@8RPQ9%di07)u>k
zO#cZ<O#i8JUiLF0mO3qo>Az6RUv>g4byQgDK%f3=b#wY}R6+AC5lbDIG~cV`G^b#x
zqry@LYWm*eo4#t>4;wLPxD6d>|Mj0I54D2?OC4CK`h_G^{i>XCej{S3(~?m2hgv?)
z<s529g{2O3Re!6Ss{T<0O}B+aGXa=16H?S^`1<AUR9Nc3232vW9VA%lz(UoeB%x|D
z<%}~q5lfwxgsLgkN>x))VW|UM)imm+s%ce0GaV629hfvTsO2=AlrSR|mO8LORUB#u
z33fWLP&F$_sG3bV<IGOPPNyZIYEHFM)m&8A=|ERCkGiR9URBV{N5oDCCd~qBIn5&2
z>8P;Nfl@V|eR7Xw#{UlVee%K(;h+NxWs8!8vc;4$(&9uMbXpS1mQ>3};{5TYsBqAM
zu51}~Q`xerpjnQ{U#o#h)1;QuT*9GtRQ$CXC}rc>Cy!TI-T?pWRHGGP<xo4l9)FnV
zMa(};^wtlVuMd$!?bHgHE34)6vDaONibL(d{KG_x7I5vVME*_<%xl}!@@wbhP&+FA
zP7VBD{-D%lZ5(O`D~H<Y_0YB&F}1C(A2Qz>L=Lr6E3~bpme0pY32Revs2!Nv*3|;8
zU5|)+4$NyeP|L67Yrr?8!aWE6FMm+#nzs0Z(nb*Dr~`|4=q4ob4&78aBW^~-QKuy#
zZws}2#GN?QjtWN|n14{(S_|rbP})X0Jlhhn)Pc#fom%;8+MWta9r(ZOlgA^qzE9o}
z5-fFKA!{H>j5A0%<LpetQl}*$Yp_~AP8)~XQDLbA{e!ogy7|EystTH6L@ae+((JC5
z(~NGmJ*cqMf#q+M^?mXPNU+p_g{nPCLe)s+jI$RJOP!X4s(sY*ao8vCONFHlbXEJS
zo2qK6pgDkur4CG*gVb^wPHH`v3QHZ>pehcvg9JMrSg1OjBvc)toN<mMVyDxRP<6Cg
zsp=Ri>~x^3I!@hGb-XHQP9S2Z1C!<?wVY<M7CV^=I~`d5_TOQjd@4jZ=)gkR=_H}-
z4CRb;CJ_gnmV~mi)$);6;!ry(9CV;7J5SwIcD^cTE+F#PYGBe_q?Xgn)nXS@@z-i#
z`P+Xd`>kT1d?~CPYNyxZ7I`@_Z;@B%hs<{+kwfj&3Yk}{<?~(0p>|XpY6s>m@;WWx
z+Utq@of??e-l&#edn$+8QSo<bVENmB=h`^b4pt7e)9az_HezbKT|Z>LJBS==r&egY
zOD&&||4n)~6^Gh^scn=NaP7TB+;d=Fd%s$KEpFupsBq7L<!}F;Xp7tbLl9%91B-X)
zXp(q`KBAlv|3k!1rzIh8j9Nb80vu{bg`Ez}+y4_<P`~|;RSwUSMErDM@;t3p{+gbl
z!cPbOFZ<+i-2Mj*T4vb5YJFp7&%uMK4lI<tKoS$ZsGNyjB4Vo3l2G=FT0YUO9BM~}
zsSfnd-s|e-XYUPF(7Z{+R0k%_+iE!thg7{og{cnIXKy_F<nasRP&+8F)PaSj4@g4O
zhsqh|BO;bMEeTDZsO7_O{`jX<Sn5F6^trmJ=?hiRd`ZMo2PVzeYB|l}Sn8;-)PZI0
z|2;bXmF9PlV5tKORX>n~svngz&QC-vby^ateo@QEd4fajsIb(5uIhJnQ`H};p!t)C
zr4CG*ztwUY{_^i1DlByiH>`?7?I6KY2NtTjlZ2{?lrzr6L@ae$5~?OuD^*QKg{2O3
zRa2;&s-{#0%~V7zbzss=qn6WPf1H*IOC8wYe=82Pg9JMrSg4wjBvj3$oN;C*VyDxR
zP&KPsscJSV>~x^3nnT@GHK!_Q<|1OJ1CwSRwVY;V>~vJv=|HLK>Q=48JJ7dk^FxG#
z4lI-{ND|5xQqD*V6LHXKNhn)XEguPE@nTds=s;Jtgu1D0NmbA+MdYv5z@%A5EvMn@
zdY7f*uhl>)>*`i5ly$yU>j5i=+UfOpo-I$z=h+JSA@i+B<WM`cLS`?ud_K&6y{S0V
z4$SA-N?O3RD--!UH88JjR?DxQltb;P_&YVQ%(HrY+Bnn>Rt~k(>!GcknA-a4hs@WH
z$f0&>g|^ky^7;1VP&+CPwF6Vznp(iMYY}nJfqCsZYWcOCthp`~?m4i`vwED`;#s{u
z#5n4};vKpnNxVZV${BGZB91yO33;2S<s<IFp>|X_>cD(fZ>|OPXZ04!;n|Xir4CG<
zt<}n3(>7FC>cBEvHJ4i7stteyOC4Cq+MXoF*+Dtu>`25?rzIh4pjtl801maI!cqtN
z2X7a3^Mf~76*RjNvDAS{vzuB@!$CPisj$?6<!_Yrt=e!%u+)Ktsy#?Tl__VO5kxF?
zS`w=ERLjR<t2UAfOC9K{_EtAl?V}2seTi7=z@*t<EvH$VL+z-r)PW7E;!rzCu+xEs
zs)I>F)gj6m=TIVcIxPuRhpUyUj-bL$2fC`G)J;`KtAgejB6d13X^vCNX|O*YPlcTh
zEPwm&uvI$|A{=yJq3mRmP<D!PMmm*<gHB6A+39NeNau2>9Tg5b(3PE~ZYn!l6*T7%
z`D-;WY0gv2X^!VmJ1YKK4J?2A@8pmC#a8V?SUJ>Aug5L&Vq)GRFVPQ~?@}U%+Nl*X
zFIUUwJA^~+s5sOP%v<DDTEMke6ZtzeFt5E<Ex(pq<aJd1of=sF_TRZS4z+`oL+$i>
zXuFA++HTelneP@NhuWzX+HO<J=i{$5Z>Qo=J217~sRdkn7ZLXynAhH;mS6ighuTr$
zo&(F@{yWhYxBvSf#!d$o@6ZQG;vM>+az=cJh@DPLLf&Y#e8eR=)Q$=}9hkTON421S
z`yZnmp2vvz>A>W9LaqEYjitg*2bS5Y_4wCcX+8xHraG`t_6$i(^sI6wdX9*xPD?`B
z3u^g9e9zpARQydG=%2lp)y>b|E2^M*m58YhOq$o#a+=dH)lp%p1NGVK>Q*g<ai|>>
zSn9w+(>o-g>0RXv^BxgPotA{A57hEu?!r<>g{2O3O&_b9nm$nl&8I{xbzstbu9njr
zfTfNKOC4C|{$IQ!<8Rfzf&@z)Sg87jBvgH?oN>M*VyV-TQ1ydaKF-8#_9GRRI?z@9
ztZu6MMHMu^60y{QN%Ol}PQ#f3e^6nm0~=Jup>~j9sRIjD|B!^LZj1Oh6A-b~fjLfh
zwNlkYR9NajS2c;cscKSHsG5w3r4CG*Db#Wr{x{u}R9Nc3232vW9VFQ4z(UouB%x|L
z<%}~u5j&lh#DA+9)k;+}QDLV8UDYh=rm9(0K{Fc>I~|xbbExGse5cEtRM_c2sp{%h
zt;0Lew`y}kgo6$&l+8;L%H~tfNb?hM&}m61TTm?@iGKpI5ETwO(3LHsZYo<;6*P+x
zIn)kJnkCe7nidYVqvB9IP|CWxRSRXEZ`GEDl|$|HdOXjTCFb*NIsK6NdJs9(POXr+
zyjnirdmL&<#i4d!KF@k;0oV2-a;P1c*Y;7%ul<!n?Wj1^4lMJm9-lT2wS$#I?euzR
zYayn#RrN#WYbA21om!!-T`iw)KMu8{;!ry<we{Bmu3e3Ydk)NN*HFu^<#Dkl74A8(
z%(Hr&+TvNgHpDpUz~UXcE=jyY*Hg}j>l1O*X-UZ2P%R&^H;39$;iv=iS-r6q)SuOx
zD2HcLB9=NZc{W!oe@$CZVW|VlY}H(9eXF(=Bv|UeLe@4UG0wKi8D{_yOP!X4tnJnE
zacUfDM}?&h^bg)n>gES;pekqv5wX;PNwbSuPQ#a94W`0U2bRB4*0*XyAi+`x7OIAl
zgsNf68D}^VOP!X4sy)>5aoDOE6_z^CRaMnZReP#}W+V|y9hfwGtK~EsaHt&>mO8LO
zRUB#u33fWLP*o!dRR<_%oCAs2>9izN9jsQWI)n;49q6hKQ#Vx|t_qqXh}h}Cq&Z40
zr&)<Z?WnNRf#q-i9ky!6LWF}3ER-Ej63R|c&PXQ`anNZ=C_7m#AL(WewWGp82fDJ;
z)J<iltAgeXB8S?6NpqH3PQ&@*XH#*g9a#SMUwpy_4yz6xKJ35#QN_8ia;TkNk6YyV
z#JojbpdT{dg+vauQ!8X%td`IBCx_Zmai|@bx5&%1fNL)&a;P1c*IucXUptmV?Wj1^
z4lIBB?_3*)+QG`9c6vRuT}Mo9*XxJOcLR|_?bHfwH>u_Gu~oa7ibL(d)OM>DaP4hG
z+;d=Fdxu(n?Pw0QqryE0mcRXXqAhO!cSDSw4lLfGqe$W%darUuypM>TPD?`G18Vt*
zJ8-BS6?Qr>Z~qT#LH+hWS~)zA5b@K2$@8dM`D+?Og`W;AvsG*Q*H-Oucrew0g|e|E
zG0~ICndm7ZraCPNWzVSP6XA$?mI_lH=%2mk)y>b|3#y=bk%*}dOq!R~a+-md>ZmZ)
zf%@!qb*q-bIMfabEOlU^=?#+5^rmu#d5egpPD?`5J8JnbD{!bC6_z^CHNCHHYWhGG
zG#?VN)PYI!v06^EF_t<iEOlU+`~UJC|5oiYNU+p_g{m(|Le-bb8Rsh^mO3p7Ro|%P
z<4n?K-%??z16|ek>ZYn6R6+A25lbDIG(W55G)rQsqry@LHmHh2?I6KY2NtUSAPH4}
zDrcO(h*;{hBvk#QR;uc@sK%K9bX60Q{g0~ds-T&Oh@}opnn~1hnpv^bQDLbA8&t)i
zc93AF0}EACl7y<MlrzrMMC^205~`+ED^*QLg`Ey`RWqoY|E*?J1<g#v|3lin2WmQ%
z?*k_aQA8r=LpdKZ&NO58I7LxLDiuPbkkezEqQ?1rib}{iL?MczRFsNRsVIt~R20QG
zilY2J*R%Fo-^q8Ie*4}3T=#vy_j>N<zV^KDdiI)kUu%)m!B*x~NtH6Ik<*FD>0nrm
zb*VPfJFqO(Uab%X9qgGc&Z*3nh_9t3Sqi!gsmzv6s<d<-1)Ye34i;w1CEJ)SpGcY4
zunx6@t;`BZl`^|K)K273I~ZnTU8+@Pqc7D~R@I?)xu56Rs%(3nt(F({w>s-kyQC`3
zHIpj+{oA2-B8S?+_B@-A0j^z}b*LR|*RGpXx%L%9-FhO2+QC7d)wAN}P&-u}YM1-r
zwh`O7ZJZbN_j=Z$c1cxklaea^+0$hckwfiZ<2IB5uARbC&%t)>W=WN6|1`PVTtqzw
z2YFV%Ah-Ig-cn(TI@rHMx8l@y=+^PI_(qnZE<-B4ZIUW2uIErY5k(zr&+4~iz{s<D
zyZGu%Wl8E_tMk^R+K*`m5lI~!WT{rDjx5#Qt_VpT>}l=DsqMTozINWllGJ5LrL|L1
zrJchZY9}J8gXM$wo@CnxZ`VZ1?8cJR!B*zINtH4SOzNhINb2C=6XnQK?R|=n)WM$B
zo}9{RulU;8n<c5skjiSGq)I!MYWs>v>R@5Df3l6$0g069Sdu!}$_yt}%53XUI}u48
z949M>+9^U#2YXfrb1JJt;%n!_EID0<R8}*RYOD?wk<-D#>hNS6t0NLAb0kYn2V0q=
zlPYEGX?~1|oDL2?{m-;iJ5C`AI@mKifm4~C7+*^#u@rO}Qkk8SRB6fQ|5OnL9W2aF
zOSUmPJ&`gWV;yP-TbWNJRm$X0JCQ@};Na8$D8KUe->RLdszdE^KU3t{Y?~rKl^6AQ
z4(m|6q$<txk}CZzwrO|1$f0(yO_3L7fNL*e9cl;LwHGH<uC+t)B_fB~!NI5h(YZO)
zPF086<$kzb&NgnJ&5Qc`9P3cKq$;;7lPdjLs$C^=s2yzFuFe40Uc*w)!FKHzlPcGK
z%%OH7>Nz<0^gjx>I{kl1VRAayzeBI*)OY9&@wNCBmYgm_D!m(%DlLA_p>`s2I@qTF
zZ)Cv8^nX))b#7+q>0qn#t)$wI=@t<^9UNq-w)XRuYTr?Zs1EkbZspXDZi}y@@3BO6
z8B&?uo>b}RUWeL=i0WYZ?ENU&_Sw55kupDKiRxf0^V6hCnNx`BL_~EkpS`gz)fzE}
z+9^S42YXI;b1J8M;%nv?ETvtBR8IFMRhqfVp>`rlJ6JgVI@!kQ{zS?=z*5@5R_3=!
zl`;oA)J{Zc2M3w|*O_^#_IpJr@L<pCAx>rWaD46jk)^=Pkjm<jq)Iz4qQDbT;K9P`
zFUdAmk0ny(ah3uPwlaTBs+92ukWYvx@ZdOEIn+)O3Ov}edWut7Jsn>=|6nQbGNiJ4
zCaK2iSrG*uEUf;WY-9CYB4uV<DP?Adt;~xABW3&n<QyUjJUC8P4z*K+0uT1A=HgUV
zbH~@tJS+uXhE!JbCe>KYC!)ZEh1JWGZLH=`q|5>=1s-f=7EG#?$)R>43OpEAV_mAv
z^bRabwS^U;!h=1tML3n&qVcu#YL*HwLn^bylPWFU>`*%q6&@_imP)oUTRM?4%dk{<
zu$5UZsZ!<uDm)Pt9t^XwF4Zct(U)qkRh1+!_wzhkk!{bjmGYwgR%S`^lBzUUO{(<g
zSCZ94Bzdqs&(_EQ*RIJr)DE_5*Gj5f>;F_Hh#YDM2YFV{ikn02RCTCb?uXlYY~!|m
zUew<PtV8XRs@yh8s`O{6wz0^ecCc}qm;tVx#5&XtwreLRRjzd|`;f??c5skq^$T*V
z&+1JTcBmcf-=Ujx>N|9c_*&eOb*Np2RC-$_Ra#u#p>`sN+QIg$ep3dFJgc{fug<nC
zSsrY4-jY=NF>NOz%Y%a~)e6;-rP}t2kl?|d)()K7&fDT^=j|*BUWQa!J0?}yIn|+d
zA`(1UK6vj=wtet+N~FxrECn8HW!{riDKq<|ZdVZn9vpn499gREt_TGl>{(6YR91V$
z*UtM`3cL)dtoBT*v}38Zmxuxn7FHifwz1kLkuv+T6nL<e**~dLW($Yfi74>kI9WN=
zP7w+`*t0r_Q&~-qubmIF6nGg@Ssk2IV|9p#0uL5eA4#^cnvqDELs<$u*vcH9R4KCz
z1)hik4-P*4&$LuKN+Bvd*fTqZQ<)tbUrWcaRCpOunVpbSY3VBtwG&a{!NTn1WE-<n
z5-D>kON9qpnbVRgWsY*Fornq#4nF;l@+*J8RQtH9Bzd`?De?@qO_86>i~2j0CCN*w
z(mXq<(%+vQY9}JegKdgDHv?RI9_vs$*si@GsdDXA4z&|G)D8|l{g2Mgp?0b|)GqhK
z?Gm<eyEHHA?=!4J?UJh8E>EiTXQ}pCkwfiZ<90;`xb{lcp?0uc`}w5GwdXk0PUKKK
zIQaBG3b#7_e?eh~+QI%EdM&5EL$8ak#V@fAwabu7@A{-li^p!(-5_$P9c<J8S2JK_
z`oA&0I$vYS@?fj;jilO-=_V0b9voz;w$AgGYTs0c3J><oZsF99z8zmj-(jinGNdxQ
zHL23keGat~QQ^Vz+53L7?X!1#B4vKS65+vC=0{1DGH-XNornkz=Ce1}rCKBAP&*~)
z?_kgAPEO@?SA5O<jHSQJkjm-qq)IbOJJe1@e+LVvUnbi)-J3|6`&jxr*vkAmsZz!t
zz~3*Tzk`F!|Le}YRQru06nL;_^&qFR`dxhO{GO%2%aF?Ip`=PX^HJc5DDYrm^`~ST
zt49(k^C(M!2V0rHBvs1LKRqU*z=Pvt<xo3CDDYs<>IqI|^<;eQ{Eel+%aF?IsiYdK
zr$rQau(0}PvW?X<iIjPkrND!&%)gT=WqhmloQMK%<#DresGTAdc(7+R2dA={Gro3S
z%u?WGNM$uwQjOKzA__cMSiLma#%kU~%FM@7;K5er<w=z?{;YF;5d|L1ztvcmYBRk9
z%TnzX3Q^&~p4md2%IuZ#wX`rxg_j|f*&<1mmM(Otornq#7G{ej+n6n$NSP&ADm>WA
zER|F#a~Kt#hzbve*;ZrRvZpc|eW|vrsw8<Z&$H#(_B?w{Uew=fS(3b@D$Ny>D*aii
ztt2AJgY9{?N(Q)gRo0<)uwA=)QtjF`L=LrsgFLHe#m%91syfsz_rq-h+qkWr7xlLe
z>rlI-D!28LD*e%_tuJz@9c<h-%mCMJ#5&XtwrgLXRJrzV4z&|G)D8~vtbRdm^;x}%
z!Va~A{X2AsQ{SOe;%jkJ)}eM8Qt54;RB3UFL+wNkwS(<h{e}z}c~)-~U!AR4vOL)8
zyeX;nW7<YUmInt}suijuOSLyELV^c-THA4IJ5%FpXM2_eFGDJ=9g-^Toa9hD5eXhF
zAG~)Y+dg<ZCQ{~|ECn8HW!{}sDKp!oZYL219vpo7A6csHq6h^Z>{;!~sjPO3ubtgl
z3cL)dtfnPZ+ObsILqvfG3#<1h+gR<HNSVD@3Ov}#d?2Y(#-EDpBci~A<7DMfJ4Gn)
zV9)9RPG!}_*Uo_~1zv_!RtF{3SWOpE;K9P`L&-K)2PabI5S9WDwlW_{s+6%*n<1jW
zgM&~1GcDB)Q-}%=_RNmpRAxuU*V0ie6<&r^X2&E|TC)A_u_7uwSePB3Y-4spB4tiw
zsqkPcb8=Fp%qJacC!)fGgHQjX{L0@i)jp~!NnY+}iaecdQ{>0;qW(V4lH?^-X`Yc(
z>F*JT+KEW=V4EV($^h4%%{tT$wrkHxYJ{6Z?L-c>gM&~1qjPhpovIGC%l&Y>kZs&9
z%8UB@H0w~iq$;;dk}CaKs$D8_s2yzFF3SMdUd}qy4z_DQmsGjd9>7<K9BKy#pZ-VT
zR;T}~6n3Z`?BAhRbLu<vn)q7$0_#w_45{?4O{%o`phN9M4z+`A`u}nUj7<O6$5-bD
zmMjmpI$uqy{g`ePk>$ZbmTK!hZ>jcmb*S)Q&+H~n?dazCI{GF{g_j|f*)2(xj(n^3
zZ4nh7ET6saCfh!Hw<c2NHkJqvwld#Os+2j_p>`r7JebekSeI&zm_zN9pudAXr#m>6
z(~sk8<|i!uU4~RncP3SuS;L`rBKkX6IQ=}?#_8@v%G|@!-@#Vqmr0c}o73Nk=<nbl
z^Z$A?FV%jf2n8POS>4a6tR9H3o!_t&co|YzJ(yH!XGID;5d|JBtp1Q}WA#uXWgccJ
z@L((Rr=&_5y9z!cqQHaWWaUshMJVuK&+0KwW%YP`?fef*ftMkb)e}iIR!@p3@L*x}
z_hcKZrxGdiG)sX8TbX|*Rmv<zfhVHCgX3i7P&-8^@L<pCIZkCY+bX4<*;xua*xH#R
zsm5wf5d|JBtX`6AV>MSItmbAZ@L((R(xgfmy7GBN6nHSK#=2CS=^a>>YA;iW3J><o
z=I2yq3&hvbD_APL45|NK3nf)rvW9u3hzbuDX0J-NF<T^&GK;cQc(9dOEU8lF3siU_
zDm)lwTa9t4R+){yR9jM2lDyo{^K5ChJ<pcOi~3uZCCN*w(p)~N(%*w5c_NZL*q&!A
zWPoc|WF2Y;+qEkvRj!?9s9QzkP&+utvwBwC9BQYkL+x@u+}2<lw>9&k{$9u0eKV=b
zZ9-C|zZ_~Ovil}%+}6zi*RIDp)DE_5H%O{++fd|CJ2=R*`USbwXZ6MkJJb&L@6d^y
z`VO5GUyGZtHsH*VN^dBs(&C#OYA3P*Cv4B^%`#x*S-p9Db+%y1@?fj;hNRk$X)6&~
z9voz;R;Z3F)!wKG2_EceZNsVUY#U!YZ)QpGGNjVlE~(PaM;&S>BEf^@gZI{C+Xrul
zM9RF4rND!&%sY}QWt?ffqlf|z4n9$iEY;qn2n8POS?$EBtagsCon2T8ybP(Vc1^0Z
zW2v^Ahyo85R_{%=v6_}hnLStvJlM*-KdDk?9tu1W1s)tHD~H-CLV*W+R{L-&t9|2Z
zXFrw#FGDJ;1Cna2IuQjPEUbo;ZLAJTq|9`d0uQz_A4;l}S&#xxM1cnfpZ;fBs(n}?
zDm>UTo588f4vnv+!&oZ345`eHNUF4Sltb-ARCus3J386M?3hH#9LrMS!B*z@q)M41
z9cm|{!h?fP|D*iM-!IipQk5hx_cKMF!nP^$)V!#_kFq3rNmZJsCsq2}(xG-Dl04X^
z$WLT|YtLXEY6si3XC_sy^{v`jB8S?+!KeSxxjEEMRfpQ;ez={>Hg4zTMg5)6+I=&r
z%I(6WN`IDW7m4h?2^+VIGr+Z%unx6@?b^>IRj$2esJl$$P&+vI^gjx>I{klEVTanm
z{vCP+r@lk4jIYJ3SQ~I=NTqjmQl-ULIMhyL15Vhc|1W01$n<}0e08p4$?{;U^W~)4
zkLh|5Ssol@skYwpmTF&7hYAn&%x>h=j=mOOM_*^D@G_(_yD6#C5v|(IA}TysK6~Fv
zwte<)Nu<oTSt2~x%6vDeQf4!U+KGtpU_N_eU8*%=4z*K){tot>Zs$}^KZviHAF}j!
z8B#gjkyL5stq!#l(ci(s>8HsyPIo3!<}Q~04z@BsPpXtzhyG4Pe+LJd|JR>+srCy+
zDDYs<>RwJ|bzgk#{EDT(%aF?I{-jDf3pmtHM1cnjtKTNuSUs3XncuM#c(9fELsF%T
zZ)zSAQQ*OGvT~@MA{2PAXY~lDvU)VWcK*y#;AKc<^;lAk)#D-xJXl!$HQC1MiA2gg
z$x`6KR_5<Xl`<BfPl+h-;5b=1)J_o!JlL~(hErKR8(%yBVkz)4q_TQ0sm5xyRnyMw
zu&{a&?|)g%kw}?2SqePZ%Dg10Qf5&KJP`#R46Cs&)n<AJmZjP}3Q^&~p4q&d%51*)
zT6!5vg_j|f+5AbBmiz(a0wO9rSePxCZ2R|GD3LO+WU26AEAy(PN|_U=@I+L2FwDlf
zRIAKJU#h)YRg%2i&+}|?wmr|5$cy@0k|oJYs?uCKsnVZ6HdsbPk_X%KY`F|@?eeTc
z?O?n1wMmt0=NRf%5INKi4)Uy?6*q_4sp?R>+z+=^*v4(uyr{p`Si5f~Rk^K^ROv5=
z+KKGG2^+VyGQhPHSclrdcI`Szm23a*P&<)B?cgBK>KEizpVjLt>`*({ze6|V)OYAc
z@wK=yYXi;<sq`i$Ra)HMp>`r0aKiSio}2+A&+4K0>P%tD@?fj8SyJuCw7G~Z4-T?a
zD^y38YFjEof(LtATXAYTTgTVV8(9*(45_rXNvgDSghTB_BzUlV@ZOSa``~StNSUcD
z1s-f=-kMY?^P`E~4k8LXIQT?4vQ&G!A{2PAXSE}zvU+EH?YxVnz{`-zYNw=1JC<rY
zizx75VfCJ58>?LtDYF|(fd^Ze_a;@!_^bYDA__b>PF4=JQ-lH!_N?~gR91V%*UsK7
z1zv_!R{JE?SnVsKz=MU={>e602P9IaV=3@pD>Ix_DKiHJo`?bu4nF<Qv{d_`LR5IL
zXLc~BGCL%`mOji<;bll=HY2Ih(sd5C6H(#8!tC&58?z%4DRU%Cg$G-iqmwFS4s@uU
zhzbu5KK+mKD}TRKJ5E)Syxh+ec>>#}$P@FT{!U^^@{+1FPf4oux4@>|sUng**rv$S
zGQhQ`vktX`?b?qgRj&Q4L+wNkwS$9C|D$trsGX_~wafi*JBw}H&d!Va`xI;U&7>-~
zbCW9lS*o2Uvil}%+%Cuf*Ivjv)DE_5Kb=&$_D2r26FJll4nF;l!mUpKmn!T~JJ`QN
zFXPmA=;iUX_*vEloEcK-U6E92@wE=M6WM?hw(0-#889;aUmahaYgn>8*y?;SsrF;K
zRz#Ku2U)7E|GcHzm(-!cgFUnBIklr3;_K)uEEQgcRAx6ORXV!Up>`rFJXk(^-$=H7
z_HIg~%*`wj9&Ba4l~gISr$g;TM0hZty|FIU8Zn33DM5b+drr4<DyQ4xYvy|_{auDs
zPPZpjn%Ui<b|U&aSUCMC*~aOPM9Tb_rN4u%%ukalWh_7M6w%+oLFWGrW?riOOc4q^
z*t5EuQ(4^;Upv2GDey9+vbr~^($4Bb-F+eoJXl!$I@!kR{zS?=z*6ABR_3=!l`?+)
ze^5k$2gk{3@9y`CP~gFy)kB=h>f!j>`6Ek#mm!taBS|$@kBTVpU}5!_WE-o;5-IaI
zOMwSlnZG7g$~af;2@wSz94D*k-QN_Uz=J)jr#O|>)A6<Q50(NiLn^Chl4`7;6;a^9
z!s_42HdfCiQf9W*Qf7A8%DhPMzpUmEQQ*O_8tYPRrgvajs=ZhtDm>UTn~PJK%^hD$
z^RQHS8B&?en^a-;&&l0<A}Tysn7usN#%%sX$}GTA;lWmB!K6wVe=xm}hzbve*;toq
zmD%V^wS`qB$;<sb&lX|Z^K8+)sJ~aUBzZ|ynu{k@`ujCWo`@t5w&&SW8Q|Kb*{R(!
zuwA=sQsvrbCwI$<hP&nAAkXSqahuS+M%5YJYjZ!`R$v>q74xG0R$}eGnN;PrN>Zgi
z&+1i0cHe}J+v*wM+BMjz-I}mn`?{pcwF?e)Yl()t32=~S^$T*V&+4@mp3$uX`*-NN
zoca!3FTNJnXKlclA(h^SNtG72+O*q9WCKpvp4G3<fRShQ#Q5q=V#)Ggt1~&N_G20n
zk>$ZbmTHCS$Wm=nMM&^qPiu2dZD))4+S!sN!OM_JYpbM6J4X(6TZ>5WVEN#^DcSbH
z+a{4R+p-jRu$6gBQl-o{CwAM3DDdFm6XnQKZF@y1@L<ns2To=6w)on4J4=C=A(hpR
zNtJdi)!r$hz=MU=yOV9Kc1ono&MXBUY-Qe)R4L=nI(HRO;K6aSn%?cM2n8POSxw_q
zR(r(P&ihyjybP(V_Drg=+Dk-%2MenYB->c+lSr9;SqePZ%Iu$1Df4G%;E5>k;Na8$
zOiQ%`6{5m}J+p&2mD%+8TKXVMg_j|f*}+MbmcBW;J48f<2Me>0B-@zHNTkf6EEOJX
zWe!iOlzC)gcZ7%v4-P*4kMb*jzf?O)Rg%2i&lGtK+os54^P>KaV@dLosx(hXs`U5N
zl<q_kNgixd<jEP}+Edu6-Knr$`_ZJzwWm$)P7@7xr^CUg|IxWk=su?EjPB#PA8wyu
z8@Ds^qW(U~+I=&r%I&PAN`I#Rvqg5_gpJ!d8Q|J;*{R)muw8q8Qsvs;4s{oZhPw;l
z;M4yo-0Jjyk-{^&Ps9EldNHTILobQ1#Y<TmaAruQcUe-U#SJ#^E*IH=6SnF9a~Uu)
z{a+DZohw<gJlN`dKB@L&x>`h*2M1ZIZScIM+85NJ!h=1tYdN)}>*DL^ODq*$hE!(P
zCsjJSWpa0ehzbvu&)!#)ZJ)gx6DjjGmIx2FGT%t5l-X-icaw++59YHs)}>k_Hlh2b
z67+Yl=X49Fa{6|B&3uQYzsr!y>DHu5Gw+_#-6o>HgN4)glWm-CPo&HbSo%BI%KRv)
zQsz|pI}!aI9Ay6AaOS1jPZXiRgFUM|IhEC2@wM|amI5zBDyzGbD(!4E)ZHVZz=MU=
zFOzMo?oFi3eJlkYY-N6(R4HStiu*+rcyOGo_U?Y82n8POSv|<9tbP|?JHKZs@G_*b
zdMK&J>R}ND9xSZ>lx$=5NFrq(WhwArEAyA6N*UXVJSL*RgX3g1z55?UDDYs<>IqI|
z^<;eQ{Eel+%aF?IsiYdKr$rQau(0}PvW?X<iIjPkrND!&%)gT=Wr!7?6H(x;URaHF
zsW#I)uq@SPSBMG^_RQwsRAzI=*V2nwD!dG-%;rj}v~<nnZf+439xTjWnrve>Zz5&p
zW2x|9EA#TCN}0o{@I+L2F#ld-U8+@Pqc7E7p(;sU?&o>75Zj(-ugr`3TbL!uORCab
zB&pKhT_kxTl04X+XNzTkYZqsyc1yr^?UG5AYaO4xlxVnH8V>TTo)xzV-7>1q=$6g>
za9fUT+?LOa`g;v)_syg#w-u5q{rOgHMUmY%VdJ)P2Do+=c51gOY}c-qRJqntZFSLb
zw+0;KS^a|C>a%)Hg=ciHgZ(>nElz!hPKd9?wOJc*W=N&CZc?Sixi{_B6WM?hwrBMQ
z88Gs!-Y~v88?j`0u+@2eQtihyQACyp2U)5Wsv}FaO%x%)gFUSwPHksOeC=$?lHg@X
zrL}odr5)RfY#}1SgXM$whGg3ZZ>vPgY|T>O!B*x?NtH7G0CF1<1s)uHq8wSOy;%_o
zJlM0^j#F7pjjx^USqi)isjPNLs<dON_BIg(9xSZhk!)kNV<KhV$x`6KR_5JFl`_6n
z+et)$2gk{3dbf)r6nL;_wJWEx+AY3zc4sN@GNiJamQ-W4hlm0X7FO?1wz1kXkurO+
z6nL<e`9M;o%v}?^eMA&^Fsuff{%2aM?WYhG9_*PNz^Tl-_*y!UrNYaQ%Iu(|N=uhc
z?xu^V@L*x~p=2AggA*xp2up<rTbYj}Rmz+@v6~^H!h?fP|D*iM-!Ih;Q<Wqy_cKKv
z!L}*#$h@e(qgax>q$<s0k}Cb(GNn6KM3M*F6nT6Gxb_5gYIh=R*PfJAx%TYI-N~Zi
z?i4uq^glYc3Eiox&gedx`{8yP+qj*c7xnis*6y20Rc@b1s`O{6c819Ao3L>^GXq?E
z7CW^&8@6jdl~lQQ?kU|lqT%jbIQaBG3b#7_pQrGQ?tIw4LoeXecj$%jwRjP01I`So
z^e#@Sw7B5r-6bL$aKbkIe<lM)rvJ<0t8+O^mIqs%&n4A<Ojn4=^57s#wGE%QRJ%$Y
zDm>UTyP8uwx+cDkzQ9u9Wk_XqZBnJ9izj#2iKy^k`Rsi;+4kAHK9Mpvuta#UmHBE?
zrOf0>-Hjq5JebekSeI&z*o5xuO3>fIp3_a7%IW6#n)xP6f0rSZ(=ADrW@y#EEuz1J
zh0}MFZJcgRq|9wB{T*y&zMoVn^DX*25&azur)6iQr`f;TZ?9eaKHUA#Xi_`aw|@tx
zw*TY!?)wv#)GkA6`*$W)?)x~Yoru&9miB+1Y-|7SM9SR5lG?#m=9fv8GEYqG?iG>R
z!L+~33%5UEjlTUo_w9aVFi9QkyT6}PyMG|Q+x~_nsmqYs{ewxB+ipoxCnBkXrTaf5
z+q!=!kundnBz3Tr`BPG*%n>AYB9c0o?l+iK_ru+;`yRN*e%(IPx?Ohrz<#^$eV{Yq
z{TAO2wNr$o4)&}b<5X6U$JfsPuq1UEQdvEbRB7j!$=#D8k~&ye{XN;n>ZwG^Jk65S
z!B*y<NtH5AHF`!wQU}M$%At0OkkrAR)pMN6YPK~>JF~MSb+ENFM^cT|oFbAsSXjLz
z*~V(FL|DzulGMRg=A}uMGWOq@S42_=$H~f}c8ZYG!JgIpoXTo}_}X~|OHP*|_1|it
zq#CPNipc3;VfCtH8>>YUDYGa`P6u0=#gZyzUP4YMBBz64HH%-A{5CYpcVJm3FR2g(
z9qgGc&8f_miLa$)Sqi!gsmzv7s<iYe3OW%59W2aNNVYLsF_AJWvA$M=t;{M(l`=2&
zwVKG+YB0=Zu}+>Pv(eYdtE=izyWGz&6Kk^Vmx<TqMg6VCI@B(yN^|X`N`Fr{)K273
zJJ^1iST6%yyFTkXHQ27*FsX9w%ZIv+M7~pl|K%5@F>`aMovIGC%l&Yh#5Qi5<VF2W
zW*usmROL1$snVZclr|MP)DAXon`eM)w_vH~V7vAWNtJ6q?NB=r^&I>!zbK83Tm41p
zjS5rL!Tue(4X3_Cw~epGH?tIV8B*zOmsDx-Ee^F4QPjcqi_%*&VB{C29pbC=HkPCg
zwmR=fs{NRD6p_@y|FTY=1y!m2b~&JXmm(x}u&1>Xr?#_meC_PQlGJ5LrL}8PrJYqB
zY9}J8gXM$w-elVcZ(1T{_FzfsU@P<fq)M3|PVDv+k<`J#C(54H-re4ckkrAR)jpic
zYTx+U*^ecu%aF?IfTT)0*2$fSqz)EV!^t*Q2PINwI!jUqTbU0fRmxmVQYRv*gX3i7
zP&-A)>0r-l2B)$*G`@BYW69|<q_R39smAI^5jh<!td35$u{tJ^GRLyybg-2<KB-dX
z7>C-4$m!tV(|`YORi^)w6r!MmJ+o6dmD#EBwe(Szf-XZUv(u9*EiLX)I}rsPEX+QU
zY-4stB4s|w`dSUPGG`@K${aMQJ6q&yH8}Y6KgxQmUnifVszdE^KU3s+Y?~s_&x`uI
zfOV){QkCXKNtOQo=ukV6L+xOjA}`4R*Ivr{P7SteFH5RidzM4(M7~plgHQjXb91Pj
zst&cw{cyXIZQQQPi~9RK>rlI-Dz|HrD*e%ke?jC>JJ`5gn*px9j-{T1?b<IVRj&Pq
zL+wP=b8ztKe-v(Y`u~c;<aDrqhu+Ak@6fNs*W%Y%a=Hwu^lnP3w0N9D?L_2suucEp
z%7BsS|Cadbe4C}GgRRbYlWISvTSfGA@V~5+Uts#*V~;iVoi=P4kd;mMJ#~ocV9)G!
zPVMLi@pbe=mZ&a6DziJ1DjiLCsGW$Y4wlc}Pm^t*y*m>ra~Dfg2V0q+CsoS$OU=7Q
zM0GHqy;-c2XHCqZc1n=c!JgB-oXY9G_?r0@OH!91mDBx6m1bxv9uSe#!NTdc$u>?8
zCQ{~iEJ+<~W&V&<DRU!9ort6k4l@6*HS<#KkBX4g!JgG4oXYCa_}ck1OH!91mDOWO
zm3BTkxqDnhQU?pGzb4yQJ&{P6Cs~p@*vkApsZz#1Ay0`&>fkt8In+)Ok~-M4dWKV3
zJsV#;|6)n%GNiJ4E~&<9wl&kv?69zU5$}Il&5=l%Ia!iA*vh;lsZ!>3By}Q^Iyg>N
z4z*K+oDTM^=H*mY^TpTB%UE){45_T<PpYw6KtxUl3#$c_ZU0saB~s><EIA!)WnPt3
zDf1$7IuSV?46Cs&)n<AJmS^>=6{4VnJ+s9*mDv*UwX`HlL6;$w+0sdsmd>G|6H(B?
z!fd%@8?)sTDf1fE*J`kpSs|%X#vV{BihQjG!)&ZewaRStrP|7>I@B)r^E_LXZO^mS
z@}mA$XB}#nRHeCQQl-B~9BL<Ws2yz2vk4jC+O=8Vslj&bx=EF5=W?i>$aiXRkZ1L*
zxH;5LRfpQ;ez<MKHf|f|Mg6^=b*NoZmD{AGN`Jo6+(hJ1JJ`4lWq@m^u+(#~UAtLQ
z<=U@1)J{Y_2M2jpzaY2ztlm;#iaOZ8L$~78cj(sfwfIJsqAo)!y={^zE$;46I}t@4
zY|rYqWWdO?db{}QOl3*xV5{@iq}q>Z2N6jf9Av3hsP?}oAJDyB5t2IC)7p_!+j(bv
z?YxU6smqW`Yp0}2JC<rYi%9BV`QW`L+4jNPHIXvAu_Se{m3ePcrHq5prin=E;NTNw
z&uZ`PeTtCO!JgHgoXTpi_}bZ<C8^7h%4(maN;{Tn`-({FU}3d?vW?XNiInMBk~-MR
z3@25}ylj*1AQ4F&949M>+9^U#2YXfrb1JJt;%n!_EID0<R8}*RYOD?wk<-D#>hNS6
zt0NLAb0kYn2V0q=lPYCivq^W1h@1`%KK=LqR%QA>P9X|9*fTqUQ<<F@UrQ&k6m%I<
znVphUX=x3I+KDLWU}1JzvW?m4iIn*m>uWXG%6uZJQszvD+KGIv1_z)1NBNb%U#gv{
zszdE^KU3t{Y?~rKl^6AQ4(m|6q$<txk}CaOG^IOV<WM`<rpOC3z_k~#zEgwk+KZDa
z*E)av5|QuJ;Na8$=-eD?r>aBkazETIXB)TA=0*K|j&-PAQkC14NtOQmPu*1_huXo$
z?dlA0?KLd*9BkKqF{yIxN>jRPMbvX}@acaPZgu+qlEUP4uz!bM&#CXw8{%v6D=ax(
zhE#etCRJKI+M#wLayr<i|8Hc#$n<|xe06SS>FHpr^R1-XkLeZ>JslimskYYhzS8`T
zIz)A_XLc*6c63{O9es}_s>_hd?DnKeM;~*joya$FuzdD@lx+L#-H}L{AG1Vtu$B2~
zQl-o!qB;>#9n5EMtV^{<%%OHlkkrAR)7_lP>7MwS`2|Z-mm!tYy-Ag3zCuzbBB_Ig
z)31|lobFGg%mXY*9c*QOn^Y-t2uYoYqz(=;|M&06`mNgU6(OmEJ*$T}mDR)Xwev@o
zq%K1$t4ES5?a&H8Dk7<ah1Fk@ZLA(kq|D<iNgZru{+d)N6G@$jqz;agl|$_mA*q8s
ztEV`X)zk5{^ADD!E<-A-XOe2Ho)wYQ!NTg_$u?HcB~oU#*QLztu$6g{V5H3Q4z&}J
z)WLDGa;Tjm<aDrSH5aF{nmfLB=3&X{GNiJaH>t*IJ`p(`EUaFhY-2TlB4rj}$?0G#
zvtUxC%-rO3B62zyR%2bN&GZf|&+3I0qM(C4vqd<S*`o2a^lFxZE<-A_#gi&69Zx|g
zqM(C?*;2_iW=khhW*OF@cCeLME~!%HIEUJa9BK!{Y^+PQ%53zd+G|yHs9o;odA1_k
zo@XoNMg6VJI@B(yN^{kuN`KEd)K273JJ_CQYh-|H*JK@P2ivu4B~`AS-=TIQhuXnG
zp4GGB=1@CT9cq{R;kF*zxUHWT^|t}*P`jimw~dl2{pC<QkwfiZ<2Eq^Tsw)So`db$
z$w`%Kf8kI&5%nA#<XQcK-0HJ>Q-vw&VE+!?oKxSSTg2DmmMleGhE#f6B~@CqRNGoa
zQ3u<z`b`-y@~qw_zB=2oBz3UWc}r65$F!Y@qz(?UR4Y_RmTKE8LQ)5NT03xRJ8z4x
zowu_jbs17=?U+<)XI+QdiAd^T`QW`f+4jNPDUmWevm|w}m3dE6rHucn+f_tT2M3=h
zN0w^4D?(BSdsfppmDL{cwevofq%K1$t38t{?O3YqB_gSVh1Ca=ZLIc5q|Cl7NgZru
z_D`ymnQfEq01-(Y949M>+9^U#2YXfraVo3n@wM|omYgm_DyxH&YOD?sk<-D#>LbZE
zRx=VQb0|wr2V0rLlPYEIbf}$(oDL2?{m-;iJ4zu6I@mKihEtgx8(&Muu@rO}Qkk8Q
zRB34+huVoK=wM-Xa<YxtDT$Ohm362cY-LVMs+76Zp>`sN+QGr6|Nav;ZQtQN_uF@u
zefE3rK_8g5M!!`1xT+4d%l%A|XRvLG{A6C#-<hmK?UJfA&rYiJ_icyTi5zMN+Z1_j
z2DtV-)}eN=U3)=N<=XQdYA1529UOf6ADx>+?NoKBUG9h5C2Zq%X<pRdXIO{YB~`gy
zo>b}2Qth)MhuXo$?TQR=?UgL`9BkKqKB;o;7E`*bMbvX}@acaPZgu+qg2Lo<uz!bM
z%c<|s>*8ziODs8EhE#gjCskTp*`anKayr<i|F34i$n<|>e09FY($m3K=Nn12AJa`D
zdOA4BQf<QXmTKQrho}zr%x>Y-j=mjVN8e$I>N2D<yEUoO5e4dPBBDB2K6~F!wte<)
zPo&HbSfV=E%KRv)Qf2~CortIo=Ce1}rCKBAP&*|^>R`|5PEO@?SA5O<j3uedkjm-q
zq)IdMIn+)>QU?pCUnbi)-J3|6`&g1X*vkAmsZz$b^!JNM>fj*r|JpNutM(g3Na|qE
z>OoFr^}G1m`8`Wgmm!taLrIl(ZkXIXEF!6ch1H*uZLA(iq|Bo%NgZru{*qKFvocAY
zh@=jVla)j56d|dDJ*y`;mDQ8+wevTYq%K1$tEZA`tezH;)WO2)pUE~>&m>ajS(c;@
zwle=ts+8G)q)tRqx7N5>In+)Oayr<vnuAkW%^6=iFJ{T<GNiJaE2+k6ZV@>hEUaFd
zY-2TVB4y@d$?0G#^YWxhnK{YnMC5cZ|5jsNs?GEcEK9XlC`3UAdu9u9DzjI{*V4i)
z1zm<zW{V_MS~`(}PDDWm3$w+NZOj%=q|6emL+xNIvs6;0%oiPMCvvDA470H=)he^m
zmukzZ>QKAf&+}|~wmr{YlNa^(TGpX<NmZIFCRO@#&h|<ohuXpRJX<9LT)QgkP&?SJ
zT|KFhf3=}*4Ut3b;2_WHS#fiyovIGC%l&Yhz&37c=SBUk!#dP1smg7=q)LC5YU_&}
zY6ly)4Ku*C8?n@LuwDE5q{_9vRhuZHo`Zutt6z{?eO7Ox@QiLU?BAh7oca!(5?_m(
zvJ`b0Qt54;RB3UdL+wNqb+A3F-;e<#&+4t>tFtvrQU_a|Hzn16OxuV^>fj(twL*1d
zsrF_?Na|ouYdcPDXKH-yY|oO^Wk{v9LsF%kH#*c#L{bOK2k#xpwh!KpiIjOKOHv10
znRh2u$}CP&CnBkXgHQh>OSN4TA*q8st6e#j)o$^%vpY*tmm!taw4_QqmTG&5Na|o=
z_5NfVt34AbvlmNJ2V0pBBvs1Hxk<N=h@=jVla)j56d|XBJ*xval~osCI|s7lbQw}v
z9h6jKHC;qb2MenYCEHjXoJg5NSaLep%6ufLQs!BQ+KI^N;Na8$OiQ)H6r!MmJ+mV?
zmD!Q;wR99qL6;$w*)d6#mbP)Iorr=C7G}pM+nAk@NSPB^huXnb=H#SGnPVJkCvvDA
z9DMp8<yZcGsrFG-9cq{RnIcbT+Z6e+yr{pAvktXOs?t0osnXw_4z&|G)DE^O@~jMS
z?b)nD?O?n1oTSRN{s8h^kwfj^;M4!;+#G7BszdE^Kin>48@G${qW(V3I@B(y%I%V*
zN`IDWmx>%}2OGD`GQhQ$v($62UHiGDMz}fDPDDKi2cP~&;Z~>rs}v@ugZ(@7YEFHJ
zUK3x7Utr1UGNjVGHmTC$`y6T~BBz6G`u}nUj7<O6$5-bDmYxo_I$uqy{g`eP(bK^}
zmTGH1Z>jcmb%^R<&+H~n?dazCI{GF{RF@%@*)2(xjwn#SEh4Ie<+JzQWZP%&)<nwO
z#uC-RR_6Ojl`^zKw~L7CU_N_eU8*%=4z*K)(hl~V?%-5TKaQ`NpRkm68B#gjnN(?J
zVM;p@r5!Asex7XObax_U?qMnIU@P;>q)M5i9cm|{w1b1p|Le@WRQr`86nL;_bw8)F
zdLX`be#27WWk_Z9U{a->+a`Cv6H(y7!s-vnHdYTMQs!Zn0uQz_e@d#9$)R>43OqPY
zRt~jOgaQxttRCZ3R*%Qm&i}9!co|YzJ&{yn^`wXb4;EH`PqwjoDv>fzvlMu+mHB5<
zrOX->cp?fsI8Ig$wNr!w5B98{<5X6&O(^Zm&QjpP*3KMBHCA(qDDYrm^^#;8tGN<k
zH8)Fv2V0q!CRNJ#FYtLq6nHSK#=2CS=^a>>YA;iW3J><o=I2yq3&hvbD_APL45|NK
z3nf)rB362(hzbuDX0J-NF<T^&GK;cQc(9dOEU8lFUsQM^Dm)lwV_m9MW}`3FmQ<A_
zFZc62Tbgapvt{z4{+4A)@{+1FmrttnmqYDDBzdqs&sN9)*RIGq)DE_5S5B&2OXs$V
z$f0&{kZ1L*xH;5LRfpQ;ez>i{Hg0R?Mg6^wb*NoZmD_}*N`L+Ua&3`A?O@}!ZU(q^
zJ=URiuwA=BQsvt3PVP1oIn)jg@~nPAZuMEcvBD0ugZ(>nBB#DXC&ky|ChXL1a)wlT
zLrIkumv^Y0$f0(yJ*zj%fRShQ=JD0pf+fp?t<D>gYCoo}L}YnzkfmCoI<i!Iqaq}D
zu&1>Rr?#_geC@oMCBe&(N^85MN;~g%sGW!e50($!Ta#@cyd4rL^EQ?O54JMzNUD_i
zl|$`B6nJp(iE?DA_AW&z@L<nsCr)LxbA0XW!cOhplOdJWu1S@4EY)@sQQ*PC>b=P}
zR?`wGvj<Co2V0r<CsoR<<xo2j1s)tHD~H-CLV*W+R{L-&t9|2ZXFrw#FGDJ;1Cna2
zIuQjPEUbo;ZLAJTq|9`d0uQz_A4;l}c|8T5hyo7|KK;+MRQs?(RCuswHiJ`{9U5Ot
zhp|+68B&=YkyL4EONZKtsPJH6c673h*)fTfIhLiugRRW*NtH4eJJe1@g$D<p{zv(h
zzhA1Iq$){X?q`ZTg>6&hsd-U<A7x4MlBzUMPpb5H)|BpJB9c7VrpQlZfNRfS9cl;L
zwPz+(u07MCb|Qz`!NI5h(YZO)PF086<$k!G%QkN3<wgCS&pOmDsmkrbq)LC5Y8Qzd
zY6ly)i!;Erm#_}CgYDYSBvr0G(V=!ChuXoxr~gs7)#?AU3Om#e_V3UuIQ1QRWqd7O
z#X8h3Ln^(ilPWDPw^?_M$f0(yP5)oafRX9{+W6{R$CBm2R_Dt}wI9><BC<R<$Wm>c
z=PlK~q7D@v?3vxjsU3YSzK*`mQsHGtWp-0ir6bytn?+Q3uzdEum2CU$-I7R|Z?i;r
zu$B34Ql-qB9BL;b!h`wjjdiKkh&j|w3Hm$ObGn^VIsG8MW`4-h-(^VUbVpL9ne7~E
zC!)WDh0{-yZJh2*q|99`{T*y&ex6h*a~l1fi2e=^GXJkT^HS{>icsLep4Gja%Id!O
z+W8erftMkb)%{78cJ7(nJs_gMgN4;^lWnXXOr*^3SPDGY%KRazQpUGx4~Z!7;5b=1
z)J_o!JlL~(gi~2P8ecnqW-0J8q_TP}smAJY5d|JBtp1v8WA#KLWu9ay@L((R_oPZ0
z8&y6fqQHaWWaUshMJVuK&*~XYW%X=)?fi?Sz{`-z>bayEtJ&60JF~;W>P5W&Wi>}4
zW#(il@L((RlB7zR7gOMgDDYrdjdiIu(>t&%)#g!%3J><o=H*mo^TpTF%UCMB45`fK
zPpY(JskVTK3J(@$3nttCy%tKO%qv+cJlM*-DydTDDk?k?6&?(;u`bmrv(cAouU3^L
zFZc62TbymrvnBGP{+47(@{+1Fmrknm=d90VL?n5zJ<pcQ0M{<hI@AufYhRmGxz@pN
zD~KFw2M2jp&x)Hv?NoKBUG9h5Ds1DnYF^aeYOF);lB(R+NUHQV!=ZK}huXo$ZLJJ&
z?F81LcCcN$PEzICFFVvu<WM^}$g}zdxz%U&`U*SL4)*WR4LS84x>0;BZp=E=E<-B4
ziAj|fH+HC<$f0(yJ*y{Yz{s<DD84#VSh76W>TH%&`!Q`UBFlq=EY%9tk)_&}ijd&J
zp4L{J+RoPTwev=n1TRA>t!<Jj?YzgKb|MlySUz}fNw$6Pwo9bURF(n{wlZ%`s+9Sb
zL+wNqcyREEa%8FYc10-gV9#ntPG$AZ_}Y0FOM#ammDNs3m3A!Eb{0|K!NTf2$u?HI
zCQ@cMmI4p9GVe{QlzAxyo`?buj+2!`?G&NFgFUM~IhEC3@wKxzOM#ammDN5;HCFqI
zDDYrmwSTgW)d7i==~xOp*vbqiRm%KiVt0^;0uK&8{m-;i`=CNpc(7-7FsCv*B)*nD
z%u?ZHNM$x7snXJ$9cm|{!h?m`;mI~;M<i0_NR|o@wlYU2Rm$w^P&*M79vpo7ALUp6
zeyMhxsw8>2pDFSLwoQ>I=0*LT#FFGCRcW4*RO#>hDcz|el04X^$kQ^wwWqTVwS(>2
zk0({GJ<g$aB8S?+!KeSxxjEEMRfpQ;ez={*Hg0F<Mg4t>b*NoZmD{;VmHsT%&J#J*
z4mNHVWPocgWF2Y;+qIugs$6@UL+wNkwS$9C|D$lL)BmLkJJb&L@6gLQ^&NV7d@X*K
zb*Np2RC-q=Ra$)WX5Ez{huXn5{eM0KMyCI(<EwKGOO^*)oi8TUeoWVj$nxMIOSN^M
zw^aL*I#hVDXLdcOc63909estR!po4#?8c-@N4{12nurPyme1Zdl5L;8n-VE=GfRXA
zTbXYqRmyDaP&*M39?WNNtV^{<%%OHl(BHwH)2*D!>9+Wq`5sGumm!tY?ManpemB(p
zKtz8B3#T6?+c@2kNSPnA^mnk8`Ds$6Ob)dZ(ci&A=Ku9(UaI{}5ehumv$~s8S=|#~
zJHKEl@G_*bx;LrP&JQPd_lYR*U}5#^WE-pd6DjimOMwSlncpVW{-quiQQ*OGvT~@M
zA{2PAXY~-LvU)hacK*mx;AKc<^+-~U)uSQ`JXl!$CE3R6u|&!|&QjpPR_3osl`{SV
z@(B?I9vmkthuSGZfd_k5PjM=%r{inqA1no4hE!J1B-L0wE26-Ih1I{4ZLFS4q|9vV
zq|EHFm3fh1q|D1G@I(}NFs#P9RGaA?Se9xpR)`7@_RQwuRAzI>*U~&J6<&r^X7eUh
zn4RoUI}sHgEX-b>Y-2WmB4rj}sqkPcvtUxC%=uJ!A}Tx>W@BBdRc50v)fQHjBro^#
zJX?fq&$C7IqW)gZlH?^-X)c~r>F@g_c_NZL*q&!gWq@m!W~X+`z;^AjNtJ7zvap=U
zp>}YPXZ5VOIn+*7huY<SxUIl8ZY$<R{jJ2>eKV=bZIz@-f1cH=itN4#8@JUnz_n|z
z4z+{r+SesjuDxz@x0cAEc5skq^$T*V&+4@mcBmcf-=XVr>N|A3_*z__wE<^_RC*gG
zRa#ukp>`r0aKiSietiavJgX<hS7#DSmIqs%$w{>z(~yWP4-T?aD^y38YMUxTf(LtA
zn{#SATg2DSmMjTghE!TxB~{wl-Jy0O5<FNwcyCI!eekwPq|CM~1s-f=-jY-)^BITQ
zi74>k;1lJ@Qf+%hDDYs<Y6nhb^|tuhc{@vimm!taj!BhvEY;pAqQHZN)w`2ztaeJI
z%+4$Y9&BaalT;~VtLa@u6nJo)tQ=~m2n8POSxw_qR(r(P&ihyjybP(V_Drg=+Dk-%
z2MenYB->c+lSr9;SqePZ%Iu$1DRY@a?L-uKaPaAWrls0}3Q^&~p4ma1%4~XkEq#!s
z!po4#?BJwIOG`S`PDF(V3$u?T+nCKrq|Bi#6&`G54o|9-ndDGA5fvUBeEJ{dSN?vf
zc9g0ldAXk{@))*Fk;mpm{T;`W<Rw*Uo{&`O?*NC|iAeHbn<7un0N0+vI@AufYd@M)
zx%Oy>+KC)$2M3@2N9X2HJ5?QOm;2%N3AS-NBQNUjldRo0ld9a#N~-i{`afG__f6Qi
zos$8sJ(qQ;9c<U0pH#Vao+;f0B8S?+!KeRGxYg<ZB844l2m5#E#hm&My(GRCFJ*1O
znIV<lWl5D5zp`0(xyS~buucD;%Yc#T|BCqPT*;E<!B*$<Nwpu-)grPyILK0Mz2_~}
zzMu{j9_*Q2%c&h*7hgwTVyW;lq%ylcsnU^e)ou_`;lc9R`)ac7vv*@6WxmD|;lWns
z8%dQiepS9nG~C?`^Vu8gQmqklsGSn@cd+Ml3#W4Wc6`lzho!&Ekjm-Sq)IbyaHyS#
z{tgyS-%qx2x;>FHKVa$aU@P;Zq)M6H=<h`IcW{vTfBl)4YClng0uT1A?&MTfcg5Gv
z&sYk)45_T{PO7xytgL%P6nL<(`em|>)xC+7xsRp5gRRW3lPYCaqrekU;K6aSa;Tjm
z6nL;_^&qFR`dxhO{GO%2%aF?Ip`;qCheZ^4u(0}5vW?XviIjPirND!&%wLi!Wh|>7
z6H(y7ak6r#ogx%?uxIrIr?Pr7zIOh`Qs8AsW%X22jn&g43Ora?{WICd>X}5!Jj+tx
z!B*zqNtH5$CC`b5yV=$)tj4-jo9P``mTI#rM1==?W^-^VvpM5y>BTG+UWQa=b0t+;
zaschzA}Tysn7uUF#%$h1%FM@7;lWns<w=z?pP|ANQQ^V-dyRFeR+){yRC|T0Bzd`?
z=h;GRd!D^AFY0e$mLxB!N^_B<N`L+Ua#0aU9&FFE#WKLPi?a^3gYDWSlPcHFIn*sB
za;O~~<XJr{ZWFp?RCTCb?uXlQY~!|kUew=fSi5f~Rk^K@RO!#NdPR}lH(}$pat63|
z71p74uwA=aQsvrLIMhz$P&+utv-$<O)o1mZ3Om#e_V3WOIQ1PmA-)#ZW^KTkA(h^`
zNtG7Ya;Tli2Ar@xt2fAik!SUW@zvRgCCh`Y&g+wEKc<NyvOGA*Qms%OS*mTK2ninS
zX$^5|J5%CoXH%90FGDJ=&66ta?B!595eXhFAG|jt+dg<(B~oT<mI4p9GH*(%l=-4V
z?L-uKaPWz8WU2OMMJVuK&uTkPWi>UvcD83J@G_*b+99dZj-}e$L=<?iuzE+bjn$5c
zlzAshfd^ZecPCZK%te7GqQHaWWaUshMP_vGfjz5TIhECJ@wKx%OM#ammDRMQ8mm1-
z6nL<(dVjKw)t-rz*^8yXgRRU5k}74s;7~ge1s)8m!KVM2mTLPcM1==?W(RO8vo5}t
z4rHnDGNdv)D5=uY+zz!9QQ^VD>_f>mW(Oxy<`9+&54JKNNvf1t&7pQ8Dm*y&^gqh4
z{QXkxFjYzNaz9h#5p0_xkIakuJBlUAORCa5CaKcj2OVlBBFTeoiab68TzdlRP&?SJ
zJt?Vj?H&%b6FJll4nF;l&ds5Asyfsz_rvWpwsAW>FY51Otlc-0s@y)2RO!$3e}>5J
zo3L>^GXq?E7VA(u*slFlQsvr9hPrb^4z+`WPyeHEtJD8^3Om#e_V3UOIQ1QRVSFuK
z#M*!}Ln^(ClPWFVyjgdN$OfFSP5+<CfRX9{viRy;&XVQ9R_Aj`wI9<JBC<R<$Wm?n
z=PlK)Qilo;_ROy4)Q+x+ucI%pRCpOunO&Py>Bt{6T_>W#gXOdL<z(Au@A^c_+`tmy
z!B*z0NtH6II@C@?ga`B48|zZ75p$@W67+Yl=X4XNa=JObX1>YN-(^VUbW2jDnbjO>
zC!)WDh0}MFZJcgRq|9wB{T*y&zMoVn^IiHo5&az;Wd7e^=B3&X6`{a`J*zu7mDP{q
zYv(5{1zv_!R(B><+L>*ryGulY2Mep8C)-%vok*E`SPDGY%KS2^Qf5O6JP`#R94D*2
zyI(0nfd_k5_j4+%2jXkzH!KBShE!G$Ce>K|PDFtR3#&gQ+gLr6NSTLO3Ov}#{3)qY
z=4}*sA__b>PFB;qKPy6k2YXhJaVo3F<7?-CSPHxhsjQwzs<C=fM1cnjtG_4PSUr_U
znWtF_JlM+oGpSN$9tu1W1s)8mu|BJ3dIy%J+P@T{!h=1t=Qx$wY(BDSX?B(h54M)(
zNUF5-v&r3@A}Tysn7t&~#%!)c%FNAD;lWnsrAd`CJ5u3^sPJHzjdiJ3nT@_wdzq>v
zdAXnG+5BvKo-L3U_4f*vBrmD@54ccLrN3X1<cUb~V0)guDg#`*2s^b~6t-($om9Ej
zX8ntahP%b#AkXSqahuRBq3Voo$=naOrP#)8>Aa}FWmvm!CRMpDmsIJ`A3!cIvil}%
z++Ldju3dqh+N}uNwJRl6uC-<S%A(<J6*$PV`USbwXZ5NI&*)Zz{X2AZPJM^25nqdI
zvNqt%kV<c@q)Ln1Z`w@|*?<$aXZ1Q6F!HQkH@-USv1EC$)!87a_G8*mM3x5!S*jJP
zBTKc76(PZcJ*|nH+Rmi-+S!C9!OM_JYbdGG&Iv=^6cGs?EFZkhl5HQn%@ZlJ1xtYk
zTbVZ`Rmxm7vD->Sfd>blC`XoRZ&ZW=5B99K;Z#=J#@Ei9Sqi)isjRk3s<dONHdRD{
z2MepWCfiu;kVu)gu@rc)mC5fIBW3Krw4;au4~~=7^zL1XP~gFy)lQtsYUlXc*@dOR
z%aF=y*Q6S&-9!|4u&{b>vW?ZWM9S>JQsBW>=KV>PGM{n=o`?bu4nF<Qv{c($Au2rB
zGuww#ne7{2OZ%}@co|Ze9gtLM>4eE$C!)fGh1qbjjoCqol$p*_;lWnsLrIk~YftPB
z7E$5B!KeRGe&z3%Y9CgWBro?fMb2Q`6nSV~)Zbz3)b8-4D$OI3D*c@@r8`nYk_X!q
zd2|N2_84|*cPwnz9+y<Pc6f4kylA*P0S-R>kIrpEccQ8@x|4E0+)id2w^Q<>{!V4>
zzL`|zc3M)UzmIIvoi4KbCT!e3o&m1?1Ut1m1GZ~FnN+!U(J9@TqT%i=IQaBG3b#7_
zpRMqW?o+UThn~Z!@6dDOYw<kR2AmmE>0OXiY4OR;x(h`%;Dl}Z|8xe7O#c_hSLYIz
zEDyFipGm6ym@X5M<-tLgY8yOnsrFfQsPJIV><Uip=*svyx{9U3%aF?K>ZD3XpP$@a
zBcj5C<+Jz2WZP%&+C<7+#}eVeR_4n|l`@uU*NceoU_N`yYzOWCABRDgT~!ZU!_@JA
z59@8jCUjqME&UzrIo-&qoW2%cGhb)v?=qxvx+$sB%*s=`n?>|@uyFcTvW?R%iIn*^
zOMeGjneQf5%A8GqC!)WDgUtUM&b(Cno+1=@uxE8Ur?UD%eC_;@rNGOO%Ic1!N;|)G
z0-lHh4;EHGO}4SRGm$cPu@rc)mHBy6rOZkccp?fsI8Ii3cfU}C0uT1A?&VZg_r=%F
zuUHDa45_T{PpYwcKtzED3#;EI+gLrANSWWU6nL<e`9o5rjN{555>ep6ak84;{ZSDL
zJlL~(gi~2P8ecnqW-0J8q_TP}smAJY5d|JBtp1v8WA#KLWu9ay@L((R_oPahms8-0
zDDYrdjdiIu(>t&%)&8Ln6&~!FJ;SNYo{g`if3Z|}8B&=&msDxVA571-ep;Fx7G^Kv
z{V%gQ5-Bq$ON9qpnU^G0%FLj`6H(#8FdOSqtuh;ZsWy+QBzd`?=h?h$d!Egg7xnit
zmLxB!N^}0CN`Lp0<cUb~V0)e|m;tU`h@INK61Hm>PO4n%>+e^IhPy@JAkXSqahuRB
zs_KmH)wv&Ti?NN{;(1YjOR#p|OsaBQDyh<+ZAF$A4R_1H#%<XQaP4yJ)NXm$u6<2X
z<=U@I?p`Yz?pA<<JgZ-jTYXlqsPK$#CD^}1SLW1r=qmBGxGHM{&J3yaR!^$5`1(z|
zHAFVxgzZ`Vx(pb3R<9Lboe3;i9&B~iNvi#r))kTE!9kX4h3d#sZGA;Z@L*4CLr!gH
zqxjm{m?go>kV<P}Ql*{qhPp{25<FNwc$1TDQ_4^xWu~wcc(9e(EU8jv{Yl;CA__b>
z_(VCfRNGP!3Ov}e+KN+IZ5>}bZ)7R(GNiKFCaKbnrP{V43Ora?y(QVkYP&?rOl2wX
zU@P<1q)HiDwH-thcyOGorgv{wgaQxttajv7R_~0jop-Smco|Yz?UYnwwX=u<4;EJM
zNw%@tHIXvAu@rc)m3ePcrOa6qyJ;c{JUICDKhsj}eF{<G!JgTkoXTvk_*&YVrNYaQ
z%50ycN=v6q?)DW?;laXe|707p0}?6Iu~c}ll^IT|l(~QkPeg?W2cP~&`IWz4s(nyZ
zlDyo{6nQY)rpQC`qW(V2lH?^-Y0gNh^mpNu?obg)9&A(O;ThoCBiO0kk+5BRR8r+y
ztJ$MP!`(4(@acbaZWFp=Rh`itm;2#%JlnXPkQeoLB5U`}q$;<QlPdk0{!bCveG@ir
zAI$*Qp2kk?PKWK<k0n*E-C#=hanW%12{`!lKMJ=x{hy)mjP8@Le}|sQsqfIU;%o72
z)&`szQt6$ORB3T&^X^=c4LD(&{?E^Vk?H?}`08B9lI6ix=hI2GAJfGmvOGA*Qf<TM
zE!8enhYAn&%r4{9jxLX{qtCKbco|ZeU6EAj=&O^vD@9azuzdDDpKSZ=U7bjoYgi&Y
z*vfn{sZwU*r0!Y~5gyEE&o6glTB<c-6S^-cL4OB(PS<lPryJsH<|{1yU4~RnHzrk@
zS#wJFH4*(CES$cPY~yrOB4uu7>F;1G^R1*xnFTiKZV}Pn!Ejo3R-AGo`7VAR?!IF*
zsU7Uwzm-$lzb(G|evc)!%aGds?MapUuCZzN0}-hmEbad&+1CCYiIn*<OKJyOnV%+A
z$~-->yHiAJ2h;xI&uhP%cA&%54|JOP?)&XKeXZ_i#!=J3zVW*`wefr6yW=ldYPt-m
zjo+J8x#Kd<vlCI%!P5AzlWmRPpGcVpSZX@h%KSE|Qs#c=*@>v>U>aZk1sfmUbDwFu
zOxtI-UHbFje{VeD9P9}^#Hj=xj_=NYWC`aoq!M@}sd8r<LOd!WoP&kHUy^ME9!sRm
z<1FDEY-RqMR4KFcCfySv!a4ZgfASN$zZpjx2m8jK;?%~Uj_;2DU}@trq&EIcQss`e
zhkI5;8wX3{|4z0w{#+tuX4@cTW{0iJiv%NOzC#-)qK$*I{>kSpv&(_+->)0q-FyDU
z#*@Iop1@q3N?`8z?mQ1m0+%6`z`RKn0{c$s<`a>?!9w8W$u<J>CsJksmIMyAG7BbE
z%6xrdw~&Yg4$kT)zt+fmeqrN?+F;-KBAnXzqVe7F)htn4hSbIvPpaJU^+VkfBBC}}
z8eb~e*7(wilv#%LO&4rsmP@LXna4L>BHwhuS^ebKTH`<8^RG4D339L}up*}tSSh|c
zugp3@E<-ATRg)@r_MTr&<ODfb2&|E8Bd}&7WnRbHRuZ-{6Ot-rHnpv!$hMO3zyIVX
zbn6;7qgxO5jjzwCjc*X&9XDjBb{l0#ZG7XT${i;Sb*~o<cN1Z0d{VNl@l6sbGnt**
z4Z&7sN>ZiF?3;9(iiW$*;H-c0|M{NZ-1r&Y7O*F<C8rX2LwtAMik;eRogtOL8<Q$`
z{`!>eO`_p$8(0Wzn`|TS=0wW8g`L`M2V0q`NtH6T^xa-G+`Sdf>L)*8<UPNGaWlHN
z!M^dgb86%7i0_U&vQxWvW=L)PT}hQYE<Dt|TQuD51WV&PC)*m|C6O}kVW)Px!d7Ot
zq)M4RH|cg44R`N_v--(fYy8JNKh5|V-5#(f@IFo@@c#JjyeB)g+bcsVfxVL|clMtD
zfM~ee2NnYRCff+?mq?lY*{R(Du$AeODrL6cq&rYF+zrG3{*#~39c0{$ZaVB6{~)I}
z{-OBpcrZJ)J0wGD;~!3{+;N_v?jxe%ZU!ukADV1y{IEpI9L`Sdj)1Mqkx7*@Cr;{)
z5)F4p!&(32|MNY6jPWzNV_{F=I8G&Se0+C4f%Vsc8Bz(HlvKI%*QRtQi~MyUECfzX
zwh{PfB4tivEg4`d^Rc8#nfWJm9~W6Nzz|sSdA}kYwEuy-?z8^~rgin#o9+xFtpQ-)
z`k9>C`dRVa^K8}{AVX^F=Ok6`dEAulT#+>ZEUllPY-|03M9N&qS^~gU=F>@)GC!Qu
zT`aN$fN6c1|7d-!{P%wSQsZ@N^uJZ^xo>x=!G6z&efO7fYWJ7NciYdhe$UU4+Wi$t
zmD~Q>@A)FX=fl$d=aX&SU!6#qYgoVM!&c^tNtH6k`aNId_k5V{nVEjie_r>ad?oX`
znZLgHlB@lu4|`VEb1JJF;%nzCtl#uAq_Vm(snX7KcG?j6O&=Ck-$=Hxx+#$|H?w}z
zhpo)Fk}751YNrj6-}K=)S?%3@M-dAE*t5EoQ(4^>UpwDpEdVm4vbsH~#_9(m3jkPH
z{V3VS>W)Op{Ft=>fUV3=lPYDlw%3Np0sxMa)%5OXidY@Mp4Hu)%Ico@+W7@*b&w&I
z)xAkIR`-dl4q##R>tq|N`x7bi0BdysTbbV`RmyB`bs(}jfMNBYUtf&r9az4;_`O1w
z4X|hS5T`PGIKGzt$XYgJNM-g&Ql%yTfBmS)vH=!me@V76dn}PMkF%Byu$B31Ql*Ro
ztey~AHo!3Z&#y23|CzNJ<UbCZ{F|y48M&YT<9Uj0|Ht!mUew<|Sc{CLD$Qq-D*Y{J
zks-3kfbIWy{+$7?eU6>l&9>o8*Ul~&xpujs?nR>EZVvc=|Nm-y+$MB$syd^4aqfrP
zOW4M3uDqzfxmkbZmsI8U(xeKv4{y@VEAm%<uyK1?2DtX+?9^_4*sfh5sdDYZle<@l
zhPwsf|NZ}~ad4~uf3=XpGrCv8{vEn7r@lj96<>>su>R05Ln^&jCskTJcGGS#kw5f<
z?f<Wq$bgamzgjZBI!m!ryQN{PvrJO$$F!_yxLXeX->)zJf2odqeX+bEGrHHnp4MwQ
zwVf5>YiC8)Ui%qRX|0@8X=mC{w~EMK`>=fQR!g>h@K#Tx%o?l>IAAODx}-`OTf(d*
zvH=Gie4-rr`eJQGY_<V=R_k&qtM%e*XMNUY8yQkrZJ1PP$JZAdiEOq33#->B+gMFZ
zq|7ANUK_BLnVi(f4`)(0B(m2A94D*k-KL7zYXkPIHs@4UTg2DSmaM%tGNiKFDyhb5
zYmvP+U}5#9WE-n(5-GDSYp)I1%Dg41QpR7JZ6~tV1{{3)pXuw1?G>`$2JD&bz^TmM
z7GFzmXYIF<A(h#VNtKo;UfwCP-v%tq-koe?wo@Wyc4qCj0b7~(Bvs1T?{`;`{Wjp>
z)Bh;{4c>o!vAe1^;K=<<k<-{VMedOo_4hv31{_IMntLWy`n!Hgx0lEU9I#E1AIJdL
z?!!*)_J!@*{gNuz9y__)Uo_ku00*D`N9Q)7>r|c59hm#!Hq16|2jxZmO=ta;Us9FZ
zhmtD&5d|GA@>hPaar<xvxb`FL)NTfB*B+YG$j@b{J4`g(9S#Sd{zu_fr~e}qp3xl%
z`*-M3ocazuI=&W<Vf~?BhE#gTB~@D7ZnN%qkw5f<ZTdek14gF*lj5s$GCQ?91-3e;
zCe?mS9~BLEr@{aG^~L|s^uNa*YxqxR|L5HQkL~H|%;-J_duAW!)Q&z8Uq@%Kw%gB;
z%IwUfN=HxHZeL`(eONwwpGvlU_RdM9%(<*RIAAMtep0231FS9(4R;s9eD?nH>x(fE
zo6udPgxxk^&*@@L<#b7W&0NabZ6iY}r^}Km&HQnwyIf?q4OlpRF4@NEibTp>$=YlK
zwlbejs+759Qg^k;W*cyj`M+nqum5n|Z<hnQFDPQC4cN1~mQz_>7hgMHV(qk%A(hqj
zNtJds8|rQl*=Yk7R$ooFvAQvlGGAluv;kY0ZzNU9Y-^_tk)1Z+I9cu8eNz#8ZNQ$@
zEu6~g+wryY9oAkO8B$r@np9(To5)@pu(0}mvW?a4iIn*PYp)I1%KRv)QpRCpcZlq@
z0msQ|diN7W?6m=VR(EnLtGnWB=Vz?FHZr8Lx;v@H>K>83Heg}(%VZm?dlM;hA8W4-
z*vkAmsZ!<*_Sz5)cMrg@8tYPRrgvajs{KYG`)$CU*@K+Q?050C^n2ER8yQlWJ(N^w
z$=96^i|n@n3$s5Z+n7C)NSQ}j`)$Bh<}XQ=GCo_6iR`xl!)&ZewaRStrP}|fY6Fhk
z&-3gFwmr|D%!~T_8*2lOq$<s)k}CZzHPk&VvH=Hd&$EAKfNP&&r*_Z6cJ05CD%Wl}
z)cspD+&u>ec~;Mg+k|emjlykq7;Z0O8@D;~qW<P&{gq!*mD@{_D*ZWYcrKB@@`H`r
zJQ?8Hm$Fm4d11SDzNE^vmTE5(4R<exgFLHWkXwCL&#&-|ZUNZ8LtnwE@6ZL~YjGjg
zANplTrMGZWrNtj@+PzBT5B*?!Rxg?XBhTtr$5&@Dc51gcY;~4Ms{NRj6b*Mv!9kX4
zh3d#sZD~bjbj!e=*0P-1&T{d!vpj3B{S2wJUYk^D=kTF!1(Ch>Vfo;#lx+Lpt(-`i
zRahHvz*c6pq)M52Cv~ffY`_5rpD0I`YHKQDvklm@T8mRzO^C0ZwON~OWJqPTZc?Qk
zOSSbxHrs%O)dtBnRvRW#W+T>K8?cpmeNv^&A(Og^B71GXak84;ZK8<1Hek<ch*Mcj
ziLaeaS$l0{NM*HoQjOIXB71GX!s-plHdb3DQf6z`UK_BLc~erQ%-#0d5ZP-34nF<Q
zv{ZYuLiXE$J+tjNmD$wzTH2nq-$sU1W;-NRS~}H<*CP9Ez{2bu$u?#?CQ{~|to=4%
zEA#H8N|}r7w;{6M1{{3)ALUp6eyO&Lsy5)r{Y;U&vTcgoEidYCch&|RNmZKDk}CZz
zy=k|H$Oas+O_A@<0N3ux`fd!iYxhp7T>Ej~jfs3W1_z)1N9Q)7+gH^Y-F~?rZu_&1
z+W~n|e;w<u{F18NhLbA&S*jf*@>hPaar<Bfxb{Qr)b3!|u015Fa_xOX-G@cP-ACZy
z)Bh;k>hwQD;Thebuz!ah#;Nbn!{cl52-Y9^Wk{uWR8pnI%Qowd7WqRz*rxwuGhk%;
zKQ6vH$Fozr6JV=zVp8qLbdqSeI~fkLR9owLe`$7#Iy1UcVbAQNoZ8W8@pW`MYrFjn
zsmwl}ROx83q3#nR+wH^h+52R&?X!1gB4y5E?ZE+CnNKBE%4{{MJ4a*>4w%p0SeI&z
z*o5vpCG55VdrlW{DyIwMYvv->ZW|d=IbEDoX=ctT-6bNsZNS3mGs!kimnBl>a@J-W
zu$B2-Ql*SV%@rb>ZNPB)&%ZS5pTy7mPB4FIc9qe#)PQ~aS95Cn*Ti?<FR-@M$dKCp
zwMmux&O4>MPGn0BSla(`vaS8=6De~8YfBB-%6v7cQfAne8X{Y2z_dS$uQb<cUuk~b
zV4G*azWbXvwfmdnyX`kwn`dN5?f#ae%5DGP@M@92xPztp?<U*2zcrCEx3M<QfUV5;
zlPYDV+B`#K^9-2oeUb4(Uujwl|6i5ae|)9+Lq%+!0ee<=a4M@G$JfqJSes{LNM&_r
zQl%Z5{JTUp&wz#1&y#Je?oOo4J*>?$U@P;>q)M3&*gQjI^9(pnR(p59QpDyNuxE8Y
zr?Pq=zIJ}Y+B_peDys*RYOH=IvUvt9tp1Q}WA#uXWgcd2o&j5#KP6Sl>}B%|k<ByU
zI9b^|LlK*2z@F7(oXYC)_}cj&*5(-*QdvEbRAcp|$mSWau=;zljnz|$lzE!9c?N7{
z{+U!MGr{H=BAaKxu$sj>`M<pb%R2dA3fVjZ_ROB+RA#enTw0o)wRr|?EzOZsY3V$h
zXNde|A1usXl5Ar(S0c>jW__&&TbY+8Rmxm$^9+%%)nJ&-Vx2roW}~l@U#6<fGjcz_
zOw7-=UnUmFi~4&7Yx9hx>ObH@NtOP5rTI#c%`;&8W#UyC;Mzr4->JcN?W>b2*ZM1{
z#YDbSga73hr7?4}d4{Ss&&d67TZ(Pmmd=a%TZZ)~g-KOz%OzF%vrb-K<WCA=<M!GN
zaP11L%`;%TcBQ1swYH>RS!DAJ_+Nfe8XLFzi_)qJ+dKpI@6gpb^&PrKd@ZiY+B_pe
zD!sLmDlHyw^9+&AGhq8gX`Kuh`9*2n`0A|3+B^fcIvXU_eoPyRY@PxC%Q|@$R7cjy
z8!KY-4A|3}$f@m2im#nbSes{LNToHDRB2}in`ek@o&n1TZ?k0E2XFI4%51^fJOj2e
zZ%C??u}<DfWb+I-_(VCfPJW{zHqU@Pt8F-y)wc1q^JdoO85vSpZI{#tE1PGCY@PuN
ztG6cGSnZHVnYXbv&w#DWJCZ78{5SoMBAaKxak8>`h9Wl4fIX|7IF;4T@wKxHYx9f@
zsjPNQs<GNlWb+JISiLvd#%fw3W%gigo&j5#_a{}#Wb+J>%`@QO)BjBC<h>QLc?Rs6
z?Zc_e_KmNl{aBl4WJqOpKvJcpC5F0AWb+JIm<=b}m>rZzndz*r)nF_0p`=Qg*&Jal
z^0gWqeEJ_{y;U~PP}Sxcxt}R=2HU2{L-V5k4r8ZwhbL8O9+6b(?;e|Hh-{t#+Z1_p
z2DtVZ)^}>KU3*+o<=XRYo+0v`8XSE3ADx@cGgP&CM(&5($!z0xN?z38sjSU2lB(QJ
zORDr|oqW2;<{7YY`*;Sp_7kklGhn;+lS!3puNvyk6xloj4nF;l!mUpKXDe*;4A{Rz
z&*9W}=(+K=cphu>j0~ytE=a1hIDND3LXpigV4MCwodF}$|HbjuxrDWO25faclT`aL
zT_&=52K+DU<QJI!|MM%&&#Ghd4A?Wff>S%XGQN(kVr`z0A(h$HNtKTNYV!<{%`;&6
z?0qrW_Sw5Okuuk@HqU^q%$JiYWo%w>y~ySnFrU3ytdnO=%;p(N*gOOFoNnY)PG5_!
znXj`p&&ZI<>87MgGhZ9(ZWh@*0~Su-O15#jC6O}UW^JAUTbb`BRm%99>{gM@GvFZe
zfBo<K{wvMzDPr>s*t5EwQ(65WzIJ}d+B_peDyut^D(x&e)csgw^9)#6{WRIe>dr*U
z+{M~F1GX|hPpXu8yUjC1HqU_LWM%UVMQok{dsg>yDy#eAYv)(2%`-BjvbsO1#_9o)
z%`;$O_1k0{s|OP)^E=k&8L*Z4LsF&8yKJ5zvUvs^Co7w0C}Q&r*t2?sQ&~M4Ups$h
zZJv=KmDOWOHCB&{Y@PuNtG_1OSUr(QnI~DBXTVnG?@6_PtEWUZ&wyby)}`7^@4&KD
z`-eg{&wxF%XE>GFv+=d`FV^N68B&=&msDx#dp6I|h0QZyVfG^4|1z5+kur0#zE*>+
z%uA9gWiGRMhRD}yFwDlfRIAKJU#iWcs?9TUKhLvy+4ek}FE8rvWvtCJlBzW4Ppb5{
z=uo$S$mSWaJ<k@*0M{<W`c4hDYZp$cT)Wax_bQR^)Zie`>REBKd4{Ss&&d67Ta0bo
z7SD_NTY|NDMpBjAQc0EmEY+44**pU_Zp&tXYnNkfo&npnuSu$0dzZ~KL^jWWgFLHW
zkXwCLuc)xiGhqJ?U71tgp{vB#;;O98Gcu&oTRo}L;=gU4A+mV}Y|rY~Wx&X@dad~C
zOki!E0b8AQl4?JubwxJMfP*a63e}ON+W%|qJm93Lnk^2BIcG6rjwmZZ)LnKK%n92Z
zQ4vvBM2mKfqhQXUm=k`YV*Cu46Xt|D=bUrS;r&n7+<J>|S${M8-uHW_s-|wAt~1Qs
z?(1yb?gI&%XMlyQzBs9zHRV&zT39yE(2yjnUMVkUCpOO@V)G2pzj%$x#*|W01Wgl`
z%`?ED8K9KY%*f^$L~Nb`);>{ImTK!k!sZ!Zp=tx1q-sO?l(P|*%`-G4soF#-FGrha
z5V3g%=&A-P8>%)}1Wkrz^9(R(wp7Y#c4zYpA~w$eJE)4yGazB}46snO9ZpiUy?n~q
z0n6qY8j@7)q*SQdnTX9ZKv%VkvY~2MMbPYqW%CR$Xm(f1Y5r#O3?eqq0BfKATP@Xw
zLd517V4-X;oTO}T`IOX*W%CRTNy>&P<t245*}g<<o&mbD{ge%5`zwORu>7nB22HM%
z)12902N3bI8d&@E-^MM!Z=L}wn`h{JOpym;V~RXPFH+y3ST@g4D#<J><@L?h-wr2Y
z^9(Si$dMYrv7@m3rUu5bM=4d&#^xDB{H6xhKK-|?jm<M)W%CT34{gU|L)!^@k@`-=
zvU!G5N!!Uvd3`L^P9b9R3^25f)&PzjgQXJ=jAKt%%8$LZWMhfwgad1z{@c)&rvEb`
zrg07|KA~shq)+HM@+t9LERAy-lH{GQl$UtT0K0&Q#yK#i|BE!BGX0N}56{I|I_JRP
zxkRaWo5mB-IS1CVREuB!Dt~=?89X%AfrYXOIH{s5<Wtd=SeoiIBq_UEDX-{Hn(B!7
zB@XoO-hY*i@7{Hapt&APQymyIH!9^chtpI?L{lB8?_RY_wGhVU8Bowt2Ns%c#YviO
zlTR_XV`-_=kfiBOrM#FE*gS)XmO9Wi-J@)1nxqJtd$F|CfkAV>QciOaEp<e+)Pd5}
z&QF?^-vsrO=0l98cMdGde*`C$|EPS<`xutqISonWPgcs$dnM1QCZcx^^zxrlHp+im
z5j4+W>74_E<~gODhR1|GPekt=sPfzSNwY3~(tMG@^vHol^)KV3>R*x1X<x<CBc~y$
z`q!26)6UNh8btKSfnNPv%0~5XD}v@7EIo2y(7dOV(+s6Yj))#PP}R5dlP1@qfB8xC
zLrCb40}E9j<0Ms|$ful7vGm7jNK*B=QeF=Gf_y<le;nwlzEU<+eXR(ZZ?N>ofkE?~
zQclB9n%@)A9|v|&RkQsF3GHxTq3UOxr0N&>l=CZ=b~p`5s(x20RQ*9jI~?e${!%tn
z{jCU^PW7Vc3<k|K1QnWL?72b2o*Q5XRkhf3kgyR4Sg4u-C#jlIKIP1WWg`v^NvdX1
zDpbu%#6}#TtD0TeP&J1lXy(MS5eFDFb1UUE%keB~A~xaxrHc1rYCo#~+b7W1$@4+P
zP90#OYyq64Y(e>y)D_E49U799Ev%H6#3qxA5V2DS=*kvTHk2){2%06Z{Hz8B&5}x0
zXc}xOB7Rl_rL3KGa=XgfUMDXDEBkrqeB4YdhmD(w<@F-<t$<}e52cdKm6Y=O{$M{3
zBKGqD<7T3}25{`ESbkFj<JcZb`LSI~R!78dYT&=zC{?Yk&U(UHwq80P+InL{TOYkh
zeQRJd>#J1Kwx&{EAG?gLMU>mxU}&q?0FG_I(hdj4v5iXku{<ZCL_|9r_%Am~)oDvN
zO8p_GMGh=Jq3htJPw2YxDRDh4Epi%?<ZYmompG!oZAe6m92hrB8*4!2MrjlI@N9~u
zM-B|0fl9^Ov^fzya^SzLliQI>N}V5DKtg{USjgH6CzZ3ce9GAdOMje(Bw5=j<>l;C
zvh9iJj|2USx1+N0#oI{{G&^JIj{}2d7p0trhn($7M1LGu`$So&YPP|U&<+O{s)pbs
zReQ*%oS|6S;WQ+v+Dj=fhjsGaM6|<!u4*4;L)9=v(CmxldDLLg?5C8|9Mxd^6Y)H1
zu!E{vYy>1ciyACc9e|TmwaBNO1F<}dT0@emgOv(ZhY;~BYS2|3rfjGxD}v^5EYG3_
zgJz^sPV*<vq9)>5)L`w?fAMeS)BjNr@%U-5P<9MXQg*C-N;(e9<EJ$wDLX+aFKPFZ
zok+yvr$JYCva+G<6h+XSisffDFlfdo<un5u>@*^NRs(CF{@Yk@73<`&u$Ju%osTK<
zOl(Y%XX!=iI~$wXIZ7p&=PKp(t=QkrBg*Z3Fs8^0G=O6-#PXXO7{`uN%8%W#(Jm(9
zH#M;K>A!7lb#@7?WgD;aq3u#^XuC`=Qs3p+%qA$6v|XW;*T;V*y^<)mtH989wFYqP
zHCVdgz&Q55O8K!5l<Ybpy5T^y9kBei8`pG8(uTTp0k{EbdgQ?3Gdd9`eMWDRPmwoc
z>5<crB=A<Hyhs~hw-M1J2gU{94h^VW0Pd6z&s|uW<iOy$N2z$DCK1sj2mZ@Sc`6rx
zDSy<w4<7pDz(UyrIH{rs<x|l^So-8NBq@7DDX)l!!ahnwpB(7lzQ>h~Z{K7^&^&>q
zPYw*4r<8J<$@Iw)(I*G$+t<!YxqV@E_AC^1$$^EY=W&vz7vxjSi&(nkG$d(yS*gmq
zT(Vb)=#m3n(`(9xrq>lg^9GhKIWTD6Qp#z@(<MhlmmFBj6(Icw{Nif;E+llwfrYB~
zagwSJ<WtUvSUTi1B&qsXDKCdd+<rnthaBjtK2tVSeXa<aFR*mTfkE??Qcg334mlz^
z<iHN9YPN47p&<?|RDF+=RQ({Ia(=|p5T_wY)z3<Ws$Ym`hyz{KZ_0+M-xWdg2bL#N
zgF*9`QciOmPoyT|iPQ}pR@GviA>nD%V4-SSoTRFYe9D;)%hRYeB&nJ~sZcc|5l^EA
zUDeFWhN@W<K{G3sr%{7JGrLkwvpP?sCgN$-p#H6@U97eG1p3u_E{J#lHCQN{2PY|;
zS3V`phvfm(8j_SPpp=)yA0RGB!~>{7SGJI{p=@DA&@6)Ghcz&07E{V;UT?6)iTGg+
zl(K3UYe`w#i?wdBmTgI$k1N_z*tnuCtrw|p8Ej_DDwSj|r<B(>P05xg%54QOu4pT2
z0LQL`<ySQ@j$K75KX%!YbtmFiHL#Yeb$i<CY&BTR)<fq*TOBsEt*#fTuO~LMUP>ix
zy_NF%*ixqtQEqF1p{=h5aO|2`8sfk>c5S8n*k>E9o`{Axu$INzRJEmRbw7w{k^_rR
zXbC5MLYw4MVt*`6avGB4t)rBecwv8Amxv}gFs{|>Ye3~%y@7mqHpJ2;2L{i^O2yl>
z2@zd#U@eO^ms;E^?{AwyLWdk!$l4qym6OS*oGq|)$Z1HDwUts{4vV#|iRh36{foD)
zvhl^+P7yTQW9g6sgJwsioMt*Y<cR2y18blD3sucF2of6Nz(UooI7!uR@+oI9mWDVD
zNveh@<>jzg+k=RPIM7w?scfj)OA$1CV|gMq7&QAR<uqqC*f1iVNDX#SRf`RWgr`x1
zg{u8=k}8u=IU}$<jaoyJssoe?RV_q3jT&@S2Pqq>4ps!sAy}S94F=6&N;%C^4OS-N
zY1Clt(|_@A<<tKW5S48tSSTBXlaw7PpOTKk@&IZLNy?5<%1atmvSW#O05#~!j#oC6
zouCMs6S4fT1_sT^N;wS=TRMe^AJ)Lyr~fu?`HRKcXjscOM(1OSJPjLD<mq~m`o>~2
zJ42}?^Gv0@zR#NMETY`b24jjmM*}$aTr9t;fpP5lO8K$PjdlSMzp8<?PycOetFwz>
zE!#Mq4{aA?L)(A!BK2K@&1}3<N!z7Ld42q-c^Oe|mxG~gf(CHx6<GS=z&Q3QrTp0W
zo9t>L`r*LZr~fvzrRo1#h-sYzi%;lvIO!95y?jc%0ZZ$gh9r3tmGTmA9$+^S(K-jl
z^nZ&6RHpx1<->Cumfkrqc<xXt-ljW==$!*=S*pcvf0Z9K?}mrAI<QbS2`5!_uY4-H
z4@+B}h9qSVDCHHsLt7mYzr}(6-FsNs`0hQT2%1N+wAFz@^SDw@LsQ^nBHHReefO$e
zs)evRdlCwo>cB$N(>O`fGx90sSu9O;8j>_Uuap<_IZbs$G}VEw=_O@D)60sWc?C;T
z9T+sPDdjX5(Nsr7QynNx?fj@&`Bjh!(cWY<-E&}3{@XaI{CDJY-gmKd&uK_1|9z$W
zyc0|I0TJDEpqKxVvQhrWilF%fOZOZYG@mKuG)(lL6VW{fs{Doj@n1mx`9y#(8ApR0
zSQP#>PAdEx`JC}vEDdrRk_!J`DL><eCHsMh2073R|4G>>{AWeb{DP%H4h)*#lyaI;
z{p@!l8stC~KIIbudUXHa!l&FR`cKBw2L~1c{>DiHI`#82cgE5O2gaGFRm#uIPSIV6
z=z{}Y!1T(7fEg4aU`8x`aA45Ptd!F%P9Gc*eQ@Bvzw>oA8{_C~1B=4vz)6MADW5aW
zg{8AiL()4xk5Ya{_NAYfh|V_93!h)vD0~4$&@70}tScBa3n}F^3-+^xiE>*6Z2z5~
za;NA;8DF-=z(T;{I7z?~@;P%iY-USpND{D=Qhw&S``gk)xh(^_fMt~p0m~_ZW_fI8
zD}X_>qEb%7e}`C!D7Tft_TKsQM1WNoSGMk8QTVDjsqoe0bH*On%<42G6~4Mse#Y+`
zttU}#y+AL#x3W=qA4Sltf#tU@Flg3P%4v?|w=N=n>jK+*=lSFPDRxJ%XMEWjz(PPj
zoFt%8K4&grGi%b2B%r@ier7)N1Bh~42Xq1JDjNdUQv}WW*vvKngJwgeoaWmG+lVN)
zjlq9^=j&_}#+7YTuqb>poK*Ng`J8ccY-X8;q{6pQ%FoE3<7`Qk+g6|#zO}MZ_%@25
z*%q7Gc3{wKuawiw-p_U*%56um{da!K&-_k|FWb&wAz%<r60nPW&b%u&v)wc#2^g%D
zpLxY5+np%4A)pJ`L)j28R1q|LVl&$d44S=_a+<>$teGgceZcnK`Se79VT>!=zF<-K
zaGX^5e)2iv{@Bb+LsH=*l=3sqShAcbw*x>gyhYh4{6Iy}9E4?m4lrmAQOaqS>1T%$
zu|Egc-aF4)V~WpwnelA90Tu#|z)1o|%ID0Zu>6Ujh9m(;DdlJ8Gk-J@e<BFFfMb;n
z0mmtV=6Ec-Zh%2^qEb%N)Xz>LV%H7u-{1K<JB4xVssR>-kH$%bkCD$APs6gShK8iV
z$13G#?8L4bMC_^odf{g&8-<^(2%2-S?5Y6<&3Q^W4gX>Dd?I$$0Na1(r~J%c$auEU
z01E-*aFT$F<#Xo$VA(=LLy~~;O8J?8E7_$)Y@q?VfXkH)0TUEKa|M<q0~j<{DdjYs
z*+PSeB?Bk{i~i%@=Knqs;95qq1^|oFufs{DUoW3?-hgEdpdqRBiAwo7FD=<kM63Zo
zFZ~u}qx4%9L310HB>)&ScPQmF>}Y-`5laB@|9)udTHW2~z3*XnGmcw)uqb>IPAdFf
z`JC}SEVuX?k_vx7DL*5B=kXvBxA>qJ{;;x9_#=v-c@)bnJ{UBQE9Er1_Or=E+~R}(
z_e0aV@F~BpPcokSd9V=hG)@xmjC{`gESCFu4M_r?SIW=K*YyP=?&m=l@RG72;AKV7
zyn^L^9t@h-lyaJLxSuEDejfbycfQWvWE^+$U{UznIH~Y=<a5S%vE0dPNGkk&rTmQR
zb0<&4ojmA;f23>_{;?uxKEZM)4+hO=N;%D*{p@oh?&QJt-}x!Ou3s{qJ9@AX@HI{n
z@Qr-V{4JI{dJRbezE{f6yj_$1K*SwA=mLIHHU#{v2%2B8+|h$U^P5sma}!%$5OGHj
zw)f7b+p<3y$DKS_6#h3(D!fyppRqHRJ9#k9IIU8C#(7KDg@`+O&<mel*(iJlMHD_G
zmOFVcXl7Q*X?n6zJP~*DV0-WUKX1!sV?1~CU?E@*oFrgQ`J8z!EO+!8lHU1wl=3t4
znV*-4J9^Ls%&%++SU?dp3u3vW2ZLrIrJSZ)gDp(N9X<H(?|hvt$~f-i!J_cRaZ=$+
z$mfjRu-wUmamJ;T@-uE+vZaZ*lLx)<WtEM>ms14I@>uTV!Jt`DDW_SXpRGj1ojln7
zJ3r-Teig=ZM-LVPR>esIR+G<}dtkYv*O2tiudbAz`H7PCB;t-9bOF7U4FP==L9+&y
zJ9;o^)>O)A_)E{Vh`6H%+k5BJZCO3zxRVEq!u#Q*!W-pt#uAo0c`(k{UnxK1)g>E1
z#GO3og|Dk@6uzD!Xx7JaCl3bAhDtfj2K{U!BJSkD_TKq_-j;2`c<$)ILcnG?Nx(q)
zoOyFBck~*P-uW$*@-y?9-;#(sde8-It!xO`MiDgIV!5LSgJyfBoQ9`U?m)yHJ^1hM
ze4XvYIPT=ZqVPdDsqkIobH-h<+{uG+#=%PY8M(slPQ;x&=!Ng0Y!p6J5j1;ZxswNj
zW^bjOX77I1OvIf$*#0{|<!62v<GG^;3jxD%l7Ri>bLRcA+|g@Ddgn(d<!9cKJ9;AS
z=s_3IqHG8_P!Ti-VY#CRgXR#WoMvQ$9ZJL<JtzT-b?Ve<*j~F2+HIfVgPYra)=`<E
z+{}YT?ML9G+DFRgq@%Fh%!6^#qm=TK@|3KjiMW{uz4l|3joOb>1kLeSZsx(DIZ-L6
z;n#zch`5;tReSGFQ?32q{)YY(D7eoD3r(YOlBO~8DdsdR_xT!<{yoMj<;CpRXlD>{
zpAWjGvy=@@XDfo{94z<wV9=bWl+(<~eLfNQ`Cx}MHQR+yaO)2in#SQIO&80jnEzn8
z^#`Mv@k)iJONqGk2VK+U%7&&1ilDgy%dI~cG*>C*H1l%nPsFW1*da|Vb}baF3BW?r
zbvQ}W_3|m^1}tj=4N3n_6O{^0HxaQW0A159%7&&}6+v?wmNfwwG<PWFG&F_YNyM4}
zl%_R1)%e--U*ztFh9v`7=$eF+bloeTg6_kzWB{X}2bA)HZsb=1B9;uG>v~w((DjHS
zXdcC~WB`NaaiyGwwa#QBmJFbDt=Fj*U2Xq4#*=WetN`_!)zjGc&FUGwNOjL*Sym{O
z{_&ny%B$<l&jds)E5P{8>Lm@}*q5=Hy#mIuuPWuoPG7Ruh;n-!)NfY*f6D6Y4LHm8
zrp|`4x3Hn?ZM{f!?_lW&QYtBXPbsgCKb?G^h>jpIlzpfH9QzSAvyZ_z_7kQ2*ryup
zQ=;5H1Ep-kPX90eX6d)M&!H{b7hv(({1PX9HouZjfnQ@Y`$j{OxNnv60#E2~-x1~Z
zJs7{m{h$Gr-{O9h56@57%zg%g=NF~oP5PB6x8Feh7T3E|^+JoE!S}b{p(xuQU?J*H
zoK(zT@+szTEKkfTwJN4FL8X`;JTZ%iCuV{Eh3leheBq{31kLo=%w_;X(~L^FrgQ6U
zCZgPC25X%h3r)>73lwFW6)ZH(hLbeSE}vrNz-Bh5hNOR}xs>u^_%n>TiE^6<bWQUr
z8=B@*1kL=|%oYHHW<jN#h9_HhCCY6futS<!Y+)$Mwg^~gS`;T~T1-C0ERM}=2@Oe_
zx+xW!mL$q;DbO`7t!!voMiDg2V%c#I44UPYa+=rbZ3QBB+yiT!@QZ&ZpYT_LhW+)x
zLf0xdNmqCI6tpUqULXxgx_T((1?7!aM?^0W=(>6;8@hTaf~Gf??e)N*SwktOX{@)t
zL~O4I);i(0@k4&`2N-L?$!>c(8`EMvHm1b}y-0Qau<W*{R1#TI%B#D)$(o4RZ4ZoT
zaexMJ>^j)Y)&=9(^_22s*(iH`qTDtBYn|}hR#s;l!dbSBbT*W2j16U*=tZjA6iW|~
zQc2lBrMx=+{9<z=dVs)CwuJ_8?3UQfwgThWt(Ed)Z!Xz3M7eDX);i(0p)5`K+d*5l
z?ZM)+xdTr6Z0;zZ0(Zh@wzGyLaf6id0?!#>yAW~H4aS7On+8-S{K4|!*&UnN5HNW5
zP%7S}p+vdu32MUc+o`$<f6Bj--wT$q?F|;XnsHJ+`^cxBVOSoMr6EbzaHYJS6?jY*
z5s%3N{X1vM#&>RnB4~1KW(R;l)1s8q^lh*MiE=v#)OW6DZL;d@U<k@~2v{gO6elS<
zOg^QQv6&sNAxY5@N_i<0N;Z-xw^5)gI#StCbd(}!j>cwo3>Y-WD&;h^lO9Ku+woxa
zGk*GO(Bd!TPk^FqCxV5hlW>xzljT#)DcH<T)sUoVv{GISf1EmoD7Vu<*L1qFp=qol
zXwJa0$sQOqXDQ`0OElQoL~ODLc1TmRoeKrq>4Al&^Kp`<3*=MGg;-jDG$d&nr&MUV
zn26RN&^2A6Y-k#<2%1Z=?4}0>&E-lt&7uu9fr#Dozz%6@u`8iqBR#OtbTv-Wbd7w9
zxfaVtdK!{6U8huNx}J!Q^g!2iqq3oCq9SN+!m^Pb7&Nyi<uvoKksc8n>4DNz-BPU8
zXV0(9w?o5bdSIdJPMoCcF8LI6H<r!xG$iSoq?8x*M5EnH#AbS+>$+dr(Di^KXdc9}
znI0H44=d#~UD-^Jh|Tms>8ftQl61Ab6nhL#_SVzcxPDE>#`Wt7y-0OWV%b|ysU-4g
zrMx=+%il9Z?5zjJ_3Jqe;MnJ}nY{qUu`ep+$1Ys5mxywE8La-w+@7*Jdj-z2y{faJ
z>@{pCdtEP5-5Xf?eUwVd-crh|V=4AF5&b@3D0^1}IQBhkX77V>><3EuvHWexheWx3
z1Xh1#o~p8RW&Rl2vV8&;pUqEk(r5EC`4sp$HnT4@B#HY{DKBuF{`M77ZeN3OW&TD3
zDp%%j<-_wGHnZ=+;Q2wRc$0o4%Iznx`lXl)tt`cUhN5i0fQ6`EaZ)kA$)}j#u{;q=
zLz1XJmGWXrJQ0hCCt`vAh3nKrHokD36+tr%HnVBLpy{HN(~PgT>4<Wh9;|h8tSrT5
zfTC<Of`z7;aFV8(<x|Wo*vw|tkfdogrMwuHVzU$FHV5dM=2SK`&7}yMxv`ne0|w2!
zN;wTbyU$0I+x%dMG_}|QP?T*!u+Y>MCuv$pKE*7I&1?}3NtzZ_Dl{!dl-uH<Yg$6t
z(9}&4G)rRHQ4b87rIm6TmSW2gv7;VX>xAEGDYhIm?4Jh~x>mqRx>l4=K`UYD^wE%{
zYZaxuAa<_kPDG~<=(<)@Hgxq+1Wg^5?eoB(>8X^{tWs~ih}b?4taZX~<F>w7iuHk$
z-Sl)erp3P4m=@R6i&VE3mfiG}N+Ro(^6DluSpyNf>47mVHfjLJmazQJ1;(-cm8u-e
zU#t=FI~Q2%gx|KZI$IacvaP4Hp=^C@DBD0UQr(7FI((E$$~IQYt79p)2@xGWU?|&6
z12}ddHnYvaI5tztkG-d4TM*^8C0Of(--fa@;co?P*|r9Y&*nBb>9e`5d<xtSo7wgn
zlEm$xloxpN0Nar$x1GS4@ORdL%7i~iK0Lc%Gussmp52s+H)$|YZo7lkFU8`I#VdaW
zKLnPt?Ex0LhT^1p_LNUOdtrGDmWCu<%}RMagLn)U5s$$F{X4g>vhkf8t_Yg_u$k=-
z28}7@G)p(w2%_9_P~W-gmSQ2R&JKW}Y%O4+=s=vL=pgx&axgZtLo_5QI#eky<%W_S
zMwD9_bVY|N8;Xul1kFfnW~0EMIZ`R7nY+P`BFgP(u=*Ll_$;P}${qtn*^UJZO~>IR
zO~=cpm=my>ov0y6(@9EsG5q!0$waxG0=lMCl?_d!6+trw%O-kY(44N6)9|a<SRyvj
z13RRt+0KN5?ef4v)7dyl(>d}f=3Fd|JsOfUov&1Ax`2qr9?&&iq-<y!rwE#hvFw%y
z2F)c(IZdwy8&AY;d0>Y$wb*4)uu&dZXqteNG+iN|Vy?upQJ#h*O;;-wnyw*Yqdd?x
z{a4x0be$q-uE(-b9vCz?D&;hbuu&cn8|8u0RNYdn)o0I_VmCv>W_e(t>sFkk>o)lm
zbUT*K@-!srx>G4H=z&JNi-^tgK-YDTvY~5|B53Z#vRNJ&H1{jzG%PV6AY!vTP`avH
ziX~lbFU1~$lP&dhHm+ZfVB`Ars9vPH$FOXvr&JO-St+lMKdXL%h%NQNxPCpQ0UY}@
zHnV5IIQChk{Mb25_8d`e&x6%pncGuVXD`56wik6al)Z!vWiRVRs(S^?-~TC<l)a{u
zS9e%Hd!2~C{{utWn;O8eZ(%cg8;oP$QOb{fx6$4u%I!U{`YZEPm8C25`_Pu{1F-mP
zeu$Gkn;*%iz>l$+eWD>r+^0%;fqV70&xmsS9E>aT7aCBxGJh!_p0BW(eGLZBH%i5u
z^es_t-+|RH#aw7*DfT@SW%~gvME!`9iup-C#r%xT>=zA5qJCA%i{Z~wek025chJ9Z
ze<&MYxIYy^^A|R=zrmpC)IVshskhEVxlIGsIyqLBV$(uVwk}|yX*!&wX?pn-GXplW
z88swnnn@|w#8PZ#qTFTyUDK?}hNjsRK{GoxvpK+^nNumJ`K{jOBFb%UutS<!Y#u1e
zHZNFcnhz&wnqNM}EP&<VSQ?TvbyX@fEkwk_u|U_fh_az+QAN-!hRtkoFld%g%4uGy
zw{AqaEeX~-;kR0fEd@>4mIe!5%ittk%gU#q<*=D8uOUg-3QBoF6B}(sqTE&jUDwLW
zhOSi<LDL<Z*{WdBtfrLH^sKiYM7h<0wNCiOH_IOjIcV7M-4EDjNcUnXwmO_;>#4Ib
zE%w63wAfoOQe7WxW@{*wMD|t6s~gi~YZB$Q78ujw+8V&I_1MfBz&N&_Qhw~HMr$O>
ztpwIO;kT`<&YIvXTYsGmWdpFGY#qHwb?ahzES6G9+4@R(bu7g;AmXuDU?|&212}eL
zY-XE)aqOl_`LPd|Y%`+V27<Lt_-!al6aMDVmMsH|&*m05>9e_|d<xtOo7vVHlEiJJ
zloz<|0Na)*x9z}~@VD21%7njze0X-mX0{U;JUc5DZ_*&5+;#!0Uy9ZJ<H51J!cw-~
zz(UtxoK(;5@~LMCHnTl6B<UKel-D!3(e@<DZ7<Njb9*Zr-??T*(CmZFY#10c`zqx$
z%QV<<qTKca^_{D3DHgKoY<~#KW?-Rc1Wr<v%cqnBu$i@JNK$m5QeMi3B|C^Hw}U}f
zbcnK{=ukz_9EQ!T3<k~NN;wUG_;v(QZX?0!XZ+Rm=a*?IHVTTe9SIhij>1Wrj+Rd`
z$6$FLmWCuv$0_B-%u%xAiFh6s=$cMcHZ+~22%3|znVkX#&8bQ`&1?-enkct1V23m{
z+i6gg?R2ovG!`dmIzv9ioQcisEDcGT&Q>ZkokNt{xu9!0Pub9Pz9MKYz-D$K7&I3t
z<unU7*f^rxE(SZKsm1;SMcFO^3r*v3lBP@LQ_N-9%r4iEq-lauq3H^u+^z&&(^blb
zrmGb}a}AdL^uVC`uToCKlI1!g_R|BUsk)_DtIwV<#cqIxz4O3A*F>D8>n8aWbTgK{
z^E4#sx>YGJ=-x)VjflPTK-YDLvZ3ovMbO-ZW$QdJXzo$UX=ZA$NknX&2TE6UOR=P@
z?WNd#aI%e_&c^lY0c>2q9@L9e_YjtC^pr{>A5qGy<Ke)M60waQ7}u}IHGpF$V>5dK
zjANfv%8#ABWKR+0_B2@imAO4-b@mLLWqVd<L)mlKQ1-lDq`DWdJOoRrr0gZ7ygIh_
zeVK@dV1c3RRSn?S*RYwr4#u%>DCNig(r9lI<@Od>{grvD%F>njZD`B(4p@9P-^EFv
z&G+O};QQFjKG2XP?n9-#z#aPAM?|@O491oD6Ah?bnLm{e&u7@oJ_m#63#H;s`jRNO
zufXb;VlK3@6#E*AvV8*<qQ1pR#e659V!p>__Jf8bQ9ml>#cW!#pNMk%8T2pQFUrOj
z?pH<7{D#f!cQ9!FP|9gW)!Uy$x%~y!IyqLBVt+$XwoU^onmXeoP1DGym}#+@b<vQd
zX*#967?xtw6XiAo=$d9!HZ;wo2%4F(nau(Q&8$i}&71W$8&PhvgB{Y;Vsk)IwmHG#
z-)SzKq-k#X6f+N&hhJ$((lnn^p=o|19)1P7rUjJ^O<fg1vk*43g~6a%L@B4?*ULqT
za$5|nb;5786k8mcvMm7?y1L;cT}#TRprx>xEv+F**D^|ZK?6&+EKzRDfv#(LWkc5r
zilA8$o7qZW(5$SK)67|Ks}SYZ9jtZ2Z{xPUSc<I*XW3TM*_al4U}IXW(~DHMIySSO
zN+prKl=A8hYO>x$x%B~KT3kZ|IJPe~vo*mub}gm+*h3p_ZKB-j!CEK$ww2Xc1Ds{+
zr?a7~5gW=%dXegyusrrksibUxQeGWPv2}=e>=hWw*3$rvT^~zB8W_iJsFWXjf5|o?
zq9F~eb;55$S(@-SftJ2Cu=s3lhLb*<1LafT=2-gHG$e`JLMbosi~+VK5q)c5O!!-C
zKxM+;Mm{{-Vrg6hgJ*lC;!WCth{iRr`lZ<F|5%Fc1Pg6#V4-UePO4`Y`P8#3mbNwx
zNxB9r<@KD@XuA{9)&}}_ZVzSSJ2zAjG<#xcYXgI3Z>5}OF*c|mqOA?ocdojnSjeig
zVGz*g1{R8j<0M7<$)}Y4vGloVNK!OHDKF(~`rL@<a|2ybi?X5UKt<3Tgr(0744Olf
za+=xcb0eb94Xl2~@7a1OR)&K9H?Yuj1WwX4Qa;6u!qWeyAxYCwN_jEU(*H(8{~PF<
zj#V}^9j6GI<FWL=fkAVkQcg1${cl9{zkwan)NH3fLH`?AXc~=^G>wr@F{feaf76hp
zX{=JA=?o(J-$2)Nma?JgY(>zVgQfos44U(la+>+*e<Py*4eXGn7P}A%`rp7p(>R=@
z>0<d5^B*kzZyJ&`jaMo(T}nj%8|a!YS2i?FPz22tSo+_<pt(vZr{PzztBL4;1Es0D
zrC6)ao-f6&g@ztDu+ViKPSSO~d<wb&OAnleBwZ7g@`Ao<w3~?NfdgIFEy{+jTNOcb
z8<rk8Flg>j%4t@m2abpyI8eH(TZ$!JZ7;>{hLeUkosH|)By3#2?$wJ_cORC9IHi)v
z2bA*a==XY%h=w>Yu3ryp0LMOp&FoPyj(toiKX&etJx-L{WU%@xb9>6_><Ku__N2~+
zvZt`2>}kD7b<bdV_LWje*>g&Hbu7i6C*s*xU?_W013302Y-TTmaqKHf`LQfeUnR=z
zHL&_C^Hi0kEA#8nmhBC&_-ww3lRleo$)~`#v6;Q2AxYf3N_l}>_P6(la(f?)EAs~$
zP`NUHC?B4Wur$Si!Sjhy@g{vrL{l7C{Zh<@R+eI)LqP)^Scv)(Cl&LRe2V!RO9PyS
zBvIcg<;B#O>^mYF;6VSv{h(}o;eJ#E%}-eR-@u^xMJcD*r`~=gqW=x7b#kmM#eRo^
z{x`7D^e0Zz^p|{!`5R0B+d8d^=}b^5hNajvMD)LbuBnT%p=mlr&`gh|{|yXHGb-hp
z9-{w^i2gUQLz-G_7AWX{0}D;F;UrD7%cqz*u=KxaNcwl0ODQh~O>+~`{|36Id6f-K
z^C^O6ek}cOV9+e6l+&C_{~HngZ(ywxeygR}!qCtI2Nt>(#YwsrlTSg5W9fm@kff`d
zQeMykjkY8aJ#e7wT3XrAwTvQYmc`No2L{dZN;ypzdf<rYfdgxu@Y}epFP35}!AV1$
z&c?L33O1(2?s}2xR>jf~r&JQzLn*J$nyik9hBz>$#hx0#vAwXF^#<eEK1%tqdo|h`
zM7i|^Yn|}hR#s<g!dbSpbT*W&jSXe>dXefHusj+|sidq?DX)&DSc!;7V}YTpzXovZ
z0BmOKfN|`)O8K#!nruCy+|~zco$%XGmL~iSpe@^mVDZ`92q%3uH<nL<n_x5BR6~-u
z&6M&2j~ieEiE`T<j0r!}fXalwg?xCn#L^T82G7<?#hbJZ5lwMm^-Hmy|5%D`2MaxL
zV4-UVoK(+_@~LMhEIn`<l5`DH%Ii72(RLxC2M+Y_+-}OocW$sEXm-ca0tW`o9!fdQ
zk_|SLh!!|d-?{3RVj-)}_JV-!H?UCDjFS}YBcD=+Vd;L;kfdn1QeMh@O|~Bq-EW{P
zGG#;22u0B3Si0Z9plMOcY38T<jfn0yu=*LlSL>zN!BEiu1{RtQ#YvhDlTR^aEd6g9
zk~AHmlo#_sqm3k@{|$6aM=Bedj#328(OCN5z@Rx+DW{o+{x>4}-@p!OYPJ)gp#Kdl
zG@XQ#G@UG;Vot%*|E3{H(`cna(-<Q9-$2)Ny0W2ZtRiU6z|#K)2F+PYIn9FfzY)>@
z26jkOi=7Jv{cm8Q>3p1|=>qu_b0L=gHw{Ue#wit=E+(S?4RlSHC>xr_D}v@yEd6g_
z&|I#R)9{}GCJ@p821-+POR-j;Jzt7l2@O4PV4>@3oTTd-`4n_5mL50_NxH66$_skD
z(XJ<=2M%;yH!2&tCMtsFCM-R0V9?y6l+!Fk4;&FaaG-Qmw-igd+Fpv?4krz9Ivdxo
zJF#*7x=Sxo-Q8Fk;*?4vCn@FCy-h<L5e;!*T)*zu0FHeCo7sb49Q%+`ek==?hlz4~
z1g!qb+@7*Jdlb&HJ*Kmv>~U-;o2(b9?g=c<yHYACdrB#<j-}YsL_F^b3}w%10LMOu
z&FpzFj(tHXKlY18dyy!&m%!?;%u`jCuFNk(Teerg;<NcGPWo)VCZ7Ud$7c41h9q%s
zD&+;%_qVr*a(f$$EAu-VP`NU{D<7Wsur$Si!SjJq@g{vpL{l7C{Zh<@R+eHPLqP)^
zScv)*Cl&LVe2V!TO9PySBvD@~<;5&ivag6}fCK#t_l>ggh5J?!G~Z$Ae*=T&2c?{5
z_j>!0i2gUQ*2%H56#E$p`rp7p)2}#5({J)A=65XpZyJ&`{i&1}!?NWsBKqGz*VJiU
zvZ1N7B50<;(*FhqO&6t{<_7xTi0FR<JEW<_W`KhJH?Yt&6Hd}JvwVt~1xx>%h9ph1
zDHWP#C!+ribWL+A8=B@)1kK!7`rp8ynO7;NIj-L3BclHetaZX~wG>+b8hYTsLRVLu
zq-!Dh6tpmw9ykq2x)xQ+3tFnt79*ku4s=~hC>y%EDS~E6EIn{w&@8Q#)67B-91%Tm
zV678=8@KhvQfxUmX^7L=m=;&S#<aMiUZlE}ur$Oel|-(hlvmf#WZj8qhy!C<TulQw
zwg;BY9Kkqtb*22+o!Oq9h|L_qS||LrmDO2qILp>YXG7T<*ihD2FH+r_SRQ<(R8qFK
zQeGWPv3epNd<BNGej32Bjo8deU>w_|lpp(A$@&xJHUO-3!f!)an()_wwruNy#b<Lp
zob=gTUp@tHfX!?}4N2lQQpyWFc7Sb6l-nj?O!%8>KxM+;Og=mVu{6bj!ILQ!Z_*Y-
zG{u3{FU5NOV=1;3EcC#Eg|2OIQa#(sr=IPw^uTFI(zSz9UJt*D?MOrq9O&P<ot2I6
z+#p5J?1H5Q4h)*zlyaJt8*DHUEpVW|bJZ=yLROs(fq?EeuuwD<Cn?%fKBer1rTa}o
zlA>m%yp)-mY#$=J-#}Niud<<NxFTrw!_xf*28}7@G(7fV1QFeDVD&S8@77DP1E8S)
z4J<Srh?6uOB%fjq#?t?$AxYDrN_jC)HrioC^uK|w>2PI3(-DfG8HuI;4Gfwim2#R{
z=zk-k{|)Srre-?^3i{u`Lep_LNz?K2Ddq$${cjqQG@YbWXgZmQ{x{GyovLhT8m$PL
zF<AQFz@Rx@DW~Dl+GC06e*-(Dsm0EOg8nzK&~!FV(sYh|ia8fc|C@#+P3J2Wnl2!s
z{|$6a7bzQ>#wmj4Vl4e{V9;Eml+*lCZ{vyRe*>kdx}{jF&z>*EE`x?1IIz$)0VnCY
zLOumuiKPclLz1qmmGXkdG}<*p^uU3x>%Yo|uIm&*b3K+GI522#RLW`os<(+m^uU4A
zRozl7>1ul^b~Btb#OZ8Yzi!3G_3JjhNOiYkX^2xQiM&%Oua1XD-9<!092nQHdo+M!
zCt)+Y7mQ=?Q_7E>reyaM<@Nwr{gt^rWp(x-oMn4RXG7V;*iiO}UZlE5vFu!_R8sc1
zQeGX~c1$K>=Snb?J*fd4`xG{_r@=V(8KwMKdc~e4%I!I@`YZEPm8C25^U#*<1+e&R
zzKD}Pn=i?yz?ZRXUa28T+^b4?fvfhn*NE7>5{xVJ8yZl#GQTMwp0}_x#eu=|j#BX^
zy-P$>99aEQ%!O8#V(&vi0~}b0`Vc1-^O1at`4~$BoQ5P(pDN|WOjEMYh-iQV{R{Vn
zvhjucQV}#?Vd;MZgXSBhoaTUf`<96QH?Y>pv9c8V9t!&3z(UiHI7!n_@+szLEd6g9
zk~ICQlo!KN>^CC%-$2*&hq9sRPesuDg{A)u44O{N6cw7Q>3<`l{|)SrrWTtP3i{u`
zLeq3ONz?T5DP{&N{cjqQG|i+`XquUb{x{Gy&8lo@noSWjvt#Lh1A}HxrJQC6{cl9{
zzk#(*_^p;=^FTun99Za@4=3rGUp@sbfTagcLz1qpN_j!AG}=N$^uU3xYY}Bb*P@D`
zSqw`L92hi9DCIQ2t!>?i=z#-ko$%YZtuK~hOTkG)oX*CyxC}O?#bxy()h&mmAx^0z
zas{Qlx&xbRMIsvFz?c?S)&P!O1)EuSFpgbSDL?j*Mq7<2w;o`v6Moyu>Z}gVvaPPO
zp{yr1l=ae!RM#8J&Xr0fWoszq)zKW>mx!G!!BDoA25{`!*v#s|IJQA4KlYE3^&`ry
z5v+B>Z$nv{@JrB^tqCkXoBeUpXLEpj3S0-v=9L<f#I2{47dUBvtxv?}m0(Qx8)`sh
z!rw?fJR4(aiUWgZQ>Eff+Kh;%II#MqSnq!<#Wsh99yqYjwFOS9XG{6ivlW&eI1Nd<
zwo%IK8Qo~QA)*Hk^zYpE%EotY2Sw2Ah@}M%44R#lavHi@1`*K$2kJXl-BK)M)!D8P
z(ESD$iU#8(MZ3$Vlp$EU-!vpC8mg3+QrBdA64Ctzx}v?64Mok0pxFmY_Zt{A`zqx$
zbI|=pME4t5{fytI^-^qqDCmC!3r!<%lBQff#T<a8|4l=ZrURAoVjgO=gNW#V16|W0
z%7&&x6+v?tmi{*|XbxA(X}Zw=MnwM`*da~LHVO**-@roCQ8-D{(ef$g7%cs78j>^}
zr&MS<o{0W8&^4W?Y-l=35i}=b>3;)*=2WGehF|(d6Vd+$c1Tl;odyN{Z(yNmEKbsN
zhJ1=S6HEV_h9pgAD;1i~A)@~cbWP_e8=B5n1kD9l`rp8yxkxFe;SWj25z+q!N>g=9
zu~wfwUyA()8hYTsLf3elr0Y`o6m%Ju9ykq2x+W;)1%2FTR}j$y2fD7Slnq^1D}v@4
zEIn{w(EL{^r<sl(I3jxBK<UCv?co+}JRl<JYI`Ym17tMBfx3Q8#K!gOCcQ{?H)Cmt
zQ!0tPRVlBIMaOMKG{k{%{klT~IQC9#W_N*c?A=QFvFyHh4^eKD!0NBe?J29Xd*LkG
zeL5S;?#G6*2lOJ<J&0xJN~Myrhn4c`_*Lu?B6h9>L)l{*z_E{GGn)*?u}>)F$MU4Y
zCy8=<3atLhJXK}s%KS96WqSrJKAX?tq|fGa@+t6nESpzqND}v=QeI%!{`L|Pn^%Hy
zWqw5iDp%%L<-_wDmZmr`c-~Md-lR8)Xo>@?Uy8ZV%2MoYC}@BK3sLXlq+;HaPciRf
zX@Jv^B<e$@yqM=2?IR)@;6VSveWGl9;XYLa&1YEp-@u^xLMf*iRBvAr(f<b4IyqLB
zVqZf+{~K6n`W7c?`c6K@e2=C7O+%8VAC>ZASc?5bME@J;nto9>H2tawn%}VWzkxyX
zhf+>MU)7&P^uK`}($r#qLqY%B`V~!`agwHK<WtPFSo+^IBx#yXsn9e%5&dtVYnoBn
z&@_`GXlBOJ{{{xltV%h}ru4rN(f<b4I^nlkip>EHJ#b+0Z#5TA(lxhy3YrH?51fW1
zUGpjB1ua{$`HAR(16|jG%7(74ilA8tOAj0vG>a(ZG}F=pM??=CSnGt}#%+DE6k8ll
z8sc;|rp0d9m=>4Ri&VE1mWDW`lE`J0^6G{(*|J15#DOs_F0TO`y8<?|6~Q=mC8hk>
z?Hg@nqTE&iYn|}hR#s=-;Vj##IvdJX!-ldRdXehtu<Tr^R8rPcDX)(I_STDtoh!jm
z)<*+4b`2~~T?6CTHI?#Xzbn~VL_Bp3taZX~Ls^>e>!IZ#ZD8@)?1z&+n~m}*u!Lpv
zN)1Wk`YYuH@&}3oh}gUmj0t~T4X8}`>&b^_eJo9JVDN0HRJ=(W5z!O}R=*VM^N*$2
zCa};02Nt?E!%6iFlutdIW9fm@kfdu1rM#YVcy=2RJ#e6Z=eAZhzH{3sf@WJREpTAa
zY_F8l(6O@v5iM|_zH`+r#X=Sjb%TKJH?UAN2q!7pMLwnMilzHaLz1GwN_i>E^LS<=
zy5B%ow1={xXs9A+_QcZt1_sUEN;wVNb~F>w{RUP)<FC<rDK-oW`rp7p({P-mX+QZC
zvp<&pHw{UeMkwXQ+|+0}5&dtVYidz8G##i2nuD<Pzkxw>h*C~7GyQKw^uK`}($s8a
zDCmC!3r$DhBuyjbQ_LtV{cjqQG##Z>XgZpR{x{Gy9jk0;I!+NZ$7AV#1B2#7rJROe
z#ZDrk{|)SrrWQK|3i{u`LeprRq-l(Nia8BS|C@#+O=Fb`O=l3%{|36Ivy=@@XDfo{
z94!5BV9=bWl+*AZHqIxa{|%I;>Xu@yK6}0tyAT?B;J`xHIGm*GV)+#GA1pm^8j^I4
zSIP^Tg~vY=(E|s%uFI7TT@w^Ra|M<jI523gQp#!gW74aM=z#;Ji#wqzmSRa)+e@))
z;iMr>XXE;H9d`L7@_M~UbvIyXh*K(woT!vn_d5-7L^Q;KyQO1q(EyIU6`R>@U>tk9
zQhw~rCA)(tw>!b=ugvW!tFyb{EZf~W8_Mp%hO$X|k?QWnvU8<UN!k5Md37ws9w1`p
zN-&f?qyZfJFgCMCz&Q3%rTp05C3}o0x5vTiugp_bmafc`p)K1JVDZ^}5+}X&Psyji
zr?G5asUb<+vr2h^tM|9(h}gUmj4SgC8c?}1zbGG`m#{R&fx+{NQt>9eN<>o}Sp8DW
zg;thguR}ot99W2Y6DJk(mVAnN8%qP6h9pt%D&@sYYP9!=Xn+I#3-^Jt@rC<P5i}oR
z>3;)*<`boyX199#l!*Q}u-3`3vK0Fq3i{u`LerNxNz+&IDduY|{cjqQG<~a-7sFEQ
zJ0kkuK-ctxvZ3imMbP|&rT+~KnqQQ1nycu4BclHe?2x7w`yC4U-@roCpEya=U-BvD
zZ!G<98?-8>Gr|9Anudt}H_$b8Q8qM9rwE$qvGl)zp=m~?T+_DnzY)>@2G%;^w_1wL
z0u4QIV4-U^oTO`Z`4lt<mL50_N&i-JDdh#d-p}SHq6ZFiUGpj%y5>^^&HPw;;J~0+
zP${SRer@YYL=PNT>xAFNZGEv6TNq9n;&e8q#YM3(EiR@PscvyB4RJ~(k=>N?>hdOA
zl8A;lFs8+&HGpH6!DhBB7{@NBlpniCqb*OA+X`T<6Moyu>TE?g%eIovhO(8hp==es
zNOj$@>|Ci-Qns2>UL8xZ9z^V135K%OHGpG#Vl(Rn#<9JX@?&2rSs$X@)&Ogr@Y_(9
zCj7q8mTgV2_-w9)lRlek%csD4ESpzqND|jiDKGGk0oF*w=9OSf_)Qv6neh9|hi3qm
zrZ_No)>SIrr1gksiUX@(immaFrPv0r&;th+x;DZ|^=vGkdN#q*1E(QL*Jet2Jr_3G
zKq7kJK>yBV%EotY3q{auiKPV&44SQ#a+;nEwha+2aG<_()h)$BR-J7J0o`w4p=bx3
zq-aO^l(G|+?l%odiUuj=r3`MeU5NOv>YyvyP1#U1SP?Y4W9fbagJut<oQB)sp+t1Q
zfz{9WeOoWZ_JV@`H?YvujFU9&BcEc1Vd;O<kfdq2QeMnBp8iHe{~PF<Oxe&hLJ>4M
zmi{*|Xj+tV8h({Hkcj>_utS=f?O-VAe*+6mhvFnnhsmdyGM4@~4N01gP%1QyB%=Qf
zbWKMp8=8(%1kKS{`rp8yIaVpB;g8;qBclHe?2x7wI{^y%-@roCNjOQ<$?_@Y6fFI3
z8j>`PRw^`&A)@~cbWNu#8=A%{g60e?{cm8<oTZf0@Qdu(MD)Lb(p242tkq}FmtyBa
zLk}ES=sF)K>AFBZ1zm`x2Tnthu5n6vLCg{t6VU?)x~@x<4PE0EL31gV9yl;)E?3HF
z_|F>?i0FX>rK`H7Skl$@QtV1NX^7L=xPD!YjqBGndXeg`#nKR`R1$feQeNGMG{h0n
z5C_Kf>qZUW*ooN8ZUW=jo0aloe{Qr}h;q9Xtp3W}p0YZ-4bHONuCt-+4s0m9Q!i58
zU08OmR4OUEM=7t4)`3Yx>|6<kvimfEWADdi_5c{iKB$x*+ofa=5#{zUSpAiGs>;%p
z`4MQ#_9$3<HXp-DpUubRQ{ZGQn^$T`68EH1ULe1UJw?Rkm0(<%pV5HImHAou@H~g5
zDGm&t7nF)O=|v)%;=t;cVlK3@6nhy88sNY})T=nDnAhY}%<EVh;4~zOdQ&Mc=8{Hx
zi--m|(7$l+C>vk6cNIbN9+v($Flatd%4z!5+lNH-zk#(*j+Ld@$57D!1{Rt=#Yvhz
zlTR_9W9fg>kfiBLrMwuHVqX!_{|36IZ<Gy9-ztLUJ1qTgV9@-al+#S4|BZ<LH?TvR
zTI^>i=zjwXO~2wKO~1*fnBTGVziCL)^rupx=`SMs-$2*YiD#hyucpq5pqU0s{~H)I
zU6gW~?)1MA(f<b4I^nlkip>BGJ#b*5YbKneYi9WrGz*p<I1Nd<W>d-w8r9EcC!z-q
zbX{{Q8~;{wDS~EhEIn{w(9Elp)4a5{%|}EJ99Zjw-^Oiyu@qYXP8#BLHm1d{*q9a<
z(u-8LFqVcmrIN@+mGbHiYqG_NXov%2T3kW{IJO%$vn9beb}6O&*ycuCnkctrz*;B#
zww2Y{vT&AdIh_q<%VR^?3VM<1R>ZP%rBX@R%1U{4L;Kk(MC@D%hO$*PfMZv~X4V6Y
zW9yXiW7&axb)wvQg0)WgZ753<elKXt)*CE7n|*N7XLAkt6xbKb=9L<f#I2>27dXp0
zwl)!)SAsF&H)ud*!tW;^o<=N9abWN?DHU&0e<GUV!0MM`egCl(TL%_;;J`xHdN`?`
z_2pB~23UIFG$iTTNGY%9lt$Z_h#ol5zjK=^8{fIj6hSi(OA8zrG?`LPLub_%M6|$x
z`p#9i6bo5(wiN_)zk!9KZE%vJZRJzSc38UKG$bk7K`Ad~vnJb-i0(Jg7458SC>o>)
znq9DTzkxxsn^I20wjG0s=zasGXz{847ySKf_}+uK>$M?_rO^#6iXV!Tir-T{XWa`+
zqnn1L;+vK7vp!R@eTZmu1HJftm5t(uD}rV}ERAkp(3nzA!@c|nA{yO572kD=;%&%?
zeTU>Dy06xK@bG<Gde{MsqL&RU%03V$m3@$W&UY}DUN#L$Wgn`PpKlSK<wis=8|Y;p
zu56Tjgd%80V(DcAgXT!3oaQ-t*@)<6166jnsg#}XHEhVBA;WeXq<7wqVKi-PU{U^Y
zIH~;O<#XN>u(YjdNGks%rTo17dD+QCw5@?&{;A4F`J)v<GX_iB8W=REE9EpZ^|P@=
zw5@^v_O932nT(<{4J^t&8z+^0j(pB{E|$(T4M}C6uauv!u4ESw(U}H%*%v7rWsg$?
z&Ba(c)4-s)L@B4~)6d2e(U}HLz3d*956fkYqVWtY%ASCe%DzHA=erV1<C%t}vaeRE
z@-~(18X_9cKrj2h%0}7ODT3yDERAPi(A=n$)BMcC*NA951E>CB>Ct`456jJrrlkxl
z%D)vSm4BOj&U-tSmNE@V<=?54pO+8IT|~5$fnNSS%0~H<6hU(@mS3vCpt)Zur&*d`
zs)+cd3jDWsz0MwD6i-b9i?ScVNo7AOpYuJ2<*8{JlFFW}l%H>_l08AhQ`0~%`zd9k
z?57n$^9+{F7{Q=<PAR8Zl+754*o+ZW*)#GL)htrXe=qW4*<J#RDqqG)RlXu0`d6`;
zy`~|l%GZ^0`n5~;22pNrf?nlY%0`uMD}v@7Y-aC*LGzwcPP0pcy-$?e2Ow1*aKNs)
zeJH>Cz>PXjGw-zXQnmiCTc^gYH*WrPnl7F6|1<fE1&-`2v#Z&l)w=Jy-@yYnnr?c%
P(3$`B!7e>*7w~@oxg#|@

diff --git a/examples/stable-diffusion/quantization/measure_config.json b/examples/stable-diffusion/quantization/measure_config.json
deleted file mode 100755
index 04576eeb46..0000000000
--- a/examples/stable-diffusion/quantization/measure_config.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "method": "HOOKS",
-    "mode": "MEASURE",
-    "observer": "maxabs",
-    "dump_stats_path": "./quantization/measure/fp8"
-}
diff --git a/examples/stable-diffusion/quantization/quant_config.json b/examples/stable-diffusion/quantization/quant_config.json
deleted file mode 100755
index b372905d7f..0000000000
--- a/examples/stable-diffusion/quantization/quant_config.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "method": "HOOKS",
-    "mode": "QUANTIZE",
-    "observer": "maxabs",
-    "scale_method": "maxabs_hw",
-    "dump_stats_path": "./quantization/measure/fp8"
-}
\ No newline at end of file
diff --git a/examples/stable-diffusion/text_to_image_generation.py b/examples/stable-diffusion/text_to_image_generation.py
index c16c5b54e0..8ebe0f56e6 100755
--- a/examples/stable-diffusion/text_to_image_generation.py
+++ b/examples/stable-diffusion/text_to_image_generation.py
@@ -305,6 +305,12 @@ def main():
         default=None,
         help="The file with prompts (for large number of images generation).",
     )
+    parser.add_argument(
+        "--lora_scale",
+        type=float,
+        default=None,
+        help="A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.",
+    )
     args = parser.parse_args()
 
     if args.optimize and not args.use_habana:
@@ -380,6 +386,9 @@ def main():
     if args.throughput_warmup_steps is not None:
         kwargs_call["throughput_warmup_steps"] = args.throughput_warmup_steps
 
+    if args.lora_scale is not None:
+        kwargs_call["lora_scale"] = args.lora_scale
+
     negative_prompts = args.negative_prompts
     if args.distributed:
         distributed_state = PartialState()
@@ -441,6 +450,7 @@ def main():
     kwargs_call["quant_mode"] = args.quant_mode
 
     # Instantiate a Stable Diffusion pipeline class
+    quant_config_path = os.getenv("QUANT_CONFIG")
     if sdxl:
         # SDXL pipelines
         if controlnet:
@@ -471,7 +481,6 @@ def main():
             pipeline.unet.set_default_attn_processor(pipeline.unet)
             pipeline.to(torch.device("hpu"))
 
-            quant_config_path = os.getenv("QUANT_CONFIG")
             if quant_config_path:
                 import habana_frameworks.torch.core as htcore
                 from neural_compressor.torch.quantization import FP8Config, convert, prepare
@@ -499,9 +508,6 @@ def main():
                 **kwargs,
             )
 
-            if args.lora_id:
-                pipeline.load_lora_weights(args.lora_id)
-
     elif sd3:
         # SD3 pipelines
         if controlnet:
@@ -520,6 +526,7 @@ def main():
                 args.model_name_or_path,
                 **kwargs,
             )
+
     elif flux:
         # Flux pipelines
         if controlnet:
@@ -550,8 +557,6 @@ def main():
                 controlnet=controlnet,
                 **kwargs,
             )
-            if args.lora_id:
-                pipeline.load_lora_weights(args.lora_id)
 
         elif inpainting:
             # SD Inpainting pipeline
@@ -595,6 +600,10 @@ def main():
                     **kwargs,
                 )
 
+    # Load LoRA weights if provided
+    if args.lora_id:
+        pipeline.load_lora_weights(args.lora_id)
+
     # Setup logging
     logging.basicConfig(
         format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
diff --git a/examples/stable-diffusion/training/README.md b/examples/stable-diffusion/training/README.md
index afa4a0a61f..4ea85c9e36 100644
--- a/examples/stable-diffusion/training/README.md
+++ b/examples/stable-diffusion/training/README.md
@@ -18,61 +18,71 @@ limitations under the License.
 
 This directory contains scripts that showcase how to perform training/fine-tuning of Stable Diffusion models on Habana Gaudi.
 
-
 ## Textual Inversion
 
 [Textual Inversion](https://arxiv.org/abs/2208.01618) is a method to personalize text2image models like Stable Diffusion on your own images using just 3-5 examples.
-The `textual_inversion.py` script shows how to implement the training procedure on Habana Gaudi.
-
 
-### Cat Toy Example
+The `textual_inversion.py` script shows how to implement the training procedure on Habana Gaudi.
 
 In the examples below, we will use a set of cat images from the following dataset:
 [https://huggingface.co/datasets/diffusers/cat_toy_example](https://huggingface.co/datasets/diffusers/cat_toy_example)
 
-Let's first download this dataset locally:
-
-```python
-from huggingface_hub import snapshot_download
-from pathlib import Path
-import shutil
-
-local_dir = './cat'
-snapshot_download(
-    'diffusers/cat_toy_example',
-    local_dir=local_dir,
-    repo_type='dataset',
-    ignore_patterns='.gitattributes',
-)
-cache_dir = Path(local_dir, '.cache')
-if cache_dir.is_dir():
-    shutil.rmtree(cache_dir)
+To download this and other example training datasets locally, run:
+```bash
+python download_train_datasets.py
 ```
 
-This will be our training data.
 Now we can launch the training using:
 
 ```bash
 python textual_inversion.py \
-  --pretrained_model_name_or_path CompVis/stable-diffusion-v1-4 \
-  --train_data_dir ./cat \
-  --learnable_property object \
-  --placeholder_token "<cat-toy>" \
-  --initializer_token toy \
-  --resolution 512 \
-  --train_batch_size 4 \
-  --max_train_steps 3000 \
-  --learning_rate 5.0e-04 \
-  --scale_lr \
-  --lr_scheduler constant \
-  --lr_warmup_steps 0 \
-  --output_dir /tmp/textual_inversion_cat \
-  --save_as_full_pipeline \
-  --gaudi_config_name Habana/stable-diffusion \
-  --throughput_warmup_steps 3
+    --pretrained_model_name_or_path CompVis/stable-diffusion-v1-4 \
+    --train_data_dir ./cat \
+    --learnable_property object \
+    --placeholder_token "<cat-toy>" \
+    --initializer_token toy \
+    --resolution 512 \
+    --train_batch_size 4 \
+    --max_train_steps 3000 \
+    --learning_rate 5.0e-04 \
+    --scale_lr \
+    --lr_scheduler constant \
+    --lr_warmup_steps 0 \
+    --output_dir /tmp/textual_inversion_cat \
+    --save_as_full_pipeline \
+    --gaudi_config_name Habana/stable-diffusion \
+    --throughput_warmup_steps 3
 ```
 
-The following example shows how to run inference using the fine-tuned model:
+> [!NOTE]
+> Change `--resolution` to 768 if you are using the [stable-diffusion-2](https://huggingface.co/stabilityai/stable-diffusion-2) 768x768 model.
+
+> [!NOTE]
+> As described in [the official paper](https://arxiv.org/abs/2208.01618), only one embedding vector is used for the placeholder token, *e.g.* `"<cat-toy>"`.
+> However, one can also add multiple embedding vectors for the placeholder token to increase the number of fine-tuneable parameters.
+> This can help the model to learn more complex details. To use multiple embedding vectors, you can define `--num_vectors` to a number larger than one,
+> *e.g.*: `--num_vectors 5`. The saved textual inversion vectors will then be larger in size compared to the default case.
+
+Once you have trained a model as described above, inference can be done using `GaudiStableDiffusionPipeline`.
+Please make sure to include the `placeholder_token` in your prompt so that textual inversion guided inference can take effect.
+
+You can use `text_to_image_generation.py` sample to run inference with the fine-tuned model:
+
+```bash
+python ../text_to_image_generation.py \
+    --model_name_or_path /tmp/textual_inversion_cat \
+    --prompts "A <cat-toy> backpack" \
+    --num_images_per_prompt 5 \
+    --batch_size 1 \
+    --image_save_dir /tmp/textual_inversion_cat_images \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --sdp_on_bf16 \
+    --bf16
+```
+
+Alternatively, you can run inference with the fine-tuned model using a simple Python script like this:
 
 ```python
 from optimum.habana.diffusers import GaudiStableDiffusionPipeline
@@ -85,6 +95,7 @@ pipe = GaudiStableDiffusionPipeline.from_pretrained(
     use_habana=True,
     use_hpu_graphs=True,
     gaudi_config="Habana/stable-diffusion",
+    sdp_on_bf16=True,
 )
 
 prompt = "A <cat-toy> backpack"
@@ -92,14 +103,6 @@ image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5).images[0]
 image.save(f"cat-backpack.png")
 ```
 
-> Change `--resolution` to 768 if you are using the [stable-diffusion-2](https://huggingface.co/stabilityai/stable-diffusion-2) 768x768 model.
-
-> As described in [the official paper](https://arxiv.org/abs/2208.01618), only one embedding vector is used for the placeholder token, *e.g.* `"<cat-toy>"`.
-> However, one can also add multiple embedding vectors for the placeholder token to increase the number of fine-tuneable parameters.
-> This can help the model to learn more complex details. To use multiple embedding vectors, you can define `--num_vectors` to a number larger than one,
-> *e.g.*: `--num_vectors 5`. The saved textual inversion vectors will then be larger in size compared to the default case.
-
-
 ## Textual Inversion XL
 
 The `textual_inversion_sdxl.py` script shows how to implement textual inversion fine-tuning on Gaudi for XL diffusion models
@@ -109,32 +112,52 @@ Assuming the afforemenioned cat toy dataset has been obtained, we can launch tex
 
 ```bash
 python textual_inversion_sdxl.py \
-  --pretrained_model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
-  --train_data_dir ./cat \
-  --learnable_property object \
-  --placeholder_token "<cat-toy>" \
-  --initializer_token toy \
-  --resolution 768 \
-  --train_batch_size 1 \
-  --gradient_accumulation_steps 4 \
-  --max_train_steps 500 \
-  --learning_rate 5.0e-04 \
-  --scale_lr \
-  --lr_scheduler constant \
-  --lr_warmup_steps 0 \
-  --output_dir /tmp/textual_inversion_cat_sdxl \
-  --save_as_full_pipeline \
-  --gaudi_config_name Habana/stable-diffusion \
-  --throughput_warmup_steps 3
+    --pretrained_model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
+    --train_data_dir ./cat \
+    --learnable_property object \
+    --placeholder_token "<cat-toy>" \
+    --initializer_token toy \
+    --resolution 768 \
+    --train_batch_size 1 \
+    --gradient_accumulation_steps 4 \
+    --max_train_steps 500 \
+    --learning_rate 5.0e-04 \
+    --scale_lr \
+    --lr_scheduler constant \
+    --lr_warmup_steps 0 \
+    --output_dir /tmp/textual_inversion_cat_sdxl \
+    --save_as_full_pipeline \
+    --gaudi_config_name Habana/stable-diffusion \
+    --throughput_warmup_steps 3
 ```
 
-> As described in [the official paper](https://arxiv.org/abs/2208.01618), only one embedding vector is used for the placeholder token, *e.g.* `"<cat-toy>"`.
-> However, one can also add multiple embedding vectors for the placeholder token to increase the number of fine-tuneable parameters.
-> This can help the model to learn more complex details. To use multiple embedding vectors, you can define `--num_vectors` to a number larger than one,
-> *e.g.*: `--num_vectors 5`. The saved textual inversion vectors will then be larger in size compared to the default case.
+> [!NOTE]
+> As described in [the official paper](https://arxiv.org/abs/2208.01618), only one embedding vector is used for the placeholder token,
+> e.g. `"<cat-toy>"`. However, one can also add multiple embedding vectors for the placeholder token to increase the number of fine-tuneable
+> parameters. This can help the model to learn more complex details. To use multiple embedding vectors, you can define `--num_vectors` to
+> a number larger than one, e.g.: `--num_vectors 5`. The saved textual inversion vectors will then be larger in size compared to the default case.
 
 The script also supports training of both text encoders of SDXL, so inference can be executed by inserting a placeholder token into one or both prompts.
-The following example shows how to run inference using the fine tuned-model with both text encoders, separately and in combination:
+
+For example, after training you can use `text_to_image_generation.py` sample to run inference with the fine-tuned model as follows:
+
+```bash
+python ../text_to_image_generation.py \
+    --model_name_or_path /tmp/textual_inversion_cat_sdxl \
+    --prompts "A <cat-toy> backpack" \
+    --num_images_per_prompt 5 \
+    --batch_size 1 \
+    --image_save_dir /tmp/textual_inversion_cat_sdxl_images \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --sdp_on_bf16 \
+    --bf16
+```
+
+Alternatively, you can run inference with the fine-tuned model using a simple standalone Python script.
+The following script can be used to run inference using the fine-tuned model with both text encoders,
+separately and in combination:
 
 ```python
 from optimum.habana.diffusers import GaudiStableDiffusionXLPipeline
@@ -147,6 +170,7 @@ pipe = GaudiStableDiffusionXLPipeline.from_pretrained(
     use_habana=True,
     use_hpu_graphs=True,
     gaudi_config="Habana/stable-diffusion",
+    sdp_on_bf16=True,
 )
 
 prompt = "A <cat-toy> backpack"
@@ -161,73 +185,77 @@ image = pipe(prompt=prompt, prompt_2=prompt_2, num_inference_steps=50, guidance_
 image.save(f"cat-backpack_p1and2.png")
 ```
 
-> [!NOTE]
-> Change `--resolution` to 768 if you are using [stable-diffusion-2](https://huggingface.co/stabilityai/stable-diffusion-2) 768x768 model.
-
-> [!NOTE]
-> As described in [the official paper](https://arxiv.org/abs/2208.01618), only one embedding vector is used for the placeholder token,
-> e.g. `"<cat-toy>"`. However, one can also add multiple embedding vectors for the placeholder token to increase the number of fine-tuneable
-> parameters. This can help the model to learn more complex details. To use multiple embedding vectors, you can define `--num_vectors` to
-> a number larger than one, e.g.: `--num_vectors 5`. The saved textual inversion vectors will then be larger in size compared to the default case.
-
-
 ## ControlNet Training
 
 ControlNet was introduced in [Adding Conditional Control to Text-to-Image Diffusion Models ](https://huggingface.co/papers/2302.05543)
 by Lvmin Zhang and Maneesh Agrawala. It is a type of model for controlling StableDiffusion by conditioning the model with an additional input image.
 This example is adapted from [controlnet example in the diffusers repository](https://github.com/huggingface/diffusers/tree/main/examples/controlnet#training).
 
-First, download the conditioning images as shown below:
-
+To download the example conditioning images locally, run:
 ```bash
-wget https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_1.png
-wget https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_2.png
+python download_train_datasets.py
 ```
 
 Then proceed to training with command:
 
 ```bash
 python train_controlnet.py \
- --pretrained_model_name_or_path=CompVis/stable-diffusion-v1-4\
- --output_dir=/tmp/stable_diffusion1_5 \
- --dataset_name=fusing/fill50k \
- --resolution=512 \
- --learning_rate=1e-5 \
- --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
- --validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
- --train_batch_size=4 \
- --throughput_warmup_steps=3 \
- --use_hpu_graphs \
- --sdp_on_bf16 \
- --bf16 \
- --trust_remote_code
+   --pretrained_model_name_or_path=CompVis/stable-diffusion-v1-4\
+   --output_dir=/tmp/stable_diffusion1_4 \
+   --dataset_name=fusing/fill50k \
+   --resolution=512 \
+   --learning_rate=1e-5 \
+   --validation_image "./cnet/conditioning_image_1.png" "./cnet/conditioning_image_2.png" \
+   --validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
+   --train_batch_size=4 \
+   --throughput_warmup_steps=3 \
+   --use_hpu_graphs \
+   --sdp_on_bf16 \
+   --bf16 \
+   --trust_remote_code
 ```
 
-### Multi-card Run
+### Multi-Card Training
 
 You can run these fine-tuning scripts in a distributed fashion as follows:
 ```bash
 python ../../gaudi_spawn.py --use_mpi --world_size 8 train_controlnet.py \
-  --pretrained_model_name_or_path CompVis/stable-diffusion-v1-4 \
-  --output_dir=/tmp/stable_diffusion1_5 \
-  --dataset_name=fusing/fill50k \
-  --resolution=512 \
-  --learning_rate=1e-5 \
-  --validation_image "./conditioning_image_1.png" "./conditioning_image_2.png" \
-  --validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
-  --train_batch_size=4 \
-  --throughput_warmup_steps 3 \
-  --use_hpu_graphs \
-  --sdp_on_bf16 \
-  --bf16 \
-  --trust_remote_code
+    --pretrained_model_name_or_path CompVis/stable-diffusion-v1-4 \
+    --output_dir=/tmp/stable_diffusion1_4 \
+    --dataset_name=fusing/fill50k \
+    --resolution=512 \
+    --learning_rate=1e-5 \
+    --validation_image "./cnet/conditioning_image_1.png" "./cnet/conditioning_image_2.png" \
+    --validation_prompt "red circle with blue background" "cyan circle with brown floral background" \
+    --train_batch_size=4 \
+    --throughput_warmup_steps 3 \
+    --use_hpu_graphs \
+    --sdp_on_bf16 \
+    --bf16 \
+    --trust_remote_code
 ```
 
-
 ### Inference
 
-Once you have trained a model as described right above, inference can be done simply using the `GaudiStableDiffusionPipeline`.
-Make sure to include the `placeholder_token` in your prompt.
+After training completes, you can use `text_to_image_generation.py` sample to run inference with the fine-tuned ControlNet model:
+
+```bash
+python ../text_to_image_generation.py \
+    --model_name_or_path CompVis/stable-diffusion-v1-4 \
+    --controlnet_model_name_or_path /tmp/stable_diffusion1_4 \
+    --prompts "pale golden rod circle with old lace background" \
+    --control_image "./cnet/conditioning_image_1.png" \
+    --num_images_per_prompt 5 \
+    --batch_size 1 \
+    --image_save_dir /tmp/controlnet_images \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --sdp_on_bf16 \
+    --bf16
+```
+
+Alternatively, you can run inference using a simple standalone Python script, as shown below:
 
 ```python
 from diffusers import ControlNetModel, UniPCMultistepScheduler
@@ -236,7 +264,7 @@ import torch
 from optimum.habana.diffusers import GaudiStableDiffusionControlNetPipeline
 
 base_model_path = "CompVis/stable-diffusion-v1-4"
-controlnet_path = "/tmp/stable_diffusion1_5"
+controlnet_path = "/tmp/stable_diffusion1_4"
 
 controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.bfloat16)
 pipe = GaudiStableDiffusionControlNetPipeline.from_pretrained(
@@ -246,12 +274,13 @@ pipe = GaudiStableDiffusionControlNetPipeline.from_pretrained(
     use_habana=True,
     use_hpu_graphs=True,
     gaudi_config="Habana/stable-diffusion",
+    sdp_on_bf16=True,
 )
 
 # speed up diffusion process with faster scheduler and memory optimization
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
 
-control_image = load_image("./conditioning_image_1.png")
+control_image = load_image("./cnet/conditioning_image_1.png")
 prompt = "pale golden rod circle with old lace background"
 
 # generate image
@@ -262,7 +291,6 @@ image = pipe(
 image.save("./output.png")
 ```
 
-
 ## Fine-Tuning for Stable Diffusion XL
 
 The `train_text_to_image_sdxl.py` script shows how to implement the fine-tuning of Stable Diffusion XL models on Gaudi.
@@ -274,103 +302,102 @@ Install the requirements:
 pip install -r requirements.txt
 ```
 
-### Single-card Training
+### Single Card Training
 
 To train Stable Diffusion XL on a single Gaudi card, use:
 ```bash
 python train_text_to_image_sdxl.py \
-  --pretrained_model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
-  --pretrained_vae_model_name_or_path madebyollin/sdxl-vae-fp16-fix \
-  --dataset_name lambdalabs/naruto-blip-captions \
-  --resolution 512 \
-  --crop_resolution 512 \
-  --center_crop \
-  --random_flip \
-  --proportion_empty_prompts=0.2 \
-  --train_batch_size 16 \
-  --max_train_steps 2500 \
-  --learning_rate 1e-05 \
-  --max_grad_norm 1 \
-  --lr_scheduler constant \
-  --lr_warmup_steps 0 \
-  --output_dir sdxl_model_output \
-  --gaudi_config_name Habana/stable-diffusion \
-  --throughput_warmup_steps 3 \
-  --dataloader_num_workers 8 \
-  --sdp_on_bf16 \
-  --bf16 \
-  --use_hpu_graphs_for_training \
-  --use_hpu_graphs_for_inference \
-  --validation_prompt="a cute naruto creature" \
-  --validation_epochs 48 \
-  --checkpointing_steps 2500 \
-  --logging_step 10 \
-  --adjust_throughput
+    --pretrained_model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
+    --pretrained_vae_model_name_or_path madebyollin/sdxl-vae-fp16-fix \
+    --dataset_name lambdalabs/naruto-blip-captions \
+    --resolution 512 \
+    --crop_resolution 512 \
+    --center_crop \
+    --random_flip \
+    --proportion_empty_prompts=0.2 \
+    --train_batch_size 16 \
+    --max_train_steps 2500 \
+    --learning_rate 1e-05 \
+    --max_grad_norm 1 \
+    --lr_scheduler constant \
+    --lr_warmup_steps 0 \
+    --output_dir sdxl_model_output \
+    --gaudi_config_name Habana/stable-diffusion \
+    --throughput_warmup_steps 3 \
+    --dataloader_num_workers 8 \
+    --sdp_on_bf16 \
+    --bf16 \
+    --use_hpu_graphs_for_training \
+    --use_hpu_graphs_for_inference \
+    --validation_prompt="a cute naruto creature" \
+    --validation_epochs 48 \
+    --checkpointing_steps 2500 \
+    --logging_step 10 \
+    --adjust_throughput
 ```
 
-
 ### Multi-Card Training
 
 To train Stable Diffusion XL on a multi-card Gaudi system, use:
 ```bash
 PT_HPU_RECIPE_CACHE_CONFIG=/tmp/stdxl_recipe_cache,True,1024  \
 python ../../gaudi_spawn.py --world_size 8 --use_mpi train_text_to_image_sdxl.py \
-  --pretrained_model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
-  --pretrained_vae_model_name_or_path madebyollin/sdxl-vae-fp16-fix \
-  --dataset_name lambdalabs/naruto-blip-captions \
-  --resolution 512 \
-  --crop_resolution 512 \
-  --center_crop \
-  --random_flip \
-  --proportion_empty_prompts=0.2 \
-  --train_batch_size 16 \
-  --max_train_steps 336 \
-  --learning_rate 1e-05 \
-  --max_grad_norm 1 \
-  --lr_scheduler constant \
-  --lr_warmup_steps 0 \
-  --output_dir sdxl_model_output \
-  --gaudi_config_name Habana/stable-diffusion \
-  --throughput_warmup_steps 3 \
-  --dataloader_num_workers 8 \
-  --sdp_on_bf16 \
-  --bf16 \
-  --use_hpu_graphs_for_training \
-  --use_hpu_graphs_for_inference \
-  --validation_prompt="a cute naruto creature" \
-  --validation_epochs 48 \
-  --checkpointing_steps 336 \
-  --mediapipe dataset_sdxl_mediapipe \
-  --adjust_throughput
+    --pretrained_model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
+    --pretrained_vae_model_name_or_path madebyollin/sdxl-vae-fp16-fix \
+    --dataset_name lambdalabs/naruto-blip-captions \
+    --resolution 512 \
+    --crop_resolution 512 \
+    --center_crop \
+    --random_flip \
+    --proportion_empty_prompts=0.2 \
+    --train_batch_size 16 \
+    --max_train_steps 336 \
+    --learning_rate 1e-05 \
+    --max_grad_norm 1 \
+    --lr_scheduler constant \
+    --lr_warmup_steps 0 \
+    --output_dir sdxl_model_output \
+    --gaudi_config_name Habana/stable-diffusion \
+    --throughput_warmup_steps 3 \
+    --dataloader_num_workers 8 \
+    --sdp_on_bf16 \
+    --bf16 \
+    --use_hpu_graphs_for_training \
+    --use_hpu_graphs_for_inference \
+    --validation_prompt="a cute naruto creature" \
+    --validation_epochs 48 \
+    --checkpointing_steps 336 \
+    --mediapipe dataset_sdxl_mediapipe \
+    --adjust_throughput
 ```
 
-### Single-Card Training on Gaudi1
+### Single Card Training on Gaudi1
 
 To train Stable Diffusion XL on a single Gaudi1 card, use:
 ```bash
 python train_text_to_image_sdxl.py \
-  --pretrained_model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
-  --pretrained_vae_model_name_or_path madebyollin/sdxl-vae-fp16-fix \
-  --dataset_name lambdalabs/naruto-blip-captions \
-  --resolution 256 \
-  --center_crop \
-  --random_flip \
-  --proportion_empty_prompts=0.2 \
-  --train_batch_size 1 \
-  --gradient_accumulation_steps 4 \
-  --max_train_steps 3000 \
-  --learning_rate 1e-05 \
-  --max_grad_norm 1 \
-  --lr_scheduler constant \
-  --lr_warmup_steps 0 \
-  --output_dir sdxl_model_output \
-  --gaudi_config_name Habana/stable-diffusion \
-  --throughput_warmup_steps 3 \
-  --use_hpu_graphs_for_training \
-  --use_hpu_graphs_for_inference \
-  --checkpointing_steps 3000 \
-  --sdp_on_bf16 \
-  --bf16
+    --pretrained_model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 \
+    --pretrained_vae_model_name_or_path madebyollin/sdxl-vae-fp16-fix \
+    --dataset_name lambdalabs/naruto-blip-captions \
+    --resolution 256 \
+    --center_crop \
+    --random_flip \
+    --proportion_empty_prompts=0.2 \
+    --train_batch_size 1 \
+    --gradient_accumulation_steps 4 \
+    --max_train_steps 3000 \
+    --learning_rate 1e-05 \
+    --max_grad_norm 1 \
+    --lr_scheduler constant \
+    --lr_warmup_steps 0 \
+    --output_dir sdxl_model_output \
+    --gaudi_config_name Habana/stable-diffusion \
+    --throughput_warmup_steps 3 \
+    --use_hpu_graphs_for_training \
+    --use_hpu_graphs_for_inference \
+    --checkpointing_steps 3000 \
+    --sdp_on_bf16 \
+    --bf16
 ```
 
 > [!NOTE]
@@ -380,6 +407,24 @@ python train_text_to_image_sdxl.py \
 > [!NOTE]
 > `--mediapipe` only works on Gaudi2.
 
+### Inference
+
+After training is finished, you can run inference using `text_to_image_generation.py` script as follows:
+
+```bash
+python ../text_to_image_generation.py \
+    --model_name_or_path sdxl_model_output \
+    --prompts "a cute naruto creature" \
+    --num_images_per_prompt 5 \
+    --batch_size 1 \
+    --image_save_dir /tmp/stable_diffusion_xl_images \
+    --scheduler euler_discrete \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --sdp_on_bf16 \
+    --bf16
+```
 
 ## DreamBooth
 
@@ -387,28 +432,12 @@ DreamBooth is a technique for personalizing text-to-image models like Stable Dif
 of a specific subject. The `train_dreambooth.py` script demonstrates how to implement this training process and adapt it for
 Stable Diffusion.
 
-### Dog Toy Example
-
 For DreamBooth examples we will use a set of dog images from the following dataset:
 [https://huggingface.co/datasets/diffusers/dog-example](https://huggingface.co/datasets/diffusers/dog-example).
 
-Let's first download this dataset locally:
-
-```python
-from huggingface_hub import snapshot_download
-from pathlib import Path
-import shutil
-
-local_dir = './dog'
-snapshot_download(
-    'diffusers/dog-example',
-    local_dir=local_dir,
-    repo_type='dataset',
-    ignore_patterns='.gitattributes',
-)
-cache_dir = Path(local_dir, '.cache')
-if cache_dir.is_dir():
-    shutil.rmtree(cache_dir)
+To download this and other example training datasets locally, run:
+```bash
+python download_train_datasets.py
 ```
 
 ### Full Model Fine-Tuning
@@ -416,26 +445,26 @@ if cache_dir.is_dir():
 To launch the multi-card Stable Diffusion training, use:
 ```bash
 python ../../gaudi_spawn.py --world_size 8 --use_mpi train_dreambooth.py \
-  --pretrained_model_name_or_path="CompVis/stable-diffusion-v1-4"  \
-  --instance_data_dir="dog" \
-  --output_dir="dog_sd" \
-  --class_data_dir="path-to-class-images" \
-  --with_prior_preservation --prior_loss_weight=1.0 \
-  --instance_prompt="a photo of sks dog" \
-  --class_prompt="a photo of dog" \
-  --resolution=512 \
-  --train_batch_size=1 \
-  --num_class_images=200 \
-  --gradient_accumulation_steps=1 \
-  --learning_rate=5e-6 \
-  --lr_scheduler="constant" \
-  --lr_warmup_steps=0 \
-  --max_train_steps=800 \
-  --mixed_precision=bf16 \
-  --use_hpu_graphs_for_training \
-  --use_hpu_graphs_for_inference \
-  --gaudi_config_name Habana/stable-diffusion \
-  full
+    --pretrained_model_name_or_path="CompVis/stable-diffusion-v1-4"  \
+    --instance_data_dir="dog" \
+    --output_dir="dog_sd" \
+    --class_data_dir="path-to-class-images" \
+    --with_prior_preservation --prior_loss_weight=1.0 \
+    --instance_prompt="a photo of sks dog" \
+    --class_prompt="a photo of dog" \
+    --resolution=512 \
+    --train_batch_size=1 \
+    --num_class_images=200 \
+    --gradient_accumulation_steps=1 \
+    --learning_rate=5e-6 \
+    --lr_scheduler="constant" \
+    --lr_warmup_steps=0 \
+    --max_train_steps=800 \
+    --mixed_precision=bf16 \
+    --use_hpu_graphs_for_training \
+    --use_hpu_graphs_for_inference \
+    --gaudi_config_name Habana/stable-diffusion \
+    full
 ```
 
 Prior preservation is used to prevent overfitting and language drift. For more details, refer to the original paper.
@@ -453,27 +482,27 @@ UNet or text encoder.
 To run the multi-card training, use:
 ```bash
 python ../../gaudi_spawn.py --world_size 8 --use_mpi train_dreambooth.py \
-  --pretrained_model_name_or_path="CompVis/stable-diffusion-v1-4"  \
-  --instance_data_dir="dog" \
-  --output_dir="dog_sd" \
-  --class_data_dir="path-to-class-images" \
-  --with_prior_preservation \
-  --prior_loss_weight=1.0 \
-  --instance_prompt="a photo of sks dog" \
-  --class_prompt="a photo of dog" \
-  --resolution=512 \
-  --train_batch_size=1 \
-  --num_class_images=200 \
-  --gradient_accumulation_steps=1 \
-  --learning_rate=1e-4 \
-  --lr_scheduler="constant" \
-  --lr_warmup_steps=0 \
-  --max_train_steps=800 \
-  --mixed_precision=bf16 \
-  --use_hpu_graphs_for_training \
-  --use_hpu_graphs_for_inference \
-  --gaudi_config_name Habana/stable-diffusion \
-  lora --unet_r 8 --unet_alpha 8
+    --pretrained_model_name_or_path="CompVis/stable-diffusion-v1-4"  \
+    --instance_data_dir="dog" \
+    --output_dir="dog_sd" \
+    --class_data_dir="path-to-class-images" \
+    --with_prior_preservation \
+    --prior_loss_weight=1.0 \
+    --instance_prompt="a photo of sks dog" \
+    --class_prompt="a photo of dog" \
+    --resolution=512 \
+    --train_batch_size=1 \
+    --num_class_images=200 \
+    --gradient_accumulation_steps=1 \
+    --learning_rate=1e-4 \
+    --lr_scheduler="constant" \
+    --lr_warmup_steps=0 \
+    --max_train_steps=800 \
+    --mixed_precision=bf16 \
+    --use_hpu_graphs_for_training \
+    --use_hpu_graphs_for_inference \
+    --gaudi_config_name Habana/stable-diffusion \
+    lora --unet_r 8 --unet_alpha 8
 ```
 > [!NOTE]
 > When using PEFT method we can use a much higher learning rate compared to vanilla dreambooth.
@@ -514,54 +543,70 @@ We can use the same `dog` dataset for the following examples.
 To launch Stable Diffusion XL LoRA training on a multi-card Gaudi system, use:"
 ```bash
 python train_dreambooth_lora_sdxl.py \
-  --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0"  \
-  --instance_data_dir="dog" \
-  --pretrained_vae_model_name_or_path="madebyollin/sdxl-vae-fp16-fix" \
-  --output_dir="lora-trained-xl" \
-  --mixed_precision="bf16" \
-  --instance_prompt="a photo of sks dog" \
-  --resolution=1024 \
-  --train_batch_size=1 \
-  --gradient_accumulation_steps=4 \
-  --learning_rate=1e-4 \
-  --lr_scheduler="constant" \
-  --lr_warmup_steps=0 \
-  --max_train_steps=500 \
-  --validation_prompt="A photo of sks dog in a bucket" \
-  --validation_epochs=25 \
-  --seed=0 \
-  --use_hpu_graphs_for_inference \
-  --use_hpu_graphs_for_training \
-  --gaudi_config_name Habana/stable-diffusion
+    --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0"  \
+    --instance_data_dir="dog" \
+    --pretrained_vae_model_name_or_path="madebyollin/sdxl-vae-fp16-fix" \
+    --output_dir="lora-trained-xl" \
+    --mixed_precision="bf16" \
+    --instance_prompt="a photo of sks dog" \
+    --resolution=1024 \
+    --train_batch_size=1 \
+    --gradient_accumulation_steps=4 \
+    --learning_rate=1e-4 \
+    --lr_scheduler="constant" \
+    --lr_warmup_steps=0 \
+    --max_train_steps=500 \
+    --validation_prompt="A photo of sks dog in a bucket" \
+    --validation_epochs=25 \
+    --seed=0 \
+    --use_hpu_graphs_for_inference \
+    --use_hpu_graphs_for_training \
+    --gaudi_config_name Habana/stable-diffusion
 ```
 
 To launch Stable Diffusion XL LoRA training on a multi-card Gaudi system, use:"
 ```bash
 python ../../gaudi_spawn.py --world_size 8 --use_mpi train_dreambooth_lora_sdxl.py \
-  --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0"  \
-  --instance_data_dir="dog" \
-  --pretrained_vae_model_name_or_path="madebyollin/sdxl-vae-fp16-fix" \
-  --output_dir="lora-trained-xl" \
-  --mixed_precision="bf16" \
-  --instance_prompt="a photo of sks dog" \
-  --resolution=1024 \
-  --train_batch_size=1 \
-  --gradient_accumulation_steps=4 \
-  --learning_rate=1e-4 \
-  --lr_scheduler="constant" \
-  --lr_warmup_steps=0 \
-  --max_train_steps=500 \
-  --validation_prompt="A photo of sks dog in a bucket" \
-  --validation_epochs=25 \
-  --seed=0 \
-  --use_hpu_graphs_for_inference \
-  --use_hpu_graphs_for_training \
-  --gaudi_config_name Habana/stable-diffusion
+    --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0"  \
+    --instance_data_dir="dog" \
+    --pretrained_vae_model_name_or_path="madebyollin/sdxl-vae-fp16-fix" \
+    --output_dir="lora-trained-xl" \
+    --mixed_precision="bf16" \
+    --instance_prompt="a photo of sks dog" \
+    --resolution=1024 \
+    --train_batch_size=1 \
+    --gradient_accumulation_steps=4 \
+    --learning_rate=1e-4 \
+    --lr_scheduler="constant" \
+    --lr_warmup_steps=0 \
+    --max_train_steps=500 \
+    --validation_prompt="A photo of sks dog in a bucket" \
+    --validation_epochs=25 \
+    --seed=0 \
+    --use_hpu_graphs_for_inference \
+    --use_hpu_graphs_for_training \
+    --gaudi_config_name Habana/stable-diffusion
 ```
 > [!NOTE]
 > To use DeepSpeed instead of MPI, replace `--use_mpi` with `--deepspeed` in the previous example
 
-After training completes, you can run inference with a simple python script like this:
+After training is completed, you can directly use `text_to_image_generation.py` sample for inference, as shown below:
+```bash
+python ../text_to_image_generation.py \
+    --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0  \
+    --lora_id lora-trained-xl \
+    --prompts "A picture of a sks dog in a bucket" \
+    --num_images_per_prompt 5 \
+    --batch_size 1 \
+    --image_save_dir /tmp/stable_diffusion_xl_images \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --sdp_on_bf16 \
+    --bf16
+```
+
+Alternatively, you can run inference with a simple Python script such as this:
 ```python
 import torch
 from optimum.habana import GaudiConfig
@@ -573,6 +618,7 @@ pipe = GaudiStableDiffusionXLPipeline.from_pretrained(
     use_hpu_graphs=True,
     use_habana=True,
     gaudi_config="Habana/stable-diffusion",
+    sdp_on_bf16=True,
 )
 pipe.load_lora_weights("lora-trained-xl")
 
@@ -588,21 +634,6 @@ image = pipe(
 image.save("sdxl-lora.png")
 ```
 
-Alternatively, you could directly use `text_to_image_generation.py` sample for inference as follows:
-```bash
-python ../text_to_image_generation.py \
-    --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0  \
-    --lora_id lora-trained-xl \
-    --prompts "A picture of a sks dog in a bucket" \
-    --num_images_per_prompt 5 \
-    --batch_size 1 \
-    --image_save_dir /tmp/stable_diffusion_xl_images \
-    --use_habana \
-    --use_hpu_graphs \
-    --gaudi_config Habana/stable-diffusion \
-    --bf16
-```
-
 ### DreamBooth LoRA Fine-Tuning with FLUX.1-dev
 
 We can use the same `dog` dataset for the following examples.
@@ -610,60 +641,76 @@ We can use the same `dog` dataset for the following examples.
 To launch FLUX.1-dev LoRA training on a single Gaudi card, use:"
 ```bash
 python train_dreambooth_lora_flux.py \
-  --pretrained_model_name_or_path="black-forest-labs/FLUX.1-dev" \
-  --dataset="dog" \
-  --prompt="a photo of sks dog" \
-  --output_dir="dog_lora_flux" \
-  --mixed_precision="bf16" \
-  --weighting_scheme="none" \
-  --resolution=1024 \
-  --train_batch_size=1 \
-  --learning_rate=1e-4 \
-  --guidance_scale=1 \
-  --report_to="tensorboard" \
-  --gradient_accumulation_steps=4 \
-  --gradient_checkpointing \
-  --lr_scheduler="constant" \
-  --lr_warmup_steps=0 \
-  --cache_latents \
-  --rank=4 \
-  --max_train_steps=500 \
-  --seed="0" \
-  --use_hpu_graphs_for_inference \
-  --use_hpu_graphs_for_training \
-  --gaudi_config_name="Habana/stable-diffusion"
+    --pretrained_model_name_or_path="black-forest-labs/FLUX.1-dev" \
+    --dataset="dog" \
+    --prompt="a photo of sks dog" \
+    --output_dir="dog_lora_flux" \
+    --mixed_precision="bf16" \
+    --weighting_scheme="none" \
+    --resolution=1024 \
+    --train_batch_size=1 \
+    --learning_rate=1e-4 \
+    --guidance_scale=1 \
+    --report_to="tensorboard" \
+    --gradient_accumulation_steps=4 \
+    --gradient_checkpointing \
+    --lr_scheduler="constant" \
+    --lr_warmup_steps=0 \
+    --cache_latents \
+    --rank=4 \
+    --max_train_steps=500 \
+    --seed="0" \
+    --use_hpu_graphs_for_inference \
+    --use_hpu_graphs_for_training \
+    --gaudi_config_name="Habana/stable-diffusion"
 ```
 
 To launch FLUX.1-dev LoRA training on a multi-card Gaudi system, use:"
 ```bash
 python ../../gaudi_spawn.py --world_size 8 --use_mpi train_dreambooth_lora_flux.py \
-  --pretrained_model_name_or_path="black-forest-labs/FLUX.1-dev" \
-  --dataset="dog" \
-  --prompt="a photo of sks dog" \
-  --output_dir="dog_lora_flux" \
-  --mixed_precision="bf16" \
-  --weighting_scheme="none" \
-  --resolution=1024 \
-  --train_batch_size=1 \
-  --learning_rate=1e-4 \
-  --guidance_scale=1 \
-  --report_to="tensorboard" \
-  --gradient_accumulation_steps=4 \
-  --gradient_checkpointing \
-  --lr_scheduler="constant" \
-  --lr_warmup_steps=0 \
-  --cache_latents \
-  --rank=4 \
-  --max_train_steps=500 \
-  --seed="0" \
-  --use_hpu_graphs_for_inference \
-  --use_hpu_graphs_for_training \
-  --gaudi_config_name="Habana/stable-diffusion"
+    --pretrained_model_name_or_path="black-forest-labs/FLUX.1-dev" \
+    --dataset="dog" \
+    --prompt="a photo of sks dog" \
+    --output_dir="dog_lora_flux" \
+    --mixed_precision="bf16" \
+    --weighting_scheme="none" \
+    --resolution=1024 \
+    --train_batch_size=1 \
+    --learning_rate=1e-4 \
+    --guidance_scale=1 \
+    --report_to="tensorboard" \
+    --gradient_accumulation_steps=4 \
+    --gradient_checkpointing \
+    --lr_scheduler="constant" \
+    --lr_warmup_steps=0 \
+    --cache_latents \
+    --rank=4 \
+    --max_train_steps=500 \
+    --seed="0" \
+    --use_hpu_graphs_for_inference \
+    --use_hpu_graphs_for_training \
+    --gaudi_config_name="Habana/stable-diffusion"
 ```
 > [!NOTE]
 > To use DeepSpeed instead of MPI, replace `--use_mpi` with `--use_deepspeed` in the previous example
 
-After training completes, you can run inference on Gaudi system with a simple python script like this:
+After training completes, you could directly use `text_to_image_generation.py` sample for inference as follows:
+```bash
+python ../text_to_image_generation.py \
+    --model_name_or_path "black-forest-labs/FLUX.1-dev" \
+    --lora_id dog_lora_flux \
+    --prompts "A picture of a sks dog in a bucket" \
+    --num_images_per_prompt 5 \
+    --batch_size 1 \
+    --image_save_dir /tmp/flux_images \
+    --use_habana \
+    --use_hpu_graphs \
+    --gaudi_config Habana/stable-diffusion \
+    --sdp_on_bf16 \
+    --bf16
+```
+
+Alternatively, you can run inference on Gaudi system with a simple Python script like this:
 ```python
 import torch
 from optimum.habana import GaudiConfig
@@ -675,6 +722,7 @@ pipe = GaudiFluxPipeline.from_pretrained(
     use_hpu_graphs=True,
     use_habana=True,
     gaudi_config="Habana/stable-diffusion",
+    sdp_on_bf16=True,
 )
 pipe.load_lora_weights("dog_lora_flux")
 
@@ -688,19 +736,3 @@ image = pipe(
 ).images[0]
 image.save("flux-dev.png")
 ```
-
-Alternatively, you could directly use `text_to_image_generation.py` sample for inference as follows:
-```bash
-python ../text_to_image_generation.py \
-    --model_name_or_path "black-forest-labs/FLUX.1-dev" \
-    --lora_id dog_lora_flux \
-    --prompts "A picture of a sks dog in a bucket" \
-    --num_images_per_prompt 5 \
-    --batch_size 1 \
-    --image_save_dir /tmp/flux_images \
-    --use_habana \
-    --use_hpu_graphs \
-    --gaudi_config Habana/stable-diffusion \
-    --sdp_on_bf16 \
-    --bf16
-```
diff --git a/examples/stable-diffusion/training/requirements.txt b/examples/stable-diffusion/training/requirements.txt
index bf92040ae8..558217e643 100644
--- a/examples/stable-diffusion/training/requirements.txt
+++ b/examples/stable-diffusion/training/requirements.txt
@@ -1,3 +1,5 @@
 imagesize
 peft == 0.10.0
 sentencepiece
+compel
+datasets
diff --git a/examples/stable-diffusion/training/textual_inversion.py b/examples/stable-diffusion/training/textual_inversion.py
index 961964d241..2f465699b3 100755
--- a/examples/stable-diffusion/training/textual_inversion.py
+++ b/examples/stable-diffusion/training/textual_inversion.py
@@ -130,6 +130,7 @@ def log_validation(text_encoder, tokenizer, unet, vae, args, accelerator, weight
         use_habana=True,
         use_hpu_graphs=True,
         gaudi_config=args.gaudi_config_name,
+        sdp_on_bf16=args.sdp_on_bf16,
     )
     pipeline.scheduler = GaudiDDIMScheduler.from_config(pipeline.scheduler.config)
     pipeline.set_progress_bar_config(disable=True)
@@ -415,6 +416,9 @@ def parse_args():
         default=None,
         help="Local path to the Gaudi configuration file or its name on the Hugging Face Hub.",
     )
+    parser.add_argument(
+        "--sdp_on_bf16", action="store_true", help="Allow pyTorch to use reduced precision in the SDPA math backend"
+    )
     parser.add_argument(
         "--throughput_warmup_steps",
         type=int,
diff --git a/examples/stable-diffusion/training/textual_inversion_sdxl.py b/examples/stable-diffusion/training/textual_inversion_sdxl.py
old mode 100644
new mode 100755
index ac2f8f8fcf..3ab6c57602
--- a/examples/stable-diffusion/training/textual_inversion_sdxl.py
+++ b/examples/stable-diffusion/training/textual_inversion_sdxl.py
@@ -392,6 +392,9 @@ def parse_args():
         default=None,
         help="Local path to the Gaudi configuration file or its name on the Hugging Face Hub.",
     )
+    parser.add_argument(
+        "--sdp_on_bf16", action="store_true", help="Allow pyTorch to use reduced precision in the SDPA math backend"
+    )
     parser.add_argument(
         "--throughput_warmup_steps",
         type=int,
@@ -623,6 +626,7 @@ def main():
         use_habana=True,
         use_hpu_graphs=True,
         gaudi_config=args.gaudi_config_name,
+        sdp_on_bf16=args.sdp_on_bf16,
     )
     text_encoder_1 = pipeline.text_encoder.to(accelerator.device)
     text_encoder_2 = pipeline.text_encoder_2.to(accelerator.device)
diff --git a/examples/summarization/README.md b/examples/summarization/README.md
index 86ab88b790..bdaef78edf 100644
--- a/examples/summarization/README.md
+++ b/examples/summarization/README.md
@@ -179,65 +179,8 @@ python ../gaudi_spawn.py \
 
 ## Using DeepSpeed
 
-Here is an example on 8 HPUs on Gaudi2/Gaudi3 with DeepSpeed-ZeRO3 to fine-tune [FLAN-T5 XXL](https://huggingface.co/google/flan-t5-xxl):
-```bash
-PT_HPU_MAX_COMPOUND_OP_SIZE=512 python ../gaudi_spawn.py \
-    --world_size 8 --use_deepspeed run_summarization.py \
-    --model_name_or_path google/flan-t5-xxl \
-    --do_train \
-    --do_eval \
-    --dataset_name cnn_dailymail \
-    --dataset_config '"3.0.0"' \
-    --source_prefix '"summarize: "' \
-    --output_dir ./tst-summarization \
-    --per_device_train_batch_size 22 \
-    --per_device_eval_batch_size 22 \
-    --learning_rate 1e-4 \
-    --num_train_epochs 3 \
-    --overwrite_output_dir \
-    --predict_with_generate \
-    --use_habana \
-    --use_lazy_mode \
-    --gaudi_config_name Habana/t5 \
-    --ignore_pad_token_for_loss False \
-    --pad_to_max_length \
-    --generation_max_length 129 \
-    --save_strategy epoch \
-    --throughput_warmup_steps 3 \
-    --gradient_checkpointing \
-    --adam_epsilon 1e-08 --logging_steps 1 \
-    --deepspeed ds_flan_t5_z3_config_bf16.json
-```
-
-Here is an example on 8 HPUs on Gaudi2 with DeepSpeed-ZeRO2 to fine-tune t5-large:
-```bash
-PT_HPU_LAZY_MODE=0 python ../gaudi_spawn.py \
-      --world_size 8 \
-      --use_deepspeed run_summarization.py \
-      --deepspeed ../../tests/configs/deepspeed_zero_2.json \
-      --do_train \
-      --do_eval \
-      --overwrite_output_dir \
-      --predict_with_generate \
-      --use_habana \
-      --gaudi_config_name Habana/t5  \
-      --ignore_pad_token_for_loss False \
-      --pad_to_max_length \
-      --save_strategy no \
-      --throughput_warmup_steps 15 \
-      --model_name_or_path t5-large \
-      --source_prefix '"summarize:"' \
-      --dataset_name cnn_dailymail \
-      --dataset_config '"3.0.0"' \
-      --output_dir /tmp/tst-summarization \
-      --per_device_train_batch_size 20 \
-      --per_device_eval_batch_size 20 \
-      --max_train_samples 2000  \
-      --torch_compile_backend hpu_backend \
-      --torch_compile
-```
-
-You can look at the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/deepspeed) for more information about how to use DeepSpeed in Optimum Habana.
+You can check the [DeepSpeed](https://github.com/huggingface/optimum-habana/tree/main/examples#deepspeed) section in Optimum Habana examples for how to run DeepSpeed.
+You also can look at the [documentation](https://huggingface.co/docs/optimum/habana/usage_guides/deepspeed) for more information about how to use DeepSpeed in Optimum Habana.
 
 
 ## Inference
@@ -267,23 +210,3 @@ python run_summarization.py \
     --bf16_full_eval
 ```
 
-You can run inference with BART on the CNN-DailyMail dataset on 1 Gaudi card with the following command:
-```bash
-python run_summarization.py \
-    --model_name_or_path facebook/bart-large-cnn \
-    --do_predict \
-    --dataset_name cnn_dailymail \
-    --dataset_config "3.0.0" \
-    --output_dir /tmp/tst-summarization \
-    --per_device_eval_batch_size 2 \
-    --overwrite_output_dir \
-    --predict_with_generate \
-    --use_habana \
-    --use_lazy_mode \
-    --use_hpu_graphs_for_inference \
-    --gaudi_config_name Habana/bart \
-    --ignore_pad_token_for_loss False \
-    --pad_to_max_length \
-    --throughput_warmup_steps 3 \
-    --num_beams 1
-```
diff --git a/examples/text-feature-extraction/README.md b/examples/text-feature-extraction/README.md
index 2b0d5354ef..e46168840b 100644
--- a/examples/text-feature-extraction/README.md
+++ b/examples/text-feature-extraction/README.md
@@ -31,10 +31,3 @@ python run_feature_extraction.py \
     --sdp_on_bf16 \
     --bf16
 ```
-
-Models that have been validated:
-
-- [Supabase/gte-small](https://huggingface.co/Supabase/gte-small)
-- [thenlper/gte-small](https://huggingface.co/thenlper/gte-small)
-- [thenlper/gte-base](https://huggingface.co/thenlper/gte-base)
-- [thenlper/gte-large](https://huggingface.co/thenlper/gte-large)
diff --git a/examples/text-generation/README.md b/examples/text-generation/README.md
index 7767443c6e..5adf348217 100755
--- a/examples/text-generation/README.md
+++ b/examples/text-generation/README.md
@@ -132,6 +132,7 @@ Here are a few settings you may be interested in:
 - `--prompt` to benchmark the model on one or several prompts of your choice
 - `--attn_softmax_bf16` to run attention softmax layer in bfloat16 precision provided that the model (such as Llama) supports it
 - `--trim_logits` to calculate logits only for the last token in the first time step provided that the model (such as Llama) supports it
+- `--attn_batch_split` specifies the number of smaller batches into which attention and MLP processing are split to improve parallelization. By default, no splitting is performed (value is 1). Splitting is enabled only for prompt processing. This configuration is most effective for batch sizes (BS) > 125 and tensor parallelism (TP) >= 2, with a recommended value of '3' splits.
 
 For example, you can reproduce the results presented in [this blog post](https://huggingface.co/blog/habana-gaudi-2-bloom) with the following command:
 ```bash
@@ -201,6 +202,20 @@ python ../gaudi_spawn.py --use_deepspeed --world_size 8 run_generation.py \
 --flash_attention_causal_mask
 ```
 
+To run Deepseek-R1-BF16 inference on 16 Gaudi3 cards (2 nodes) use the following command. Ensure you replace the hostfile parameter with the appropriate file. Sample hostfile reference [here](https://github.com/huggingface/optimum-habana/blob/main/examples/multi-node-training/hostfile)
+```bash
+python3 ../gaudi_spawn.py --hostfile=<hostfile> --use_deepspeed \ 
+--world_size 16 ./run_generation.py \ 
+--model_name_or_path opensourcerelease/DeepSeek-R1-bf16 \ 
+--bf16 \ 
+--trim_logits \
+--batch_size 1 \ 
+--use_hpu_graphs \ 
+--use_kv_cache  \ 
+--parallel_strategy "ep" \
+--prompt "DeepSpeed is a machine learning framework"
+```
+
 > To be able to run gated models like [StarCoder](https://huggingface.co/bigcode/starcoder), you should:
 > - have a HF account
 > - agree to the terms of use of the model in its model card on the HF Hub
@@ -597,7 +612,7 @@ Some models can fit on HPU DRAM but can't fit on the CPU RAM.
 When we run a model on single card and don't use deepspeed, the `--disk_offload` flag allows to offload weights to disk during model quantization in INC. When this flag is mentioned, during the quantization process, each weight first is loaded from disk to CPU RAM, when brought to HPU DRAM and quantized there. This way not all the model is on the CPU RAM but only one weight each time.
 To enable this weights offload mechanism, add `--disk_offload` flag to the topology command line.
 Here is an example of using disk_offload in quantize command.
-Please follow the "Running FP8 models on single device" section first before running the cmd below.
+Please follow the [Running FP8 models on single device](#running-fp8-models-on-single-device) section first before running the cmd below.
 
 ```bash
 QUANT_CONFIG=./quantization_config/maxabs_quant.json TQDM_DISABLE=1 \
@@ -619,6 +634,57 @@ python run_generation.py \
 --flash_attention_recompute
 ```
 
+### Saving FP8 Checkpoints in Hugging Face format
+After quantizing the model, we can save it to a local path.
+
+> [!NOTE]  
+> Before executing the command below, please refer to the [Running with FP8](#running-with-fp8) section to measure the model quantization statistics.
+
+Here is an example of how to quantize and save the LLama3.1-70B model on two cards:
+```bash
+QUANT_CONFIG=./quantization_config/maxabs_quant.json python ../gaudi_spawn.py \
+--use_deepspeed --world_size 2 run_generation.py \
+--model_name_or_path meta-llama/Llama-3.1-70B \
+--attn_softmax_bf16 \
+--use_hpu_graphs \
+--trim_logits \
+--use_kv_cache \
+--reuse_cache \
+--use_flash_attention \
+--flash_attention_recompute \
+--bf16 \
+--batch_size 1 \
+--max_new_tokens 128 \
+--max_input_tokens 128 \
+--limit_hpu_graphs \
+--save_quantized_model_with_inc \
+--saved_model_path <model_path_on_local_disk>
+```
+
+> [!NOTE]
+> For multi-card usage, the number of cards loaded and used needs to be kept consistent with that when saving.
+
+### Loading FP8 Checkpoints from Hugging Face
+You can load pre-quantized FP8 models using the `--load_quantized_model_with_inc` argument. The `model_name_or_path` should be a model name from [Neural Magic](https://huggingface.co/collections/neuralmagic/fp8-llms-for-vllm-666742ed2b78b7ac8df13127) or a path to FP8 Checkpoints saved in Hugging Face format.
+
+Below is an example of how to load `neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8` on two cards.
+```bash
+python ../gaudi_spawn.py \
+--use_deepspeed --world_size 2 run_lm_eval.py \
+-o acc_load_fp8_model.txt \
+--model_name_or_path neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8 \
+--use_hpu_graphs \
+--use_kv_cache \
+--trim_logits \
+--batch_size 1 \
+--bf16 \
+--use_flash_attention \
+--flash_attention_recompute \
+--attn_softmax_bf16 \
+--bucket_size=128 \
+--bucket_internal \
+--load_quantized_model_with_inc
+```
 
 ### Loading 4 Bit Checkpoints from Hugging Face
 
@@ -727,6 +793,36 @@ python run_generation.py \
 --load_quantized_model_with_autogptq
 ```
 
+### Running with UINT4 weight quantization using AutoAWQ
+
+Llama2-7b supports UINT4 weight-only quantization through [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), which offers quantization capabilities in PyTorch.
+Currently, this support is limited to UINT4 inference of pre-quantized models only.
+
+Please run the following command to install AutoAWQ:
+```bash
+pip install -r requirements_awq.txt
+```
+
+You can run a *UINT4 weight quantized* model using AutoAWQ by including the argument `--load_quantized_model_with_autoawq`.
+
+Here is an example of how to run a quantized model <quantized_awq_model>:
+```bash
+python run_generation.py \
+--attn_softmax_bf16 \
+--model_name_or_path <quantized_awq_model> \
+--use_hpu_graphs \
+--limit_hpu_graphs \
+--use_kv_cache \
+--bucket_size 128 \
+--bucket_internal \
+--trim_logits \
+--max_new_tokens 128 \
+--batch_size 1 \
+--bf16 \
+--load_quantized_model_with_autoawq
+```
+
+
 ## Language Model Evaluation Harness
 
 The evaluation of LLMs can be done using the `lm_eval.py` script. It utilizes the [LM evaluation harness](https://github.com/EleutherAI/lm-evaluation-harness)
diff --git a/examples/text-generation/quantization_config/maxabs_quant_mixtral.json b/examples/text-generation/quantization_config/maxabs_quant_mixtral.json
index 87dc52d08a..caaff8d09e 100644
--- a/examples/text-generation/quantization_config/maxabs_quant_mixtral.json
+++ b/examples/text-generation/quantization_config/maxabs_quant_mixtral.json
@@ -3,10 +3,7 @@
     "mode": "QUANTIZE",
     "observer": "maxabs",
     "scale_method": "maxabs_hw",
-    "allowlist": {"types": [], "names":  ["gate","w1","w3","w2"]},
-    "blocklist": {"types": [], "names":  [
-        "model.layers.1.block_sparse_moe.experts.(3|4).w2",
-        "model.layers.[29-31].block_sparse_moe.experts.[0-7].w2"
-    ]},
+    "allowlist": {"types": [], "names":  []},
+    "blocklist": {"types": [], "names":  ["self_attn"]},
     "dump_stats_path": "./hqt_output/measure"
 }
\ No newline at end of file
diff --git a/examples/text-generation/quantization_config/unit_scale_quant.json b/examples/text-generation/quantization_config/unit_scale_quant.json
index 216cf27e68..20783ea3f1 100644
--- a/examples/text-generation/quantization_config/unit_scale_quant.json
+++ b/examples/text-generation/quantization_config/unit_scale_quant.json
@@ -3,5 +3,10 @@
     "mode": "QUANTIZE",
     "observer": "maxabs",
     "scale_method": "unit_scale",
-    "dump_stats_path": "./hqt_output/measure"
+    "whitelist": {"types": [], "names":  []},
+    "blacklist": {"types": [], "names":  []},
+    "quantize_weight": false,
+    "dump_stats_path": "./results/hk",
+    "ignore_modules_wo_measures": "True",
+    "dump_stats_xlsx_path": "./run_outputs/fp8stats.xlsx"
 }
diff --git a/examples/text-generation/run_generation.py b/examples/text-generation/run_generation.py
index ae04d92970..f6d5bccfc8 100755
--- a/examples/text-generation/run_generation.py
+++ b/examples/text-generation/run_generation.py
@@ -29,7 +29,7 @@
 
 import torch
 from transformers import BatchEncoding
-from utils import adjust_batch, count_hpu_graphs, finalize_quantization, initialize_model
+from utils import adjust_batch, count_hpu_graphs, finalize_quantization, initialize_model, save_model
 
 from optimum.habana.utils import get_hpu_memory_stats
 
@@ -226,6 +226,11 @@ def setup_parser(parser):
         action="store_true",
         help="Skip HPU Graph usage for first token to save memory",
     )
+    parser.add_argument(
+        "--clear_hpu_graphs_cache",
+        action="store_true",
+        help="Clear HPU graphs cache",
+    )
     parser.add_argument(
         "--show_graphs_count",
         action="store_true",
@@ -323,6 +328,17 @@ def setup_parser(parser):
     parser.add_argument(
         "--sdp_on_bf16", action="store_true", help="Allow pyTorch to use reduced precision in the SDPA math backend"
     )
+    parser.add_argument(
+        "--save_quantized_model_with_inc",
+        action="store_true",
+        help="Save quantized Huggingface checkpoint using INC.",
+    )
+    parser.add_argument(
+        "--saved_model_path",
+        type=str,
+        default="inc_quantized_model",
+        help="A path to save quantized checkpoint.",
+    )
 
     quant_parser_group = parser.add_mutually_exclusive_group()
     quant_parser_group.add_argument(
@@ -330,6 +346,11 @@ def setup_parser(parser):
         action="store_true",
         help="Load an AutoGPTQ quantized checkpoint using AutoGPTQ.",
     )
+    quant_parser_group.add_argument(
+        "--load_quantized_model_with_autoawq",
+        action="store_true",
+        help="Load an AutoAWQ quantized checkpoint using AutoAWQ.",
+    )
     quant_parser_group.add_argument(
         "--disk_offload",
         action="store_true",
@@ -338,7 +359,7 @@ def setup_parser(parser):
     quant_parser_group.add_argument(
         "--load_quantized_model_with_inc",
         action="store_true",
-        help="Load a Huggingface quantized checkpoint using INC.",
+        help="Load a quantized Huggingface checkpoint using INC.",
     )
     quant_parser_group.add_argument(
         "--local_quantized_inc_model_path",
@@ -346,6 +367,12 @@ def setup_parser(parser):
         default=None,
         help="Path to neural-compressor quantized model, if set, the checkpoint will be loaded.",
     )
+    parser.add_argument(
+        "--attn_batch_split",
+        default=1,
+        type=int,
+        help="Specify the batch size split for attention and mlp layers. 1 for no split. This is enabled only for prompt.",
+    )
 
     args = parser.parse_args()
 
@@ -361,6 +388,8 @@ def setup_parser(parser):
     args.quant_config = os.getenv("QUANT_CONFIG", "")
     if args.quant_config and args.load_quantized_model_with_autogptq:
         raise RuntimeError("Setting both quant_config and load_quantized_model_with_autogptq is unsupported. ")
+    if args.quant_config and args.load_quantized_model_with_autoawq:
+        raise RuntimeError("Setting both quant_config and load_quantized_model_with_autoawq is unsupported. ")
 
     if args.quant_config == "" and args.disk_offload:
         logger.warning(
@@ -773,6 +802,8 @@ def generate_dataset(batch):
         print(separator)
     if args.quant_config:
         finalize_quantization(model)
+    if args.save_quantized_model_with_inc:
+        save_model(model, tokenizer, args.saved_model_path)
     if args.const_serialization_path and os.path.isdir(args.const_serialization_path):
         import shutil
 
diff --git a/examples/text-generation/run_lm_eval.py b/examples/text-generation/run_lm_eval.py
index cb3ecd80f0..4e536dc757 100644
--- a/examples/text-generation/run_lm_eval.py
+++ b/examples/text-generation/run_lm_eval.py
@@ -34,7 +34,7 @@
 from run_generation import setup_parser
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from transformers.generation import GenerationConfig
-from utils import finalize_quantization, initialize_model
+from utils import finalize_quantization, initialize_model, save_model
 
 from optimum.habana.utils import get_hpu_memory_stats
 
@@ -254,6 +254,8 @@ def main() -> None:
 
     if args.quant_config:
         finalize_quantization(model)
+    if args.save_quantized_model_with_inc:
+        save_model(model, tokenizer, args.saved_model_path)
 
     if args.const_serialization_path and os.path.isdir(args.const_serialization_path):
         import shutil
diff --git a/examples/text-generation/utils.py b/examples/text-generation/utils.py
index 2510b2e019..61270ca218 100644
--- a/examples/text-generation/utils.py
+++ b/examples/text-generation/utils.py
@@ -158,7 +158,7 @@ def setup_device(args):
     if args.device == "hpu":
         import habana_frameworks.torch.core as htcore
 
-        if args.quant_config:
+        if args.quant_config or args.load_quantized_model_with_inc or args.local_quantized_inc_model_path:
             htcore.hpu_set_env()
     return torch.device(args.device)
 
@@ -252,7 +252,20 @@ def setup_model(args, model_dtype, model_kwargs, logger):
         model = AutoModelForCausalLM.from_pretrained(
             args.model_name_or_path, torch_dtype=model_dtype, quantization_config=quantization_config, **model_kwargs
         )
+    elif args.load_quantized_model_with_autoawq:
+        from transformers import AwqConfig
+
+        quantization_config = AwqConfig(bits=4, version="hpu")
+        model = AutoModelForCausalLM.from_pretrained(
+            args.model_name_or_path, torch_dtype=model_dtype, quantization_config=quantization_config, **model_kwargs
+        )
     elif args.load_quantized_model_with_inc:
+        # TODO: This will be removed in v1.20 Synapse release
+        # Override neural_compressor split_rank_state_dict for loading neural_magic models on multi-cards.
+        import neural_compressor.torch.algorithms.fp8_quant.save_load as nc_sl
+
+        nc_sl.split_rank_state_dict = local_split_rank_state_dict
+
         from neural_compressor.torch.quantization import load
 
         model = load(model_name_or_path=args.model_name_or_path, format="huggingface", device="hpu", **model_kwargs)
@@ -308,6 +321,9 @@ def setup_model(args, model_dtype, model_kwargs, logger):
 
     if args.torch_compile:
         model = get_torch_compiled_model(model, logger)
+        assert "PT_HPU_LAZY_MODE" in os.environ and os.environ["PT_HPU_LAZY_MODE"] == "0", (
+            "Please set PT_HPU_LAZY_MODE=0 on command line when using `--torch_compile`"
+        )
         # if args.assistant_model is not None:
         #     assistant_model = get_torch_compiled_model(assistant_model, logger)
     return model, assistant_model
@@ -423,7 +439,12 @@ def setup_distributed_model(args, model_dtype, model_kwargs, logger):
     logger.info("DeepSpeed is enabled.")
     deepspeed.init_distributed(dist_backend="hccl")
     config = AutoConfig.from_pretrained(args.model_name_or_path, torch_dtype=model_dtype, **model_kwargs)
-    load_to_meta = model_on_meta(config)
+
+    keep_module_on_host = False
+    if "Llama-3.1-405B" in args.model_name_or_path:
+        keep_module_on_host = True
+
+    load_to_meta = False if keep_module_on_host else model_on_meta(config)
 
     if args.assistant_model is None:
         assistant_model = None
@@ -478,6 +499,7 @@ def setup_distributed_model(args, model_dtype, model_kwargs, logger):
 
     # Initialize the model
     ds_inference_kwargs = {"dtype": model_dtype}
+    ds_inference_kwargs["keep_module_on_host"] = keep_module_on_host
     ds_inference_kwargs["tensor_parallel"] = {"tp_size": args.world_size}
     ds_inference_kwargs["enable_cuda_graph"] = args.use_hpu_graphs
     ds_inference_kwargs["injection_policy"] = get_ds_injection_policy(config)
@@ -611,6 +633,12 @@ def setup_tokenizer(args, model, assistant_model, logger):
         )
         model.generation_config.eos_token_id = model.generation_config.eos_token_id[-1]
 
+    if model.config.model_type == "mpt":
+        if tokenizer.pad_token is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        if model.generation_config.pad_token_id is None:
+            model.generation_config.pad_token_id = tokenizer.eos_token_id
+
     # Some models like GPT2 do not have a PAD token so we have to set it if necessary
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
@@ -648,6 +676,7 @@ def setup_generation_config(args, model, assistant_model, tokenizer):
     generation_config.trim_logits = args.trim_logits
     generation_config.attn_softmax_bf16 = args.attn_softmax_bf16
     generation_config.limit_hpu_graphs = args.limit_hpu_graphs
+    generation_config.clear_hpu_graphs_cache = args.clear_hpu_graphs_cache
     generation_config.reuse_cache = args.reuse_cache
     generation_config.reduce_recompile = args.reduce_recompile
     if generation_config.reduce_recompile:
@@ -658,6 +687,7 @@ def setup_generation_config(args, model, assistant_model, tokenizer):
     generation_config.flash_attention_fast_softmax = args.flash_attention_fast_softmax
     generation_config.trust_remote_code = args.trust_remote_code
     generation_config.valid_sequence_lengths = None
+    generation_config.attn_batch_split = args.attn_batch_split
 
     return generation_config
 
@@ -668,7 +698,7 @@ def exclude_hpu_graph_configs(args):
         if "falcon-180B" in args.model_name_or_path or "falcon-180b" in args.model_name_or_path:
             return False
         if args.world_size == 2 or args.world_size == 4 or args.world_size == 8:
-            if args.quant_config:
+            if args.quant_config or args.load_quantized_model_with_inc or args.local_quantized_inc_model_path:
                 if args.max_input_tokens >= 8192 and args.max_new_tokens >= 128:
                     return False
             else:
@@ -682,6 +712,9 @@ def exclude_hpu_graph_configs(args):
 def initialize_model(args, logger):
     init_start = time.perf_counter()
     setup_distributed(args)
+    if not args.world_size > 0 and args.attn_batch_split > 1:
+        logger.warning("Disabling attention batch splitting as it's unnecessary for single-card execution")
+        args.attn_batch_split = 1
     if exclude_hpu_graph_configs(args):
         args.limit_hpu_graphs = False
     override_prints(args.global_rank == 0 or args.verbose_workers, logger)
@@ -711,7 +744,7 @@ def initialize_model(args, logger):
 
     model, assistant_model = (
         setup_model(args, model_dtype, model_kwargs, logger)
-        if not use_deepspeed
+        if not use_deepspeed or args.load_quantized_model_with_inc
         else setup_distributed_model(args, model_dtype, model_kwargs, logger)
         if args.parallel_strategy == "none"
         else setup_distributed_model_tp(args, model_dtype, model_kwargs, logger, cache_dir)
@@ -724,10 +757,48 @@ def initialize_model(args, logger):
 
     if args.const_serialization_path:
         setup_const_serialization(args.const_serialization_path)
-    if args.quant_config:
+    if args.quant_config or args.load_quantized_model_with_inc or args.local_quantized_inc_model_path:
         model = setup_inference(args, model)
     init_end = time.perf_counter()
     logger.info(f"Args: {args}")
     logger.info(f"device: {args.device}, n_hpu: {args.world_size}, bf16: {model_dtype == torch.bfloat16}")
     logger.info(f"Model initialization took {(init_end - init_start):.3f}s")
     return model, assistant_model, tokenizer, generation_config
+
+
+def save_model(model, tokenizer, save_path):
+    """Saves the model and tokenizer in the huggingface format with neural_compressor."""
+    from neural_compressor.torch.quantization import save
+
+    save(model, save_path, format="huggingface")
+    tokenizer.save_pretrained(save_path)
+
+
+# TODO: This will be removed in v1.20 Synapse release
+# Override neural_compressor split_rank_state_dict for loading neural_magic models on multi-cards.
+def local_split_rank_state_dict(model, gathered_state_dict):
+    """split state_dict for current local_rank."""
+    from neural_compressor.torch.algorithms.fp8_quant.save_load import (
+        cur_accelerator,
+        local_rank,
+        split_weights,
+        world_size,
+    )
+
+    rank_state_dict = {}
+    for name, param in model.named_parameters():
+        if name in gathered_state_dict:
+            full_weight = gathered_state_dict[name]
+            if len(param.shape) != 0 and full_weight.shape != param.shape:
+                if full_weight.shape[0] != param.shape[0]:
+                    split_weight = split_weights(full_weight, world_size, local_rank, split_axis=0).clone()
+                elif full_weight.shape[1] != param.shape[1]:
+                    split_weight = split_weights(full_weight, world_size, local_rank, split_axis=1).clone()
+                else:
+                    split_weight = split_weights(full_weight, world_size, local_rank, split_axis=0).clone()
+            else:
+                split_weight = full_weight
+            rank_state_dict[name] = split_weight
+        cur_accelerator.synchronize()
+
+    return rank_state_dict
diff --git a/examples/text-to-speech/requirements.txt b/examples/text-to-speech/requirements.txt
index c5fb09c806..01d3da67aa 100644
--- a/examples/text-to-speech/requirements.txt
+++ b/examples/text-to-speech/requirements.txt
@@ -1,2 +1,3 @@
 datasets
 soundfile
+sentencepiece
diff --git a/examples/text-to-video/README.md b/examples/text-to-video/README.md
deleted file mode 100644
index 49905cb5b8..0000000000
--- a/examples/text-to-video/README.md
+++ /dev/null
@@ -1,51 +0,0 @@
-<!---
-Copyright 2024 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# Text to Video Examples
-
-This directory contains a script that showcases how to use the `GaudiTextToVideoSDPipeline` to run text-to-video generation tasks on HPUs.
-
-## Requirements
-
-First, you should install the requirements:
-
-```bash
-pip install -r requirements.txt
-```
-
-## Single-HPU inference
-
-```bash
-python3 text_to_video_generation.py \
-    --model_name_or_path ali-vilab/text-to-video-ms-1.7b \
-    --prompts "An astronaut riding a horse" \
-    --use_habana \
-    --use_hpu_graphs \
-    --dtype bf16
-```
-
-Models that have been validated:
-  - [ali-vilab/text-to-video-ms-1.7b](https://huggingface.co/ali-vilab/text-to-video-ms-1.7b)
-
-CogvideoX test:
-```bash
-python3 text_to_video_generation.py \
-    --model_name_or_path THUDM/CogVideoX-2b \
-    --pipeline_type 'cogvideox' \
-    --video_save_dir 'cogvideo_out' \
-```
-
-
diff --git a/examples/text-to-video/requirements.txt b/examples/text-to-video/requirements.txt
deleted file mode 100644
index f3e192bbdc..0000000000
--- a/examples/text-to-video/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-opencv-python-headless
-sentencepiece
-imageio
-imageio-ffmpeg
-
diff --git a/examples/text-to-video/text_to_video_generation.py b/examples/text-to-video/text_to_video_generation.py
deleted file mode 100755
index 014fe5d6dc..0000000000
--- a/examples/text-to-video/text_to_video_generation.py
+++ /dev/null
@@ -1,242 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-
-# Adapted from ../stable-diffusion/text_to_image_generation.py
-
-import argparse
-import logging
-import sys
-from pathlib import Path
-
-import torch
-from diffusers.utils.export_utils import export_to_video
-
-from optimum.habana.diffusers import GaudiCogVideoXPipeline, GaudiTextToVideoSDPipeline
-from optimum.habana.transformers.gaudi_configuration import GaudiConfig
-from optimum.habana.utils import set_seed
-
-
-try:
-    from optimum.habana.utils import check_optimum_habana_min_version
-except ImportError:
-
-    def check_optimum_habana_min_version(*a, **b):
-        return ()
-
-
-# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
-check_optimum_habana_min_version("1.16.0.dev0")
-
-
-logger = logging.getLogger(__name__)
-
-
-def main():
-    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    parser.add_argument(
-        "--model_name_or_path",
-        default="ali-vilab/text-to-video-ms-1.7b",
-        type=str,
-        help="Path to pre-trained model",
-    )
-    # Pipeline arguments
-    parser.add_argument(
-        "--prompts",
-        type=str,
-        nargs="*",
-        default="Spiderman is surfing",
-        help="The prompt or prompts to guide the video generation.",
-    )
-    parser.add_argument(
-        "--pipeline_type",
-        type=str,
-        nargs="*",
-        default="sdp",
-        help="pipeline type:sdp or cogvideoX",
-    )
-    parser.add_argument(
-        "--num_videos_per_prompt", type=int, default=1, help="The number of videos to generate per prompt."
-    )
-    parser.add_argument("--batch_size", type=int, default=1, help="The number of videos in a batch.")
-    parser.add_argument(
-        "--height",
-        type=int,
-        default=0,
-        help="The height in pixels of the generated videos (0=default from model config).",
-    )
-    parser.add_argument(
-        "--width",
-        type=int,
-        default=0,
-        help="The width in pixels of the generated videos (0=default from model config).",
-    )
-    parser.add_argument("--num_frames", type=int, default=20, help="The number of frames in the generated videos.")
-    parser.add_argument(
-        "--num_inference_steps",
-        type=int,
-        default=50,
-        help=(
-            "The number of denoising steps. More denoising steps usually lead to a higher quality videos at the expense"
-            " of slower inference."
-        ),
-    )
-    parser.add_argument(
-        "--guidance_scale",
-        type=float,
-        default=7.5,
-        help=(
-            "Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598)."
-            " Higher guidance scale encourages to generate videos that are closely linked to the text `prompt`,"
-            " usually at the expense of lower video quality."
-        ),
-    )
-    parser.add_argument(
-        "--negative_prompts",
-        type=str,
-        nargs="*",
-        default=None,
-        help="The prompt or prompts not to guide the video generation.",
-    )
-    parser.add_argument(
-        "--eta",
-        type=float,
-        default=0.0,
-        help="Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502.",
-    )
-    parser.add_argument(
-        "--output_type",
-        type=str,
-        choices=["mp4", "np"],
-        default="mp4",
-        help="Whether to return mp4 or Numpy arrays.",
-    )
-
-    parser.add_argument(
-        "--pipeline_save_dir",
-        type=str,
-        default=None,
-        help="The directory where the generation pipeline will be saved.",
-    )
-    parser.add_argument(
-        "--video_save_dir",
-        type=str,
-        default="./generated-videos",
-        help="The directory where videos will be saved.",
-    )
-
-    parser.add_argument("--seed", type=int, default=42, help="Random seed for initialization.")
-
-    # HPU-specific arguments
-    parser.add_argument("--use_habana", action="store_true", help="Use HPU.")
-    parser.add_argument(
-        "--use_hpu_graphs", action="store_true", help="Use HPU graphs on HPU. This should lead to faster generations."
-    )
-    parser.add_argument(
-        "--dtype",
-        default="bf16",
-        choices=["bf16", "fp32", "autocast_bf16"],
-        help="Which runtime dtype to perform generation in.",
-    )
-    args = parser.parse_args()
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-    logger.setLevel(logging.INFO)
-    logger.info(f"Arguments: {args}")
-
-    # Set video resolution
-    kwargs_call = {}
-    if args.width > 0 and args.height > 0:
-        kwargs_call["width"] = args.width
-        kwargs_call["height"] = args.height
-    kwargs_call["num_frames"] = args.num_frames
-
-    gaudi_config_kwargs = {"use_fused_adam": True, "use_fused_clip_norm": True}
-    if args.dtype == "autocast_bf16":
-        gaudi_config_kwargs["use_torch_autocast"] = True
-
-    gaudi_config = GaudiConfig(**gaudi_config_kwargs)
-    logger.info(f"Gaudi Config: {gaudi_config}")
-
-    kwargs = {
-        "use_habana": args.use_habana,
-        "use_hpu_graphs": args.use_hpu_graphs,
-        "gaudi_config": gaudi_config,
-    }
-    if args.dtype == "bf16":
-        kwargs["torch_dtype"] = torch.bfloat16
-    elif args.dtype == "fp32":
-        kwargs["torch_dtype"] = torch.float32
-
-    # Generate images
-    if args.pipeline_type[0] == "sdp":
-        pipeline: GaudiTextToVideoSDPipeline = GaudiTextToVideoSDPipeline.from_pretrained(
-            args.model_name_or_path, **kwargs
-        )
-        set_seed(args.seed)
-        outputs = pipeline(
-            prompt=args.prompts,
-            num_videos_per_prompt=args.num_videos_per_prompt,
-            batch_size=args.batch_size,
-            num_inference_steps=args.num_inference_steps,
-            guidance_scale=args.guidance_scale,
-            negative_prompt=args.negative_prompts,
-            eta=args.eta,
-            output_type="pil" if args.output_type == "mp4" else args.output_type,  # Naming inconsistency in base class
-            **kwargs_call,
-        )
-        # Save the pipeline in the specified directory if not None
-        if args.pipeline_save_dir is not None:
-            pipeline.save_pretrained(args.pipeline_save_dir)
-
-        # Save images in the specified directory if not None and if they are in PIL format
-        if args.video_save_dir is not None:
-            if args.output_type == "mp4":
-                video_save_dir = Path(args.video_save_dir)
-                video_save_dir.mkdir(parents=True, exist_ok=True)
-                logger.info(f"Saving images in {video_save_dir.resolve()}...")
-
-                for i, video in enumerate(outputs.videos):
-                    filename = video_save_dir / f"video_{i + 1}.mp4"
-                    export_to_video(video, str(filename.resolve()))
-            else:
-                logger.warning("--output_type should be equal to 'mp4' to save images in --video_save_dir.")
-
-    elif args.pipeline_type[0] == "cogvideox":
-        pipeline: GaudiCogVideoXPipeline = GaudiCogVideoXPipeline.from_pretrained(args.model_name_or_path, **kwargs)
-        pipeline.vae.enable_tiling()
-        pipeline.vae.enable_slicing()
-        video = pipeline(
-            prompt=args.prompts,
-            num_videos_per_prompt=1,
-            num_inference_steps=50,
-            num_frames=49,
-            guidance_scale=6,
-            generator=torch.Generator(device="cpu").manual_seed(42),
-        ).frames[0]
-        video_save_dir = Path(args.video_save_dir)
-        video_save_dir.mkdir(parents=True, exist_ok=True)
-        filename = video_save_dir / "cogvideoX_out.mp4"
-        export_to_video(video, str(filename.resolve()), fps=8)
-    else:
-        logger.error(f"unsupported pipe line:{args.pipeline_type}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/optimum/habana/accelerate/accelerator.py b/optimum/habana/accelerate/accelerator.py
index f73769692d..8566c9a7e5 100644
--- a/optimum/habana/accelerate/accelerator.py
+++ b/optimum/habana/accelerate/accelerator.py
@@ -123,6 +123,7 @@ def __init__(
         dynamic: bool | None = None,
         distribution_strategy: str = None,
         force_autocast: bool = False,
+        use_regional_compilation: bool | None = None,
     ):
         self.trackers = []
         self.mpu = parallel_state
@@ -315,6 +316,7 @@ def __init__(
             )
         self.step_scheduler_with_optimizer = step_scheduler_with_optimizer
         self.dynamic = dynamic
+        self.use_regional_compilation = use_regional_compilation
 
         # Mixed precision attributes
         self.scaler = None
@@ -407,7 +409,7 @@ def prepare_model(self, model: torch.nn.Module, device_placement: bool = None, e
                 model.forward = convert_outputs_to_fp32(new_forward)
 
         if self.state.is_fp8_enabled:
-            model = convert_model(model)
+            model = convert_model(model, _minimize_memory=GaudiPartialState().minimize_memory)
 
         if (getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False)) and getattr(
             model, "hf_device_map", False
@@ -577,6 +579,19 @@ def prepare_model(self, model: torch.nn.Module, device_placement: bool = None, e
                 model = torch.compile(model, **self.state.dynamo_plugin.to_kwargs())
         return model
 
+    def compile_regions(self, model):
+        if isinstance(model, torch.nn.ModuleList):
+            for name, module in model.named_children():
+                if self.dynamic is not None:
+                    module = torch.compile(module, dynamic=self.dynamic, **self.state.dynamo_plugin.to_kwargs())
+                else:
+                    module = torch.compile(module, **self.state.dynamo_plugin.to_kwargs())
+                module.__dict__.pop("_parameters", None)
+                setattr(model, name, module)
+        else:
+            for _, module in model.named_children():
+                self.compile_regions(module)
+
     def _prepare_deepspeed(self, *args):
         import deepspeed
 
@@ -586,7 +601,7 @@ def _prepare_deepspeed(self, *args):
         result = [
             self._prepare_one(obj, first_pass=True)
             if isinstance(obj, torch.utils.data.DataLoader)
-            else convert_model(obj)
+            else convert_model(obj, _minimize_memory=GaudiPartialState().minimize_memory)
             if isinstance(obj, torch.nn.Module) and self.state.is_fp8_enabled
             else obj
             for obj in args
@@ -783,7 +798,10 @@ def _prepare_deepspeed(self, *args):
             if self.state.dynamo_plugin.backend == GaudiDynamoBackend.HPU_BACKEND and not is_compiled_module(
                 kwargs["model"]
             ):
-                engine.compile(compile_kwargs={"dynamic": self.dynamic})
+                if self.use_regional_compilation:
+                    self.compile_regions(engine.module)
+                else:
+                    engine.compile(compile_kwargs={"dynamic": self.dynamic})
             if optimizer is not None:
                 optimizer = DeepSpeedOptimizerWrapper(optimizer)
             if scheduler is not None:
diff --git a/optimum/habana/accelerate/state.py b/optimum/habana/accelerate/state.py
index b692c3e036..c5d241e384 100644
--- a/optimum/habana/accelerate/state.py
+++ b/optimum/habana/accelerate/state.py
@@ -52,6 +52,7 @@ def __init__(self, cpu: bool = False, **kwargs):
                 world_size, rank, local_rank = initialize_distributed_hpu()
                 self.backend = kwargs.pop("backend", "hccl")
                 context_parallel_size = kwargs.pop("context_parallel_size", 1)
+                self.minimize_memory = kwargs.pop("minimize_memory", False)
                 if os.environ.get("ACCELERATE_USE_DEEPSPEED", "false") == "true":
                     if not is_deepspeed_available():
                         raise ImportError(
diff --git a/optimum/habana/accelerate/utils/transformer_engine.py b/optimum/habana/accelerate/utils/transformer_engine.py
index 03f070434c..89e1a895aa 100755
--- a/optimum/habana/accelerate/utils/transformer_engine.py
+++ b/optimum/habana/accelerate/utils/transformer_engine.py
@@ -16,6 +16,13 @@
 import functools
 
 import torch
+from transformers.utils import (
+    is_peft_available,
+)
+
+
+if is_peft_available():
+    from peft.tuners import lora
 
 
 has_transformer_engine = False
@@ -38,16 +45,39 @@ def is_fp8_available():
     return has_transformer_engine
 
 
-def _convert_model(model, to_transformer_engine=True, _convert_linear=True):
+def _convert_model(model, to_transformer_engine=True, _convert_linear=True, _minimize_memory=False):
     """
     Recursively converts the linear layer of a model to their `transformers_engine` counterpart.
     """
-    from optimum.habana.transformers.models.llama.modeling_llama import ModuleFusedSDPA
+    from ...transformers.models.llama.modeling_llama import ModuleFusedSDPA
 
     if not is_fp8_available():
         raise ImportError("Using `convert_model` requires transformer_engine to be installed.")
     for name, module in model.named_children():
-        if isinstance(module, torch.nn.Linear) and to_transformer_engine and _convert_linear:
+        if is_peft_available() and isinstance(module, lora.Linear) and to_transformer_engine and _convert_linear:
+            # For lora linear module, convert only base linear layer to fp8 and skip lora-a,
+            # lora-b linear layers. Since lora-a, lora-b are small in size, there is not much
+            # device performance gain by pushing these in fp8. This way we avoid host overhead
+            # associated with using TE for these layers.
+            for name, lora_module in module.named_children():
+                if name == "base_layer":
+                    has_bias = lora_module.bias is not None
+                    # Initializing TE linear without weights and biases and shallow copying them from the original module.
+                    te_module = te.Linear(
+                        lora_module.in_features,
+                        lora_module.out_features,
+                        bias=has_bias,
+                        params_dtype=lora_module.weight.dtype,
+                        skip_weight_param_allocation=True,
+                        minimize_memory=_minimize_memory,
+                    )
+                    te_module.weight = lora_module.weight
+
+                    if has_bias:
+                        te_module.bias = lora_module.bias
+
+                    setattr(module, name, te_module)
+        elif isinstance(module, torch.nn.Linear) and to_transformer_engine and _convert_linear:
             has_bias = module.bias is not None
             # Initializing TE linear without weights and biases and shallow copying them from the original module.
             te_module = te.Linear(
@@ -56,6 +86,7 @@ def _convert_model(model, to_transformer_engine=True, _convert_linear=True):
                 bias=has_bias,
                 params_dtype=module.weight.dtype,
                 skip_weight_param_allocation=True,
+                minimize_memory=_minimize_memory,
             )
             te_module.weight = module.weight
 
@@ -109,7 +140,12 @@ def forward(
 
             setattr(model, name, TE_ModuleFusedSDPA())
         else:
-            _convert_model(module, to_transformer_engine=to_transformer_engine, _convert_linear=_convert_linear)
+            _convert_model(
+                module,
+                to_transformer_engine=to_transformer_engine,
+                _convert_linear=_convert_linear,
+                _minimize_memory=_minimize_memory,
+            )
 
 
 def has_transformer_engine_layers(model):
@@ -124,14 +160,14 @@ def has_transformer_engine_layers(model):
     return False
 
 
-def convert_model(model):
+def convert_model(model, _minimize_memory=False):
     """
     Converts torch.nn.Linear modules to `transformers_engine` Linear modules.
     Adapted from: https://github.com/huggingface/accelerate/blob/v0.27.2/src/accelerate/accelerator.py#L1303
     """
     if not has_transformer_engine_layers(model):
         with torch.no_grad():
-            _convert_model(model)
+            _convert_model(model, _minimize_memory=_minimize_memory)
         model._converted_to_transformer_engine = True
     return model
 
diff --git a/optimum/habana/diffusers/__init__.py b/optimum/habana/diffusers/__init__.py
index 086257a8f8..fcd35aed9c 100644
--- a/optimum/habana/diffusers/__init__.py
+++ b/optimum/habana/diffusers/__init__.py
@@ -7,6 +7,7 @@
 from .pipelines.ddpm.pipeline_ddpm import GaudiDDPMPipeline
 from .pipelines.flux.pipeline_flux import GaudiFluxPipeline
 from .pipelines.flux.pipeline_flux_img2img import GaudiFluxImg2ImgPipeline
+from .pipelines.i2vgen_xl.pipeline_i2vgen_xl import GaudiI2VGenXLPipeline
 from .pipelines.pipeline_utils import GaudiDiffusionPipeline
 from .pipelines.stable_diffusion.pipeline_stable_diffusion import GaudiStableDiffusionPipeline
 from .pipelines.stable_diffusion.pipeline_stable_diffusion_depth2img import GaudiStableDiffusionDepth2ImgPipeline
diff --git a/optimum/habana/diffusers/pipelines/controlnet/pipeline_controlnet.py b/optimum/habana/diffusers/pipelines/controlnet/pipeline_controlnet.py
index 2cf8c866ec..c274ab0f00 100644
--- a/optimum/habana/diffusers/pipelines/controlnet/pipeline_controlnet.py
+++ b/optimum/habana/diffusers/pipelines/controlnet/pipeline_controlnet.py
@@ -82,6 +82,8 @@ class GaudiStableDiffusionControlNetPipeline(GaudiDiffusionPipeline, StableDiffu
         bf16_full_eval (bool, defaults to `False`):
             Whether to use full bfloat16 evaluation instead of 32-bit.
             This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -100,7 +102,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
-        sdp_on_bf16: bool = True,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
diff --git a/optimum/habana/diffusers/pipelines/controlnet/pipeline_stable_video_diffusion_controlnet.py b/optimum/habana/diffusers/pipelines/controlnet/pipeline_stable_video_diffusion_controlnet.py
index b3419a8aea..50387fedb8 100644
--- a/optimum/habana/diffusers/pipelines/controlnet/pipeline_stable_video_diffusion_controlnet.py
+++ b/optimum/habana/diffusers/pipelines/controlnet/pipeline_stable_video_diffusion_controlnet.py
@@ -63,6 +63,18 @@ class GaudiStableVideoDiffusionControlNetPipeline(GaudiStableVideoDiffusionPipel
             A scheduler to be used in combination with `unet` to denoise the encoded image latents.
         feature_extractor ([`~transformers.CLIPImageProcessor`]):
             A `CLIPImageProcessor` to extract features from generated images.
+        use_habana (bool, defaults to `False`):
+            Whether to use Gaudi (`True`) or CPU (`False`).
+        use_hpu_graphs (bool, defaults to `False`):
+            Whether to use HPU graphs or not.
+        gaudi_config (Union[str, [`GaudiConfig`]], defaults to `None`):
+            Gaudi configuration to use. Can be a string to download it from the Hub.
+            Or a previously initialized config can be passed.
+        bf16_full_eval (bool, defaults to `False`):
+            Whether to use full bfloat16 evaluation instead of 32-bit.
+            This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -77,6 +89,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -84,6 +97,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         StableVideoDiffusionPipeline.__init__(
diff --git a/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py b/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py
index 65a7df7e2d..f27efd2a29 100644
--- a/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py
+++ b/optimum/habana/diffusers/pipelines/ddpm/pipeline_ddpm.py
@@ -25,11 +25,11 @@
 from diffusers.utils import BaseOutput
 from diffusers.utils.torch_utils import randn_tensor
 
-from optimum.habana.diffusers.pipelines.pipeline_utils import GaudiDiffusionPipeline
-from optimum.habana.transformers.gaudi_configuration import GaudiConfig
 from optimum.utils import logging
 
+from ....transformers.gaudi_configuration import GaudiConfig
 from ....utils import speed_metrics
+from ..pipeline_utils import GaudiDiffusionPipeline
 
 
 logger = logging.get_logger(__name__)
@@ -59,6 +59,18 @@ class GaudiDDPMPipeline(GaudiDiffusionPipeline, DDPMPipeline):
         scheduler ([`SchedulerMixin`]):
             A scheduler to be used in combination with `unet` to denoise the encoded image. Can be one of
             [`DDPMScheduler`], or [`DDIMScheduler`].
+        use_habana (bool, defaults to `False`):
+            Whether to use Gaudi (`True`) or CPU (`False`).
+        use_hpu_graphs (bool, defaults to `False`):
+            Whether to use HPU graphs or not.
+        gaudi_config (Union[str, [`GaudiConfig`]], defaults to `None`):
+            Gaudi configuration to use. Can be a string to download it from the Hub.
+            Or a previously initialized config can be passed.
+        bf16_full_eval (bool, defaults to `False`):
+            Whether to use full bfloat16 evaluation instead of 32-bit.
+            This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -69,8 +81,16 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
-        GaudiDiffusionPipeline.__init__(self, use_habana, use_hpu_graphs, gaudi_config, bf16_full_eval)
+        GaudiDiffusionPipeline.__init__(
+            self,
+            use_habana,
+            use_hpu_graphs,
+            gaudi_config,
+            bf16_full_eval,
+            sdp_on_bf16,
+        )
 
         DDPMPipeline.__init__(self, unet, scheduler)
 
diff --git a/optimum/habana/diffusers/pipelines/flux/pipeline_flux.py b/optimum/habana/diffusers/pipelines/flux/pipeline_flux.py
index 73560b25d5..5657999cda 100644
--- a/optimum/habana/diffusers/pipelines/flux/pipeline_flux.py
+++ b/optimum/habana/diffusers/pipelines/flux/pipeline_flux.py
@@ -282,6 +282,18 @@ class GaudiFluxPipeline(GaudiDiffusionPipeline, FluxPipeline):
         tokenizer_2 (`T5TokenizerFast`):
             Second Tokenizer of class
             [T5TokenizerFast](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast).
+        use_habana (bool, defaults to `False`):
+            Whether to use Gaudi (`True`) or CPU (`False`).
+        use_hpu_graphs (bool, defaults to `False`):
+            Whether to use HPU graphs or not.
+        gaudi_config (Union[str, [`GaudiConfig`]], defaults to `None`):
+            Gaudi configuration to use. Can be a string to download it from the Hub.
+            Or a previously initialized config can be passed.
+        bf16_full_eval (bool, defaults to `False`):
+            Whether to use full bfloat16 evaluation instead of 32-bit.
+            This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     model_cpu_offload_seq = "text_encoder->text_encoder_2->transformer->vae"
@@ -301,6 +313,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -308,6 +321,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
         FluxPipeline.__init__(
             self,
diff --git a/optimum/habana/diffusers/pipelines/flux/pipeline_flux_img2img.py b/optimum/habana/diffusers/pipelines/flux/pipeline_flux_img2img.py
index 53f3e85876..17894db5ae 100644
--- a/optimum/habana/diffusers/pipelines/flux/pipeline_flux_img2img.py
+++ b/optimum/habana/diffusers/pipelines/flux/pipeline_flux_img2img.py
@@ -297,6 +297,8 @@ class GaudiFluxImg2ImgPipeline(GaudiDiffusionPipeline, FluxImg2ImgPipeline):
         bf16_full_eval (bool, defaults to `False`):
             Whether to use full bfloat16 evaluation instead of 32-bit.
             This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     model_cpu_offload_seq = "text_encoder->text_encoder_2->transformer->vae"
@@ -316,6 +318,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -323,6 +326,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
         FluxImg2ImgPipeline.__init__(
             self,
diff --git a/optimum/habana/diffusers/pipelines/pipeline_utils.py b/optimum/habana/diffusers/pipelines/pipeline_utils.py
index c0233b4bfd..5215fd6603 100644
--- a/optimum/habana/diffusers/pipelines/pipeline_utils.py
+++ b/optimum/habana/diffusers/pipelines/pipeline_utils.py
@@ -128,7 +128,8 @@ def __init__(
         DiffusionPipeline.__init__(self)
 
         if sdp_on_bf16:
-            torch._C._set_math_sdp_allow_fp16_bf16_reduction(True)
+            if hasattr(torch._C, "_set_math_sdp_allow_fp16_bf16_reduction"):
+                torch._C._set_math_sdp_allow_fp16_bf16_reduction(True)
 
         self.use_habana = use_habana
         if self.use_habana:
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
index ff9a139839..7efe1059bc 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py
@@ -598,13 +598,18 @@ def __call__(
             hb_profiler.stop()
 
             speed_metrics_prefix = "generation"
+            if t1 == t0 or use_warmup_inference_steps:
+                num_samples = num_batches * batch_size
+                num_steps = num_inference_steps * num_batches * batch_size
+            else:
+                num_samples = (num_batches - throughput_warmup_steps) * batch_size
+                num_steps = (num_batches - throughput_warmup_steps) * num_inference_steps * batch_size
+
             speed_measures = speed_metrics(
                 split=speed_metrics_prefix,
                 start_time=t0,
-                num_samples=num_batches * batch_size
-                if t1 == t0 or use_warmup_inference_steps
-                else (num_batches - throughput_warmup_steps) * batch_size,
-                num_steps=num_batches * batch_size * num_inference_steps,
+                num_samples=num_samples,
+                num_steps=num_steps,
                 start_time_after_warmup=t1,
             )
             logger.info(f"Speed metrics: {speed_measures}")
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
index 5432388229..c0a59fd5ec 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py
@@ -95,6 +95,34 @@ class GaudiStableDiffusionDepth2ImgPipeline(GaudiDiffusionPipeline, StableDiffus
         - Add HPU Graphs
         - Depth map is now generated by CPU
         - Changed the logic of setting timestep
+
+    Args:
+        vae ([`AutoencoderKL`]):
+            Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
+        text_encoder ([`~transformers.CLIPTextModel`]):
+            Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
+        tokenizer (`~transformers.CLIPTokenizer`):
+            A `CLIPTokenizer` to tokenize text.
+        unet ([`UNet2DConditionModel`]):
+            A `UNet2DConditionModel` to denoise the encoded image latents.
+        scheduler ([`KarrasDiffusionSchedulers`]):
+            A scheduler to be used in combination with `unet` to denoise the encoded image latents.
+        depth_estimator ([`DPTForDepthEstimation`]):
+            Estimates depth in generated images
+        feature_extractor ([`DPTFeatureExtractor`]):
+            A feature extractor to extract features from generated images
+        use_habana (bool, defaults to `False`):
+            Whether to use Gaudi (`True`) or CPU (`False`).
+        use_hpu_graphs (bool, defaults to `False`):
+            Whether to use HPU graphs or not.
+        gaudi_config (Union[str, [`GaudiConfig`]], defaults to `None`):
+            Gaudi configuration to use. Can be a string to download it from the Hub.
+            Or a previously initialized config can be passed.
+        bf16_full_eval (bool, defaults to `False`):
+            Whether to use full bfloat16 evaluation instead of 32-bit.
+            This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -110,6 +138,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -117,6 +146,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         StableDiffusionDepth2ImgPipeline.__init__(
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
index b2a419389b..7cd8d23ade 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py
@@ -71,6 +71,8 @@ class GaudiStableDiffusionImageVariationPipeline(GaudiDiffusionPipeline, StableD
         bf16_full_eval (bool, defaults to `False`):
             Whether to use full bfloat16 evaluation instead of 32-bit.
             This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -86,6 +88,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -93,6 +96,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         # Workaround for Synapse 1.11 for full bf16
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
index d7e36e983a..3086b23c0c 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py
@@ -32,10 +32,10 @@
 from diffusers.utils import deprecate
 from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
 
-from optimum.habana.utils import HabanaProfile, speed_metrics, warmup_inference_steps_time_adjustment
 from optimum.utils import logging
 
 from ....transformers.gaudi_configuration import GaudiConfig
+from ....utils import HabanaProfile, speed_metrics, warmup_inference_steps_time_adjustment
 from ..pipeline_utils import GaudiDiffusionPipeline
 
 
@@ -96,6 +96,39 @@ class GaudiStableDiffusionImg2ImgPipeline(GaudiDiffusionPipeline, StableDiffusio
     Adapted from: https://github.com/huggingface/diffusers/blob/v0.26.3/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py#L161
     Changes:
         1. Use CPU to generate random tensor
+
+    Args:
+        vae ([`AutoencoderKL`]):
+            Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
+        text_encoder ([`~transformers.CLIPTextModel`]):
+            Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
+        tokenizer (`~transformers.CLIPTokenizer`):
+            A `CLIPTokenizer` to tokenize text.
+        unet ([`UNet2DConditionModel`]):
+            A `UNet2DConditionModel` to denoise the encoded image latents.
+        scheduler ([`SchedulerMixin`]):
+            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
+            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
+        safety_checker ([`StableDiffusionSafetyChecker`]):
+            Classification module that estimates whether generated images could be considered offensive or harmful.
+            Please refer to the [model card](https://huggingface.co/CompVis/stable-diffusion-v1-4) for more details
+            about a model's potential harms.
+        feature_extractor ([`~transformers.CLIPImageProcessor`]):
+            A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
+        image_encoder ([`~transformers.CLIPVisionModelWithProjection`]):
+            Pre-trained CLIP vision model used to obtain image features.
+        use_habana (bool, defaults to `False`):
+            Whether to use Gaudi (`True`) or CPU (`False`).
+        use_hpu_graphs (bool, defaults to `False`):
+            Whether to use HPU graphs or not.
+        gaudi_config (Union[str, [`GaudiConfig`]], defaults to `None`):
+            Gaudi configuration to use. Can be a string to download it from the Hub.
+            Or a previously initialized config can be passed.
+        bf16_full_eval (bool, defaults to `False`):
+            Whether to use full bfloat16 evaluation instead of 32-bit.
+            This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -113,6 +146,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -120,6 +154,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         StableDiffusionImg2ImgPipeline.__init__(
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
index f937423d13..ebbf428f9e 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py
@@ -82,6 +82,8 @@ class GaudiStableDiffusionInpaintPipeline(GaudiDiffusionPipeline, StableDiffusio
         bf16_full_eval (bool, defaults to `False`):
             Whether to use full bfloat16 evaluation instead of 32-bit.
             This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     _callback_tensor_inputs = ["latents", "prompt_embeds", "mask", "masked_image_latents"]
@@ -101,6 +103,7 @@ def __init__(
         use_hpu_graphs: bool = True,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -108,6 +111,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         StableDiffusionInpaintPipeline.__init__(
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
index c4b0d0e742..a2a7ec1399 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py
@@ -72,6 +72,8 @@ class GaudiStableDiffusionInstructPix2PixPipeline(GaudiDiffusionPipeline, Stable
         bf16_full_eval (bool, defaults to `False`):
             Whether to use full bfloat16 evaluation instead of 32-bit.
             This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -89,6 +91,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -96,6 +99,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         # Workaround for Synapse 1.11 for full bf16
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
index 704c3c1cf1..72b7ab838b 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py
@@ -84,6 +84,8 @@ class GaudiStableDiffusionLDM3DPipeline(GaudiDiffusionPipeline, StableDiffusionL
         bf16_full_eval (bool, defaults to `False`):
             Whether to use full bfloat16 evaluation instead of 32-bit.
             This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -101,6 +103,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -108,6 +111,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         # Workaround for Synapse 1.11 for full bf16
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
index 136ff0dace..e1f4804df7 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
@@ -90,6 +90,8 @@ class GaudiStableDiffusionUpscalePipeline(GaudiDiffusionPipeline, StableDiffusio
         bf16_full_eval (bool, defaults to `False`):
             Whether to use full bfloat16 evaluation instead of 32-bit.
             This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -108,8 +110,16 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
-        GaudiDiffusionPipeline.__init__(self, use_habana, use_hpu_graphs, gaudi_config, bf16_full_eval)
+        GaudiDiffusionPipeline.__init__(
+            self,
+            use_habana,
+            use_hpu_graphs,
+            gaudi_config,
+            bf16_full_eval,
+            sdp_on_bf16,
+        )
 
         # Workaround for Synapse 1.11 for full bf16
         if bf16_full_eval:
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py b/optimum/habana/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py
index 805584d64c..731e0434bf 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py
@@ -20,6 +20,8 @@
 import numpy as np
 import PIL
 import torch
+import torch.nn.functional as F
+from diffusers.models.attention_processor import Attention
 from diffusers.models.autoencoders import AutoencoderKL
 from diffusers.models.transformers import SD3Transformer2DModel
 from diffusers.pipelines.stable_diffusion_3 import StableDiffusion3Pipeline
@@ -76,6 +78,101 @@ class GaudiStableDiffusion3PipelineOutput(BaseOutput):
 """
 
 
+# ToDo: Look into FusedJointAttnProcessor2_0 usage for sd3 pipeline, and check its perf using fused sdpa
+class GaudiJointAttnProcessor2_0:
+    """Attention processor used typically in processing the SD3-like self-attention projections.
+    Copied from JointAttnProcessor2_0.forward: https://github.com/huggingface/diffusers/blob/89e4d6219805975bd7d253a267e1951badc9f1c0/src/diffusers/models/attention_processor.py
+        The only differences are:
+        - applied Fused SDPA from Habana's framework.
+    """
+
+    def __init__(self):
+        if not hasattr(F, "scaled_dot_product_attention"):
+            raise ImportError("AttnProcessor2_0 requires PyTorch 2.0, to use it, please upgrade PyTorch to 2.0.")
+
+    def __call__(
+        self,
+        attn: Attention,
+        hidden_states: torch.FloatTensor,
+        encoder_hidden_states: torch.FloatTensor = None,
+        attention_mask: Optional[torch.FloatTensor] = None,
+        *args,
+        **kwargs,
+    ) -> torch.FloatTensor:
+        residual = hidden_states
+
+        batch_size = hidden_states.shape[0]
+
+        # `sample` projections.
+        query = attn.to_q(hidden_states)
+        key = attn.to_k(hidden_states)
+        value = attn.to_v(hidden_states)
+
+        inner_dim = key.shape[-1]
+        head_dim = inner_dim // attn.heads
+
+        query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+
+        if attn.norm_q is not None:
+            query = attn.norm_q(query)
+        if attn.norm_k is not None:
+            key = attn.norm_k(key)
+
+        # `context` projections.
+        if encoder_hidden_states is not None:
+            encoder_hidden_states_query_proj = attn.add_q_proj(encoder_hidden_states)
+            encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states)
+            encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states)
+
+            encoder_hidden_states_query_proj = encoder_hidden_states_query_proj.view(
+                batch_size, -1, attn.heads, head_dim
+            ).transpose(1, 2)
+            encoder_hidden_states_key_proj = encoder_hidden_states_key_proj.view(
+                batch_size, -1, attn.heads, head_dim
+            ).transpose(1, 2)
+            encoder_hidden_states_value_proj = encoder_hidden_states_value_proj.view(
+                batch_size, -1, attn.heads, head_dim
+            ).transpose(1, 2)
+
+            if attn.norm_added_q is not None:
+                encoder_hidden_states_query_proj = attn.norm_added_q(encoder_hidden_states_query_proj)
+            if attn.norm_added_k is not None:
+                encoder_hidden_states_key_proj = attn.norm_added_k(encoder_hidden_states_key_proj)
+
+            query = torch.cat([query, encoder_hidden_states_query_proj], dim=2)
+            key = torch.cat([key, encoder_hidden_states_key_proj], dim=2)
+            value = torch.cat([value, encoder_hidden_states_value_proj], dim=2)
+
+        from habana_frameworks.torch.hpex.kernels import FusedSDPA
+
+        hidden_states = FusedSDPA.apply(query, key, value, None, 0.0, False, None, "fast", None)
+
+        # hidden_states = F.scaled_dot_product_attention(query, key, value, dropout_p=0.0, is_causal=False)
+        hidden_states = hidden_states.transpose(1, 2).reshape(batch_size, -1, attn.heads * head_dim)
+        hidden_states = hidden_states.to(query.dtype)
+
+        if encoder_hidden_states is not None:
+            # Split the attention outputs.
+            hidden_states, encoder_hidden_states = (
+                hidden_states[:, : residual.shape[1]],
+                hidden_states[:, residual.shape[1] :],
+            )
+            if not attn.context_pre_only:
+                encoder_hidden_states = attn.to_add_out(encoder_hidden_states)
+
+        # linear proj
+        hidden_states = attn.to_out[0](hidden_states)
+        # dropout
+        hidden_states = attn.to_out[1](hidden_states)
+
+        if encoder_hidden_states is not None:
+            return hidden_states, encoder_hidden_states
+        else:
+            return hidden_states
+
+
 class GaudiStableDiffusion3Pipeline(GaudiDiffusionPipeline, StableDiffusion3Pipeline):
     r"""
     Adapted from: https://github.com/huggingface/diffusers/blob/v0.29.2/src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py#L128
@@ -110,6 +207,18 @@ class GaudiStableDiffusion3Pipeline(GaudiDiffusionPipeline, StableDiffusion3Pipe
         tokenizer_3 (`T5TokenizerFast`):
             Tokenizer of class
             [T5Tokenizer](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer).
+        use_habana (bool, defaults to `False`):
+            Whether to use Gaudi (`True`) or CPU (`False`).
+        use_hpu_graphs (bool, defaults to `False`):
+            Whether to use HPU graphs or not.
+        gaudi_config (Union[str, [`GaudiConfig`]], defaults to `None`):
+            Gaudi configuration to use. Can be a string to download it from the Hub.
+            Or a previously initialized config can be passed.
+        bf16_full_eval (bool, defaults to `False`):
+            Whether to use full bfloat16 evaluation instead of 32-bit.
+            This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -127,6 +236,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -134,6 +244,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         StableDiffusion3Pipeline.__init__(
@@ -151,6 +262,93 @@ def __init__(
 
         self.to(self._device)
 
+    @classmethod
+    def _split_inputs_into_batches(
+        cls,
+        batch_size,
+        latents,
+        prompt_embeds,
+        negative_prompt_embeds,
+        pooled_prompt_embeds,
+        negative_pooled_prompt_embeds,
+    ):
+        # Use torch.split to generate num_batches batches of size batch_size
+        latents_batches = list(torch.split(latents, batch_size))
+        prompt_embeds_batches = list(torch.split(prompt_embeds, batch_size))
+
+        if negative_prompt_embeds is not None:
+            negative_prompt_embeds_batches = list(torch.split(negative_prompt_embeds, batch_size))
+        if pooled_prompt_embeds is not None:
+            pooled_prompt_embeds_batches = list(torch.split(pooled_prompt_embeds, batch_size))
+        if negative_pooled_prompt_embeds is not None:
+            negative_pooled_prompt_embeds_batches = list(torch.split(negative_pooled_prompt_embeds, batch_size))
+
+        # If the last batch has less samples than batch_size, pad it with dummy samples
+        num_dummy_samples = 0
+        if latents_batches[-1].shape[0] < batch_size:
+            num_dummy_samples = batch_size - latents_batches[-1].shape[0]
+            # Pad latents_batches
+            sequence_to_stack = (latents_batches[-1],) + tuple(
+                torch.zeros_like(latents_batches[-1][0][None, :]) for _ in range(num_dummy_samples)
+            )
+            latents_batches[-1] = torch.vstack(sequence_to_stack)
+            # Pad prompt_embeds_batches
+            sequence_to_stack = (prompt_embeds_batches[-1],) + tuple(
+                torch.zeros_like(prompt_embeds_batches[-1][0][None, :]) for _ in range(num_dummy_samples)
+            )
+            prompt_embeds_batches[-1] = torch.vstack(sequence_to_stack)
+            # Pad negative_prompt_embeds_batches if necessary
+            if negative_prompt_embeds is not None:
+                sequence_to_stack = (negative_prompt_embeds_batches[-1],) + tuple(
+                    torch.zeros_like(negative_prompt_embeds_batches[-1][0][None, :]) for _ in range(num_dummy_samples)
+                )
+                negative_prompt_embeds_batches[-1] = torch.vstack(sequence_to_stack)
+            # Pad add_text_embeds_batches if necessary
+            if pooled_prompt_embeds is not None:
+                sequence_to_stack = (pooled_prompt_embeds_batches[-1],) + tuple(
+                    torch.zeros_like(pooled_prompt_embeds_batches[-1][0][None, :]) for _ in range(num_dummy_samples)
+                )
+                pooled_prompt_embeds_batches[-1] = torch.vstack(sequence_to_stack)
+            # Pad negative_pooled_prompt_embeds_batches if necessary
+            if negative_pooled_prompt_embeds is not None:
+                sequence_to_stack = (negative_pooled_prompt_embeds_batches[-1],) + tuple(
+                    torch.zeros_like(negative_pooled_prompt_embeds_batches[-1][0][None, :])
+                    for _ in range(num_dummy_samples)
+                )
+                negative_pooled_prompt_embeds_batches[-1] = torch.vstack(sequence_to_stack)
+
+        # Stack batches in the same tensor
+        latents_batches = torch.stack(latents_batches)
+        # if self.do_classifier_free_guidance:
+
+        if negative_prompt_embeds is not None:
+            # For classifier free guidance, we need to do two forward passes.
+            # Here we concatenate the unconditional and text embeddings into a single batch
+            # to avoid doing two forward passes
+            for i, (negative_prompt_embeds_batch, prompt_embeds_batch) in enumerate(
+                zip(negative_prompt_embeds_batches, prompt_embeds_batches[:])
+            ):
+                prompt_embeds_batches[i] = torch.cat([negative_prompt_embeds_batch, prompt_embeds_batch])
+
+        prompt_embeds_batches = torch.stack(prompt_embeds_batches)
+
+        if pooled_prompt_embeds is not None:
+            if negative_pooled_prompt_embeds is not None:
+                # For classifier free guidance, we need to do two forward passes.
+                # Here we concatenate the unconditional and text embeddings into a single batch
+                # to avoid doing two forward passes
+                for i, (negative_pooled_prompt_embeds_batch, pooled_prompt_embeds_batch) in enumerate(
+                    zip(negative_pooled_prompt_embeds_batches, pooled_prompt_embeds_batches[:])
+                ):
+                    pooled_prompt_embeds_batches[i] = torch.cat(
+                        [negative_pooled_prompt_embeds_batch, pooled_prompt_embeds_batch]
+                    )
+            pooled_prompt_embeds_batches = torch.stack(pooled_prompt_embeds_batches)
+        else:
+            pooled_prompt_embeds_batches = None
+
+        return latents_batches, prompt_embeds_batches, pooled_prompt_embeds_batches, num_dummy_samples
+
     @torch.no_grad()
     @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
@@ -288,9 +486,39 @@ def __call__(
             [`~pipelines.stable_diffusion_3.StableDiffusion3PipelineOutput`] if `return_dict` is True, otherwise a
             `tuple`. When returning a tuple, the first element is a list with the generated images.
         """
+        import habana_frameworks.torch as ht
         import habana_frameworks.torch.core as htcore
 
-        with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=self.gaudi_config.use_torch_autocast):
+        # Set dtype to BF16 only if --bf16 is used, else use device's default autocast precision
+        # When --bf16 is used, bf16_full_eval=True, which disables use_torch_autocast
+        with torch.autocast(
+            device_type="hpu",
+            enabled=self.gaudi_config.use_torch_autocast,
+            dtype=torch.bfloat16 if not self.gaudi_config.use_torch_autocast else None,
+        ):
+            quant_mode = kwargs.get("quant_mode", None)
+            if quant_mode == "measure" or quant_mode == "quantize":
+                import os
+
+                quant_config_path = os.getenv("QUANT_CONFIG")
+
+                if not quant_config_path:
+                    raise ImportError(
+                        "Error: QUANT_CONFIG path is not defined. Please define path to quantization configuration JSON file."
+                    )
+                elif not os.path.isfile(quant_config_path):
+                    raise ImportError(f"Error: QUANT_CONFIG path '{quant_config_path}' is not valid")
+
+                htcore.hpu_set_env()
+                from neural_compressor.torch.quantization import FP8Config, convert, prepare
+
+                config = FP8Config.from_json_file(quant_config_path)
+                if config.measure:
+                    self.transformer = prepare(self.transformer, config)
+                elif config.quantize:
+                    self.transformer = convert(self.transformer, config)
+                htcore.hpu_initialize(self.transformer, mark_only_scales_as_const=True)
+
             height = height or self.default_sample_size * self.vae_scale_factor
             width = width or self.default_sample_size * self.vae_scale_factor
 
@@ -319,14 +547,17 @@ def __call__(
 
             # 2. Define call parameters
             if prompt is not None and isinstance(prompt, str):
-                batch_size = 1
+                num_prompts = 1
             elif prompt is not None and isinstance(prompt, list):
-                batch_size = len(prompt)
+                num_prompts = len(prompt)
             else:
-                batch_size = prompt_embeds.shape[0]
+                num_prompts = prompt_embeds.shape[0]
+            num_batches = ceil((num_images_per_prompt * num_prompts) / batch_size)
 
             device = self._execution_device
 
+            lora_scale = kwargs.get("lora_scale", None) if kwargs is not None else None
+
             (
                 prompt_embeds,
                 negative_prompt_embeds,
@@ -348,12 +579,9 @@ def __call__(
                 clip_skip=self.clip_skip,
                 num_images_per_prompt=num_images_per_prompt,
                 max_sequence_length=max_sequence_length,
+                lora_scale=lora_scale,
             )
 
-            if self.do_classifier_free_guidance:
-                prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
-                pooled_prompt_embeds = torch.cat([negative_pooled_prompt_embeds, pooled_prompt_embeds], dim=0)
-
             # 4. Prepare timesteps
             timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
             num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
@@ -362,7 +590,7 @@ def __call__(
             # 5. Prepare latent variables
             num_channels_latents = self.transformer.config.in_channels
             latents = self.prepare_latents(
-                batch_size * num_images_per_prompt,
+                num_prompts * num_images_per_prompt,
                 num_channels_latents,
                 height,
                 width,
@@ -372,14 +600,6 @@ def __call__(
                 latents,
             )
 
-            # 5-1. Define call parameters
-            if prompt is not None and isinstance(prompt, str):
-                num_prompts = 1
-            elif prompt is not None and isinstance(prompt, list):
-                num_prompts = len(prompt)
-            else:
-                num_prompts = prompt_embeds.shape[0]
-            num_batches = ceil((num_images_per_prompt * num_prompts) / batch_size)
             logger.info(
                 f"{num_prompts} prompt(s) received, {num_images_per_prompt} generation(s) per prompt,"
                 f" {batch_size} sample(s) per batch, {num_batches} total batch(es)."
@@ -388,40 +608,86 @@ def __call__(
                 logger.warning("The first two iterations are slower so it is recommended to feed more batches.")
 
             throughput_warmup_steps = kwargs.get("throughput_warmup_steps", 3)
+            use_warmup_inference_steps = (
+                num_batches <= throughput_warmup_steps and num_inference_steps > throughput_warmup_steps
+            )
 
-            t0 = time.time()
-            t1 = t0
+            if hasattr(self.scheduler, "set_begin_index"):
+                self.scheduler.set_begin_index()
 
             hb_profiler = HabanaProfile(
                 warmup=profiling_warmup_steps,
                 active=profiling_steps,
                 record_shapes=False,
             )
+
             hb_profiler.start()
 
-            # 6. Denoising loop
-            with self.progress_bar(total=num_inference_steps) as progress_bar:
-                for i, t in enumerate(timesteps):
-                    # because compilation occurs in the first two iterations
-                    if i == throughput_warmup_steps:
+            # 6. Split Input data to batches (HPU-specific step)
+            latents_batches, text_embeddings_batches, pooled_prompt_embeddings_batches, num_dummy_samples = (
+                self._split_inputs_into_batches(
+                    batch_size,
+                    latents,
+                    prompt_embeds,
+                    negative_prompt_embeds,
+                    pooled_prompt_embeds,
+                    negative_pooled_prompt_embeds,
+                )
+            )
+
+            outputs = {
+                "images": [],
+            }
+
+            for block in self.transformer.transformer_blocks:
+                block.attn.processor = GaudiJointAttnProcessor2_0()
+            ht.hpu.synchronize()
+
+            t0 = time.time()
+            t1 = t0
+
+            # 7. Denoising loop
+            for j in range(num_batches):
+                latents_batch = latents_batches[0]
+                latents_batches = torch.roll(latents_batches, shifts=-1, dims=0)
+                text_embeddings_batch = text_embeddings_batches[0]
+                text_embeddings_batches = torch.roll(text_embeddings_batches, shifts=-1, dims=0)
+                pooled_prompt_embeddings_batch = pooled_prompt_embeddings_batches[0]
+                pooled_prompt_embeddings_batches = torch.roll(pooled_prompt_embeddings_batches, shifts=-1, dims=0)
+
+                if hasattr(self.scheduler, "_init_step_index"):
+                    # Reset scheduler step index for next batch
+                    self.scheduler.timesteps = timesteps
+                    self.scheduler._init_step_index(timesteps[0])
+
+                # Throughput is calculated after warmup iterations
+                if j == throughput_warmup_steps:
+                    t1 = time.time()
+
+                for i in self.progress_bar(range(len(timesteps))):
+                    timestep = timesteps[0]
+                    timesteps = torch.roll(timesteps, shifts=-1, dims=0)
+
+                    if use_warmup_inference_steps and i == throughput_warmup_steps and j == num_batches - 1:
                         t1 = time.time()
 
                     if self.interrupt:
                         continue
 
                     # expand the latents if we are doing classifier free guidance
-                    latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
+                    latent_model_input = (
+                        torch.cat([latents_batch] * 2) if self.do_classifier_free_guidance else latents_batch
+                    )
                     # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
-                    timestep = t.expand(latent_model_input.shape[0])
+                    timestep_batch = timestep.expand(latent_model_input.shape[0])
 
-                    noise_pred = self.transformer(
-                        hidden_states=latent_model_input,
-                        timestep=timestep,
-                        encoder_hidden_states=prompt_embeds,
-                        pooled_projections=pooled_prompt_embeds,
-                        joint_attention_kwargs=self.joint_attention_kwargs,
-                        return_dict=False,
-                    )[0]
+                    noise_pred = self.transformer_hpu(
+                        latent_model_input,
+                        timestep_batch,
+                        text_embeddings_batch,
+                        pooled_prompt_embeddings_batch,
+                        self.joint_attention_kwargs,
+                    )
 
                     # perform guidance
                     if self.do_classifier_free_guidance:
@@ -429,62 +695,173 @@ def __call__(
                         noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
 
                     # compute the previous noisy sample x_t -> x_t-1
-                    latents_dtype = latents.dtype
-                    latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
-
-                    if latents.dtype != latents_dtype:
-                        if torch.backends.mps.is_available():
-                            # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
-                            latents = latents.to(latents_dtype)
+                    latents_dtype = latents_batch.dtype
+                    latents_batch = self.scheduler.step(noise_pred, timestep, latents_batch, return_dict=False)[0]
 
                     if callback_on_step_end is not None:
                         callback_kwargs = {}
                         for k in callback_on_step_end_tensor_inputs:
                             callback_kwargs[k] = locals()[k]
-                        callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+                        callback_outputs = callback_on_step_end(self, i, timestep, callback_kwargs)
 
-                        latents = callback_outputs.pop("latents", latents)
-                        prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
-                        negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
-                        negative_pooled_prompt_embeds = callback_outputs.pop(
-                            "negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
-                        )
+                        latents_batch = callback_outputs.pop("latents", latents_batch)
 
-                    # call the callback, if provided
-                    if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
-                        progress_bar.update()
+                        _prompt_embeds = callback_outputs.pop("prompt_embeds", None)
+                        _negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", None)
+                        if _prompt_embeds is not None and _negative_prompt_embeds is not None:
+                            text_embeddings_batch = torch.cat([_negative_prompt_embeds, _prompt_embeds])
+                        _pooled_prompt_embeds = callback_outputs.pop("pooled_prompt_embeds", None)
+                        _negative_pooled_prompt_embeds = callback_outputs.pop("negative_pooled_prompt_embeds", None)
+                        if _pooled_prompt_embeds is not None and _negative_pooled_prompt_embeds is not None:
+                            pooled_prompt_embeddings_batch = torch.cat(
+                                [_negative_pooled_prompt_embeds, _pooled_prompt_embeds]
+                            )
 
                     hb_profiler.step()
                     htcore.mark_step(sync=True)
 
-                hb_profiler.stop()
+                if output_type == "latent":
+                    image = latents_batch
 
-                t1 = warmup_inference_steps_time_adjustment(t1, t1, num_inference_steps, throughput_warmup_steps)
-                speed_metrics_prefix = "generation"
-                speed_measures = speed_metrics(
-                    split=speed_metrics_prefix,
-                    start_time=t0,
-                    num_samples=num_batches * batch_size,
-                    num_steps=num_batches * batch_size * num_inference_steps,
-                    start_time_after_warmup=t1,
-                )
-                logger.info(f"Speed metrics: {speed_measures}")
-            if output_type == "latent":
-                image = latents
+                else:
+                    latents_batch = (latents_batch / self.vae.config.scaling_factor) + self.vae.config.shift_factor
+                    image = self.vae.decode(latents_batch, return_dict=False)[0]
+                    image = self.image_processor.postprocess(image, output_type=output_type)
 
-            else:
-                latents = (latents / self.vae.config.scaling_factor) + self.vae.config.shift_factor
+                outputs["images"].append(image)
+
+            # End of Denoising loop
+
+            hb_profiler.stop()
 
-                image = self.vae.decode(latents, return_dict=False)[0]
-                image = self.image_processor.postprocess(image, output_type=output_type)
+            ht.hpu.synchronize()
+            speed_metrics_prefix = "generation"
+            if use_warmup_inference_steps:
+                t1 = warmup_inference_steps_time_adjustment(t1, t1, num_inference_steps, throughput_warmup_steps)
+            speed_measures = speed_metrics(
+                split=speed_metrics_prefix,
+                start_time=t0,
+                num_samples=batch_size
+                if t1 == t0 or use_warmup_inference_steps
+                else (num_batches - throughput_warmup_steps) * batch_size,
+                num_steps=batch_size * num_inference_steps
+                if use_warmup_inference_steps
+                else (num_batches - throughput_warmup_steps) * batch_size * num_inference_steps,
+                start_time_after_warmup=t1,
+            )
+            logger.info(f"Speed metrics: {speed_measures}")
+
+            if quant_mode == "measure":
+                from neural_compressor.torch.quantization import finalize_calibration
+
+                finalize_calibration(self.transformer)
+
+            # 8 Output Images
+            # Remove dummy generations if needed
+            if num_dummy_samples > 0:
+                outputs["images"][-1] = outputs["images"][-1][:-num_dummy_samples]
+
+            # Process generated images
+            for i, image in enumerate(outputs["images"][:]):
+                if i == 0:
+                    outputs["images"].clear()
+
+                # image = self.image_processor.postprocess(image, output_type=output_type)
+
+                if output_type == "pil" and isinstance(image, list):
+                    outputs["images"] += image
+                elif output_type in ["np", "numpy"] and isinstance(image, np.ndarray):
+                    if len(outputs["images"]) == 0:
+                        outputs["images"] = image
+                    else:
+                        outputs["images"] = np.concatenate((outputs["images"], image), axis=0)
+                else:
+                    if len(outputs["images"]) == 0:
+                        outputs["images"] = image
+                    else:
+                        outputs["images"] = torch.cat((outputs["images"], image), 0)
 
             # Offload all models
             self.maybe_free_model_hooks()
 
             if not return_dict:
-                return (image,)
+                return outputs["images"]
 
             return GaudiStableDiffusion3PipelineOutput(
-                images=image,
+                images=outputs["images"],
                 throughput=speed_measures[f"{speed_metrics_prefix}_samples_per_second"],
             )
+
+    @torch.no_grad()
+    def transformer_hpu(
+        self,
+        latent_model_input,
+        timestep,
+        text_embeddings_batch,
+        pooled_prompt_embeddings_batch,
+        joint_attention_kwargs,
+    ):
+        if self.use_hpu_graphs:
+            return self.capture_replay(
+                latent_model_input,
+                timestep,
+                text_embeddings_batch,
+                pooled_prompt_embeddings_batch,
+                joint_attention_kwargs,
+            )
+        else:
+            return self.transformer(
+                hidden_states=latent_model_input,
+                timestep=timestep,
+                encoder_hidden_states=text_embeddings_batch,
+                pooled_projections=pooled_prompt_embeddings_batch,
+                joint_attention_kwargs=joint_attention_kwargs,
+                return_dict=False,
+            )[0]
+
+    @torch.no_grad()
+    def capture_replay(
+        self,
+        latent_model_input,
+        timestep,
+        encoder_hidden_states,
+        pooled_prompt_embeddings_batch,
+        joint_attention_kwargs,
+    ):
+        inputs = [
+            latent_model_input,
+            timestep,
+            encoder_hidden_states,
+            pooled_prompt_embeddings_batch,
+            joint_attention_kwargs,
+        ]
+        h = self.ht.hpu.graphs.input_hash(inputs)
+        cached = self.cache.get(h)
+
+        if cached is None:
+            # Capture the graph and cache it
+            with self.ht.hpu.stream(self.hpu_stream):
+                graph = self.ht.hpu.HPUGraph()
+                graph.capture_begin()
+
+                outputs = self.transformer(
+                    hidden_states=inputs[0],
+                    timestep=inputs[1],
+                    encoder_hidden_states=inputs[2],
+                    pooled_projections=inputs[3],
+                    joint_attention_kwargs=inputs[4],
+                    return_dict=False,
+                )[0]
+
+                graph.capture_end()
+                graph_inputs = inputs
+                graph_outputs = outputs
+                self.cache[h] = self.ht.hpu.graphs.CachedParams(graph_inputs, graph_outputs, graph)
+            return outputs
+
+        # Replay the cached graph with updated inputs
+        self.ht.hpu.graphs.copy_to(cached.graph_inputs, inputs)
+        cached.graph.replay()
+        self.ht.core.hpu.default_stream().synchronize()
+
+        return cached.graph_outputs
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
index 6a1b74d129..610f8eabba 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py
@@ -97,6 +97,8 @@ class GaudiStableDiffusionXLPipeline(GaudiDiffusionPipeline, StableDiffusionXLPi
         bf16_full_eval (bool, defaults to `False`):
             Whether to use full bfloat16 evaluation instead of 32-bit.
             This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -115,6 +117,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -122,6 +125,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         StableDiffusionXLPipeline.__init__(
@@ -653,8 +657,6 @@ def __call__(
             t1 = t0
 
             self._num_timesteps = len(timesteps)
-            if hasattr(self.scheduler, "set_begin_index"):
-                self.scheduler.set_begin_index()
 
             hb_profiler = HabanaProfile(
                 warmup=profiling_warmup_steps,
@@ -820,7 +822,7 @@ def __call__(
             speed_metrics_prefix = "generation"
             if t1 == t0 or use_warmup_inference_steps:
                 num_samples = num_batches * batch_size
-                num_steps = (num_inference_steps - throughput_warmup_steps) * num_batches * batch_size
+                num_steps = num_inference_steps * num_batches * batch_size
             else:
                 num_samples = (num_batches - throughput_warmup_steps) * batch_size
                 num_steps = (num_batches - throughput_warmup_steps) * num_inference_steps * batch_size
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
index 7b6f25d920..6846e1a146 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
@@ -97,6 +97,8 @@ class GaudiStableDiffusionXLImg2ImgPipeline(GaudiDiffusionPipeline, StableDiffus
         bf16_full_eval (bool, defaults to `False`):
             Whether to use full bfloat16 evaluation instead of 32-bit.
             This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -117,6 +119,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -124,6 +127,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         StableDiffusionXLImg2ImgPipeline.__init__(
@@ -536,8 +540,6 @@ def denoising_value_valid(dnv):
                 ).to(device=device, dtype=latents.dtype)
 
             self._num_timesteps = len(timesteps)
-            if hasattr(self.scheduler, "set_begin_index"):
-                self.scheduler.set_begin_index()
 
             # 8.3 Denoising loop
             throughput_warmup_steps = kwargs.get("throughput_warmup_steps", 3)
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
index dab18e82e2..3270e14224 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py
@@ -133,6 +133,8 @@ class GaudiStableDiffusionXLInpaintPipeline(GaudiDiffusionPipeline, StableDiffus
         bf16_full_eval (bool, defaults to `False`):
             Whether to use full bfloat16 evaluation instead of 32-bit.
             This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     _callback_tensor_inputs = [
@@ -162,6 +164,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -169,6 +172,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         StableDiffusionXLInpaintPipeline.__init__(
@@ -744,8 +748,6 @@ def denoising_value_valid(dnv):
                 ).to(device=device, dtype=latents.dtype)
 
             self._num_timesteps = len(timesteps)
-            if hasattr(self.scheduler, "set_begin_index"):
-                self.scheduler.set_begin_index()
 
             outputs = {
                 "images": [],
diff --git a/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_mlperf.py b/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_mlperf.py
index 42c703b78b..3cca208954 100644
--- a/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_mlperf.py
+++ b/optimum/habana/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_mlperf.py
@@ -159,7 +159,10 @@ def __init__(
         feature_extractor: CLIPImageProcessor = None,
         force_zeros_for_empty_prompt: bool = True,
         add_watermarker: Optional[bool] = None,
+        sdp_on_bf16: bool = False,
     ):
+        if sdp_on_bf16:
+            torch._C._set_math_sdp_allow_fp16_bf16_reduction(True)
         super().__init__(
             vae,
             text_encoder,
@@ -757,7 +760,7 @@ def __call__(
 
         throughput_warmup_steps = kwargs.get("throughput_warmup_steps", 3)
         use_warmup_inference_steps = (
-            num_batches < throughput_warmup_steps and num_inference_steps > throughput_warmup_steps
+            num_batches <= throughput_warmup_steps and num_inference_steps > throughput_warmup_steps
         )
 
         self._num_timesteps = len(timesteps)
@@ -769,9 +772,6 @@ def __call__(
             if j == throughput_warmup_steps:
                 ht.hpu.synchronize()
                 t1 = time.time()
-            if use_warmup_inference_steps:
-                ht.hpu.synchronize()
-                t0_inf = time.time()
 
             latents = latents_batches[0]
             latents_batches = torch.roll(latents_batches, shifts=-1, dims=0)
@@ -841,10 +841,9 @@ def __call__(
                     hb_profiler.step()
             else:
                 for i in range(num_inference_steps):
-                    if use_warmup_inference_steps and i == throughput_warmup_steps:
+                    if use_warmup_inference_steps and i == throughput_warmup_steps and j == num_batches - 1:
                         ht.hpu.synchronize()
-                        t1_inf = time.time()
-                        t1 += t1_inf - t0_inf
+                        t1 = time.time()
 
                     t = timesteps[0]
                     timesteps = torch.roll(timesteps, shifts=-1, dims=0)
@@ -875,9 +874,10 @@ def __call__(
                         callback_on_step_end_tensor_inputs,
                     )
                     hb_profiler.step()
-            if use_warmup_inference_steps:
+            if use_warmup_inference_steps and j == num_batches - 1:
                 ht.hpu.synchronize()
-                t1 = warmup_inference_steps_time_adjustment(t1, t1_inf, num_inference_steps, throughput_warmup_steps)
+                t1 = warmup_inference_steps_time_adjustment(t1, t1, num_inference_steps, throughput_warmup_steps)
+                t_vae_b = time.time()
 
             if not output_type == "latent":
                 # make sure the VAE is in float32 mode, as it overflows in float16
@@ -896,19 +896,21 @@ def __call__(
                 image = latents
 
             output_images.append(image)
+            if use_warmup_inference_steps and j == num_batches - 1:
+                ht.hpu.synchronize()
+                t_vae_e = time.time()
+                t1 = t1 + t_vae_e - t_vae_b
 
         hb_profiler.stop()
 
         speed_metrics_prefix = "generation"
         ht.hpu.synchronize()
-
         if t1 == t0 or use_warmup_inference_steps:
-            num_samples = num_batches * batch_size
-            num_steps = (num_inference_steps - throughput_warmup_steps) * num_batches * batch_size
+            num_samples = batch_size
+            num_steps = batch_size * num_inference_steps
         else:
             num_samples = (num_batches - throughput_warmup_steps) * batch_size
             num_steps = (num_batches - throughput_warmup_steps) * num_inference_steps * batch_size
-
         speed_measures = speed_metrics(
             split=speed_metrics_prefix,
             start_time=t0,
diff --git a/optimum/habana/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py b/optimum/habana/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py
index 25f122c960..ec46283461 100644
--- a/optimum/habana/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py
+++ b/optimum/habana/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py
@@ -31,7 +31,8 @@
 from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
 
 from ....transformers.gaudi_configuration import GaudiConfig
-from ....utils import speed_metrics
+from ....utils import HabanaProfile, speed_metrics
+from ...models.unet_2d_condition import set_default_attn_processor_hpu
 from ..pipeline_utils import GaudiDiffusionPipeline
 
 
@@ -77,6 +78,18 @@ class GaudiStableVideoDiffusionPipeline(GaudiDiffusionPipeline, StableVideoDiffu
             A scheduler to be used in combination with `unet` to denoise the encoded image latents.
         feature_extractor ([`~transformers.CLIPImageProcessor`]):
             A `CLIPImageProcessor` to extract features from generated images.
+        use_habana (bool, defaults to `False`):
+            Whether to use Gaudi (`True`) or CPU (`False`).
+        use_hpu_graphs (bool, defaults to `False`):
+            Whether to use HPU graphs or not.
+        gaudi_config (Union[str, [`GaudiConfig`]], defaults to `None`):
+            Gaudi configuration to use. Can be a string to download it from the Hub.
+            Or a previously initialized config can be passed.
+        bf16_full_eval (bool, defaults to `False`):
+            Whether to use full bfloat16 evaluation instead of 32-bit.
+            This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -90,6 +103,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -97,6 +111,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
 
         StableVideoDiffusionPipeline.__init__(
@@ -107,7 +122,13 @@ def __init__(
             scheduler,
             feature_extractor,
         )
+        if use_habana:
+            self.unet.set_default_attn_processor = set_default_attn_processor_hpu
+            self.unet.set_default_attn_processor(self.unet)
+        if use_hpu_graphs:
+            from habana_frameworks.torch.hpu import wrap_in_hpu_graph
 
+            self.unet = wrap_in_hpu_graph(self.unet, disable_tensor_cache=True)
         self.to(self._device)
 
     @classmethod
@@ -240,6 +261,8 @@ def __call__(
         output_type: Optional[str] = "pil",
         callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
         callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+        profiling_warmup_steps: Optional[int] = 0,
+        profiling_steps: Optional[int] = 0,
         return_dict: bool = True,
         **kwargs,
     ):
@@ -372,7 +395,7 @@ def __call__(
             # 4. Encode input image using VAE
             image = self.video_processor.preprocess(image, height=height, width=width)
             # torch.randn is broken on HPU so running it on CPU
-            rand_device = "cpu" if device.type == "hpu" else device
+            rand_device = torch.device("cpu") if device.type == "hpu" else device
             noise = randn_tensor(image.shape, generator=generator, device=rand_device, dtype=image.dtype).to(device)
             # image = self.image_processor.preprocess(image, height=height, width=width).to(device)
             # noise = randn_tensor(image.shape, generator=generator, device=device, dtype=image.dtype)
@@ -469,6 +492,13 @@ def __call__(
             t0 = time.time()
             t1 = t0
 
+            hb_profiler = HabanaProfile(
+                warmup=profiling_warmup_steps,
+                active=profiling_steps,
+                record_shapes=False,
+            )
+            hb_profiler.start()
+
             # 10. Denoising loop
             throughput_warmup_steps = kwargs.get("throughput_warmup_steps", 3)
             use_warmup_inference_steps = (
@@ -525,6 +555,9 @@ def __call__(
                     # compute the previous noisy sample x_t -> x_t-1
                     latents_batch = self.scheduler.step(noise_pred, timestep, latents_batch).prev_sample
 
+                    if not self.use_hpu_graphs:
+                        self.htcore.mark_step()
+
                     if callback_on_step_end is not None:
                         callback_kwargs = {}
                         for k in callback_on_step_end_tensor_inputs:
@@ -532,6 +565,7 @@ def __call__(
                         callback_outputs = callback_on_step_end(self, i, timestep, callback_kwargs)
 
                         latents_batch = callback_outputs.pop("latents", latents_batch)
+                    hb_profiler.step()
 
                 if not output_type == "latent":
                     # cast back to fp16/bf16 if needed
@@ -544,7 +578,10 @@ def __call__(
                     frames = latents_batch
 
                 outputs["frames"].append(frames)
+                if not self.use_hpu_graphs:
+                    self.htcore.mark_step()
 
+            hb_profiler.stop()
             speed_metrics_prefix = "generation"
             speed_measures = speed_metrics(
                 split=speed_metrics_prefix,
diff --git a/optimum/habana/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py b/optimum/habana/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
index ffaf25df11..133f87623d 100644
--- a/optimum/habana/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
+++ b/optimum/habana/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py
@@ -41,6 +41,31 @@ class GaudiTextToVideoSDPipelineOutput(BaseOutput):
 class GaudiTextToVideoSDPipeline(GaudiDiffusionPipeline, TextToVideoSDPipeline):
     r"""
     Adapted from: https://github.com/huggingface/diffusers/blob/v0.26.3/src/diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py#L84
+
+    Args:
+        vae ([`AutoencoderKL`]):
+            Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
+        text_encoder ([`~transformers.CLIPTextModel`]):
+            Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
+        tokenizer (`~transformers.CLIPTokenizer`):
+            A `CLIPTokenizer` to tokenize text.
+        unet ([`UNet2DConditionModel`]):
+            A `UNet2DConditionModel` to denoise the encoded image latents.
+        scheduler ([`SchedulerMixin`]):
+            A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
+            [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
+        use_habana (bool, defaults to `False`):
+            Whether to use Gaudi (`True`) or CPU (`False`).
+        use_hpu_graphs (bool, defaults to `False`):
+            Whether to use HPU graphs or not.
+        gaudi_config (Union[str, [`GaudiConfig`]], defaults to `None`):
+            Gaudi configuration to use. Can be a string to download it from the Hub.
+            Or a previously initialized config can be passed.
+        bf16_full_eval (bool, defaults to `False`):
+            Whether to use full bfloat16 evaluation instead of 32-bit.
+            This will be faster and save memory compared to fp32/mixed precision but can harm generated images.
+        sdp_on_bf16 (bool, defaults to `False`):
+            Whether to allow PyTorch to use reduced precision in the SDPA math backend.
     """
 
     def __init__(
@@ -54,6 +79,7 @@ def __init__(
         use_hpu_graphs: bool = False,
         gaudi_config: Union[str, GaudiConfig] = None,
         bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
     ):
         GaudiDiffusionPipeline.__init__(
             self,
@@ -61,6 +87,7 @@ def __init__(
             use_hpu_graphs,
             gaudi_config,
             bf16_full_eval,
+            sdp_on_bf16,
         )
         TextToVideoSDPipeline.__init__(
             self,
diff --git a/optimum/habana/distributed/strategy.py b/optimum/habana/distributed/strategy.py
index 91b3f00232..bb1b2d9bb3 100644
--- a/optimum/habana/distributed/strategy.py
+++ b/optimum/habana/distributed/strategy.py
@@ -115,12 +115,12 @@ def __init__(self, group=None, from_meta=False):
         self.group = group if group is not None else torch.distributed.GroupMember.WORLD
 
     def distribute_module(self, module: nn.Module, final_layers: bool = False) -> nn.Module:
-        from optimum.habana.distributed import tp_wrapping
+        from . import tp_wrapping
 
         return tp_wrapping.apply_tp(module, self.group)
 
     def distribute_layer(self, block: nn.Module, layer: int) -> nn.Module:
-        from optimum.habana.distributed import tp_wrapping
+        from . import tp_wrapping
 
         return tp_wrapping.apply_tp(block, layer, self.group)
 
diff --git a/optimum/habana/sentence_transformers/modeling_utils.py b/optimum/habana/sentence_transformers/modeling_utils.py
index 7690483e39..0aa16c83e0 100644
--- a/optimum/habana/sentence_transformers/modeling_utils.py
+++ b/optimum/habana/sentence_transformers/modeling_utils.py
@@ -19,10 +19,11 @@ def adapt_sentence_transformers_to_gaudi():
     Replaces some SentenceTransformer' methods for equivalent methods optimized
     for Gaudi.
     """
-
     from sentence_transformers import SentenceTransformer
+    from sentence_transformers.data_collator import SentenceTransformerDataCollator
+    from sentence_transformers.models import Transformer
 
-    from optimum.habana.sentence_transformers import (
+    from . import (
         st_gaudi_data_collator_call,
         st_gaudi_encode,
         st_gaudi_transformer_save,
@@ -30,12 +31,6 @@ def adapt_sentence_transformers_to_gaudi():
     )
 
     SentenceTransformer.encode = st_gaudi_encode
-
-    from sentence_transformers.models import Transformer
-
     Transformer.tokenize = st_gaudi_transformer_tokenize
     Transformer.save = st_gaudi_transformer_save
-
-    from sentence_transformers.data_collator import SentenceTransformerDataCollator
-
     SentenceTransformerDataCollator.__call__ = st_gaudi_data_collator_call
diff --git a/optimum/habana/sentence_transformers/st_gaudi_data_collator.py b/optimum/habana/sentence_transformers/st_gaudi_data_collator.py
index 25e015fe24..51e823e1ae 100644
--- a/optimum/habana/sentence_transformers/st_gaudi_data_collator.py
+++ b/optimum/habana/sentence_transformers/st_gaudi_data_collator.py
@@ -5,47 +5,55 @@
 
 
 def st_gaudi_data_collator_call(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]:
-    """data collator for sentence transformer"""
+    """Collator for a SentenceTransformers model."""
 
-    columns = list(features[0].keys())
+    column_names = list(features[0].keys())
 
     # We should always be able to return a loss, label or not:
     batch = {"return_loss": True}
 
-    if "dataset_name" in columns:
-        columns.remove("dataset_name")
+    if "dataset_name" in column_names:
+        column_names.remove("dataset_name")
         batch["dataset_name"] = features[0]["dataset_name"]
 
+    if tuple(column_names) not in self._warned_columns:
+        self.maybe_warn_about_column_order(column_names)
+
     # Extract the label column if it exists
     for label_column in self.valid_label_columns:
-        if label_column in columns:
+        if label_column in column_names:
             batch["label"] = torch.tensor([row[label_column] for row in features])
-            columns.remove(label_column)
+            column_names.remove(label_column)
             break
 
     # Extract the feature columns
     cnt = 0
+    cnt1 = 0
     power2_len = [0, 0]
-    for column in columns:
-        tokenized = self.tokenize_fn([row[column] for row in features])
+    for column_name in column_names:
+        # If the prompt length has been set, we should add it to the batch
+        if column_name.endswith("_prompt_length") and column_name[: -len("_prompt_length")] in column_names:
+            batch[column_name] = torch.tensor([row[column_name] for row in features], dtype=torch.int)
+            continue
+
+        tokenized = self.tokenize_fn([row[column_name] for row in features])
         for key, value in tokenized.items():
             curr_tokenize_len = value.shape
             if curr_tokenize_len[1] > 4096:
-                power2_len[cnt % 2] = math.ceil(curr_tokenize_len[1] / 128) * 128
-                additional_pad_len = math.ceil(curr_tokenize_len[1] / 128) * 128 - curr_tokenize_len[1]
+                power2_len[cnt1] = math.ceil(curr_tokenize_len[1] / 128) * 128
             else:
-                power2_len[cnt % 2] = 2 ** math.ceil(math.log2(curr_tokenize_len[1]))
-                additional_pad_len = 2 ** math.ceil(math.log2(curr_tokenize_len[1])) - curr_tokenize_len[1]
-
-            if (cnt % 2 == 1) and (power2_len[0] == power2_len[1]):
-                additional_pad_len = additional_pad_len + 1
+                power2_len[cnt1] = 2 ** math.ceil(math.log2(curr_tokenize_len[1]))
+            additional_pad_len = power2_len[cnt1] - curr_tokenize_len[1]
+            if (cnt1 == 1) and (power2_len[0] == power2_len[1]):
+                additional_pad_len += 1
 
-            batch[f"{column}_{key}"] = torch.cat(
+            batch[f"{column_name}_{key}"] = torch.cat(
                 (
                     value,
                     torch.zeros((curr_tokenize_len[0], additional_pad_len), dtype=torch.int8),
                 ),
                 -1,
             )
-        cnt = cnt + 1
+        cnt += 1
+        cnt1 = cnt & 1
     return batch
diff --git a/optimum/habana/sentence_transformers/st_gaudi_encoder.py b/optimum/habana/sentence_transformers/st_gaudi_encoder.py
index db253953db..df8d06956c 100644
--- a/optimum/habana/sentence_transformers/st_gaudi_encoder.py
+++ b/optimum/habana/sentence_transformers/st_gaudi_encoder.py
@@ -5,13 +5,11 @@
 
 import numpy as np
 import torch
-from numpy import ndarray
 from sentence_transformers.quantization import quantize_embeddings
 from sentence_transformers.util import (
     batch_to_device,
     truncate_embeddings,
 )
-from torch import Tensor
 from tqdm.autonotebook import trange
 
 
@@ -24,14 +22,15 @@ def st_gaudi_encode(
     prompt_name: Optional[str] = None,
     prompt: Optional[str] = None,
     batch_size: int = 32,
-    show_progress_bar: bool = None,
+    show_progress_bar: Optional[bool] = None,
     output_value: Optional[Literal["sentence_embedding", "token_embeddings"]] = "sentence_embedding",
     precision: Literal["float32", "int8", "uint8", "binary", "ubinary"] = "float32",
     convert_to_numpy: bool = True,
     convert_to_tensor: bool = False,
-    device: str = None,
+    device: Optional[str] = None,
     normalize_embeddings: bool = False,
-) -> Union[List[Tensor], ndarray, Tensor]:
+    **kwargs,
+) -> Union[List[torch.Tensor], np.ndarray, torch.Tensor]:
     """
     Computes sentence embeddings.
 
@@ -63,7 +62,7 @@ def st_gaudi_encode(
             the faster dot-product (util.dot_score) instead of cosine similarity can be used. Defaults to False.
 
     Returns:
-        Union[List[Tensor], ndarray, Tensor]: By default, a 2d numpy array with shape [num_inputs, output_dimension] is returned.
+        Union[List[torch.Tensor], np.ndarray, torch.Tensor]: By default, a 2d numpy array with shape [num_inputs, output_dimension] is returned.
         If only one string input is provided, then the output is a 1d array with shape [output_dimension]. If ``convert_to_tensor``,
         a torch Tensor is returned instead. If ``self.truncate_dim <= output_dimension`` then output_dimension is ``self.truncate_dim``.
 
@@ -85,9 +84,10 @@ def st_gaudi_encode(
             print(embeddings.shape)
             # (3, 768)
     """
+
     self.eval()
     if show_progress_bar is None:
-        show_progress_bar = logger.getEffectiveLevel() == logging.INFO or logger.getEffectiveLevel() == logging.DEBUG
+        show_progress_bar = logger.getEffectiveLevel() in (logging.INFO, logging.DEBUG)
 
     if convert_to_tensor:
         convert_to_numpy = False
@@ -119,6 +119,7 @@ def st_gaudi_encode(
                 "Encode with either a `prompt`, a `prompt_name`, or neither, but not both. "
                 "Ignoring the `prompt_name` in favor of `prompt`."
             )
+
     extra_features = {}
     if prompt is not None:
         sentences = [prompt + sentence for sentence in sentences]
@@ -132,6 +133,8 @@ def st_gaudi_encode(
     if device is None:
         device = self.device
 
+    self.to(device)
+
     all_embeddings = []
     length_sorted_idx = np.argsort([-self._text_length(sen) for sen in sentences])
     sentences_sorted = [sentences[idx] for idx in length_sorted_idx]
@@ -139,7 +142,6 @@ def st_gaudi_encode(
     for start_index in trange(0, len(sentences), batch_size, desc="Batches", disable=not show_progress_bar):
         sentences_batch = sentences_sorted[start_index : start_index + batch_size]
         features = self.tokenize(sentences_batch)
-
         if self.device.type == "hpu":
             if "input_ids" in features:
                 curr_tokenize_len = features["input_ids"].shape
@@ -166,11 +168,12 @@ def st_gaudi_encode(
                         ),
                         -1,
                     )
+
         features = batch_to_device(features, device)
         features.update(extra_features)
 
         with torch.no_grad():
-            out_features = self.forward(features)
+            out_features = self.forward(features, **kwargs)
             if self.device.type == "hpu":
                 out_features = copy.deepcopy(out_features)
 
@@ -218,7 +221,7 @@ def st_gaudi_encode(
             all_embeddings = torch.Tensor()
     elif convert_to_numpy:
         if not isinstance(all_embeddings, np.ndarray):
-            if all_embeddings[0].dtype == torch.bfloat16:
+            if all_embeddings and all_embeddings[0].dtype == torch.bfloat16:
                 all_embeddings = np.asarray([emb.float().numpy() for emb in all_embeddings])
             else:
                 all_embeddings = np.asarray([emb.numpy() for emb in all_embeddings])
diff --git a/optimum/habana/sentence_transformers/st_gaudi_trainer.py b/optimum/habana/sentence_transformers/st_gaudi_trainer.py
index ccbd2e1fb2..f7d73d231c 100644
--- a/optimum/habana/sentence_transformers/st_gaudi_trainer.py
+++ b/optimum/habana/sentence_transformers/st_gaudi_trainer.py
@@ -1,5 +1,5 @@
 # coding=utf-8
-# Copyright 2022 The HuggingFace Team. All rights reserved.
+# Copyright 2024 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,17 +12,23 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
+
 import logging
 import os
-import warnings
+from collections import OrderedDict
 from contextlib import nullcontext
+from functools import partial
 from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
 
 import torch
 from accelerate.utils import DistributedDataParallelKwargs
+from packaging.version import parse as parse_version
 from sentence_transformers.data_collator import SentenceTransformerDataCollator
 from sentence_transformers.evaluation import SentenceEvaluator, SequentialEvaluator
 from sentence_transformers.losses.CoSENTLoss import CoSENTLoss
+from sentence_transformers.model_card import ModelCardCallback
+from sentence_transformers.models import Pooling
 from sentence_transformers.models.Transformer import Transformer
 from sentence_transformers.sampler import (
     DefaultBatchSampler,
@@ -38,21 +44,19 @@
 from sentence_transformers.util import disable_logging, is_datasets_available
 from torch.utils.data import BatchSampler, ConcatDataset, DataLoader, SubsetRandomSampler
 from transformers import EvalPrediction, PreTrainedTokenizerBase, TrainerCallback
+from transformers import __version__ as transformers_version
 from transformers.data.data_collator import DataCollator
 from transformers.integrations import WandbCallback
-from transformers.modeling_utils import unwrap_model
-from transformers.trainer import TRAINING_ARGS_NAME
+from transformers.trainer import TRAINING_ARGS_NAME, _is_peft_model
 from transformers.trainer_utils import EvalLoopOutput
 from transformers.training_args import ParallelMode
 
-from optimum.habana.transformers.trainer import _is_peft_model
-
 from ..transformers import GaudiConfig, GaudiTrainer
 from .st_gaudi_training_args import SentenceTransformerGaudiTrainingArguments
 
 
 if is_datasets_available():
-    from datasets import Dataset, DatasetDict
+    from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict, Value
 
 logger = logging.getLogger(__name__)
 
@@ -62,33 +66,58 @@
 
 class SentenceTransformerGaudiTrainer(GaudiTrainer):
     """
-    Inherits from GaudiTrainer and adapted from: https://github.com/UKPLab/sentence-transformers/blob/v3.0.1/sentence_transformers/trainer.py
+    SentenceTransformerGaudiTrainer is a simple but feature-complete training and eval loop for PyTorch
+    based on the 🤗 Transformers :class:`~transformers.Trainer`.
+
+    It inherits from GaudiTrainer and adapted from:
+        https://github.com/UKPLab/sentence-transformers/blob/v3.3.1/sentence_transformers/trainer.py
     """
 
     def __init__(
         self,
-        model: Optional["SentenceTransformer"] = None,
-        gaudi_config: GaudiConfig = None,
-        args: SentenceTransformerGaudiTrainingArguments = None,
-        train_dataset: Optional[Union["Dataset", "DatasetDict", Dict[str, "Dataset"]]] = None,
-        eval_dataset: Optional[Union["Dataset", "DatasetDict", Dict[str, "Dataset"]]] = None,
+        model: Optional[SentenceTransformer] = None,
+        gaudi_config: Optional[GaudiConfig] = None,
+        args: Optional[SentenceTransformerGaudiTrainingArguments] = None,
+        train_dataset: Optional[Union[Dataset, DatasetDict, IterableDataset, Dict[str, Dataset]]] = None,
+        eval_dataset: Optional[Union[Dataset, DatasetDict, IterableDataset, Dict[str, Dataset]]] = None,
         loss: Optional[
             Union[
                 torch.nn.Module,
                 Dict[str, torch.nn.Module],
-                Callable[["SentenceTransformer"], torch.nn.Module],
-                Dict[str, Callable[["SentenceTransformer"], torch.nn.Module]],
+                Callable[[SentenceTransformer], torch.nn.Module],
+                Dict[str, Callable[[SentenceTransformer], torch.nn.Module]],
             ]
         ] = None,
         evaluator: Optional[Union[SentenceEvaluator, List[SentenceEvaluator]]] = None,
         data_collator: Optional[DataCollator] = None,
         tokenizer: Optional[Union[PreTrainedTokenizerBase, Callable]] = None,
-        model_init: Optional[Callable[[], "SentenceTransformer"]] = None,
+        model_init: Optional[Callable[[], SentenceTransformer]] = None,
         compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
         callbacks: Optional[List[TrainerCallback]] = None,
-        optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
+        optimizers: Tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None),
         preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
     ) -> None:
+        if not is_datasets_available():
+            raise RuntimeError(
+                "To train a SentenceTransformerGaudiTrainer model, you need to install the `datasets` module. "
+                "To fix: pip install datasets"
+            )
+
+        if args is None:
+            output_dir = "tmp_trainer"
+            logger.info(f"No `TrainingArguments` passed, using `output_dir={output_dir}`.")
+            args = SentenceTransformerGaudiTrainingArguments(
+                output_dir=output_dir,
+                use_habana=True,
+                gaudi_config_name="Habana/distilbert-base-uncased",
+                use_lazy_mode=True,
+                use_hpu_graphs=True,
+                use_hpu_graphs_for_inference=False,
+                use_hpu_graphs_for_training=True,
+            )
+        elif not isinstance(args, SentenceTransformerGaudiTrainingArguments):
+            raise ValueError("Please use `TrainingArguments` imported from `optimum.habana.sentence_transformers`.")
+
         if model is None:
             if model_init is not None:
                 self.model_init = model_init
@@ -97,16 +126,33 @@ def __init__(
                 raise RuntimeError("`Trainer` requires either a `model` or `model_init` argument")
         else:
             if model_init is not None:
-                warnings.warn(
+                logger.warning(
                     "`Trainer` requires either a `model` or `model_init` argument, but not both. `model_init` will"
-                    " overwrite your model when calling the `train` method. This will become a fatal error in the next"
-                    " release.",
-                    FutureWarning,
+                    " overwrite your model when calling the `train` method."
                 )
             self.model_init = model_init
 
-        # If the model ID is set via the SentenceTransformerTrainingArguments, but not via the SentenceTransformerModelCardData,
-        # then we can set it here for the model card regardless
+        if compute_metrics is not None:
+            logger.warning(
+                "`compute_metrics` is currently not compatible with the SentenceTransformerGaudiTrainer. Please use the "
+                "`evaluator` argument instead for detailed evaluation metrics, or the `eval_dataset` argument for "
+                "the evaluation loss."
+            )
+
+        # Get a dictionary of the default training arguments, so we can determine which arguments have been changed
+        # for the model card
+        default_args_dict = SentenceTransformerGaudiTrainingArguments(
+            output_dir="unused",
+            use_habana=True,
+            gaudi_config_name="Habana/distilbert-base-uncased",
+            use_lazy_mode=True,
+            use_hpu_graphs=True,
+            use_hpu_graphs_for_inference=False,
+            use_hpu_graphs_for_training=True,
+        ).to_dict()
+
+        # If the model ID is set via the SentenceTransformerGaudiTrainingArguments, but not via the
+        # SentenceTransformerModelCardData, then we can set it here for the model card regardless
         if args.hub_model_id and not model.model_card_data.model_id:
             model.model_card_data.set_model_id(args.hub_model_id)
 
@@ -116,30 +162,57 @@ def __init__(
         if data_collator is None:
             data_collator = SentenceTransformerDataCollator(tokenize_fn=model.tokenize)
 
+        for dataset_name, dataset in zip(["train", "eval"], [train_dataset, eval_dataset]):
+            if isinstance(dataset, IterableDataset) and dataset.column_names is None:
+                sample = next(iter(dataset))
+                naive_type_mapping = {str: "string", int: "int64", float: "float32", bool: "bool"}
+                example_features = {
+                    key: Value(naive_type_mapping.get(type(value), "null")) for key, value in sample.items()
+                }
+                raise ValueError(
+                    f"The provided `{dataset_name}_dataset` must have Features. Specify them with e.g.:\n"
+                    f"{dataset_name}_dataset = {dataset_name}_dataset.cast(Features({example_features}))\n"
+                    "or by providing the Features to the IterableDataset initialization method. See the Datasets "
+                    "documentation for more information on dataset Features: "
+                    "https://huggingface.co/docs/datasets/en/about_dataset_features"
+                )
+
         if isinstance(train_dataset, dict) and not isinstance(train_dataset, DatasetDict):
             train_dataset = DatasetDict(train_dataset)
-        if isinstance(eval_dataset, dict) and not isinstance(eval_dataset, Dataset):
+        if isinstance(eval_dataset, dict) and not isinstance(eval_dataset, DatasetDict):
             eval_dataset = DatasetDict(eval_dataset)
+        super_kwargs = {
+            "model": None if self.model_init else model,
+            "gaudi_config": gaudi_config,
+            "args": args,
+            "data_collator": data_collator,
+            "train_dataset": train_dataset,
+            "eval_dataset": eval_dataset if eval_dataset is not None or evaluator is None else "dummy",
+            "model_init": model_init,
+            "compute_metrics": compute_metrics,
+            "callbacks": callbacks,
+            "optimizers": optimizers,
+            "preprocess_logits_for_metrics": preprocess_logits_for_metrics,
+        }
+        # Transformers v4.46.0 changed the `tokenizer` argument to a more general `processing_class` argument
+        if parse_version(transformers_version) >= parse_version("4.46.0"):
+            super_kwargs["processing_class"] = tokenizer
+        else:
+            super_kwargs["tokenizer"] = tokenizer
+        super().__init__(**super_kwargs)
 
-        super().__init__(
-            model=None if self.model_init else model,
-            gaudi_config=gaudi_config,
-            args=args,
-            data_collator=data_collator,
-            train_dataset=train_dataset,
-            eval_dataset=eval_dataset,
-            tokenizer=tokenizer,
-            model_init=model_init,
-            compute_metrics=compute_metrics,
-            callbacks=callbacks,
-            optimizers=optimizers,
-            preprocess_logits_for_metrics=preprocess_logits_for_metrics,
-        )
+        # Transformers v4.46.0 introduced a ValueError if `eval_dataset` is None while eval_strategy is not "no",
+        # but in Sentence Transformers you can also evaluate without an eval_dataset via an evaluator, so we set
+        # it to "dummy" in that case to avoid the ValueError
+        if self.eval_dataset == "dummy":
+            self.eval_dataset = None
 
         # Every Sentence Transformer model can always return a loss, so we set this to True
         # to avoid having to specify it in the data collator or model's forward
         self.can_return_loss = True
 
+        self._prompt_length_mapping = {}
+
         self.model: SentenceTransformer
         self.args: SentenceTransformerGaudiTrainingArguments
         self.data_collator: SentenceTransformerDataCollator
@@ -167,18 +240,49 @@ def __init__(
                     )
         else:
             self.loss = self.prepare_loss(loss, model)
+
         # If evaluator is a list, we wrap it in a SequentialEvaluator
         if evaluator is not None and not isinstance(evaluator, SentenceEvaluator):
             evaluator = SequentialEvaluator(evaluator)
         self.evaluator = evaluator
 
+        if self.train_dataset is not None:
+            self.train_dataset = self.maybe_add_prompts_or_dataset_name_column(
+                train_dataset, args.prompts, dataset_name="train"
+            )
+        if self.eval_dataset is not None:
+            self.eval_dataset = self.maybe_add_prompts_or_dataset_name_column(
+                eval_dataset, args.prompts, dataset_name="eval"
+            )
+        self.add_model_card_callback(default_args_dict)
+
+    def add_model_card_callback(self, default_args_dict: dict[str, Any]) -> None:
+        """
+        Add a callback responsible for automatically tracking data required for the automatic model card generation
+
+        This method is called in the ``__init__`` method of the
+        :class:`~sentence_transformers.trainer.SentenceTransformerTrainer` class.
+
+        Args:
+            default_args_dict (Dict[str, Any]): A dictionary of the default training arguments, so we can determine
+                which arguments have been changed for the model card.
+
+        .. note::
+
+            This method can be overriden by subclassing the trainer to remove/customize this callback in custom uses cases
+        """
+
+        model_card_callback = ModelCardCallback(self, default_args_dict)
+        self.add_callback(model_card_callback)
+        model_card_callback.on_init_end(self.args, self.state, self.control, self.model)
+
     def _wrap_model(self, model, training=True, dataloader=None):
         """
         Differs from GaudiTrainer._wrap_model:
         - `allow_unused_input=True` was added to `ht.hpu.ModuleCacher()`
         """
         # train/eval could be run multiple-times - if already wrapped, don't re-wrap it again
-        if unwrap_model(model) is not model:
+        if self.accelerator.unwrap_model(model) is not model:
             return model
 
         # Note: in torch.distributed mode, there's no point in wrapping the model
@@ -216,7 +320,7 @@ def _wrap_model(self, model, training=True, dataloader=None):
 
         return model
 
-    def call_model_init(self, trial=None) -> "SentenceTransformer":
+    def call_model_init(self, trial=None) -> SentenceTransformer:
         model = super().call_model_init(trial=trial)
         # If the Trainer already has a loss, then we'll want to override the model in the loss function
         if not hasattr(self, "loss"):
@@ -241,7 +345,7 @@ def call_model_init(self, trial=None) -> "SentenceTransformer":
             self.loss = self.override_model_in_loss(self.loss, model)
         return model
 
-    def override_model_in_loss(self, loss: torch.nn.Module, model: "SentenceTransformer") -> torch.nn.Module:
+    def override_model_in_loss(self, loss: torch.nn.Module, model: SentenceTransformer) -> torch.nn.Module:
         from sentence_transformers import SentenceTransformer
 
         for name, child in loss.named_children():
@@ -255,14 +359,14 @@ def override_model_in_loss(self, loss: torch.nn.Module, model: "SentenceTransfor
 
     def prepare_loss(
         self,
-        loss: Union[Callable[["SentenceTransformer"], torch.nn.Module], torch.nn.Module],
-        model: "SentenceTransformer",
+        loss: Union[Callable[[SentenceTransformer], torch.nn.Module], torch.nn.Module],
+        model: SentenceTransformer,
     ) -> torch.nn.Module:
         if isinstance(loss, torch.nn.Module):
             return loss.to(model.device)
         return loss(model).to(model.device)
 
-    def add_dataset_name_column(self, dataset_dict: "DatasetDict") -> "DatasetDict":
+    def add_dataset_name_column(self, dataset_dict: DatasetDict) -> DatasetDict:
         for key, dataset in dataset_dict.items():
             if "dataset_name" not in dataset.column_names:
                 dataset_dict[key] = dataset.add_column("dataset_name", [key] * len(dataset))
@@ -270,9 +374,10 @@ def add_dataset_name_column(self, dataset_dict: "DatasetDict") -> "DatasetDict":
 
     def compute_loss(
         self,
-        model: "SentenceTransformer",
+        model: SentenceTransformer,
         inputs: Dict[str, Union[torch.Tensor, Any]],
         return_outputs: bool = False,
+        num_items_in_batch: Optional[int] = None,
     ) -> Union[torch.Tensor, Tuple[torch.Tensor, Dict[str, Any]]]:
         """
         Computes the loss for the SentenceTransformer model.
@@ -287,6 +392,7 @@ def compute_loss(
             model (SentenceTransformer): The SentenceTransformer model.
             inputs (Dict[str, Union[torch.Tensor, Any]]): The input data for the model.
             return_outputs (bool, optional): Whether to return the outputs along with the loss. Defaults to False.
+            num_items_in_batch (int, optional): The number of items in the batch. Defaults to None. Unused, but required by the transformers Trainer.
 
         Returns:
             Union[torch.Tensor, Tuple[torch.Tensor, Dict[str, Any]]]: The computed loss. If `return_outputs` is True, returns a tuple of loss and outputs. Otherwise, returns only the loss.
@@ -300,7 +406,6 @@ def compute_loss(
 
         # Insert the wrapped (e.g. distributed or compiled) model into the loss function,
         # if the loss stores the model. Only called once per process
-        # from https://github.com/UKPLab/sentence-transformers/blob/v3.1.0/sentence_transformers/trainer.py#L337
         if (
             model == self.model_wrapped
             and model != self.model  # Only if the model is wrapped
@@ -312,7 +417,7 @@ def compute_loss(
         if return_outputs:
             # During prediction/evaluation, `compute_loss` will be called with `return_outputs=True`.
             # However, Sentence Transformer losses do not return outputs, so we return an empty dictionary.
-            # This does not result in any problems, as the SentenceTransformerTrainingArguments sets
+            # This does not result in any problems, as the SentenceTransformerGaudiTrainingArguments sets
             # `prediction_loss_only=True` which means that the output is not used.
             return loss, {}
         return loss
@@ -354,13 +459,16 @@ def collect_features(
 
     def evaluate(
         self,
-        eval_dataset: Optional[Union["Dataset", Dict[str, "Dataset"]]] = None,
+        eval_dataset: Optional[Union[Dataset, Dict[str, Dataset]]] = None,
         ignore_keys: Optional[List[str]] = None,
         metric_key_prefix: str = "eval",
     ) -> Dict[str, float]:
-        eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
-        if isinstance(eval_dataset, DatasetDict) and isinstance(self.loss, dict):
-            eval_dataset = self.add_dataset_name_column(eval_dataset)
+        if eval_dataset is not None:
+            eval_dataset = self.maybe_add_prompts_or_dataset_name_column(
+                eval_dataset, self.args.prompts, dataset_name="eval"
+            )
+        else:
+            eval_dataset = self.eval_dataset
         return super().evaluate(eval_dataset, ignore_keys, metric_key_prefix)
 
     def evaluation_loop(
@@ -420,7 +528,7 @@ def _load_best_model(self) -> None:
         except Exception:
             pass
 
-        # Override the model with the `tranformers`-based auto_model, and restore the original SentenceTransformers
+        # Override the model with the `transformers`-based auto_model, and restore the original SentenceTransformers
         # model with the loaded `transformers` model
         full_model = self.model
         self.model = self.model[0].auto_model
@@ -431,7 +539,12 @@ def _load_best_model(self) -> None:
             self.model = full_model
             self.model[0].auto_model = loaded_auto_model
 
-    def validate_column_names(self, dataset: "Dataset", dataset_name: Optional[str] = None) -> bool:
+    def validate_column_names(self, dataset: Dataset, dataset_name: Optional[str] = None) -> None:
+        if isinstance(dataset, dict):
+            for dataset_name, dataset in dataset.items():
+                self.validate_column_names(dataset, dataset_name=dataset_name)
+            return
+
         if overlap := set(dataset.column_names) & {"return_loss", "dataset_name"}:
             raise ValueError(
                 f"The following column names are invalid in your {dataset_name + ' ' if dataset_name else ''}dataset: {list(overlap)}."
@@ -440,12 +553,36 @@ def validate_column_names(self, dataset: "Dataset", dataset_name: Optional[str]
 
     def get_batch_sampler(
         self,
-        dataset: "Dataset",
+        dataset: Dataset,
         batch_size: int,
         drop_last: bool,
         valid_label_columns: Optional[List[str]] = None,
         generator: Optional[torch.Generator] = None,
-    ) -> BatchSampler:
+    ) -> Optional[BatchSampler]:
+        """
+        Returns the appropriate batch sampler based on the ``batch_sampler`` argument in ``self.args``.
+        This batch sampler class supports ``__len__`` and ``__iter__`` methods, and is used as the ``batch_sampler``
+        to create the :class:`torch.utils.data.DataLoader`.
+
+        .. note::
+            Override this method to provide a custom batch sampler.
+
+        Args:
+            dataset (Dataset): The dataset to sample from.
+            batch_size (int): Number of samples per batch.
+            drop_last (bool): If True, drop the last incomplete batch if the dataset size
+                is not divisible by the batch size.
+            valid_label_columns (List[str]): List of column names to check for labels.
+                The first column name from ``valid_label_columns`` found in the dataset will
+                be used as the label column.
+            generator (torch.Generator, optional): Optional random number generator for shuffling
+                the indices.
+        """
+        if isinstance(dataset, IterableDataset):
+            if self.args.batch_sampler != BatchSamplers.BATCH_SAMPLER:
+                logger.warning("When using an IterableDataset, you cannot specify a batch sampler.")
+            return None
+
         if self.args.batch_sampler == BatchSamplers.NO_DUPLICATES:
             return NoDuplicatesBatchSampler(
                 dataset=dataset,
@@ -473,10 +610,24 @@ def get_batch_sampler(
     def get_multi_dataset_batch_sampler(
         self,
         dataset: ConcatDataset,
-        batch_samplers: List[BatchSampler],
+        batch_samplers: list[BatchSampler],
         generator: Optional[torch.Generator] = None,
         seed: Optional[int] = 0,
     ) -> BatchSampler:
+        """
+        Returns the appropriate multi-dataset batch sampler based on the ``multi_dataset_batch_sampler`` argument
+        in ``self.args``. This batch sampler class supports ``__len__`` and ``__iter__`` methods, and is used as the
+        ``batch_sampler`` to create the :class:`torch.utils.data.DataLoader`.
+
+        .. note::
+            Override this method to provide a custom multi-dataset batch sampler.
+
+        Args:
+            dataset (ConcatDataset): The concatenation of all datasets.
+            batch_samplers (List[BatchSampler]): List of batch samplers for each dataset in the concatenated dataset.
+            generator (torch.Generator, optional): Optional random number generator for shuffling the indices.
+            seed (int, optional): Optional seed for the random number generator
+        """
         if self.args.multi_dataset_batch_sampler == MultiDatasetBatchSamplers.ROUND_ROBIN:
             return RoundRobinBatchSampler(
                 dataset=dataset,
@@ -503,7 +654,7 @@ def get_train_dataloader(self) -> DataLoader:
         Subclass and override this method if you want to inject some custom behavior.
         """
         if self.train_dataset is None:
-            raise ValueError("Trainer: training requires a train_dataset.")
+            raise ValueError("Training requires specifying a train_dataset to the SentenceTransformerGaudiTrainer.")
 
         train_dataset = self.train_dataset
         data_collator = self.data_collator
@@ -512,15 +663,40 @@ def get_train_dataloader(self) -> DataLoader:
         if self.args.seed:
             generator.manual_seed(self.args.seed)
 
-        if isinstance(train_dataset, DatasetDict):
-            for dataset_name, dataset in train_dataset.items():
-                self.validate_column_names(dataset, dataset_name=dataset_name)
-            if isinstance(self.loss, dict):
-                train_dataset = self.add_dataset_name_column(train_dataset)
+        dataloader_params = {
+            "collate_fn": data_collator,
+            "num_workers": self.args.dataloader_num_workers,
+            "pin_memory": self.args.dataloader_pin_memory,
+            "persistent_workers": self.args.dataloader_persistent_workers,
+            "prefetch_factor": self.args.dataloader_prefetch_factor,
+        }
+
+        if isinstance(train_dataset, IterableDataset):
+            dataloader_params.update(
+                {
+                    "batch_size": self.args.train_batch_size,
+                    "drop_last": self.args.dataloader_drop_last,
+                }
+            )
+            if self.args.batch_sampler != BatchSamplers.BATCH_SAMPLER:
+                logger.warning("When using an IterableDataset, you cannot specify a batch sampler.")
+
+        elif isinstance(train_dataset, IterableDatasetDict):
+            raise ValueError(
+                "Sentence Transformers is not compatible with IterableDatasetDict. Please use a DatasetDict instead."
+            )
+
+        elif isinstance(train_dataset, DatasetDict):
+            for dataset in train_dataset.values():
+                if isinstance(dataset, IterableDataset):
+                    raise ValueError(
+                        "Sentence Transformers is not compatible with a DatasetDict containing an IterableDataset."
+                    )
+
             batch_samplers = [
                 self.get_batch_sampler(
                     dataset,
-                    batch_size=self.args.per_device_train_batch_size,
+                    batch_size=self.args.train_batch_size,
                     drop_last=self.args.dataloader_drop_last,
                     valid_label_columns=data_collator.valid_label_columns,
                     generator=generator,
@@ -535,10 +711,9 @@ def get_train_dataloader(self) -> DataLoader:
                 generator=generator,
                 seed=self.args.seed,
             )
+            dataloader_params["batch_sampler"] = batch_sampler
 
-        else:
-            self.validate_column_names(train_dataset)
-
+        elif isinstance(train_dataset, Dataset):
             batch_sampler = self.get_batch_sampler(
                 train_dataset,
                 batch_size=self.args.train_batch_size,
@@ -546,15 +721,11 @@ def get_train_dataloader(self) -> DataLoader:
                 valid_label_columns=data_collator.valid_label_columns,
                 generator=generator,
             )
-
-        dataloader_params = {
-            "collate_fn": data_collator,
-            "num_workers": self.args.dataloader_num_workers,
-            "pin_memory": self.args.dataloader_pin_memory,
-            "persistent_workers": self.args.dataloader_persistent_workers,
-            "prefetch_factor": self.args.dataloader_prefetch_factor,
-            "batch_sampler": batch_sampler,
-        }
+            dataloader_params["batch_sampler"] = batch_sampler
+        else:
+            raise ValueError(
+                "Unsupported `train_dataset` type. Use a Dataset, DatasetDict, or IterableDataset for training."
+            )
 
         # If 'even_batches' is True, it will use the initial few samples to pad out the last sample. This can
         # cause issues with multi-dataset training, so we want to set this to False.
@@ -563,7 +734,9 @@ def get_train_dataloader(self) -> DataLoader:
         self._train_dataloader = self.accelerator.prepare(DataLoader(train_dataset, **dataloader_params))
         return self._train_dataloader
 
-    def get_eval_dataloader(self, eval_dataset: Union["Dataset", None] = None) -> DataLoader:
+    def get_eval_dataloader(
+        self, eval_dataset: Optional[Union[Dataset, DatasetDict, IterableDataset]] = None
+    ) -> DataLoader:
         """
         Returns the evaluation [`~torch.utils.data.DataLoader`].
 
@@ -578,7 +751,8 @@ def get_eval_dataloader(self, eval_dataset: Union["Dataset", None] = None) -> Da
             # Prevent errors if the evaluator is set but no eval_dataset is provided
             if self.evaluator is not None:
                 return DataLoader([])
-            raise ValueError("Trainer: evaluation requires an eval_dataset.")
+            raise ValueError("Evaluation requires specifying an eval_dataset to the SentenceTransformerGaudiTrainer.")
+
         eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
         data_collator = self.data_collator
 
@@ -586,14 +760,37 @@ def get_eval_dataloader(self, eval_dataset: Union["Dataset", None] = None) -> Da
         if self.args.seed:
             generator.manual_seed(self.args.seed)
 
-        # TODO: Correctly validate the column names for the eval_dataset
-        if isinstance(eval_dataset, DatasetDict):
-            if isinstance(self.loss, dict):
-                eval_dataset = self.add_dataset_name_column(eval_dataset)
+        dataloader_params = {
+            "collate_fn": data_collator,
+            "num_workers": self.args.dataloader_num_workers,
+            "pin_memory": self.args.dataloader_pin_memory,
+            "persistent_workers": self.args.dataloader_persistent_workers,
+            "prefetch_factor": self.args.dataloader_prefetch_factor,
+        }
+        if isinstance(eval_dataset, IterableDataset):
+            dataloader_params.update(
+                {
+                    "batch_size": self.args.eval_batch_size,
+                    "drop_last": self.args.dataloader_drop_last,
+                }
+            )
+
+        elif isinstance(eval_dataset, IterableDatasetDict):
+            raise ValueError(
+                "Sentence Transformers is not compatible with IterableDatasetDict. Please use a DatasetDict instead."
+            )
+
+        elif isinstance(eval_dataset, DatasetDict):
+            for dataset in eval_dataset.values():
+                if isinstance(dataset, IterableDataset):
+                    raise ValueError(
+                        "Sentence Transformers is not compatible with a DatasetDict containing an IterableDataset."
+                    )
+
             batch_samplers = [
                 self.get_batch_sampler(
                     dataset,
-                    batch_size=self.args.per_device_eval_batch_size,
+                    batch_size=self.args.eval_batch_size,
                     drop_last=self.args.dataloader_drop_last,
                     valid_label_columns=data_collator.valid_label_columns,
                     generator=generator,
@@ -608,23 +805,22 @@ def get_eval_dataloader(self, eval_dataset: Union["Dataset", None] = None) -> Da
                 generator=generator,
                 seed=self.args.seed,
             )
-        else:
+            dataloader_params["batch_sampler"] = batch_sampler
+
+        elif isinstance(eval_dataset, Dataset):
             batch_sampler = self.get_batch_sampler(
                 eval_dataset,
-                batch_size=self.args.train_batch_size,
+                batch_size=self.args.eval_batch_size,
                 drop_last=self.args.dataloader_drop_last,
                 valid_label_columns=data_collator.valid_label_columns,
                 generator=generator,
             )
+            dataloader_params["batch_sampler"] = batch_sampler
 
-        dataloader_params = {
-            "collate_fn": data_collator,
-            "num_workers": self.args.dataloader_num_workers,
-            "pin_memory": self.args.dataloader_pin_memory,
-            "persistent_workers": self.args.dataloader_persistent_workers,
-            "prefetch_factor": self.args.dataloader_prefetch_factor,
-            "batch_sampler": batch_sampler,
-        }
+        else:
+            raise ValueError(
+                "Unsupported `eval_dataset` type. Use a Dataset, DatasetDict, or IterableDataset for evaluation."
+            )
 
         # If 'even_batches' is True, it will use the initial few samples to pad out the last sample. This can
         # cause issues with multi-dataset training, so we want to set this to False during training.
@@ -632,7 +828,7 @@ def get_eval_dataloader(self, eval_dataset: Union["Dataset", None] = None) -> Da
         self.accelerator.even_batches = True
         return self.accelerator.prepare(DataLoader(eval_dataset, **dataloader_params))
 
-    def get_test_dataloader(self, test_dataset: "Dataset") -> DataLoader:
+    def get_test_dataloader(self, test_dataset: Union[Dataset, DatasetDict, IterableDataset]) -> DataLoader:
         """
         Returns the training [`~torch.utils.data.DataLoader`].
 
@@ -649,15 +845,38 @@ def get_test_dataloader(self, test_dataset: "Dataset") -> DataLoader:
         if self.args.seed:
             generator.manual_seed(self.args.seed)
 
-        if isinstance(test_dataset, DatasetDict):
-            for dataset_name, dataset in test_dataset.items():
-                self.validate_column_names(dataset, dataset_name=dataset_name)
-            if isinstance(self.loss, dict):
-                test_dataset = self.add_dataset_name_column(test_dataset)
+        dataloader_params = {
+            "collate_fn": data_collator,
+            "num_workers": self.args.dataloader_num_workers,
+            "pin_memory": self.args.dataloader_pin_memory,
+            "persistent_workers": self.args.dataloader_persistent_workers,
+            "prefetch_factor": self.args.dataloader_prefetch_factor,
+        }
+
+        if isinstance(test_dataset, IterableDataset):
+            dataloader_params.update(
+                {
+                    "batch_size": self.args.eval_batch_size,
+                    "drop_last": self.args.dataloader_drop_last,
+                }
+            )
+
+        elif isinstance(test_dataset, IterableDatasetDict):
+            raise ValueError(
+                "Sentence Transformers is not compatible with IterableDatasetDict. Please use a DatasetDict instead."
+            )
+
+        elif isinstance(test_dataset, DatasetDict):
+            for dataset in test_dataset.values():
+                if isinstance(dataset, IterableDataset):
+                    raise ValueError(
+                        "Sentence Transformers is not compatible with a DatasetDict containing an IterableDataset."
+                    )
+
             batch_samplers = [
                 self.get_batch_sampler(
                     dataset,
-                    batch_size=self.args.per_device_train_batch_size,
+                    batch_size=self.args.eval_batch_size,
                     drop_last=self.args.dataloader_drop_last,
                     valid_label_columns=data_collator.valid_label_columns,
                     generator=generator,
@@ -672,33 +891,28 @@ def get_test_dataloader(self, test_dataset: "Dataset") -> DataLoader:
                 generator=generator,
                 seed=self.args.seed,
             )
+            dataloader_params["batch_sampler"] = batch_sampler
 
-        else:
-            self.validate_column_names(test_dataset)
-
+        elif isinstance(test_dataset, Dataset):
             batch_sampler = self.get_batch_sampler(
                 test_dataset,
-                batch_size=self.args.train_batch_size,
+                batch_size=self.args.eval_batch_size,
                 drop_last=self.args.dataloader_drop_last,
                 valid_label_columns=data_collator.valid_label_columns,
                 generator=generator,
             )
+            dataloader_params["batch_sampler"] = batch_sampler
 
-        dataloader_params = {
-            "collate_fn": data_collator,
-            "num_workers": self.args.dataloader_num_workers,
-            "pin_memory": self.args.dataloader_pin_memory,
-            "persistent_workers": self.args.dataloader_persistent_workers,
-            "prefetch_factor": self.args.dataloader_prefetch_factor,
-            "batch_sampler": batch_sampler,
-        }
+        else:
+            raise ValueError(
+                "Unsupported `test_dataset` type. Use a Dataset, DatasetDict, or IterableDataset for testing."
+            )
 
         # If 'even_batches' is True, it will use the initial few samples to pad out the last sample. This can
-        # cause issues with multi-dataset training, so we want to set this to False.
-        # For evaluation, setting 'even_batches' to False results in hanging, so we keep it as True there.
-        self.accelerator.even_batches = False
-        self._train_dataloader = self.accelerator.prepare(DataLoader(test_dataset, **dataloader_params))
-        return self._train_dataloader
+        # cause issues with multi-dataset training, so we want to set this to False during training.
+        # For evaluation, setting 'even_batches' to False results in hanging, so we keep it as True here.
+        self.accelerator.even_batches = True
+        return self.accelerator.prepare(DataLoader(test_dataset, **dataloader_params))
 
     def _save(self, output_dir: Optional[str] = None, state_dict=None) -> None:
         # If we are executing this function, we are the process zero, so we don't check for that.
@@ -708,8 +922,13 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None) -> None:
 
         self.model.save_pretrained(output_dir, safe_serialization=self.args.save_safetensors)
 
-        if self.tokenizer is not None:
-            self.tokenizer.save_pretrained(output_dir)
+        # Transformers v4.46.0 changed the `tokenizer` attribute to a more general `processing_class` attribute
+        if parse_version(transformers_version) >= parse_version("4.46.0"):
+            if self.processing_class is not None:
+                self.processing_class.save_pretrained(output_dir)
+        else:
+            if self.tokenizer is not None:
+                self.tokenizer.save_pretrained(output_dir)
 
         # Good practice: save your training arguments together with the trained model
         torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))
@@ -717,20 +936,257 @@ def _save(self, output_dir: Optional[str] = None, state_dict=None) -> None:
     def _load_from_checkpoint(self, checkpoint_path: str) -> None:
         from sentence_transformers import SentenceTransformer
 
-        loaded_model = SentenceTransformer(checkpoint_path)
+        loaded_model = SentenceTransformer(checkpoint_path, trust_remote_code=self.model.trust_remote_code)
         self.model.load_state_dict(loaded_model.state_dict())
 
+    def _get_prompt_length(self, prompt: str) -> int:
+        try:
+            return self._prompt_length_mapping[prompt]
+        except KeyError:
+            prompt_length = self.model.tokenize([prompt])["input_ids"].shape[-1] - 1
+            self._prompt_length_mapping[prompt] = prompt_length
+            return prompt_length
+
+    def _include_prompt_length(self) -> bool:
+        """
+        Return whether the prompt length should be passed to the model's forward method.
+
+        True if the model does not include the prompt in the pooling layer. Can be
+        overridden by the user if it's useful to include the prompt length.
+        """
+        for module in self.model:
+            if isinstance(module, Pooling):
+                return not module.include_prompt
+        return False
+
+    @staticmethod
+    def add_prompts_or_dataset_name_transform(
+        batch: Dict[str, List[Any]],
+        prompts: Optional[Union[Dict[str, str], str]] = None,
+        prompt_lengths: Optional[Union[Dict[str, int], int]] = None,
+        dataset_name: Optional[str] = None,
+        transform: Optional[Callable[[Dict[str, List[Any]]], Dict[str, List[Any]]]] = None,
+        **kwargs,
+    ) -> Dict[str, List[Any]]:
+        """A transform/map function that adds prompts or dataset names to the batch.
+
+        Args:
+            batch (dict[str, list[Any]]): The batch of data, where each key is a column name and each value
+                is a list of values.
+            prompts (dict[str, str] | str | None, optional): An optional mapping of column names to string
+                prompts, or a string prompt for all columns. Defaults to None.
+            prompt_lengths (dict[str, int] | int | None, optional): An optional mapping of prompts names to
+                prompt token length, or a prompt token length if the prompt is a string. Defaults to None.
+            dataset_name (str | None, optional): The name of this dataset, only if there are multiple datasets
+                that use a different loss. Defaults to None.
+            transform (Callable[[dict[str, list[Any]]], dict[str, list[Any]]], optional): An optional transform
+                function to apply on the batch before adding prompts, etc. Defaults to None.
+
+        Returns:
+            dict[str, list[Any]]: The "just-in-time" transformed batch with prompts and/or dataset names added.
+        """
+        # If the dataset is a Dataset(Dict), then we use set_transform and we want to also apply any
+        # previous transform if it exists
+        if transform:
+            batch = transform(batch)
+
+        # Return if the batch has no columns...
+        if not batch:
+            return batch
+
+        # ... or if it's empty
+        first_column = list(batch.keys())[0]
+        if not batch[first_column]:
+            return batch
+
+        # Apply one prompt to all columns...
+        if isinstance(prompts, str):
+            for column_name, column in list(batch.items()):
+                if isinstance(column[0], str):
+                    batch[column_name] = [prompts + value for value in column]
+
+                    if prompt_lengths is not None:
+                        batch[f"{column_name}_prompt_length"] = [prompt_lengths] * len(column)
+
+        # ... or a column-specific prompt
+        if isinstance(prompts, dict):
+            for column_name, prompt in prompts.items():
+                if column_name in batch:
+                    batch[column_name] = [prompt + value for value in batch[column_name]]
+
+                    if prompt_lengths:
+                        batch[f"{column_name}_prompt_length"] = [prompt_lengths[prompt]] * len(batch[column_name])
+
+        # If we have multiple losses, then we need to add the dataset name to the batch
+        if dataset_name:
+            batch["dataset_name"] = [dataset_name] * len(batch[first_column])
+
+        return batch
+
+    def maybe_add_prompts_or_dataset_name_column(
+        self,
+        dataset_dict: Union[DatasetDict, Dataset, None],
+        prompts: Optional[Union[Dict[str, Dict[str, str]], Dict[str, str], str]] = None,
+        dataset_name: Optional[str] = None,
+    ) -> Union[DatasetDict, Dataset, None]:
+        """
+        Maybe add prompts or dataset names to the dataset. We add the dataset_name column to the dataset if:
+
+        1. The loss is a dictionary and the dataset is a DatasetDict, or
+        2. The prompts contain a mapping to dataset names.
+
+        There are 4 cases for the prompts:
+
+        1. `str`: One prompt for all datasets and columns.
+        2. `dict[str, str]`: A column to prompt mapping.
+        3. `dict[str, str]`: A dataset to prompt mapping.
+        4. `dict[str, dict[str, str]]`: A dataset to column to prompt mapping.
+
+        And 2 cases for the dataset:
+
+        A. `Dataset`: A single dataset.
+        B. `DatasetDict`: A dictionary of datasets.
+
+        3A is not allowed, and 2A doesn't make sense.
+
+        Args:
+            dataset_dict (DatasetDict | Dataset | None): The dataset to add prompts or dataset names to.
+
+        Returns:
+            DatasetDict | Dataset | None: The dataset with prompts or dataset names added.
+        """
+        if dataset_dict is None:
+            return None
+
+        include_dataset_name = isinstance(self.loss, dict)
+
+        # If we've already added the transform to this (iterable) dataset, don't add it again
+        if hasattr(dataset_dict, "_sentence_transformers_preprocessed"):
+            return dataset_dict
+
+        # Ensure that there's no "dataset_name"/"return_loss" columns in the unprocessed datasets
+        self.validate_column_names(dataset_dict, dataset_name=dataset_name)
+
+        # Only add if 1) we have prompts or 2) we need the dataset name for the loss dictionary
+        if prompts or include_dataset_name:
+            include_prompt_lengths = self._include_prompt_length()
+            dataset_dict = self.add_prompts_or_dataset_name_column(
+                dataset_dict,
+                prompts=prompts,
+                include_prompt_lengths=include_prompt_lengths,
+                include_dataset_name=include_dataset_name,
+            )
+        return dataset_dict
+
+    def add_prompts_or_dataset_name_column(
+        self,
+        dataset_dict: Union[DatasetDict, IterableDatasetDict, Dataset, IterableDataset],
+        prompts: Optional[Union[Dict[str, str], str]] = None,
+        dataset_name: Optional[str] = None,
+        include_prompt_lengths: bool = False,
+        include_dataset_name: bool = False,
+    ) -> Union[DatasetDict, Dataset, None]:
+        # If we have DatasetDict, recurse
+        if isinstance(dataset_dict, (IterableDatasetDict, DatasetDict)):
+            for dataset_name, dataset in dataset_dict.items():
+                # If prompts is a dictionary that matches the dataset names, then take the nested prompts
+                nested_prompts = prompts.get(dataset_name, prompts) if isinstance(prompts, dict) else prompts
+                dataset_dict[dataset_name] = self.add_prompts_or_dataset_name_column(
+                    dataset_dict=dataset,
+                    prompts=nested_prompts,
+                    dataset_name=dataset_name if include_dataset_name else None,
+                    include_prompt_lengths=include_prompt_lengths,
+                    include_dataset_name=include_dataset_name,
+                )
+            return dataset_dict
+
+        # Get the prompt lengths if needed for the pooling layer
+        prompt_lengths = None
+        if prompts:
+            if isinstance(prompts, str):
+                if include_prompt_lengths:
+                    prompt_lengths = self._get_prompt_length(prompts)
+            elif isinstance(prompts, dict):
+                first_key = list(prompts.keys())[0]
+                if isinstance(prompts[first_key], dict):
+                    raise ValueError(
+                        "The prompts provided to the trainer are a nested dictionary. In this setting, the first "
+                        "level of the dictionary should map to dataset names and the second level to column names. "
+                        "However, as the provided dataset is a not a DatasetDict, no dataset names can be inferred. "
+                        f"The keys to the provided prompts dictionary are {list(prompts.keys())!r}"
+                    )
+                if include_prompt_lengths:
+                    # If prompt columns exist, add the prompt length column
+                    prompt_lengths = {
+                        prompt: self._get_prompt_length(prompt)
+                        for column_name, prompt in prompts.items()
+                        if column_name in dataset_dict.column_names
+                    }
+
+        # If we have a Dataset, we can set the transform directly...
+        if isinstance(dataset_dict, Dataset):
+            dataset_dict.set_transform(
+                partial(
+                    self.add_prompts_or_dataset_name_transform,
+                    prompts=prompts,
+                    prompt_lengths=prompt_lengths,
+                    dataset_name=dataset_name,
+                    **dataset_dict._format_kwargs,
+                )
+            )
+
+        # ... otherwise, we have an IterableDataset and we need to map it, which performs the same operation as above
+        elif isinstance(dataset_dict, IterableDataset):
+            # Update the features to include the new columns
+            features = dataset_dict.features
+            if dataset_name:
+                features["dataset_name"] = Value("string")
+            if prompt_lengths:
+                if isinstance(prompts, str):
+                    for column_name in dataset_dict.column_names:
+                        feature = features[column_name]
+                        if isinstance(feature, Value) and feature.dtype in ("string", "large_string"):
+                            features[f"{column_name}_prompt_length"] = Value("int16")
+                elif isinstance(prompts, dict):
+                    for column_name, prompt in prompts.items():
+                        feature = features[column_name]
+                        if (
+                            prompt in prompt_lengths
+                            and isinstance(feature, Value)
+                            and feature.dtype in ("string", "large_string")
+                        ):
+                            features[f"{column_name}_prompt_length"] = Value("int16")
+
+            dataset_dict = dataset_dict.map(
+                partial(
+                    self.add_prompts_or_dataset_name_transform,
+                    prompts=prompts,
+                    prompt_lengths=prompt_lengths,
+                    dataset_name=dataset_name,
+                ),
+                batched=True,
+                features=features,
+            )
+
+        else:
+            raise ValueError("Unsupported dataset type.")
+
+        # Add a tag to the dataset to indicate that it has been preprocessed, to ensure that we don't apply the map or
+        # transform multiple times.
+        dataset_dict._sentence_transformers_preprocessed = True
+        return dataset_dict
+
     def create_model_card(
         self,
         language: Optional[str] = None,
         license: Optional[str] = None,
-        tags: Union[str, List[str], None] = None,
+        tags: Optional[Union[str, List[str]]] = None,
         model_name: Optional[str] = None,
         finetuned_from: Optional[str] = None,
-        tasks: Union[str, List[str], None] = None,
-        dataset_tags: Union[str, List[str], None] = None,
-        dataset: Union[str, List[str], None] = None,
-        dataset_args: Union[str, List[str], None] = None,
+        tasks: Optional[Union[str, List[str]]] = None,
+        dataset_tags: Optional[Union[str, List[str]]] = None,
+        dataset: Optional[Union[str, List[str]]] = None,
+        dataset_args: Optional[Union[str, List[str]]] = None,
         **kwargs,
     ) -> None:
         if not self.is_world_process_zero():
@@ -744,3 +1200,41 @@ def create_model_card(
             self.model.model_card_data.add_tags(tags)
 
         self.model._create_model_card(self.args.output_dir, model_name=model_name)
+
+    def get_optimizer_cls_and_kwargs(
+        self, args: SentenceTransformerGaudiTrainingArguments, model: Optional[SentenceTransformer] = None
+    ) -> Tuple[Any, Any]:
+        """
+        We have to override the optimizer_grouped_parameters because the Trainer superclass bases it on the `model`
+        itself, but the SentenceTransformer losses can have weights that should be updated as well, e.g.
+        SoftmaxLoss (see #2872).
+
+        This method requires `transformers` >= 4.43.0.
+        """
+
+        if isinstance(self.loss, dict):
+            loss_model = torch.nn.Sequential(OrderedDict(self.loss))
+        else:
+            loss_model = self.loss
+        optimizer_cls, optimizer_kwargs = super().get_optimizer_cls_and_kwargs(args, loss_model)
+
+        # If the kwargs were not overridden by the super() call, then we should override them here so that the potential
+        # weights in the loss(es) can also be updated.
+        if not {"params", "model", "optimizer_dict"} & set(optimizer_kwargs.keys()):
+            decay_parameters = self.get_decay_parameter_names(loss_model)
+            optimizer_kwargs["optimizer_dict"] = [
+                {
+                    "params": [
+                        p for n, p in loss_model.named_parameters() if (n in decay_parameters and p.requires_grad)
+                    ],
+                    "weight_decay": self.args.weight_decay,
+                },
+                {
+                    "params": [
+                        p for n, p in loss_model.named_parameters() if (n not in decay_parameters and p.requires_grad)
+                    ],
+                    "weight_decay": 0.0,
+                },
+            ]
+
+        return optimizer_cls, optimizer_kwargs
diff --git a/optimum/habana/sentence_transformers/st_gaudi_training_args.py b/optimum/habana/sentence_transformers/st_gaudi_training_args.py
index 07f98c3fbc..b47434c10a 100644
--- a/optimum/habana/sentence_transformers/st_gaudi_training_args.py
+++ b/optimum/habana/sentence_transformers/st_gaudi_training_args.py
@@ -14,7 +14,7 @@
 # limitations under the License.
 import logging
 from dataclasses import dataclass, field
-from typing import Union
+from typing import Dict, Optional, Union
 
 from sentence_transformers.training_args import BatchSamplers, MultiDatasetBatchSamplers
 from transformers.training_args import ParallelMode
@@ -28,9 +28,38 @@
 @dataclass
 class SentenceTransformerGaudiTrainingArguments(GaudiTrainingArguments):
     """
-    Inherits from GaudiTrainingArguments and adapted from: https://github.com/UKPLab/sentence-transformers/blob/v3.0.1/sentence_transformers/training_args.py
+    SentenceTransformerGaudiTrainingArguments extends :class:`~transformers.TrainingArguments` with additional arguments
+    specific to Sentence Transformers. See :class:`~transformers.TrainingArguments` for the complete list of
+    available arguments.
+
+    It inherits from GaudiTrainingArguments and adapted from:
+        https://github.com/UKPLab/sentence-transformers/blob/v3.3.1/sentence_transformers/training_args.py
+
+    Args:
+        output_dir (`str`):
+            The output directory where the model checkpoints will be written.
+        prompts (`Union[Dict[str, Dict[str, str]], Dict[str, str], str]`, *optional*):
+            The prompts to use for each column in the training, evaluation and test datasets. Four formats are accepted:
+
+            1. `str`: A single prompt to use for all columns in the datasets, regardless of whether the training/evaluation/test
+               datasets are :class:`datasets.Dataset` or a :class:`datasets.DatasetDict`.
+            2. `Dict[str, str]`: A dictionary mapping column names to prompts, regardless of whether the training/evaluation/test
+               datasets are :class:`datasets.Dataset` or a :class:`datasets.DatasetDict`.
+            3. `Dict[str, str]`: A dictionary mapping dataset names to prompts. This should only be used if your training/evaluation/test
+               datasets are a :class:`datasets.DatasetDict` or a dictionary of :class:`datasets.Dataset`.
+            4. `Dict[str, Dict[str, str]]`: A dictionary mapping dataset names to dictionaries mapping column names to
+               prompts. This should only be used if your training/evaluation/test datasets are a
+               :class:`datasets.DatasetDict` or a dictionary of :class:`datasets.Dataset`.
+
+        batch_sampler (Union[:class:`~sentence_transformers.training_args.BatchSamplers`, `str`], *optional*):
+            The batch sampler to use. See :class:`~sentence_transformers.training_args.BatchSamplers` for valid options.
+            Defaults to ``BatchSamplers.BATCH_SAMPLER``.
+        multi_dataset_batch_sampler (Union[:class:`~sentence_transformers.training_args.MultiDatasetBatchSamplers`, `str`], *optional*):
+            The multi-dataset batch sampler to use. See :class:`~sentence_transformers.training_args.MultiDatasetBatchSamplers`
+            for valid options. Defaults to ``MultiDatasetBatchSamplers.PROPORTIONAL``.
     """
 
+    prompts: Optional[Union[Dict[str, Dict[str, str]], Dict[str, str], str]] = None
     batch_sampler: Union[BatchSamplers, str] = field(
         default=BatchSamplers.BATCH_SAMPLER, metadata={"help": "The batch sampler to use."}
     )
diff --git a/optimum/habana/sentence_transformers/st_gaudi_transformer.py b/optimum/habana/sentence_transformers/st_gaudi_transformer.py
index de56caaa9e..db12501562 100644
--- a/optimum/habana/sentence_transformers/st_gaudi_transformer.py
+++ b/optimum/habana/sentence_transformers/st_gaudi_transformer.py
@@ -4,7 +4,7 @@
 
 import torch
 
-from optimum.habana.utils import to_device_dtype
+from ..utils import to_device_dtype
 
 
 def st_gaudi_transformer_save(self, output_path: str, safe_serialization: bool = True) -> None:
diff --git a/optimum/habana/transformers/generation/configuration_utils.py b/optimum/habana/transformers/generation/configuration_utils.py
index ec04f139c9..4ed9cd80a2 100644
--- a/optimum/habana/transformers/generation/configuration_utils.py
+++ b/optimum/habana/transformers/generation/configuration_utils.py
@@ -21,6 +21,8 @@ class GaudiGenerationConfig(GenerationConfig):
         is also running in lower precision.
     limit_hpu_graphs (`bool`, *optional*):
         Skip HPU Graph usage for first token to save memory
+    clear_hpu_graphs_cache (`bool`, *optional*):
+        Clear HPU Graph cache
     reuse_cache (`bool`, *optional*):
         Whether to reuse key/value cache for decoding. It should save memory.
     bucket_size (`int`, *optional*):
@@ -37,6 +39,8 @@ class GaudiGenerationConfig(GenerationConfig):
         Whether to enable causal_mask if use Habana flash attention.
     flash_attention_fast_softmax_mode (`bool`, *optional*):
         Whether to use fast softmax with reduced precision if use Habana flash attention.
+    attn_batch_split (`int`, *optional*):
+        Specify the batch size split for attention and mlp layers. 1 for no split. This is enabled only for prompt.
     """
 
     def __init__(self, **kwargs):
@@ -46,6 +50,7 @@ def __init__(self, **kwargs):
         self.ignore_eos = kwargs.get("ignore_eos", None)
         self.attn_softmax_bf16 = kwargs.get("attn_softmax_bf16", None)
         self.limit_hpu_graphs = kwargs.get("limit_hpu_graphs", None)
+        self.clear_hpu_graphs_cache = kwargs.get("clear_hpu_graphs_cache", None)
         self.reuse_cache = kwargs.get("reuse_cache", None)
         self.bucket_size = kwargs.get("bucket_size", -1)
         self.bucket_internal = kwargs.get("bucket_internal", None)
@@ -56,3 +61,4 @@ def __init__(self, **kwargs):
         self.flash_attention_fast_softmax = kwargs.get("flash_attention_fast_softmax", None)
         self.use_fused_rope = kwargs.get("use_fused_rope", None)
         self.valid_sequence_lengths = kwargs.get("valid_sequence_lengths", None)
+        self.attn_batch_split = kwargs.get("attn_batch_split", 1)
diff --git a/optimum/habana/transformers/generation/utils.py b/optimum/habana/transformers/generation/utils.py
old mode 100644
new mode 100755
index fe198f24d9..73347a0008
--- a/optimum/habana/transformers/generation/utils.py
+++ b/optimum/habana/transformers/generation/utils.py
@@ -112,10 +112,13 @@
     "paligemma",
     "idefics2",
     "mllama",
+    "video_llava",
     "minicpm3",
     "baichuan",
     "deepseek_v2",
+    "deepseek_v3",
     "chatglm",
+    "qwen2_vl",
 ]
 
 # Initial generated token index is set to 1 to accomodate SOS (start of string) token.
@@ -1092,8 +1095,10 @@ def generate(
                 "gemma2",
                 "baichuan",
                 "chatglm",
+                "deepseek_v2",
+                "deepseek_v3",
             ], (
-                "reuse_cache only supported by llama, mistral, falcon, mixtral, phi, qwen2, qwen2_moe, gemma, gemma2, starcoder2, baichuan and chatglm at the moment"
+                "reuse_cache only supported by llama, mistral, falcon, mixtral, phi, qwen2, qwen2_moe, gemma, gemma2, starcoder2, baichuan, chatglm and deepseek_v2 at the moment"
             )
             if not generation_config.bucket_internal:
                 assert generation_config.bucket_size <= 0, (
@@ -1261,9 +1266,15 @@ def generate(
         model_kwargs["use_hpu_graphs"] = hpu_graphs
         model_kwargs["limit_hpu_graphs"] = generation_config.limit_hpu_graphs
 
+        # determine whether to clear hpu graphs cache
+        model_kwargs["clear_hpu_graphs_cache"] = generation_config.clear_hpu_graphs_cache
+
         # prepare for allocate kv cache
         model_kwargs["reuse_cache"] = generation_config.reuse_cache
 
+        # prepare for attention batch splitting
+        model_kwargs["attn_batch_split"] = generation_config.attn_batch_split
+
         # determine whether flash attention needs to be used
         model_kwargs["use_flash_attention"] = generation_config.use_flash_attention
         model_kwargs["flash_attention_recompute"] = True if generation_config.flash_attention_recompute else False
@@ -1300,6 +1311,7 @@ def generate(
                 "gemma2",
                 "qwen2_moe",
                 "baichuan",
+                "deepseek_v2",
             ]:
                 if (
                     hasattr(self.config, "max_position_embeddings")
@@ -2467,9 +2479,9 @@ def _sample(
                             output_idx = torch.tensor(outputs.logits.shape[-2], device=input_ids.device)
                         else:
                             output_idx = token_idx + outputs.logits.shape[-2] - input_ids.shape[-1]
-                        next_token_logits = torch.index_select(outputs.logits, -2, output_idx - 1).squeeze(-2)
+                        next_token_logits = torch.index_select(outputs.logits, -2, output_idx - 1).squeeze(-2).float()
                     else:
-                        next_token_logits = torch.index_select(outputs.logits, -2, token_idx - 1).squeeze(-2)
+                        next_token_logits = torch.index_select(outputs.logits, -2, token_idx - 1).squeeze(-2).float()
                     next_token_scores = logits_processor(input_ids, next_token_logits)
             else:
                 # .float() is needed to retain precision for later logits manipulations
@@ -2503,7 +2515,9 @@ def _sample(
 
             # token selection
             if do_sample:
-                probs = torch.nn.functional.softmax(next_token_scores, dim=-1)
+                # Workaround on HPU for output quality issues with torch.multinomial for lower precision models
+                # Distribution sampled by torch.multinomial may be affected by next_token_logits upcast to float
+                probs = torch.nn.functional.softmax(next_token_scores, dim=-1).to(outputs.logits.dtype)
                 # TODO (joao): this OP throws "skipping cudagraphs due to ['incompatible ops']", find solution
                 next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
             else:
@@ -2611,8 +2625,14 @@ def _sample(
             and not model_kwargs.get("reuse_cache", False)
             and bucket_internal
         ):
+            # Clear HPU graphs cache
+            if model_kwargs.get("clear_hpu_graphs_cache", False):
+                self.clear_cache()
+
             # Clear HPU graphs input tensors of the decode phase after the full generation while loop
-            self.clear_inputs()
+            else:
+                self.clear_inputs()
+
             # Delete past key value tensors
             self._remove_past_key_values(model_kwargs)
 
diff --git a/optimum/habana/transformers/modeling_utils.py b/optimum/habana/transformers/modeling_utils.py
index 27f4de8820..53ab91433b 100644
--- a/optimum/habana/transformers/modeling_utils.py
+++ b/optimum/habana/transformers/modeling_utils.py
@@ -18,6 +18,14 @@
 import transformers.utils.fx
 
 from ..accelerate.utils import extract_model_from_parallel
+from ..accelerate.utils.modeling import gaudi_check_device_same
+from ..quantizers.bitsandbytes import (
+    gaudi_bitsandbytesconfig_post_init,
+    gaudi_create_quantized_param,
+    gaudi_is_bitsandbytes_available,
+    gaudi_validate_bnb_backend_availability,
+    gaudi_validate_environment,
+)
 from .generation import (
     GaudiGenerationConfig,
     GaudiGenerationMixin,
@@ -26,6 +34,14 @@
     gaudi_MaxTimeCriteria_call,
     gaudi_StoppingCriteriaList_call,
 )
+from .integrations.awq import (
+    GaudiAWQLinearVersion,
+    gaudi_awq_config_post_init,
+    gaudi_awq_quantizer_process_model_after_weight_loading,
+    gaudi_awq_quantizer_process_model_before_weight_loading,
+    gaudi_awq_quantizer_validate_environment,
+)
+from .modeling_utils_transformers import load_state_dict
 from .models import (
     GAUDI_WHISPER_ATTENTION_CLASSES,
     BaichuanConfig,
@@ -40,6 +56,8 @@
     DeepseekTokenizerFast,
     DeepseekV2Config,
     DeepseekV2ForCausalLM,
+    DeepseekV3Config,
+    DeepseekV3ForCausalLM,
     Gaudi2Idefics2ImageProcessor,
     GaudiBloomForCausalLM,
     GaudiBloomMLP,
@@ -104,6 +122,7 @@
     GaudiMixtralDecoderLayer,
     GaudiMixtralForCausalLM,
     GaudiMixtralModel,
+    GaudiMixtralSparseMoeBlock,
     GaudiMllamaCrossAttentionDecoderLayer,
     GaudiMllamaForCausalLM,
     GaudiMllamaForConditionalGeneration,
@@ -139,6 +158,13 @@
     GaudiQwen2MoeForCausalLM,
     GaudiQwen2MoeMLP,
     GaudiQwen2MoeModel,
+    GaudiQwen2VisionSdpaAttention,
+    GaudiQwen2VisionTransformerPretrainedModel,
+    GaudiQwen2VLDecoderLayer,
+    GaudiQwen2VLForConditionalGeneration,
+    GaudiQwen2VLModel,
+    GaudiQwen2VLSdpaAttention,
+    GaudiQwen2VLVisionBlock,
     GaudiStableLmAttention,
     GaudiStableLmDecoderLayer,
     GaudiStableLmForCausalLM,
@@ -146,6 +172,7 @@
     GaudiStarcoder2DecoderLayer,
     GaudiStarcoder2ForCausalLM,
     GaudiStarcoder2Model,
+    GaudiVideoLlavaForConditionalGeneration,
     GaudiWav2Vec2SdpaAttention,
     GaudiWhisperDecoder,
     GaudiWhisperDecoderLayer,
@@ -218,9 +245,6 @@
     gaudi_MambaForCausalLM_prepare_inputs_for_generation,
     gaudi_MambaForCausalLM_update_model_kwargs_for_generation,
     gaudi_mistral_rmsnorm_forward,
-    gaudi_mixtral_block_dynamic_moe_forward,
-    gaudi_mixtral_block_moe_forward,
-    gaudi_mixtral_block_sparse_moe_forward,
     gaudi_mixtral_rmsnorm_forward,
     gaudi_opt_attention_forward,
     gaudi_opt_decoder_forward,
@@ -265,6 +289,7 @@
     gaudi_xglm_attention_forward,
     gaudi_xglm_decoder_layer_forward,
     gaudi_xglm_model_forward,
+    gaudi_XLMRoberta_Sdpa_SelfAttention_forward,
 )
 
 
@@ -276,6 +301,15 @@ def adapt_transformers_to_gaudi():
     accelerate.utils.extract_model_from_parallel = extract_model_from_parallel
     accelerate.utils.other.extract_model_from_parallel = extract_model_from_parallel
     accelerate.accelerator.extract_model_from_parallel = extract_model_from_parallel
+    accelerate.utils.modeling.check_device_same = gaudi_check_device_same
+
+    transformers.utils.quantization_config.BitsAndBytesConfig.post_init = gaudi_bitsandbytesconfig_post_init
+    transformers.utils.import_utils.is_bitsandbytes_available = gaudi_is_bitsandbytes_available
+    transformers.utils.is_bitsandbytes_available = gaudi_is_bitsandbytes_available
+    transformers.quantizers.quantizer_bnb_4bit.is_bitsandbytes_available = gaudi_is_bitsandbytes_available
+    transformers.integrations.bitsandbytes.validate_bnb_backend_availability = gaudi_validate_bnb_backend_availability
+    transformers.quantizers.quantizer_bnb_4bit.Bnb4BitHfQuantizer.validate_environment = gaudi_validate_environment
+    transformers.quantizers.quantizer_bnb_4bit.Bnb4BitHfQuantizer.create_quantized_param = gaudi_create_quantized_param
 
     # models that support symbolic tracing should be added to this list
     models_with_tracing_support = []
@@ -283,6 +317,9 @@ def adapt_transformers_to_gaudi():
     # optimize Conv1D
     transformers.pytorch_utils.Conv1D.forward = gaudi_conv1d_forward
 
+    # override of load_state_dict for deepseekv3. Delete on upgrade to transformers v4.48
+    transformers.modeling_utils.load_state_dict = load_state_dict
+
     # Optimization tweak for ViT
     transformers.models.vit.modeling_vit.ViTSelfAttention.forward = gaudi_vit_self_attention_forward
 
@@ -561,13 +598,7 @@ def adapt_transformers_to_gaudi():
     transformers.models.mixtral.modeling_mixtral.MixtralAttention = GaudiMixtralAttention
     transformers.models.mixtral.modeling_mixtral.MixtralForCausalLM = GaudiMixtralForCausalLM
     transformers.models.mixtral.modeling_mixtral.MixtralModel = GaudiMixtralModel
-    transformers.models.mixtral.modeling_mixtral.MixtralSparseMoeBlock.sparse_moe_forward = (
-        gaudi_mixtral_block_sparse_moe_forward
-    )
-    transformers.models.mixtral.modeling_mixtral.MixtralSparseMoeBlock.dynamic_moe_forward = (
-        gaudi_mixtral_block_dynamic_moe_forward
-    )
-    transformers.models.mixtral.modeling_mixtral.MixtralSparseMoeBlock.forward = gaudi_mixtral_block_moe_forward
+    transformers.models.mixtral.modeling_mixtral.MixtralSparseMoeBlock = GaudiMixtralSparseMoeBlock
     transformers.models.mixtral.modeling_mixtral.MixtralDecoderLayer = GaudiMixtralDecoderLayer
     transformers.models.mixtral.modeling_mixtral.MixtralRMSNorm.forward = gaudi_mixtral_rmsnorm_forward
     transformers.models.mixtral.configuration_mixtral.MixtralConfig = MixtralConfig
@@ -648,6 +679,19 @@ def adapt_transformers_to_gaudi():
         gaudi_qwen2moe_block_sparse_moe_forward
     )
 
+    # Optimization for qwen2-vl Gaudi
+    transformers.models.qwen2_vl.modeling_qwen2_vl.VisionSdpaAttention = GaudiQwen2VisionSdpaAttention
+    transformers.models.qwen2_vl.modeling_qwen2_vl.Qwen2VLVisionBlock = GaudiQwen2VLVisionBlock
+    transformers.models.qwen2_vl.modeling_qwen2_vl.Qwen2VisionTransformerPretrainedModel = (
+        GaudiQwen2VisionTransformerPretrainedModel
+    )
+    transformers.models.qwen2_vl.modeling_qwen2_vl.Qwen2VLSdpaAttention = GaudiQwen2VLSdpaAttention
+    transformers.models.qwen2_vl.modeling_qwen2_vl.Qwen2VLDecoderLayer = GaudiQwen2VLDecoderLayer
+    transformers.models.qwen2_vl.modeling_qwen2_vl.Qwen2VLModel = GaudiQwen2VLModel
+    transformers.models.qwen2_vl.modeling_qwen2_vl.Qwen2VLForConditionalGeneration = (
+        GaudiQwen2VLForConditionalGeneration
+    )
+
     # Optimization for stablelm on Gaudi
     transformers.models.stablelm.modeling_stablelm.StableLmAttention = GaudiStableLmAttention
     transformers.models.stablelm.modeling_stablelm.StableLmDecoderLayer = GaudiStableLmDecoderLayer
@@ -677,6 +721,11 @@ def adapt_transformers_to_gaudi():
     transformers.models.falcon_mamba.modeling_falcon_mamba.FalconMambaModel.forward = gaudi_FalconMambaModel_forward
     transformers.models.falcon_mamba.modeling_falcon_mamba.FalconMambaRMSNorm.forward = gaudi_llama_rmsnorm_forward
 
+    # Optimization for VideoLlava on Gaudi
+    transformers.models.video_llava.modeling_video_llava.VideoLlavaForConditionalGeneration = (
+        GaudiVideoLlavaForConditionalGeneration
+    )
+
     # Optimization for Whisper on Gaudi
     transformers.models.whisper.modeling_whisper.WhisperSdpaAttention = GaudiWhisperSdpaAttention
     transformers.models.whisper.modeling_whisper.WhisperDecoderLayer = GaudiWhisperDecoderLayer
@@ -701,9 +750,12 @@ def adapt_transformers_to_gaudi():
     transformers.AutoConfig.register("deci", DeciLMConfig)
     transformers.AutoModelForCausalLM.register(DeciLMConfig, DeciLMForCausalLM)
 
+    # Optimization for deepseek on Gaudi
     transformers.AutoConfig.register("deepseek_v2", DeepseekV2Config)
     transformers.AutoModelForCausalLM.register(DeepseekV2Config, DeepseekV2ForCausalLM)
     transformers.AutoTokenizer.register(DeepseekV2Config, fast_tokenizer_class=DeepseekTokenizerFast)
+    transformers.AutoConfig.register("deepseek_v3", DeepseekV3Config)
+    transformers.AutoModelForCausalLM.register(DeepseekV3Config, DeepseekV3ForCausalLM)
 
     # Optimization for cohere on Gaudi
     transformers.models.cohere.modeling_cohere.CohereDecoderLayer = GaudiCohereDecoderLayer
@@ -720,6 +772,11 @@ def adapt_transformers_to_gaudi():
     transformers.AutoConfig.register("minicpm3", MiniCPM3Config)
     transformers.AutoModelForCausalLM.register(MiniCPM3Config, MiniCPM3ForCausalLM)
 
+    # Optimization for XLMRoberta model on Gaudi
+    transformers.models.xlm_roberta.modeling_xlm_roberta.XLMRobertaSdpaSelfAttention.forward = (
+        gaudi_XLMRoberta_Sdpa_SelfAttention_forward
+    )
+
     # Optimization for Baichuan2 on Gaudi
     transformers.AutoConfig.register("baichuan", BaichuanConfig)
     transformers.AutoTokenizer.register(BaichuanConfig, slow_tokenizer_class=BaichuanTokenizer)
@@ -733,6 +790,16 @@ def adapt_transformers_to_gaudi():
     transformers.AutoModelForSeq2SeqLM.register(ChatGLMConfig, ChatGLMForConditionalGeneration)
     transformers.AutoModelForSequenceClassification.register(ChatGLMConfig, ChatGLMForSequenceClassification)
 
+    transformers.quantizers.quantizer_awq.AwqQuantizer.validate_environment = gaudi_awq_quantizer_validate_environment
+    transformers.quantizers.quantizer_awq.AwqQuantizer._process_model_before_weight_loading = (
+        gaudi_awq_quantizer_process_model_before_weight_loading
+    )
+    transformers.quantizers.quantizer_awq.AwqQuantizer._process_model_after_weight_loading = (
+        gaudi_awq_quantizer_process_model_after_weight_loading
+    )
+    transformers.utils.quantization_config.AWQLinearVersion = GaudiAWQLinearVersion
+    transformers.utils.quantization_config.AwqConfig.post_init = gaudi_awq_config_post_init
+
     # Optimization for DETR model on Gaudi
     transformers.models.detr.modeling_detr.DetrConvModel.forward = gaudi_DetrConvModel_forward
     transformers.models.detr.modeling_detr.DetrHungarianMatcher.forward = gaudi_DetrHungarianMatcher_forward
diff --git a/optimum/habana/transformers/models/__init__.py b/optimum/habana/transformers/models/__init__.py
index ffcfa4ccbb..b784c1c895 100644
--- a/optimum/habana/transformers/models/__init__.py
+++ b/optimum/habana/transformers/models/__init__.py
@@ -71,6 +71,10 @@
     DeepseekV2Config,
     DeepseekV2ForCausalLM,
 )
+from .deepseek_v3 import (
+    DeepseekV3Config,
+    DeepseekV3ForCausalLM,
+)
 from .detr import (
     gaudi_DetrConvModel_forward,
     gaudi_DetrHungarianMatcher_forward,
@@ -183,10 +187,8 @@
     GaudiMixtralDecoderLayer,
     GaudiMixtralForCausalLM,
     GaudiMixtralModel,
+    GaudiMixtralSparseMoeBlock,
     MixtralConfig,
-    gaudi_mixtral_block_dynamic_moe_forward,
-    gaudi_mixtral_block_moe_forward,
-    gaudi_mixtral_block_sparse_moe_forward,
     gaudi_mixtral_rmsnorm_forward,
 )
 from .mllama import (
@@ -256,6 +258,15 @@
     gaudi_qwen2moe_block_sparse_moe_forward,
     gaudi_qwen2moe_rmsnorm_forward,
 )
+from .qwen2_vl import (
+    GaudiQwen2VisionSdpaAttention,
+    GaudiQwen2VisionTransformerPretrainedModel,
+    GaudiQwen2VLDecoderLayer,
+    GaudiQwen2VLForConditionalGeneration,
+    GaudiQwen2VLModel,
+    GaudiQwen2VLSdpaAttention,
+    GaudiQwen2VLVisionBlock,
+)
 from .seamless_m4t import (
     gaudi_SeamlessM4TAttention_forward,
     gaudi_SeamlessM4TCodeHifiGan_get_output_hifigan_lengths,
@@ -296,6 +307,7 @@
     gaudi_T5Stack_forward,
 )
 from .table_transformer import gaudi_table_transformer_conv_encoder_forward
+from .video_llava import GaudiVideoLlavaForConditionalGeneration
 from .vision_encoder_decoder import (
     gaudi_VisionEncoderDecoderModel_prepare_inputs_for_generation,
 )
@@ -325,3 +337,4 @@
     gaudi_xglm_decoder_layer_forward,
     gaudi_xglm_model_forward,
 )
+from .xlm_roberta import gaudi_XLMRoberta_Sdpa_SelfAttention_forward
diff --git a/optimum/habana/transformers/models/baichuan/modeling_baichuan.py b/optimum/habana/transformers/models/baichuan/modeling_baichuan.py
index ca9498e0f1..a109251647 100644
--- a/optimum/habana/transformers/models/baichuan/modeling_baichuan.py
+++ b/optimum/habana/transformers/models/baichuan/modeling_baichuan.py
@@ -43,8 +43,7 @@
 from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
 from transformers.utils import logging
 
-from optimum.habana.transformers.modeling_attn_mask_utils import _gaudi_prepare_4d_causal_attention_mask
-
+from ...modeling_attn_mask_utils import _gaudi_prepare_4d_causal_attention_mask
 from .configuration_baichuan import BaichuanConfig
 from .generation_utils import TextIterStreamer, build_chat_input
 
diff --git a/optimum/habana/transformers/models/deepseek_v2/modeling_deepseek_v2.py b/optimum/habana/transformers/models/deepseek_v2/modeling_deepseek_v2.py
index ee271b7254..674772ffdc 100644
--- a/optimum/habana/transformers/models/deepseek_v2/modeling_deepseek_v2.py
+++ b/optimum/habana/transformers/models/deepseek_v2/modeling_deepseek_v2.py
@@ -20,20 +20,34 @@
 """PyTorch DeepSeekV2 model. Adapted from https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite/resolve/main/modeling_deepseek.py"""
 
 import math
+import os
 import warnings
 from typing import List, Optional, Tuple, Union
 
+import habana_frameworks.torch.core as htcore
 import torch
 import torch.distributed as dist
+
+# This makes `_prepare_4d_causal_attention_mask` a leaf function in the FX graph.
+# It means that the function will not be traced through and simply appear as a node in the graph.
+import torch.fx
 import torch.nn.functional as F
 import torch.utils.checkpoint
 from torch import nn
-from torch.nn import CrossEntropyLoss
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from transformers import PretrainedConfig
 from transformers.activations import ACT2FN
-from transformers.cache_utils import Cache
+from transformers.cache_utils import Cache, DynamicCache, StaticCache
+from transformers.generation import GenerationMixin
+from transformers.integrations.deepspeed import is_deepspeed_available
+from transformers.modeling_attn_mask_utils import (
+    _prepare_4d_causal_attention_mask,
+)
 from transformers.modeling_outputs import (
     BaseModelOutputWithPast,
-    CausalLMOutputWithPast,
+    MoeCausalLMOutputWithPast,
+    MoeModelOutputWithPast,
+    SequenceClassifierOutputWithPast,
 )
 from transformers.modeling_utils import PreTrainedModel
 from transformers.pytorch_utils import (
@@ -43,7 +57,6 @@
     add_start_docstrings,
     add_start_docstrings_to_model_forward,
     logging,
-    replace_return_docstrings,
 )
 
 from ....distributed.tensorparallel import _all_reduce
@@ -51,6 +64,37 @@
 from .configuration_deepseek_v2 import DeepseekV2Config
 
 
+_prepare_4d_causal_attention_mask = torch.fx.wrap(_prepare_4d_causal_attention_mask)
+
+logger = logging.get_logger(__name__)
+
+_CONFIG_FOR_DOC = "DeepseekV2Config"
+
+# default expert number per slice for dynamic MoE
+SLICE_MAX_EXPERT = 80
+
+try:
+    from habana_frameworks.torch.hpex.kernels import RotaryPosEmbeddingHelperV2 as FusedRoPE
+
+    print("Using HPU fused kernel for apply_rotary_pos_emb")
+except ImportError:
+    print("Not using HPU fused kernel for apply_rotary_pos_emb")
+    FusedRoPE = None
+
+try:
+    from habana_frameworks.torch.hpex.normalization import FusedRMSNorm
+
+    print("Using HPU fused kernel for RMSNorm")
+except ImportError:
+    print("Not using HPU fused kernel for RMSNorm")
+    FusedRMSNorm = None
+
+try:
+    from habana_frameworks.torch.hpex.kernels import FusedSDPA
+except ImportError:
+    print("Not using HPU fused scaled dot-product attention kernel.")
+    FusedSDPA = None
+
 logger = logging.get_logger(__name__)
 
 _CONFIG_FOR_DOC = "DeepseekV2Config"
@@ -68,6 +112,89 @@ def _get_unpad_data(attention_mask):
     )
 
 
+# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
+def load_balancing_loss_func(
+    gate_logits: Union[torch.Tensor, Tuple[torch.Tensor], None],
+    num_experts: Optional[int] = None,
+    top_k=2,
+    attention_mask: Optional[torch.Tensor] = None,
+) -> Union[torch.Tensor, int]:
+    r"""
+    Computes auxiliary load balancing loss as in Switch Transformer - implemented in Pytorch.
+
+    See Switch Transformer (https://arxiv.org/abs/2101.03961) for more details. This function implements the loss
+    function presented in equations (4) - (6) of the paper. It aims at penalizing cases where the routing between
+    experts is too unbalanced.
+
+    Args:
+        gate_logits:
+            Logits from the `gate`, should be a tuple of model.config.num_hidden_layers tensors of
+            shape [batch_size X sequence_length, num_experts].
+        num_experts:
+            Number of experts
+        top_k:
+            The number of experts to route per-token, can be also interpreted as the `top-k` routing
+            parameter.
+        attention_mask (`torch.Tensor`, *optional*):
+            The attention_mask used in forward function
+            shape [batch_size X sequence_length] if not None.
+
+    Returns:
+        The auxiliary loss.
+    """
+    if gate_logits is None or not isinstance(gate_logits, tuple):
+        return 0
+
+    if isinstance(gate_logits, tuple):
+        compute_device = gate_logits[0].device
+        concatenated_gate_logits = torch.cat([layer_gate.to(compute_device) for layer_gate in gate_logits], dim=0)
+
+    routing_weights = torch.nn.functional.softmax(concatenated_gate_logits, dim=-1)
+
+    _, selected_experts = torch.topk(routing_weights, top_k, dim=-1)
+
+    expert_mask = torch.nn.functional.one_hot(selected_experts, num_experts)
+
+    if attention_mask is None:
+        # Compute the percentage of tokens routed to each experts
+        tokens_per_expert = torch.mean(expert_mask.float(), dim=0)
+
+        # Compute the average probability of routing to these experts
+        router_prob_per_expert = torch.mean(routing_weights, dim=0)
+    else:
+        batch_size, sequence_length = attention_mask.shape
+        num_hidden_layers = concatenated_gate_logits.shape[0] // (batch_size * sequence_length)
+
+        # Compute the mask that masks all padding tokens as 0 with the same shape of expert_mask
+        expert_attention_mask = (
+            attention_mask[None, :, :, None, None]
+            .expand((num_hidden_layers, batch_size, sequence_length, top_k, num_experts))
+            .reshape(-1, top_k, num_experts)
+            .to(compute_device)
+        )
+
+        # Compute the percentage of tokens routed to each experts
+        tokens_per_expert = torch.sum(expert_mask.float() * expert_attention_mask, dim=0) / torch.sum(
+            expert_attention_mask, dim=0
+        )
+
+        # Compute the mask that masks all padding tokens as 0 with the same shape of tokens_per_expert
+        router_per_expert_attention_mask = (
+            attention_mask[None, :, :, None]
+            .expand((num_hidden_layers, batch_size, sequence_length, num_experts))
+            .reshape(-1, num_experts)
+            .to(compute_device)
+        )
+
+        # Compute the average probability of routing to these experts
+        router_prob_per_expert = torch.sum(routing_weights * router_per_expert_attention_mask, dim=0) / torch.sum(
+            router_per_expert_attention_mask, dim=0
+        )
+
+    overall_loss = torch.sum(tokens_per_expert * router_prob_per_expert.unsqueeze(0))
+    return overall_loss * num_experts
+
+
 class DeepseekV2RMSNorm(nn.Module):
     def __init__(self, hidden_size, eps=1e-6):
         """
@@ -78,11 +205,23 @@ def __init__(self, hidden_size, eps=1e-6):
         self.variance_epsilon = eps
 
     def forward(self, hidden_states):
-        input_dtype = hidden_states.dtype
-        hidden_states = hidden_states.to(torch.float32)
-        variance = hidden_states.pow(2).mean(-1, keepdim=True)
-        hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
-        return self.weight * hidden_states.to(input_dtype)
+        if hidden_states.device.type == "hpu" and FusedRMSNorm:
+            # mixed dtypes are not good for FusedRMSNorm, both inputs need to have same dtype
+            if hidden_states.dtype != self.weight.dtype:
+                orig_dtype = hidden_states.dtype
+                hidden_states = FusedRMSNorm.apply(
+                    hidden_states.to(self.weight.dtype), self.weight, self.variance_epsilon
+                )
+                return hidden_states.to(orig_dtype)
+            else:
+                hidden_states = FusedRMSNorm.apply(hidden_states, self.weight, self.variance_epsilon)
+                return hidden_states
+        else:
+            input_dtype = hidden_states.dtype
+            hidden_states = hidden_states.to(torch.float32)
+            variance = hidden_states.pow(2).mean(-1, keepdim=True)
+            hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+            return self.weight * hidden_states.to(input_dtype)
 
 
 ALL_LAYERNORM_LAYERS.append(DeepseekV2RMSNorm)
@@ -118,7 +257,7 @@ def _set_cos_sin_cache(self, seq_len, device, dtype):
 
     def forward(self, x, seq_len=None):
         # x: [bs, num_attention_heads, seq_len, head_size]
-        if seq_len > self.max_seq_len_cached:
+        if seq_len is not None and seq_len > self.max_seq_len_cached:
             self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
 
         return (
@@ -273,6 +412,15 @@ def _set_cos_sin_cache(self, seq_len, device, dtype):
         self.register_buffer("sin_cached", emb_sin, persistent=False)
 
 
+def apply_customized_rope(q, k, cos, sin, position_ids):
+    if q.device.type == "hpu" and FusedRoPE:
+        return FusedRoPE.apply(
+            q, cos.unsqueeze(0).unsqueeze(0), sin.unsqueeze(0).unsqueeze(0), position_ids
+        ), FusedRoPE.apply(k, cos.unsqueeze(0).unsqueeze(0), sin.unsqueeze(0).unsqueeze(0), position_ids)
+    else:
+        return apply_rotary_pos_emb(q, k, cos, sin, position_ids)
+
+
 # Copied from transformers.models.llama.modeling_llama.rotate_half
 def rotate_half(x):
     """Rotates half the hidden dims of the input."""
@@ -282,11 +430,10 @@ def rotate_half(x):
 
 
 # Copied from transformers.models.llama.modeling_llama.apply_rotary_pos_emb
-def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
+def apply_rotary_pos_emb(q: torch.Tensor, cos, sin, position_ids, unsqueeze_dim=1):
     """Applies Rotary Position Embedding to the query and key tensors.
     Args:
         q (`torch.Tensor`): The query tensor.
-        k (`torch.Tensor`): The key tensor.
         cos (`torch.Tensor`): The cosine part of the rotary embedding.
         sin (`torch.Tensor`): The sine part of the rotary embedding.
         position_ids (`torch.Tensor`):
@@ -302,18 +449,19 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1):
     Returns:
         `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
     """
-    cos = cos[position_ids].unsqueeze(unsqueeze_dim)
-    sin = sin[position_ids].unsqueeze(unsqueeze_dim)
 
     b, h, s, d = q.shape
     q = q.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d)
 
-    b, h, s, d = k.shape
-    k = k.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d)
-
-    q_embed = (q * cos) + (rotate_half(q) * sin)
-    k_embed = (k * cos) + (rotate_half(k) * sin)
-    return q_embed, k_embed
+    if q.device.type == "hpu" and FusedRoPE:
+        return FusedRoPE.apply(
+            q, cos.unsqueeze(0).unsqueeze(0).clone(), sin.unsqueeze(0).unsqueeze(0).clone(), position_ids
+        )
+    else:
+        cos = cos[position_ids].unsqueeze(unsqueeze_dim)
+        sin = sin[position_ids].unsqueeze(unsqueeze_dim)
+        q_embed = (q * cos) + (rotate_half(q) * sin)
+        return q_embed
 
 
 class DeepseekV2MLP(nn.Module):
@@ -362,18 +510,21 @@ def forward(self, hidden_states):
         bsz, seq_len, h = hidden_states.shape
         ### compute gating score
         hidden_states = hidden_states.view(-1, h)
-        logits = F.linear(hidden_states.type(torch.float32), self.weight.type(torch.float32), None)
+
+        logits = F.linear(hidden_states.type(torch.bfloat16), self.weight.type(torch.bfloat16), None).to(
+            dtype=torch.float32
+        )
         if self.scoring_func == "softmax":
-            scores = F.softmax(logits, dim=-1, dtype=torch.float32)
+            scores = logits.softmax(dim=-1, dtype=torch.float32)
         else:
             raise NotImplementedError(f"insupportable scoring function for MoE gating: {self.scoring_func}")
 
         ### select top-k experts
         if self.topk_method == "greedy":
-            topk_weight, topk_idx = torch.topk(scores, self.top_k, dim=-1)
+            topk_weight, topk_idx = torch.topk(scores, k=self.top_k, dim=-1, sorted=True)
         elif self.topk_method == "group_limited_greedy":
             group_scores = scores.view(bsz * seq_len, self.n_group, -1).max(dim=-1).values  # [n, n_group]
-            group_idx = torch.topk(group_scores, k=self.topk_group, dim=-1, sorted=False)[1]  # [n, top_k_group]
+            group_idx = torch.topk(group_scores, k=self.topk_group, dim=-1, sorted=True)[1]  # [n, top_k_group]
             group_mask = torch.zeros_like(group_scores)  # [n, n_group]
             group_mask.scatter_(1, group_idx, 1)  # [n, n_group]
             score_mask = (
@@ -382,7 +533,7 @@ def forward(self, hidden_states):
                 .reshape(bsz * seq_len, -1)
             )  # [n, e]
             tmp_scores = scores.masked_fill(~score_mask.bool(), 0.0)  # [n, e]
-            topk_weight, topk_idx = torch.topk(tmp_scores, k=self.top_k, dim=-1, sorted=False)
+            topk_weight, topk_idx = torch.topk(tmp_scores, k=self.top_k, dim=-1, sorted=True)
 
         ### norm gate to sum 1
         if self.top_k > 1 and self.norm_topk_prob:
@@ -446,7 +597,7 @@ def __init__(self, config):
         super().__init__()
         self.config = config
         self.num_experts_per_tok = config.num_experts_per_tok
-
+        self.experts_per_rank = config.n_routed_experts
         if hasattr(config, "ep_size") and config.ep_size > 1:
             assert config.ep_size == dist.get_world_size()
             self.ep_size = config.ep_size
@@ -477,66 +628,205 @@ def __init__(self, config):
             intermediate_size = config.moe_intermediate_size * config.n_shared_experts
             self.shared_experts = DeepseekV2MLP(config=config, intermediate_size=intermediate_size)
 
+        self.expert_slice = math.ceil(self.experts_per_rank / SLICE_MAX_EXPERT)
+        self.expert_chunk = math.ceil(self.experts_per_rank / self.expert_slice)
+
     def forward(self, hidden_states):
         identity = hidden_states
         orig_shape = hidden_states.shape
         topk_idx, topk_weight, aux_loss = self.gate(hidden_states)
         hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
-        flat_topk_idx = topk_idx.view(-1)
+        # we cast back to the input dtype
+        topk_weight = topk_weight.to(hidden_states.dtype)
+        batch = orig_shape[0]
+        sequence_length = orig_shape[1]
+        hidden_dim = orig_shape[2]
         if self.training:
-            hidden_states = hidden_states.repeat_interleave(self.num_experts_per_tok, dim=0)
-            y = torch.empty_like(hidden_states)
+            padded_weights = torch.zeros(
+                (batch * sequence_length, self.config.n_routed_experts),
+                dtype=topk_weight.dtype,
+                device=topk_weight.device,
+            )
+            padded_weights.scatter_(-1, topk_idx, topk_weight)
+            padded_weights = padded_weights.reshape(-1, sequence_length, self.config.n_routed_experts)
+            padded_weights = padded_weights.permute(2, 0, 1).unsqueeze(-1)
+
+            final_hidden_states = torch.zeros(
+                (batch, sequence_length, hidden_dim), dtype=hidden_states.dtype, device=hidden_states.device
+            )
             for i, expert in enumerate(self.experts):
-                y[flat_topk_idx == i] = expert(hidden_states[flat_topk_idx == i])
-            y = (y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1)).sum(dim=1)
-            y = y.to(hidden_states.dtype).view(*orig_shape)
-            y = AddAuxiliaryLoss.apply(y, aux_loss)
+                current_hidden_state = expert(hidden_states)
+                current_padded_weight = padded_weights[i]
+                final_hidden_states = (
+                    final_hidden_states
+                    + current_hidden_state.reshape(-1, sequence_length, hidden_dim) * current_padded_weight
+                )
+            final_hidden_states = final_hidden_states.type(hidden_states.dtype)
+            final_hidden_states = final_hidden_states.view(*orig_shape)
+            final_hidden_states = AddAuxiliaryLoss.apply(final_hidden_states, aux_loss)
         else:
-            y = self.moe_infer(hidden_states, topk_idx, topk_weight).view(*orig_shape)
-        if self.config.n_shared_experts is not None:
-            y = y + self.shared_experts(identity)
-        return y
+            final_hidden_states = torch.zeros(
+                (batch * sequence_length, hidden_dim), dtype=hidden_states.dtype, device=hidden_states.device
+            )
+            for idx in range(self.expert_slice):
+                experts_min = (self.ep_rank * self.experts_per_rank) + (self.expert_chunk * idx)
+                experts_max = min((experts_min + self.expert_chunk), (self.ep_rank + 1) * self.experts_per_rank)
+                experts_range = range(experts_min, experts_max)
+                gate_proj_list = [self.experts[i].gate_proj.weight.squeeze() for i in experts_range]
+                down_proj_list = [self.experts[i].down_proj.weight.squeeze() for i in experts_range]
+                up_proj_list = [self.experts[i].up_proj.weight.squeeze() for i in experts_range]
+                hidden_states_slice = torch.ops.hpu.mixture_of_experts(
+                    hidden_states=hidden_states,
+                    expert_routing_table=topk_idx,
+                    router_weights=topk_weight,
+                    w1=gate_proj_list,
+                    w2=up_proj_list,
+                    w3=down_proj_list,
+                    permuted_weights=True,
+                    activation="silu",
+                    experts_min=experts_min,
+                    experts_max=experts_max - 1,
+                )
+                final_hidden_states = final_hidden_states + hidden_states_slice
+                htcore.mark_step()
 
-    @torch.no_grad()
-    def moe_infer(self, x, topk_ids, topk_weight):
-        """
-        Rewrite DeepseekV2MoE.moe_infer: https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite/blob/main/modeling_deepseek.py for static expert support
-        """
-        out = torch.zeros_like(x)
+            if self.ep_size > 1:
+                final_hidden_states = _all_reduce(final_hidden_states)
+            elif is_deepspeed_available():
+                from deepspeed import comm as dist
+
+                if dist.is_initialized():
+                    dist.all_reduce(final_hidden_states, op=dist.ReduceOp.SUM)
 
-        seq_len, hidden_dim = x.shape
-        num_experts = len(self.experts)
+            final_hidden_states = final_hidden_states.type(hidden_states.dtype)
+            final_hidden_states = final_hidden_states.reshape(-1, sequence_length, hidden_dim)
 
-        padded_weights = torch.zeros((seq_len, num_experts), dtype=topk_weight.dtype, device=x.device)
-        padded_weights.scatter_(-1, topk_ids, topk_weight)
-        padded_weights = padded_weights.reshape(seq_len, num_experts)
-        padded_weights = padded_weights.permute(1, 0).unsqueeze(-1)
+        if self.config.n_shared_experts is not None:
+            final_hidden_states = final_hidden_states + self.shared_experts(identity)
+
+        return final_hidden_states
 
-        # Loop over all available experts in the model and perform the computation on each expert
-        for i in range(self.experts_per_rank):
-            expert_idx = i + self.ep_rank * self.experts_per_rank
-            expert = self.experts[expert_idx]
-            padded_weight = padded_weights[expert_idx]
-            x_static = expert(x) * padded_weight
-            out += x_static
 
-        if self.ep_size > 1:
-            out = _all_reduce(out)
+class Matmul(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
 
-        return out
+    def forward(self, x, y):
+        return torch.matmul(x, y)
 
 
-# Copied from transformers.models.llama.modeling_llama.repeat_kv
-def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
+def gaudi_deepseekv2_repeat_kv(
+    query_states: torch.Tensor,
+    key_states: torch.Tensor,
+    value_states: torch.Tensor,
+    attention_mask: torch.Tensor,
+    n_rep: int,
+):
     """
-    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
-    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
+    Copied from repeat_kv: https://github.com/huggingface/transformers/blob/v4.37.0/src/transformers/models/mixtral/modeling_mixtral.py
+    The only differences are:
+    - Append num_key_value_heads == 1 check as kv states can be broadcasted during matmuls so need to expand and reshape them.
+    - Add new args query_states, key_states, value_states and attention_mask and update the logic for expansion.
+    The query states go from (batch, num_heads, seqlen, head_dim) to (batch, num_key_value_heads, n_rep, seqlen, head_dim)
+    The key/value states go from (batch, num_key_value_heads, seqlen, head_dim) to (batch, num_key_value_heads, 1, seqlen, head_dim)
     """
-    batch, num_key_value_heads, slen, head_dim = hidden_states.shape
-    if n_rep == 1:
-        return hidden_states
-    hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
-    return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
+    batch, num_key_value_heads, kv_len, head_dim = key_states.shape
+    if n_rep == 1 or num_key_value_heads == 1:
+        return query_states, key_states, value_states, attention_mask
+
+    new_kv_shape = (batch, num_key_value_heads, 1, kv_len, head_dim)
+    key_states = key_states.reshape(new_kv_shape)
+    value_states = value_states.reshape(new_kv_shape)
+
+    batch, q_heads, q_len, head_dim = query_states.shape
+    new_q_shape = (batch, num_key_value_heads, n_rep, q_len, head_dim)
+    query_states = query_states.reshape(new_q_shape)
+
+    if attention_mask is not None:
+        # Add groups dim and set to 1
+        attention_mask = attention_mask.unsqueeze(1)
+
+    return query_states, key_states, value_states, attention_mask
+
+
+class KVCache(torch.nn.Module):
+    def __init__(self):
+        super(KVCache, self).__init__()
+        self.cache = None
+        self.inp_seq_len = -1
+
+    def allocate(self, inp_seq_len, dtype, device, shape):
+        if self.cache is None or self.cache.shape != shape:
+            self.inp_seq_len = inp_seq_len
+            self.cache = torch.zeros(shape, dtype=dtype, device=device)
+        else:
+            assert self.inp_seq_len == inp_seq_len, (
+                f"inp_seq_len must be the same. self.inp_seq_len:{self.inp_seq_len} inp_seq_len:{inp_seq_len}"
+            )
+            self.cache.fill_(0)
+
+    def update(self, prev, cur, dim, idx, inp_seq_len):
+        orig_cur = cur
+        if prev.shape == cur.shape:
+            prev.copy_(cur)
+            return orig_cur
+        if cur.shape[1] > 1 and cur.shape[1] <= prev.shape[1]:
+            # Initialize
+            prev[:, :inp_seq_len, :].copy_(cur)
+            return orig_cur
+        assert cur.shape[1] == 1, f"Cannot update kv-cache. Unsupported shapes. prev:{prev.shape} cur:{cur.shape}"
+
+        if idx is not None:
+            prev.index_copy_(dim, idx - 1, cur)
+            return prev
+        else:
+            return torch.cat((prev, cur), dim=dim)
+
+    def get_shape(self):
+        if self.cache is None:
+            return None
+        return self.cache.shape
+
+    def forward(self, cur, dim, idx):
+        return self.update(self.cache, cur, dim, idx, self.inp_seq_len)
+
+
+class ModuleFusedSDPA(torch.nn.Module):
+    def __init__(self, fusedSDPA, scale, attention_dropout, enable_recompute, flash_attention_fp8):
+        super().__init__()
+        self._hpu_kernel_fsdpa = fusedSDPA
+        self.scale = scale
+        self.attention_dropout = attention_dropout
+        self.enable_recompute = enable_recompute
+        self.flash_attention_fp8 = flash_attention_fp8
+
+    def forward(
+        self,
+        query,
+        key,
+        value,
+        attn_mask,
+        dropout_p,
+        is_casual,
+        scale,
+        softmax_mode,
+        recompute_mode,
+        valid_sequence_lengths,
+        padding_side="left",
+    ):
+        return self._hpu_kernel_fsdpa.apply(
+            query,
+            key,
+            value,
+            attn_mask,
+            dropout_p,
+            is_casual,
+            scale,
+            softmax_mode,
+            recompute_mode,
+            valid_sequence_lengths,
+            padding_side,
+        )
 
 
 # Copied from transformers.models.llama.modeling_llama.LlamaAttention with Llama->DeepseekV2
@@ -595,6 +885,13 @@ def __init__(self, config: DeepseekV2Config, layer_idx: Optional[int] = None):
         )
         self._init_rope()
 
+        self.num_key_value_groups = self.num_heads // config.num_key_value_heads
+        self.matmul_qk = Matmul()
+        self.matmul_av = Matmul()
+        self.k_cache = KVCache()
+        self.v_cache = KVCache()
+        self.inp_seq_len = -1
+
         self.softmax_scale = self.q_head_dim ** (-0.5)
         if self.config.rope_scaling is not None:
             mscale_all_dim = self.config.rope_scaling.get("mscale_all_dim", 0)
@@ -603,6 +900,19 @@ def __init__(self, config: DeepseekV2Config, layer_idx: Optional[int] = None):
                 mscale = yarn_get_mscale(scaling_factor, mscale_all_dim)
                 self.softmax_scale = self.softmax_scale * mscale * mscale
 
+        self.norm_factor = self.softmax_scale
+        self.fused_scaled_dot_product_attention = (
+            ModuleFusedSDPA(
+                FusedSDPA,
+                scale=self.norm_factor,
+                attention_dropout=self.attention_dropout,
+                enable_recompute=False,
+                flash_attention_fp8=getattr(config, "flash_attention_fp8", False),
+            )
+            if FusedSDPA
+            else None
+        )
+
     def _init_rope(self):
         if self.config.rope_scaling is None:
             self.rotary_emb = DeepseekV2RotaryEmbedding(
@@ -649,107 +959,331 @@ def _init_rope(self):
             else:
                 raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
 
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        compressed_kv_cache_shape = (batch_size, max_seq_len, self.kv_lora_rank)
+        k_pe_cache_shape = (batch_size, max_seq_len, self.qk_rope_head_dim)
+        device = self.kv_a_proj_with_mqa.weight.device
+        dtype = self.config.torch_dtype
+
+        self.k_cache.allocate(inp_seq_len, dtype, device, compressed_kv_cache_shape)
+        self.v_cache.allocate(inp_seq_len, dtype, device, k_pe_cache_shape)
+
+    def update_sincos_cache(self, seq_len):
+        # Call rotary emb forward() to update cos/sin cache when infering more than self.max_position_embeddings
+        # This helps in avoiding creation of these caches during actual model forward pass and
+        # reduce memory consumption and improve performance.
+        if seq_len > self.max_position_embeddings:
+            self.max_position_embeddings = seq_len
+            _, _ = self.rotary_emb(self.k_b_proj.weight, seq_len=seq_len)
+
+    def reorder(self, tensor, beam_idx, dim_a, dim_b):
+        updated = tensor.index_select(0, beam_idx)
+        tensor.copy_(updated)
+
+    def reorder_kv_cache(self, beam_idx: torch.LongTensor):
+        if self.k_cache.cache is None:
+            return (None, None)
+
+        head_dim = self.k_cache.cache.size(-1)
+        seq_length = self.k_cache.cache.size(-2)
+        self.reorder(self.k_cache.cache, beam_idx, seq_length, head_dim)
+        self.reorder(self.v_cache.cache, beam_idx, seq_length, head_dim)
+        return (self.k_cache.cache.shape, self.v_cache.cache.shape)
+
     def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
         return tensor.view(bsz, seq_len, self.num_heads, self.v_head_dim).transpose(1, 2).contiguous()
 
+    def split_kv_b_proj(self):
+        kv_b_proj_weight = self.kv_b_proj.weight.view(self.num_heads, -1, self.kv_lora_rank)
+        self.q_absorb = kv_b_proj_weight[:, : self.qk_nope_head_dim, :].unsqueeze(0).transpose(0, 1)
+        self.out_absorb = kv_b_proj_weight[:, self.qk_nope_head_dim :, :].unsqueeze(0)
+
+    def compress_kv(
+        self,
+        hidden_states_kv: torch.Tensor,
+        kv_position_ids: torch.LongTensor,
+        past_key_value: Optional[Cache] = None,
+    ) -> torch.Tensor:
+        # return the RoPE'ed & compressed kv
+        bsz, kv_seq_len, _ = hidden_states_kv.size()
+        compressed_kv = self.kv_a_proj_with_mqa(hidden_states_kv)
+        compressed_kv, k_pe = torch.split(compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1)
+        compressed_kv = self.kv_a_layernorm(compressed_kv)
+        k_pe = k_pe.view(bsz, kv_seq_len, 1, self.qk_rope_head_dim).transpose(1, 2)
+        cos, sin = self.rotary_emb.cos_cached, self.rotary_emb.sin_cached
+        k_pe = apply_rotary_pos_emb(k_pe, cos, sin, kv_position_ids).view(bsz, kv_seq_len, self.qk_rope_head_dim)
+        return compressed_kv, k_pe
+
     def forward(
         self,
         hidden_states: torch.Tensor,
         attention_mask: Optional[torch.Tensor] = None,
         position_ids: Optional[torch.LongTensor] = None,
-        past_key_value: Optional[Tuple[torch.Tensor]] = None,
+        past_key_value: Optional[Cache] = None,
         output_attentions: bool = False,
         use_cache: bool = False,
         token_idx: Optional[torch.Tensor] = None,
+        reuse_cache: Optional[bool] = False,
+        cache_idx: int = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        attn_softmax_bf16: Optional[bool] = False,
+        use_flash_attention: Optional[bool] = False,
+        flash_attention_recompute: Optional[bool] = False,
+        flash_attention_causal_mask: Optional[bool] = False,
+        flash_attention_fast_softmax: Optional[bool] = False,
+        valid_sequence_lengths: Optional[torch.Tensor] = None,
+        num_virtual_tokens: int = None,
         **kwargs,
     ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
         """
-        Copied from DeepseekV2Attention.forward: https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite/blob/main/modeling_deepseek.py
-        deltas are:
-        - add  token_idx
-        - optimize KV cache
+        Attention masks and past cache are removed.
+        Input:
+        - hidden_states: [bsz, q_len, hidden_size]
+        - position_ids: [bsz, q_len]
         """
 
         if "padding_mask" in kwargs:
             warnings.warn(
                 "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
             )
-        bsz, q_len, _ = hidden_states.size()
-
-        if self.q_lora_rank is None:
-            q = self.q_proj(hidden_states)
-        else:
-            q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states)))
-        q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2)
-        q_nope, q_pe = torch.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
+        if self.training:
+            if "padding_mask" in kwargs:
+                warnings.warn(
+                    "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+                )
+            bsz, q_len, _ = hidden_states.size()
+            if self.q_lora_rank is None:
+                q = self.q_proj(hidden_states)
+            else:
+                q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states)))
+            q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2)
+            q_nope, q_pe = torch.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
+
+            compressed_kv = self.kv_a_proj_with_mqa(hidden_states)
+            compressed_kv, k_pe = torch.split(compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1)
+            k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2)
+            kv = (
+                self.kv_b_proj(self.kv_a_layernorm(compressed_kv))
+                .view(bsz, q_len, self.num_heads, self.qk_nope_head_dim + self.v_head_dim)
+                .transpose(1, 2)
+            )
 
-        compressed_kv = self.kv_a_proj_with_mqa(hidden_states)
-        compressed_kv, k_pe = torch.split(compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1)
-        k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2)
-        kv = (
-            self.kv_b_proj(self.kv_a_layernorm(compressed_kv))
-            .view(bsz, q_len, self.num_heads, self.qk_nope_head_dim + self.v_head_dim)
-            .transpose(1, 2)
-        )
+            k_nope, value_states = torch.split(kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1)
+            kv_seq_len = value_states.shape[-2]
+            if past_key_value is not None:
+                if self.layer_idx is None:
+                    raise ValueError(
+                        f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} "
+                        "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class "
+                        "with a layer index."
+                    )
 
-        k_nope, value_states = torch.split(kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1)
-        kv_seq_len = value_states.shape[-2]
-        if past_key_value is not None:
-            if self.layer_idx is None:
-                raise ValueError(
-                    f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} "
-                    "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class "
-                    "with a layer index."
+                if token_idx is None:
+                    if hasattr(past_key_value, "get_usable_length"):
+                        kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+                    else:
+                        kv_seq_len += past_key_value[0].shape[-2]
+                else:
+                    if num_virtual_tokens is not None and num_virtual_tokens == past_key_value[0].shape[-2]:
+                        kv_seq_len = past_key_value[0].shape[-2] + kv_seq_len
+                    else:
+                        kv_seq_len = past_key_value[0].shape[-2]
+
+            cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+            q_pe, k_pe = apply_customized_rope(q_pe, k_pe, cos, sin, position_ids)
+
+            query_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim)
+            query_states[:, :, :, : self.qk_nope_head_dim] = q_nope
+            query_states[:, :, :, self.qk_nope_head_dim :] = q_pe
+
+            key_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim)
+            key_states[:, :, :, : self.qk_nope_head_dim] = k_nope
+            key_states[:, :, :, self.qk_nope_head_dim :] = k_pe
+
+            if past_key_value is not None:
+                cache_kwargs = {"sin": sin, "cos": cos}  # Specific to RoPE models
+                key_states, value_states = past_key_value.update(
+                    key_states, value_states, self.layer_idx, cache_kwargs
                 )
-            if token_idx is None:
-                kv_seq_len += past_key_value[0].shape[-2]
-            else:
-                kv_seq_len = past_key_value[0].shape[-2]
-        cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
-
-        q_pe, k_pe = apply_rotary_pos_emb(q_pe, k_pe, cos, sin, position_ids)
-
-        query_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim)
-        query_states[:, :, :, : self.qk_nope_head_dim] = q_nope
-        query_states[:, :, :, self.qk_nope_head_dim :] = q_pe
-
-        key_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim)
-        key_states[:, :, :, : self.qk_nope_head_dim] = k_nope
-        key_states[:, :, :, self.qk_nope_head_dim :] = k_pe
-        if past_key_value is not None:
-            if token_idx is None:
-                key_states = torch.cat([past_key_value[0], key_states], dim=2)
-                value_states = torch.cat([past_key_value[1], value_states], dim=2)
+            # optimization
+            if use_flash_attention and FusedSDPA is not None:
+                if q_len == 1:
+                    # next token
+                    attn_output = self.fused_scaled_dot_product_attention(
+                        query_states,
+                        key_states,
+                        value_states,
+                        attention_mask,
+                        0.0,
+                        False,
+                        None,
+                        "None",
+                        False,
+                        None,
+                        "None",
+                    )
+                else:
+                    # first token
+                    softmax_mode = "fast" if flash_attention_fast_softmax else "None"
+                    if flash_attention_causal_mask:
+                        attn_output = self.fused_scaled_dot_product_attention(
+                            query_states,
+                            key_states,
+                            value_states,
+                            None,
+                            0.0,
+                            True,
+                            None,
+                            softmax_mode,
+                            flash_attention_recompute,
+                            valid_sequence_lengths,
+                            "left",
+                        )
+                    else:
+                        attn_output = self.fused_scaled_dot_product_attention(
+                            query_states,
+                            key_states,
+                            value_states,
+                            attention_mask,
+                            0.0,
+                            False,
+                            None,
+                            softmax_mode,
+                            flash_attention_recompute,
+                            None,
+                            "None",
+                        )
+
             else:
-                past_key_value[0].index_add_(
-                    2, token_idx - 1, key_states - torch.index_select(past_key_value[0], 2, token_idx - 1)
+                query_states, key_states, value_states, attention_mask = gaudi_deepseekv2_repeat_kv(
+                    query_states, key_states, value_states, attention_mask, self.num_key_value_groups
                 )
-                past_key_value[1].index_add_(
-                    2, token_idx - 1, value_states - torch.index_select(past_key_value[1], 2, token_idx - 1)
+
+                attn_weights = self.matmul_qk(query_states, key_states.transpose(-2, -1)) * self.softmax_scale
+                htcore.mark_step()
+
+                if attention_mask is not None:  # no matter the length, we just slice it
+                    causal_mask = attention_mask
+                    if cache_position is not None:
+                        causal_mask = attention_mask[:, :, cache_position, : key_states.shape[-2]]
+                    attn_weights = attn_weights + causal_mask.float()
+
+                if attn_softmax_bf16:
+                    attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1, dtype=query_states.dtype)
+                else:
+                    # upcast attention to fp32
+                    attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(
+                        query_states.dtype
+                    )
+                attn_weights = torch.nn.functional.dropout(
+                    attn_weights, p=self.attention_dropout, training=self.training
                 )
-                key_states = past_key_value[0]
-                value_states = past_key_value[1]
-        past_key_value = (key_states, value_states) if use_cache else None
+                attn_output = self.matmul_av(attn_weights, value_states)
+        else:
+            hidden_states_q = hidden_states
+            hidden_states_kv = hidden_states
+            self.split_kv_b_proj()
+            q_position_ids = position_ids
+            kv_position_ids = position_ids
+            bsz, q_len, _ = hidden_states_q.size()
+
+            if self.q_lora_rank is None:
+                q = self.q_proj(hidden_states_q)
+            else:
+                q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states_q)))
 
-        attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) * self.softmax_scale
+            q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2)
 
-        if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
-            raise ValueError(
-                f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
-                f" {attn_weights.size()}"
-            )
-        assert attention_mask is not None
-        if attention_mask is not None:
-            if attention_mask.size() != (bsz, 1, q_len, kv_seq_len):
+            q_nope, q_pe = torch.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
+
+            kv_seq_len = q_pe.shape[-2]
+
+            if past_key_value is not None:
+                if self.layer_idx is None:
+                    raise ValueError(
+                        f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} "
+                        "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class "
+                        "with a layer index."
+                    )
+                if token_idx is None:
+                    if hasattr(past_key_value, "get_usable_length"):
+                        kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+                    else:
+                        kv_seq_len += past_key_value[0].shape[-2]
+                else:
+                    if reuse_cache:
+                        kv_seq_len = past_key_value[0][-2]
+                    else:
+                        kv_seq_len = past_key_value[0].shape[-2]
+
+            cos, sin = self.rotary_emb(q_pe, seq_len=kv_seq_len)
+            q_pe = apply_rotary_pos_emb(q_pe, cos, sin, q_position_ids)
+            q_nope = torch.matmul(q_nope.transpose(0, 1), self.q_absorb).transpose(0, 1)
+            compressed_kv, k_pe = self.compress_kv(hidden_states_kv, kv_position_ids)
+
+            # update & get all compressed_kv, k_pe
+            if use_cache:
+                if reuse_cache:
+                    if past_key_value is not None and isinstance(past_key_value[0], torch.Tensor):
+                        # prefix tuning case. attach past_key_value to generate first token.
+                        compressed_kv = torch.cat((past_key_value[0], compressed_kv), -2)
+                        k_pe = torch.cat((past_key_value[1], k_pe), -2)
+
+                    compressed_kv = self.k_cache(compressed_kv, 1, token_idx)
+
+                    k_pe = self.v_cache(k_pe, 1, token_idx)
+                    past_key_value = (self.k_cache.get_shape(), self.v_cache.get_shape())
+
+                else:
+                    if past_key_value is None:
+                        dtype_1 = hidden_states.dtype
+                        device_1 = hidden_states.device
+                        past_key = torch.zeros(compressed_kv.shape, dtype=dtype_1, device=device_1)
+                        past_value = torch.zeros(k_pe.shape, dtype=dtype_1, device=device_1)
+                        past_key_value = (past_key, past_value)
+                    compressed_kv = self.k_cache.update(
+                        past_key_value[0], compressed_kv, 1, token_idx, self.inp_seq_len
+                    )
+                    k_pe = self.v_cache.update(past_key_value[1], k_pe, 1, token_idx, self.inp_seq_len)
+
+                    if token_idx is None:
+                        past_key_value = (compressed_kv, k_pe)
+
+                if cache_idx is not None and q_len == 1:
+                    compressed_kv = compressed_kv[:, :cache_idx, :]
+
+                    k_pe = k_pe[:, :cache_idx, :]
+                    if attention_mask is not None:
+                        attention_mask = attention_mask[:, :, :, :cache_idx]
+
+                    kv_seq_len = compressed_kv.shape[-2]
+            else:
+                past_key_value = None
+
+            kv_seq_len = compressed_kv.size(1)
+
+            k_pe = k_pe.view(bsz, 1, kv_seq_len, self.qk_rope_head_dim)
+
+            attn_weights = (
+                torch.matmul(q_pe, k_pe.mT) + torch.matmul(q_nope, compressed_kv.unsqueeze(-3).mT)
+            ) * self.softmax_scale
+
+            if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
                 raise ValueError(
-                    f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}"
+                    f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
+                    f" {attn_weights.size()}"
                 )
-            attn_weights = attn_weights + attention_mask
+            assert attention_mask is not None
+            if attention_mask is not None:
+                attn_weights = attn_weights + attention_mask
+
+            # upcast attention to fp32
+            attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(q_nope.dtype)
 
-        # upcast attention to fp32
-        attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query_states.dtype)
-        attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
-        attn_output = torch.matmul(attn_weights, value_states)
+            attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
+            attn_output = torch.einsum("bhql,blc->bhqc", attn_weights, compressed_kv)
+
+            attn_output = torch.matmul(attn_output.permute(2, 1, 0, 3), self.out_absorb.mT).permute(2, 1, 0, 3)
 
         if attn_output.size() != (bsz, self.num_heads, q_len, self.v_head_dim):
             raise ValueError(
@@ -788,6 +1322,15 @@ def __init__(self, config: DeepseekV2Config, layer_idx: int):
         self.input_layernorm = DeepseekV2RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
         self.post_attention_layernorm = DeepseekV2RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
 
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        self.self_attn.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+
+    def reorder_kv_cache(self, beam_idx: torch.LongTensor):
+        return self.self_attn.reorder_kv_cache(beam_idx)
+
+    def update_sincos_cache(self, seq_len):
+        self.self_attn.update_sincos_cache(seq_len)
+
     def forward(
         self,
         hidden_states: torch.Tensor,
@@ -797,13 +1340,18 @@ def forward(
         output_attentions: Optional[bool] = False,
         use_cache: Optional[bool] = False,
         token_idx: Optional[torch.Tensor] = None,
+        reuse_cache: Optional[bool] = False,
+        cache_idx: int = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        attn_softmax_bf16: Optional[bool] = False,
+        use_flash_attention: Optional[bool] = False,
+        flash_attention_recompute: Optional[bool] = False,
+        flash_attention_causal_mask: Optional[bool] = False,
+        flash_attention_fast_softmax: Optional[bool] = False,
+        valid_sequence_lengths: Optional[torch.Tensor] = None,
+        num_virtual_tokens: int = None,
         **kwargs,
     ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
-        """
-        Copied from DeepseekV2DecoderLayer.forward: https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite/blob/main/modeling_deepseek.py
-        The deltas are:
-        - add token_idx
-        """
         """
         Args:
             hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
@@ -835,6 +1383,16 @@ def forward(
             output_attentions=output_attentions,
             use_cache=use_cache,
             token_idx=token_idx,
+            reuse_cache=reuse_cache,
+            cache_idx=cache_idx,
+            cache_position=cache_position,
+            attn_softmax_bf16=attn_softmax_bf16,
+            use_flash_attention=use_flash_attention,
+            flash_attention_recompute=flash_attention_recompute,
+            flash_attention_causal_mask=flash_attention_causal_mask,
+            flash_attention_fast_softmax=flash_attention_fast_softmax,
+            valid_sequence_lengths=valid_sequence_lengths,
+            num_virtual_tokens=num_virtual_tokens,
             **kwargs,
         )
         hidden_states = residual + hidden_states
@@ -842,7 +1400,10 @@ def forward(
         # Fully Connected
         residual = hidden_states
         hidden_states = self.post_attention_layernorm(hidden_states)
-        hidden_states = self.mlp(hidden_states)
+        if isinstance(self.mlp, DeepseekV2MoE):
+            hidden_states = self.mlp(hidden_states)
+        else:
+            hidden_states = self.mlp(hidden_states)
         hidden_states = residual + hidden_states
 
         outputs = (hidden_states,)
@@ -881,7 +1442,7 @@ class DeepseekV2PreTrainedModel(PreTrainedModel):
     supports_gradient_checkpointing = True
     _no_split_modules = ["DeepseekV2DecoderLayer"]
     _skip_keys_device_placement = "past_key_values"
-    _supports_flash_attn_2 = True
+    _supports_flash_attn_2 = False
     _supports_cache_class = True
 
     def _init_weights(self, module):
@@ -974,6 +1535,7 @@ def __init__(self, config: DeepseekV2Config):
         self.layers = nn.ModuleList(
             [DeepseekV2DecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
         )
+        self._attn_implementation = "eager"
         self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
         self.norm = DeepseekV2RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
 
@@ -981,6 +1543,17 @@ def __init__(self, config: DeepseekV2Config):
         # Initialize weights and apply final processing
         self.post_init()
 
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        for layer in self.layers:
+            layer.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+
+    def reorder_kv_cache(self, beam_idx: torch.LongTensor):
+        return tuple(layer.reorder_kv_cache(beam_idx) for layer in self.layers)
+
+    def update_sincos_cache(self, seq_len):
+        for layer in self.layers:
+            layer.update_sincos_cache(seq_len)
+
     def get_input_embeddings(self):
         return self.embed_tokens
 
@@ -998,8 +1571,20 @@ def forward(
         use_cache: Optional[bool] = None,
         output_attentions: Optional[bool] = None,
         output_hidden_states: Optional[bool] = None,
+        output_router_logits: Optional[bool] = None,
         return_dict: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
         token_idx: Optional[torch.Tensor] = None,
+        attn_softmax_bf16: Optional[bool] = False,
+        reuse_cache: Optional[bool] = False,
+        use_flash_attention: Optional[bool] = False,
+        flash_attention_recompute: Optional[bool] = False,
+        flash_attention_causal_mask: Optional[bool] = False,
+        flash_attention_fast_softmax: Optional[bool] = False,
+        cache_idx: int = None,
+        lazy_mode: Optional[bool] = True,
+        valid_sequence_lengths: Optional[torch.Tensor] = None,
+        num_virtual_tokens: int = None,
     ) -> Union[Tuple, BaseModelOutputWithPast]:
         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
         output_hidden_states = (
@@ -1026,32 +1611,53 @@ def forward(
                 )
                 use_cache = False
 
-        past_key_values_length = 0
-        if past_key_values is not None:
-            past_key_values_length = past_key_values[0][0].shape[2]
-
-        if position_ids is None:
-            device = input_ids.device if input_ids is not None else inputs_embeds.device
-            position_ids = torch.arange(
-                past_key_values_length,
-                seq_length + past_key_values_length,
-                dtype=torch.long,
-                device=device,
-            )
-            position_ids = position_ids.unsqueeze(0)
-
         if inputs_embeds is None:
             inputs_embeds = self.embed_tokens(input_ids)
 
-        # 4d mask is passed through the layers
-        attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
-        if attention_mask is not None:
-            attention_mask = _gaudi_prepare_4d_causal_attention_mask(
-                attention_mask,
-                (batch_size, seq_length),
-                inputs_embeds,
-                past_key_values_length,
-            )
+        ignore_cache_position = True  # Ignoring cache position for HPU
+        use_new_cache = False  # Ignoring new Cache path for HPU
+
+        past_seen_tokens = 0
+
+        if past_key_values is not None and use_cache:  # kept for BC (cache positions)
+            if reuse_cache:
+                if isinstance(past_key_values[0][0], torch.Tensor):
+                    past_seen_tokens = past_key_values[0][0].shape[2]
+                else:
+                    past_seen_tokens = past_key_values[0][0][2]
+            else:
+                if use_new_cache:
+                    if not isinstance(past_key_values, StaticCache):
+                        past_key_values = DynamicCache.from_legacy_cache(past_key_values)
+                    past_seen_tokens = past_key_values.get_seq_length()
+                else:
+                    if past_key_values[0] is not None:  ##added for (None, None)
+                        past_seen_tokens = past_key_values[0][0].shape[2]
+
+        if ignore_cache_position is False:
+            if cache_position is None:
+                if isinstance(past_key_values, StaticCache):
+                    raise ValueError("cache_position is a required argument when using StaticCache.")
+                cache_position = torch.arange(
+                    past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
+                )
+            if position_ids is None and cache_position:
+                position_ids = cache_position.unsqueeze(0)
+
+        else:
+            if position_ids is None:
+                position_ids = torch.arange(
+                    past_seen_tokens, seq_length + past_seen_tokens, dtype=torch.long, device=inputs_embeds.device
+                )
+                position_ids = position_ids.unsqueeze(0)
+            cache_position = None
+
+        causal_mask = _gaudi_prepare_4d_causal_attention_mask(
+            attention_mask,
+            input_ids.shape if input_ids is not None else (batch_size, seq_length),
+            inputs_embeds,
+            past_seen_tokens,
+        )
 
         # embed positions
         hidden_states = inputs_embeds
@@ -1059,34 +1665,70 @@ def forward(
         # decoder layers
         all_hidden_states = () if output_hidden_states else None
         all_self_attns = () if output_attentions else None
-        next_decoder_cache = () if use_cache else None
+        all_router_logits = () if output_router_logits else None
+        next_decoder_cache = () if not use_new_cache else None
+
+        if lazy_mode:
+            htcore.mark_step()
 
-        for idx, decoder_layer in enumerate(self.layers):
+        for layer_idx, decoder_layer in enumerate(self.layers):
             if output_hidden_states:
                 all_hidden_states += (hidden_states,)
 
-            past_key_value = past_key_values[idx] if past_key_values is not None else None
-
             if self.gradient_checkpointing and self.training:
                 layer_outputs = self._gradient_checkpointing_func(
                     decoder_layer.__call__,
                     hidden_states,
-                    attention_mask,
+                    causal_mask,
                     position_ids,
                     past_key_values,
                     output_attentions,
                     use_cache,
+                    token_idx,
+                    reuse_cache,
+                    cache_idx,
+                    cache_position,
+                    attn_softmax_bf16,
+                    use_flash_attention,
+                    flash_attention_recompute,
+                    flash_attention_causal_mask,
+                    flash_attention_fast_softmax,
+                    valid_sequence_lengths,
+                    num_virtual_tokens,
                 )
             else:
+                if (
+                    lazy_mode
+                    and not self.training
+                    and (torch.distributed.is_initialized() is False or torch.distributed.get_world_size() == 1)
+                ):
+                    htcore.mark_step()
+
                 layer_outputs = decoder_layer(
                     hidden_states,
-                    attention_mask=attention_mask,
+                    attention_mask=causal_mask,
                     position_ids=position_ids,
-                    past_key_value=past_key_value,
+                    past_key_value=None if past_key_values is None else past_key_values[layer_idx],
                     output_attentions=output_attentions,
+                    output_router_logits=output_router_logits,
                     use_cache=use_cache,
+                    cache_position=cache_position,
                     token_idx=token_idx,
+                    attn_softmax_bf16=attn_softmax_bf16,
+                    reuse_cache=reuse_cache,
+                    use_flash_attention=use_flash_attention,
+                    flash_attention_recompute=flash_attention_recompute,
+                    flash_attention_causal_mask=flash_attention_causal_mask,
+                    flash_attention_fast_softmax=flash_attention_fast_softmax,
+                    cache_idx=cache_idx,
+                    num_virtual_tokens=num_virtual_tokens,
                 )
+                if (
+                    lazy_mode
+                    and not self.training
+                    and (torch.distributed.is_initialized() is False or torch.distributed.get_world_size() == 1)
+                ):
+                    htcore.mark_step()
 
             hidden_states = layer_outputs[0]
 
@@ -1096,24 +1738,36 @@ def forward(
             if output_attentions:
                 all_self_attns += (layer_outputs[1],)
 
+            if output_router_logits:
+                all_router_logits += (layer_outputs[-1],)
+
         hidden_states = self.norm(hidden_states)
 
         # add hidden states from the last decoder layer
         if output_hidden_states:
             all_hidden_states += (hidden_states,)
 
-        next_cache = next_decoder_cache if use_cache else None
+        next_cache = None
+        if use_cache:
+            next_cache = (
+                next_decoder_cache.to_legacy_cache() if isinstance(next_decoder_cache, Cache) else next_decoder_cache
+            )
         if not return_dict:
-            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
-        return BaseModelOutputWithPast(
+            return tuple(
+                v
+                for v in [hidden_states, next_cache, all_hidden_states, all_self_attns, all_router_logits]
+                if v is not None
+            )
+        return MoeModelOutputWithPast(
             last_hidden_state=hidden_states,
             past_key_values=next_cache,
             hidden_states=all_hidden_states,
             attentions=all_self_attns,
+            router_logits=all_router_logits,
         )
 
 
-class DeepseekV2ForCausalLM(DeepseekV2PreTrainedModel):
+class DeepseekV2ForCausalLM(DeepseekV2PreTrainedModel, GenerationMixin):
     _tied_weights_keys = ["lm_head.weight"]
 
     def __init__(self, config):
@@ -1143,8 +1797,64 @@ def set_decoder(self, decoder):
     def get_decoder(self):
         return self.model
 
-    @add_start_docstrings_to_model_forward(DeepseekV2_INPUTS_DOCSTRING)
-    @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        self.model.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+        self.kv_cache_len = max_seq_len
+
+    def reorder_kv_cache(self, beam_idx: torch.LongTensor):
+        return self.model.reorder_kv_cache(beam_idx)
+
+    def update_sincos_cache(self, seq_len):
+        self.model.update_sincos_cache(seq_len)
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        pretrained_model_name_or_path: Optional[Union[str, os.PathLike]],
+        *model_args,
+        config: Optional[Union[PretrainedConfig, str, os.PathLike]] = None,
+        cache_dir: Optional[Union[str, os.PathLike]] = None,
+        ignore_mismatched_sizes: bool = False,
+        force_download: bool = False,
+        local_files_only: bool = False,
+        token: Optional[Union[str, bool]] = None,
+        revision: str = "main",
+        use_safetensors: bool = None,
+        **kwargs,
+    ):
+        # Load config if we don't provide a configuration
+        if not isinstance(config, PretrainedConfig):
+            config_path = config if config is not None else pretrained_model_name_or_path
+            config, model_kwargs = cls.config_class.from_pretrained(
+                config_path,
+                cache_dir=cache_dir,
+                return_unused_kwargs=True,
+                force_download=force_download,
+                resume_download=False,
+                proxies=None,
+                local_files_only=local_files_only,
+                token=token,
+                revision=revision,
+                subfolder="",
+                _from_auto=False,
+                _from_pipeline=None,
+                **kwargs,
+            )
+
+        return super(DeepseekV2ForCausalLM, cls).from_pretrained(
+            pretrained_model_name_or_path,
+            *model_args,
+            config=config,
+            cache_dir=cache_dir,
+            ignore_mismatched_sizes=ignore_mismatched_sizes,
+            force_download=force_download,
+            local_files_only=local_files_only,
+            token=token,
+            revision=revision,
+            use_safetensors=use_safetensors,
+            **kwargs,
+        )
+
     def forward(
         self,
         input_ids: torch.LongTensor = None,
@@ -1155,10 +1865,23 @@ def forward(
         labels: Optional[torch.LongTensor] = None,
         use_cache: Optional[bool] = None,
         output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = False,
+        output_router_logits: Optional[bool] = None,
         return_dict: Optional[bool] = None,
         token_idx: Optional[torch.Tensor] = None,
-    ) -> Union[Tuple, CausalLMOutputWithPast]:
+        reuse_cache: Optional[bool] = None,
+        flash_attention_recompute: Optional[bool] = False,
+        cache_idx: int = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        trim_logits: Optional[bool] = False,
+        attn_softmax_bf16: Optional[bool] = False,
+        use_flash_attention: Optional[bool] = False,
+        flash_attention_causal_mask: Optional[bool] = False,
+        flash_attention_fast_softmax: Optional[bool] = False,
+        valid_sequence_lengths: torch.Tensor = None,
+        lazy_mode: Optional[bool] = True,
+        num_virtual_tokens: int = None,
+    ) -> Union[Tuple, MoeCausalLMOutputWithPast]:
         r"""
         Args:
             labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
@@ -1194,11 +1917,31 @@ def forward(
             use_cache=use_cache,
             output_attentions=output_attentions,
             output_hidden_states=output_hidden_states,
+            output_router_logits=output_router_logits,
             return_dict=return_dict,
+            cache_position=cache_position,
             token_idx=token_idx,
+            attn_softmax_bf16=attn_softmax_bf16,
+            reuse_cache=reuse_cache,
+            use_flash_attention=use_flash_attention,
+            flash_attention_recompute=flash_attention_recompute,
+            flash_attention_causal_mask=flash_attention_causal_mask,
+            flash_attention_fast_softmax=flash_attention_fast_softmax,
+            cache_idx=cache_idx,
+            lazy_mode=lazy_mode,
+            valid_sequence_lengths=valid_sequence_lengths,
+            num_virtual_tokens=num_virtual_tokens,
         )
 
         hidden_states = outputs[0]
+
+        _, seq_len, _ = hidden_states.shape
+        if seq_len > 1 and trim_logits and not self.training:
+            if token_idx is not None:
+                hidden_states = hidden_states.index_select(1, token_idx - 1)
+            else:
+                hidden_states = hidden_states[:, -1, :]
+
         logits = self.lm_head(hidden_states)
         logits = logits.float()
 
@@ -1215,16 +1958,31 @@ def forward(
             shift_labels = shift_labels.to(shift_logits.device)
             loss = loss_fct(shift_logits, shift_labels)
 
+        aux_loss = None
+        if output_router_logits:
+            aux_loss = load_balancing_loss_func(
+                outputs.router_logits if return_dict else outputs[-1],
+                self.num_experts,
+                self.num_experts_per_tok,
+                attention_mask,
+            )
+            if labels is not None:
+                loss += self.router_aux_loss_coef * aux_loss.to(loss.device)  # make sure to reside in the same device
+
         if not return_dict:
             output = (logits,) + outputs[1:]
+            if output_router_logits:
+                output = (aux_loss,) + output
             return (loss,) + output if loss is not None else output
 
-        return CausalLMOutputWithPast(
+        return MoeCausalLMOutputWithPast(
             loss=loss,
+            aux_loss=aux_loss,
             logits=logits,
             past_key_values=outputs.past_key_values,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
+            router_logits=outputs.router_logits,
         )
 
     def prepare_inputs_for_generation(
@@ -1233,44 +1991,33 @@ def prepare_inputs_for_generation(
         past_key_values=None,
         attention_mask=None,
         inputs_embeds=None,
+        cache_position=None,
+        position_ids=None,
+        use_cache=True,
+        num_logits_to_keep=None,
+        token_idx=None,
         **kwargs,
     ):
-        token_idx = kwargs.get("token_idx")
-        past_length = 0
-        max_cache_length = None
+        reuse_cache = kwargs.get("reuse_cache")
+        bucket_internal = kwargs.get("bucket_internal")
+
         if past_key_values is not None:
             if token_idx is not None:
-                input_ids = torch.index_select(input_ids, 1, token_idx - 1)
+                idx = token_idx + kwargs.get("inputs_embeds_offset", 0) - 1
+                input_ids = torch.index_select(input_ids, 1, idx)
             else:
-                if isinstance(past_key_values, Cache):
-                    cache_length = past_key_values.get_seq_length()
-                    past_length = past_key_values.seen_tokens
-                    max_cache_length = past_key_values.get_max_length()
-                else:
-                    cache_length = past_length = past_key_values[0][0].shape[2]
-                    max_cache_length = None
-
-            # Keep only the unprocessed tokens:
-            # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
-            # some of the inputs are exclusivelly passed as part of the cache (e.g. when passing input_embeds as
-            # input)
-            if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]:
-                input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
-            # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard
-            # input_ids based on the past_length.
-            elif past_length < input_ids.shape[1]:
-                input_ids = input_ids[:, past_length:]
-            # 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens.
-
-            # If we are about to go beyond the maximum cache length, we need to crop the input attention mask.
-            if (
-                max_cache_length is not None
-                and attention_mask is not None
-                and cache_length + input_ids.shape[1] > max_cache_length
-            ):
-                attention_mask = attention_mask[:, -max_cache_length:]
+                if inputs_embeds is not None:  # Exception 1
+                    input_ids = input_ids[:, -cache_position.shape[0] :]
+                elif (
+                    input_ids.shape[1] != cache_position.shape[0]
+                ):  # Default case (the "else", a no op, is Exception 2)
+                    input_ids = input_ids[:, cache_position]
+        elif (reuse_cache or bucket_internal) and token_idx is not None:
+            # KV cache is pre allocated with reuse cache or will be padded with bucket internal
+            # hence for the 1st token we can slice the inputs till token idx for the fwd pass.
+            input_ids = input_ids[:, :token_idx]
+            attention_mask = attention_mask[:, :token_idx]
 
-        position_ids = kwargs.get("position_ids", None)
         if attention_mask is not None and position_ids is None:
             # create position_ids on the fly for batch generation
             position_ids = attention_mask.long().cumsum(-1) - 1
@@ -1281,19 +2028,159 @@ def prepare_inputs_for_generation(
                 else:
                     position_ids = position_ids[:, -input_ids.shape[1] :]
 
+        # keep cache_position implementation as None for HPU
+        cache_position = None
+
         # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
         if inputs_embeds is not None and past_key_values is None:
             model_inputs = {"inputs_embeds": inputs_embeds}
         else:
             model_inputs = {"input_ids": input_ids.contiguous()}
 
+        if num_logits_to_keep is not None:
+            model_inputs["num_logits_to_keep"] = num_logits_to_keep
+
         model_inputs.update(
             {
                 "position_ids": position_ids,
+                "cache_position": cache_position,
                 "past_key_values": past_key_values,
-                "use_cache": kwargs.get("use_cache"),
+                "use_cache": use_cache,
                 "attention_mask": attention_mask,
                 "token_idx": token_idx,
+                "trim_logits": kwargs.get("trim_logits"),
+                "attn_softmax_bf16": kwargs.get("attn_softmax_bf16"),
+                "reuse_cache": reuse_cache,
+                "use_flash_attention": kwargs.get("use_flash_attention"),
+                "flash_attention_recompute": kwargs.get("flash_attention_recompute"),
+                "flash_attention_causal_mask": kwargs.get("flash_attention_causal_mask"),
+                "flash_attention_fast_softmax": kwargs.get("flash_attention_fast_softmax"),
+                "valid_sequence_lengths": kwargs.get("valid_sequence_lengths"),
+                "cache_idx": kwargs.get("cache_idx"),
+                "lazy_mode": kwargs.get("lazy_mode"),
+                "num_virtual_tokens": kwargs.get("num_virtual_tokens"),
             }
         )
         return model_inputs
+
+
+@add_start_docstrings(
+    """
+    The DeepseekV2 Model transformer with a sequence classification head on top (linear layer).
+
+    [`DeepseekV2ForSequenceClassification`] uses the last token in order to do the classification, as other causal models
+    (e.g. GPT-2) do.
+
+    Since it does classification on the last token, it requires to know the position of the last token. If a
+    `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row. If
+    no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot guess the
+    padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take the last value in
+    each row of the batch).
+    """,
+    DeepseekV2_START_DOCSTRING,
+)
+class DeepseekV2ForSequenceClassification(DeepseekV2PreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.model = DeepseekV2Model(config)
+        self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
+
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    @add_start_docstrings_to_model_forward(DeepseekV2_INPUTS_DOCSTRING)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for computing the sequence classification/regression loss. Indices should be in `[0, transformers.,
+            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
+            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        transformer_outputs = self.model(
+            input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        hidden_states = transformer_outputs[0]
+        logits = self.score(hidden_states)
+
+        if input_ids is not None:
+            batch_size = input_ids.shape[0]
+        else:
+            batch_size = inputs_embeds.shape[0]
+
+        if self.config.pad_token_id is None and batch_size != 1:
+            raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
+        if self.config.pad_token_id is None:
+            sequence_lengths = -1
+        else:
+            if input_ids is not None:
+                sequence_lengths = (torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1).to(
+                    logits.device
+                )
+            else:
+                sequence_lengths = -1
+
+        pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
+
+        loss = None
+        if labels is not None:
+            labels = labels.to(logits.device)
+            if self.config.problem_type is None:
+                if self.num_labels == 1:
+                    self.config.problem_type = "regression"
+                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+                    self.config.problem_type = "single_label_classification"
+                else:
+                    self.config.problem_type = "multi_label_classification"
+
+            if self.config.problem_type == "regression":
+                loss_fct = MSELoss()
+                if self.num_labels == 1:
+                    loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
+                else:
+                    loss = loss_fct(pooled_logits, labels)
+            elif self.config.problem_type == "single_label_classification":
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
+            elif self.config.problem_type == "multi_label_classification":
+                loss_fct = BCEWithLogitsLoss()
+                loss = loss_fct(pooled_logits, labels)
+        if not return_dict:
+            output = (pooled_logits,) + transformer_outputs[1:]
+            return ((loss,) + output) if loss is not None else output
+
+        return SequenceClassifierOutputWithPast(
+            loss=loss,
+            logits=pooled_logits,
+            past_key_values=transformer_outputs.past_key_values,
+            hidden_states=transformer_outputs.hidden_states,
+            attentions=transformer_outputs.attentions,
+        )
diff --git a/optimum/habana/transformers/models/gemma2/modeling_gemma2.py b/optimum/habana/transformers/models/gemma2/modeling_gemma2.py
index 5905e8bf3a..7583715daf 100755
--- a/optimum/habana/transformers/models/gemma2/modeling_gemma2.py
+++ b/optimum/habana/transformers/models/gemma2/modeling_gemma2.py
@@ -35,7 +35,6 @@
 )
 from transformers.utils import logging
 
-from ....distributed.strategy import DistributedStrategy, NoOpStrategy
 from ...modeling_attn_mask_utils import _gaudi_prepare_4d_causal_attention_mask
 
 
@@ -844,10 +843,6 @@ def forward(
 
 
 class GaudiGemma2ForCausalLM(Gemma2ForCausalLM):
-    def __init__(self, config, parallel_strategy: DistributedStrategy = NoOpStrategy):
-        config.parallel_strategy = parallel_strategy
-        super().__init__(config)
-
     def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
         self.model.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
 
diff --git a/optimum/habana/transformers/models/llama/modeling_llama.py b/optimum/habana/transformers/models/llama/modeling_llama.py
index 0afcfbe05a..0e90f2a24d 100755
--- a/optimum/habana/transformers/models/llama/modeling_llama.py
+++ b/optimum/habana/transformers/models/llama/modeling_llama.py
@@ -136,7 +136,8 @@ def __init__(
 
     def _set_cos_sin_cache(self, seq_len, device, dtype):
         self.max_seq_len_cached = seq_len
-        t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+        # Use torch.int32 to avoid loss due to low precision with BF16 (refer to SW-215204)
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.int32)
 
         freqs = torch.outer(t, self.inv_freq)
         # Different from paper, but it uses a different permutation in order to obtain the same calculation
@@ -404,15 +405,18 @@ def allocate(self, inp_seq_len, dtype, device, shape):
 
     @staticmethod
     def update(prev, cur, dim, idx, inp_seq_len):
-        orig_cur = cur
-        if prev.shape == cur.shape:
-            prev.copy_(cur)
-            return orig_cur
-        if idx is not None and cur.shape[2] > 1 and cur.shape[2] <= prev.shape[2]:
-            # Initialize
-            prev[:, :, :inp_seq_len, :].copy_(cur)
-            return orig_cur
+        if inp_seq_len != -1:
+            # reuse cache logic
+            orig_cur = cur
+            if prev.shape == cur.shape:
+                prev.copy_(cur)
+                return orig_cur
+            if cur.shape[2] > 1 and cur.shape[2] <= prev.shape[2]:
+                # Initialize
+                prev[:, :, :inp_seq_len, :].copy_(cur)
+                return orig_cur
         if idx is not None:
+            # 2+ tokenizer logic if model is static shape optimized
             prev.index_copy_(dim, idx - 1, cur)
             return prev
         else:
@@ -652,21 +656,34 @@ def pre_attn_forward(
             else:
                 if past_key_value is None:
                     past_key = torch.zeros(
-                        key_states.shape, dtype=self.get_k_proj_weight_dtype(), device=key_states.device
+                        key_states.shape,
+                        dtype=self.get_k_proj_weight_dtype()
+                        if self.get_k_proj_weight_dtype() != torch.uint8
+                        else key_states.dtype,
+                        device=key_states.device,
                     )
                     past_value = torch.zeros(
-                        key_states.shape, dtype=self.get_k_proj_weight_dtype(), device=key_states.device
+                        key_states.shape,
+                        dtype=self.get_k_proj_weight_dtype()
+                        if self.get_k_proj_weight_dtype() != torch.uint8
+                        else key_states.dtype,
+                        device=key_states.device,
                     )
                     # Return list instead of tuple
                     past_key_value = [past_key, past_value]
-                if (
+                    key_states = self.k_cache.update(past_key_value[0], key_states, 2, token_idx, key_states.shape[-2])
+                    value_states = self.v_cache.update(
+                        past_key_value[1], value_states, 2, token_idx, value_states.shape[-2]
+                    )
+
+                elif (
                     token_idx is not None
                     and num_virtual_tokens is not None
                     and num_virtual_tokens == past_key_value[0].shape[-2]
                 ):
                     # prefix tuning case. attach past_key_value to generate first token.
-                    key_states = torch.cat((past_key_value[0], key_states), -2)
-                    value_states = torch.cat((past_key_value[1], value_states), -2)
+                    key_states = self.k_cache.update(past_key_value[0], key_states, 2, None, -1)
+                    value_states = self.v_cache.update(past_key_value[1], value_states, 2, None, -1)
                     past_key_value = (key_states, value_states)
                 else:
                     key_states = self.k_cache.update(past_key_value[0], key_states, 2, token_idx, self.inp_seq_len)
@@ -746,6 +763,8 @@ def pre_attn_forward(
                 causal_mask = attention_mask
                 if cache_position is not None:
                     causal_mask = attention_mask[:, :, cache_position, : key_states.shape[-2]]
+                else:
+                    causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
                 attn_weights = attn_weights + causal_mask
 
             if attn_softmax_bf16:
@@ -896,7 +915,6 @@ class GaudiLlamaDecoderLayer(LlamaDecoderLayer):
     def __init__(self, config: LlamaConfig, layer_idx: int):
         super(LlamaDecoderLayer, self).__init__()
         self.hidden_size = config.hidden_size
-
         self.self_attn = GaudiLlamaAttention(config=config, layer_idx=layer_idx)
 
         self.mlp = GaudiLlamaMLP(config)
@@ -932,6 +950,8 @@ def forward(
         valid_sequence_lengths: Optional[torch.Tensor] = None,
         cache_idx: int = None,
         num_virtual_tokens: int = None,
+        attn_batch_split: int = 1,
+        prev_layer_residual: Optional[torch.Tensor] = None,
         **kwargs,
     ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
         """
@@ -945,33 +965,99 @@ def forward(
         - add new arg flash_attention_causal_mask
         - add new arg flash_attention_fast_softmax
         """
-        residual = hidden_states
+        if attn_batch_split > 1 and past_key_value is None:
+            # Calculate split sizes to handle cases where batch size is not divisible by attn_batch_split
+            batch_size = attention_mask.size(0)
+            base_split_size = batch_size // attn_batch_split
+            remainder = batch_size % attn_batch_split
+
+            split_sizes = [base_split_size + 1 if i < remainder else base_split_size for i in range(attn_batch_split)]
+
+            # Split tensors using the calculated sizes
+            sub_attention_mask = torch.split(attention_mask, split_sizes, dim=0)
+            sub_position_ids = torch.split(position_ids, split_sizes, dim=0)
+            sub_valid_sequence_lengths = torch.split(valid_sequence_lengths, split_sizes, dim=0)
+            split_attn_weights = []
+            split_present_key_values = []
+            split_hidden_states = [None] * attn_batch_split
+            residual = [None] * attn_batch_split
+
+            for i in range(attn_batch_split):
+                split_hidden_states[i] = hidden_states[i]
+                if self.self_attn.layer_idx != 0:
+                    # Add the residual from the previous layer
+                    split_hidden_states[i] = self.post_mlp(hidden_states[i], prev_layer_residual[i])
+
+                residual[i] = split_hidden_states[i]
+                split_hidden_states[i], self_attn_weights, present_key_value = self.pre_attn(
+                    hidden_states=split_hidden_states[i],
+                    attention_mask=sub_attention_mask[i],
+                    position_ids=sub_position_ids[i],
+                    past_key_value=past_key_value,
+                    output_attentions=output_attentions,
+                    use_cache=use_cache,
+                    cache_position=cache_position,
+                    position_embeddings=position_embeddings,
+                    token_idx=token_idx,
+                    attn_softmax_bf16=attn_softmax_bf16,
+                    reuse_cache=reuse_cache,
+                    use_flash_attention=use_flash_attention,
+                    flash_attention_recompute=flash_attention_recompute,
+                    flash_attention_causal_mask=flash_attention_causal_mask,
+                    flash_attention_fast_softmax=flash_attention_fast_softmax,
+                    valid_sequence_lengths=sub_valid_sequence_lengths[i],
+                    cache_idx=cache_idx,
+                    num_virtual_tokens=num_virtual_tokens,
+                    **kwargs,
+                )
+                self.self_attn.attention_all_reduce(split_hidden_states[i])
+                if output_attentions:
+                    split_attn_weights.append(self_attn_weights)
+                if use_cache:
+                    split_present_key_values.append(present_key_value)
 
-        hidden_states, self_attn_weights, present_key_value = self.pre_attn(
-            hidden_states=hidden_states,
-            attention_mask=attention_mask,
-            position_ids=position_ids,
-            past_key_value=past_key_value,
-            output_attentions=output_attentions,
-            use_cache=use_cache,
-            cache_position=cache_position,
-            position_embeddings=position_embeddings,
-            token_idx=token_idx,
-            attn_softmax_bf16=attn_softmax_bf16,
-            reuse_cache=reuse_cache,
-            use_flash_attention=use_flash_attention,
-            flash_attention_recompute=flash_attention_recompute,
-            flash_attention_causal_mask=flash_attention_causal_mask,
-            flash_attention_fast_softmax=flash_attention_fast_softmax,
-            valid_sequence_lengths=valid_sequence_lengths,
-            cache_idx=cache_idx,
-            num_virtual_tokens=num_virtual_tokens,
-            **kwargs,
-        )
-        self.self_attn.attention_all_reduce(hidden_states)
-        hidden_states, residual = self.post_attn_pre_mlp(hidden_states, residual)
-        self.mlp.mlp_all_reduce(hidden_states)
-        hidden_states = self.post_mlp(hidden_states, residual)
+            self_attn_weights = torch.cat(split_attn_weights, dim=0) if split_attn_weights else None
+            present_key_value = [torch.cat(tensors, dim=0) for tensors in zip(*split_present_key_values)]
+
+            int_residual_splits = []
+            for i in range(attn_batch_split):
+                split_hidden_states[i], int_residual = self.post_attn_pre_mlp(split_hidden_states[i], residual[i])
+                self.mlp.mlp_all_reduce(split_hidden_states[i])
+                int_residual_splits.append(int_residual)
+
+            if self.self_attn.layer_idx == (self.self_attn.config.num_hidden_layers - 1):
+                for i in range(attn_batch_split):
+                    split_hidden_states[i] = self.post_mlp(split_hidden_states[i], int_residual_splits[i])
+
+            hidden_states = split_hidden_states
+
+        else:
+            residual = hidden_states
+            hidden_states, self_attn_weights, present_key_value = self.pre_attn(
+                hidden_states=hidden_states,
+                attention_mask=attention_mask,
+                position_ids=position_ids,
+                past_key_value=past_key_value,
+                output_attentions=output_attentions,
+                use_cache=use_cache,
+                cache_position=cache_position,
+                position_embeddings=position_embeddings,
+                token_idx=token_idx,
+                attn_softmax_bf16=attn_softmax_bf16,
+                reuse_cache=reuse_cache,
+                use_flash_attention=use_flash_attention,
+                flash_attention_recompute=flash_attention_recompute,
+                flash_attention_causal_mask=flash_attention_causal_mask,
+                flash_attention_fast_softmax=flash_attention_fast_softmax,
+                valid_sequence_lengths=valid_sequence_lengths,
+                cache_idx=cache_idx,
+                num_virtual_tokens=num_virtual_tokens,
+                **kwargs,
+            )
+            self.self_attn.attention_all_reduce(hidden_states)
+            hidden_states, residual = self.post_attn_pre_mlp(hidden_states, residual)
+            self.mlp.mlp_all_reduce(hidden_states)
+            hidden_states = self.post_mlp(hidden_states, residual)
 
         outputs = (hidden_states,)
 
@@ -979,6 +1065,9 @@ def forward(
             outputs += (self_attn_weights,)
         if use_cache:
             outputs += (present_key_value,)
+        # Store the residual splits to add them in the beginning of the next layer
+        if attn_batch_split > 1 and past_key_value is None:
+            outputs += (int_residual_splits,)
 
         return outputs
 
@@ -1024,6 +1113,7 @@ def pre_attn(
             cache_idx=cache_idx,
             num_virtual_tokens=num_virtual_tokens,
         )
+
         return hidden_states, attn_weights, present_key_value
 
     def post_attn_pre_mlp(self, hidden_states, residual):
@@ -1118,6 +1208,7 @@ def forward(
         cache_idx: int = None,
         lazy_mode: Optional[bool] = True,
         num_virtual_tokens: int = None,
+        attn_batch_split: int = 1,
     ) -> Union[Tuple, BaseModelOutputWithPast]:
         """
         Copied from LlamaModel.forward: https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py
@@ -1225,6 +1316,18 @@ def forward(
         if lazy_mode:
             htcore.mark_step()
 
+        split_prompt = False
+        prev_layer_residual = None
+        if attn_batch_split > 1 and past_key_values is None:
+            # Calculate split sizes to handle cases where batch size is not divisible by attn_batch_split
+            batch_size = hidden_states.size(0)
+            base_split_size = batch_size // attn_batch_split
+            remainder = batch_size % attn_batch_split
+            split_sizes = [base_split_size + 1 if i < remainder else base_split_size for i in range(attn_batch_split)]
+            # Split tensors using the calculated sizes
+            hidden_states_split = torch.split(hidden_states, split_sizes, dim=0)
+            split_prompt = True
+
         for layer_idx, decoder_layer in enumerate(self.layers):
             if (
                 lazy_mode
@@ -1257,9 +1360,11 @@ def forward(
                     valid_sequence_lengths,
                     None,
                 )
+                hidden_states = layer_outputs[0]
             else:
+                use_prev_layer_residual = attn_batch_split > 1 and past_key_values is None
                 layer_outputs = decoder_layer(
-                    hidden_states,
+                    hidden_states=hidden_states_split if split_prompt else hidden_states,
                     attention_mask=causal_mask,
                     position_ids=position_ids,
                     past_key_value=None if past_key_values is None else past_key_values[layer_idx],
@@ -1277,8 +1382,16 @@ def forward(
                     valid_sequence_lengths=valid_sequence_lengths,
                     cache_idx=cache_idx,
                     num_virtual_tokens=num_virtual_tokens,
+                    attn_batch_split=attn_batch_split,
+                    prev_layer_residual=prev_layer_residual if use_prev_layer_residual else None,
                 )
-            hidden_states = layer_outputs[0]
+                if use_prev_layer_residual:
+                    index = 1 + int(use_cache) + int(output_attentions)
+                    prev_layer_residual = layer_outputs[index]
+                if split_prompt:
+                    hidden_states_split = layer_outputs[0]
+                else:
+                    hidden_states = layer_outputs[0]
 
             if use_cache:
                 next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
@@ -1357,6 +1470,7 @@ def forward(
         cache_idx: int = None,
         lazy_mode: Optional[bool] = True,
         num_virtual_tokens: int = None,
+        attn_batch_split: int = 1,
     ) -> Union[Tuple, CausalLMOutputWithPast]:
         output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
         output_hidden_states = (
@@ -1390,6 +1504,7 @@ def forward(
             cache_idx=cache_idx,
             lazy_mode=lazy_mode,
             num_virtual_tokens=num_virtual_tokens,
+            attn_batch_split=attn_batch_split,
         )
         hidden_states = outputs[0]
         _, seq_len, _ = hidden_states.shape
@@ -1431,7 +1546,7 @@ def forward(
                 parallel_state.sequence_parallel_is_initialized()
                 and parallel_state.get_sequence_parallel_world_size() > 1
             ):
-                from optimum.habana.distributed.contextparallel import _get_loss_from_context_parallel
+                from ....distributed.contextparallel import _get_loss_from_context_parallel
 
                 loss_fct = torch.nn.CrossEntropyLoss(reduction="none")
                 loss_all = _get_loss_from_context_parallel(loss_fct(shift_logits, shift_labels))
@@ -1545,6 +1660,7 @@ def prepare_inputs_for_generation(
                 "cache_idx": kwargs.get("cache_idx"),
                 "lazy_mode": kwargs.get("lazy_mode"),
                 "num_virtual_tokens": kwargs.get("num_virtual_tokens"),
+                "attn_batch_split": kwargs.get("attn_batch_split"),
             }
         )
         return model_inputs
diff --git a/optimum/habana/transformers/models/llava/modeling_llava.py b/optimum/habana/transformers/models/llava/modeling_llava.py
index 997c16d700..f8fba446e6 100644
--- a/optimum/habana/transformers/models/llava/modeling_llava.py
+++ b/optimum/habana/transformers/models/llava/modeling_llava.py
@@ -22,6 +22,7 @@
 from typing import List, Optional, Tuple, Union
 
 import torch
+import torch.nn as nn
 from transformers.cache_utils import Cache
 from transformers.models.llava.modeling_llava import LlavaCausalLMOutputWithPast, LlavaForConditionalGeneration
 from transformers.utils import logging
@@ -129,57 +130,65 @@ def forward(
         flash_attention_recompute: Optional[bool] = False,
     ) -> Union[Tuple, LlavaCausalLMOutputWithPast]:
         """
-        Inherits from LlavaForConditionalGeneration: https://github.com/huggingface/transformers/blob/v4.37.2/src/transformers/models/llava/modeling_llava.py
+        Inherits from LlavaForConditionalGeneration: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/llava/modeling_llava.py#L362
         The only differences are:
         - add new args token_idx
         - add new args image_offset
         - add new args tokens_pos
         """
 
-        if token_idx is not None:
-            output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
-            output_hidden_states = (
-                output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
-            )
-            return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-            vision_feature_layer = (
-                vision_feature_layer if vision_feature_layer is not None else self.config.vision_feature_layer
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        vision_feature_layer = (
+            vision_feature_layer if vision_feature_layer is not None else self.config.vision_feature_layer
+        )
+        vision_feature_select_strategy = (
+            vision_feature_select_strategy
+            if vision_feature_select_strategy is not None
+            else self.config.vision_feature_select_strategy
+        )
+
+        if (input_ids is None) ^ (inputs_embeds is not None):
+            raise ValueError(
+                "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
             )
-            vision_feature_select_strategy = (
-                vision_feature_select_strategy
-                if vision_feature_select_strategy is not None
-                else self.config.vision_feature_select_strategy
+
+        if pixel_values is not None and inputs_embeds is not None:
+            raise ValueError(
+                "You cannot specify both pixel_values and inputs_embeds at the same time, and must specify either one"
             )
 
-            # 1. Extra the input embeddings
+        if inputs_embeds is None:
             inputs_embeds = self.get_input_embeddings()(input_ids)
 
-            image_features = None
-            # 2. Merge text and images
-            if pixel_values is not None and input_ids.shape[1] != 1:
-                image_outputs = self.vision_tower(
-                    pixel_values,
-                    output_hidden_states=True,
-                    use_flash_attention=use_flash_attention,
-                    flash_attention_recompute=flash_attention_recompute,
-                )
-                # this is not memory efficient at all (output_hidden_states=True) will save all the hidden stated.
-                selected_image_feature = image_outputs.hidden_states[vision_feature_layer]
+        image_features = None
+        # 2. Merge text and images
+        if pixel_values is not None and input_ids.shape[1] != 1:
+            image_outputs = self.vision_tower(
+                pixel_values,
+                output_hidden_states=True,
+                use_flash_attention=use_flash_attention,
+                flash_attention_recompute=flash_attention_recompute,
+            )
+            # this is not memory efficient at all (output_hidden_states=True) will save all the hidden stated.
+            selected_image_feature = image_outputs.hidden_states[vision_feature_layer]
 
-                if vision_feature_select_strategy == "default":
-                    selected_image_feature = selected_image_feature[:, 1:]
-                elif vision_feature_select_strategy == "full":
-                    selected_image_feature = selected_image_feature
-                else:
-                    raise ValueError(
-                        f"Unexpected select feature strategy: {self.config.vision_feature_select_strategy}"
-                    )
+            if vision_feature_select_strategy == "default":
+                selected_image_feature = selected_image_feature[:, 1:]
+            elif vision_feature_select_strategy == "full":
+                selected_image_feature = selected_image_feature
+            else:
+                raise ValueError(f"Unexpected select feature strategy: {self.config.vision_feature_select_strategy}")
 
-                image_features = self.multi_modal_projector(selected_image_feature)
-                inputs_embeds = _merge_input_ids_with_image_features(
-                    image_features, inputs_embeds, input_ids, self.config.image_token_index
-                )
+            image_features = self.multi_modal_projector(selected_image_feature)
+            inputs_embeds = _merge_input_ids_with_image_features(
+                image_features, inputs_embeds, input_ids, self.config.image_token_index
+            )
 
+        if token_idx is not None:
             outputs = self.language_model(
                 attention_mask=attention_mask,
                 position_ids=position_ids,
@@ -190,14 +199,13 @@ def forward(
                 output_hidden_states=output_hidden_states,
                 return_dict=return_dict,
                 cache_position=cache_position,
-                # TODO: from Transformers v4.45, `generate` sets `num_logits_to_keep` to 1 if not given, which we don't want here
-                # num_logits_to_keep=num_logits_to_keep,
+                num_logits_to_keep=num_logits_to_keep,
                 token_idx=token_idx + image_offset,
                 use_flash_attention=use_flash_attention,
                 flash_attention_recompute=flash_attention_recompute,
             )
 
-            if input_ids.shape[1] != 1 and pixel_values is not None:
+            if input_ids.shape[1] != 1 and pixel_values is not None and tokens_pos is not None:
                 batch_size, seq_len = tokens_pos.shape
                 batch_indices = torch.arange(batch_size).repeat_interleave(seq_len)
                 logits = outputs[0][batch_indices, tokens_pos.reshape(-1), :].reshape(batch_size, seq_len, -1)
@@ -220,20 +228,50 @@ def forward(
             )
 
         else:
-            return super().forward(
-                input_ids=input_ids,
-                pixel_values=pixel_values,
+            outputs = self.language_model(
                 attention_mask=attention_mask,
                 position_ids=position_ids,
                 past_key_values=past_key_values,
                 inputs_embeds=inputs_embeds,
-                vision_feature_layer=vision_feature_layer,
-                vision_feature_select_strategy=vision_feature_select_strategy,
-                labels=labels,
                 use_cache=use_cache,
                 output_attentions=output_attentions,
                 output_hidden_states=output_hidden_states,
                 return_dict=return_dict,
+                cache_position=cache_position,
+                num_logits_to_keep=num_logits_to_keep,
+                use_flash_attention=use_flash_attention,
+                flash_attention_recompute=flash_attention_recompute,
+            )
+
+            logits = outputs[0]
+
+            loss = None
+            if labels is not None:
+                # Shift so that tokens < n predict n
+                if attention_mask is not None:
+                    shift_attention_mask = attention_mask[..., 1:]
+                    shift_logits = logits[..., :-1, :][shift_attention_mask.to(logits.device) != 0].contiguous()
+                    shift_labels = labels[..., 1:][shift_attention_mask.to(labels.device) != 0].contiguous()
+                else:
+                    shift_logits = logits[..., :-1, :].contiguous()
+                    shift_labels = labels[..., 1:].contiguous()
+                # Flatten the tokens
+                loss_fct = nn.CrossEntropyLoss()
+                loss = loss_fct(
+                    shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1).to(shift_logits.device)
+                )
+
+            if not return_dict:
+                output = (logits,) + outputs[1:]
+                return (loss,) + output if loss is not None else output
+
+            return LlavaCausalLMOutputWithPast(
+                loss=loss,
+                logits=logits,
+                past_key_values=outputs.past_key_values,
+                hidden_states=outputs.hidden_states,
+                attentions=outputs.attentions,
+                image_hidden_states=image_features if pixel_values is not None else None,
             )
 
     def prepare_inputs_for_generation(
@@ -259,7 +297,10 @@ def prepare_inputs_for_generation(
         token_idx = kwargs.get("token_idx", None)
         image_offset = 0
         tokens_pos = None
-        if token_idx is not None and pixel_values is not None:
+        legacy_processing = (
+            (input_ids == self.config.image_token_index).sum(1).max() < self.config.image_seq_length
+        ) or ((input_ids.shape[-1] == 1 if token_idx is None else token_idx == 1) and pixel_values is not None)
+        if token_idx is not None and pixel_values is not None and legacy_processing:
             input_ids, attention_mask, image_offset, tokens_pos = _pad_inputs(
                 input_ids,
                 attention_mask,
diff --git a/optimum/habana/transformers/models/llava_next/modeling_llava_next.py b/optimum/habana/transformers/models/llava_next/modeling_llava_next.py
index 6cf728d014..784828973e 100644
--- a/optimum/habana/transformers/models/llava_next/modeling_llava_next.py
+++ b/optimum/habana/transformers/models/llava_next/modeling_llava_next.py
@@ -94,7 +94,7 @@ def forward(
                 flash_attention_recompute=flash_attention_recompute,
             )
 
-            if inputs_embeds.shape[1] != 1 and pixel_values is not None:
+            if inputs_embeds.shape[1] != 1 and pixel_values is not None and self.text_tokens_pos is not None:
                 batch_size, seq_len = self.text_tokens_pos.shape
                 batch_indices = torch.arange(batch_size).repeat_interleave(seq_len)
                 logits = outputs[0][batch_indices, self.text_tokens_pos.reshape(-1), :].reshape(
@@ -261,6 +261,9 @@ def prepare_inputs_for_generation(
                 **kwargs,
             )
         else:
+            legacy_processing = (
+                (input_ids == self.config.image_token_index).sum(1).max() < self.config.image_seq_length
+            ) or ((input_ids.shape[-1] == 1 if token_idx is None else token_idx == 1) and pixel_values is not None)
             use_flash_attention = kwargs.get("use_flash_attention", False)
             flash_attention_recompute = kwargs.get("flash_attention_recompute", False)
             position_ids = kwargs.get("position_ids", None)
@@ -337,13 +340,28 @@ def prepare_inputs_for_generation(
                         image_feature = image_feature[0]
                         image_feature = torch.cat((image_feature, self.image_newline[None]), dim=0)
                     new_image_features.append(image_feature)
-                image_features = torch.stack(new_image_features, dim=0)
-                inputs_embeds, attention_mask, labels, position_ids, self.text_tokens_pos = (
-                    self._merge_input_ids_with_image_features(
-                        image_features, inputs_embeds, input_ids, attention_mask, labels
+                if legacy_processing:
+                    image_features = torch.stack(new_image_features, dim=0)
+                    inputs_embeds, attention_mask, labels, position_ids, self.text_tokens_pos = (
+                        self._merge_input_ids_with_image_features(
+                            image_features, inputs_embeds, input_ids, attention_mask, labels
+                        )
                     )
-                )
-                self.image_offset = image_features.shape[1] - 1  # image_token has occupied 1 token position.
+                    self.image_offset = image_features.shape[1] - 1  # image_token has occupied 1 token position.
+                else:
+                    image_features = torch.cat(new_image_features, dim=0)
+                    n_image_tokens = (input_ids == self.config.image_token_index).sum().item()
+                    n_image_features = image_features.shape[0]
+                    if n_image_tokens != n_image_features:
+                        raise ValueError(
+                            f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {n_image_features}"
+                        )
+                    image_features = image_features.to(inputs_embeds.device, inputs_embeds.dtype)
+                    batch_indices, image_indices = torch.where(input_ids == self.config.image_token_index)
+                    inputs_embeds[batch_indices, image_indices] = image_features.contiguous()
+                    self.image_offset = 0
+                    self.text_tokens_pos = None
+
                 if labels is None:
                     labels = torch.full_like(attention_mask, self.config.ignore_index).to(torch.long)
 
@@ -353,33 +371,34 @@ def prepare_inputs_for_generation(
                 seq_len = input_ids.shape[1]
                 pad_len = seq_len - token_idx
                 input_ids = torch.index_select(input_ids, 1, token_idx - 1)
-                # Retrieve the first layer to inspect the logits and mask out the hidden states
-                # that are set to 0
-                first_layer_past_key_value = past_key_values[0][0][:, :, :, 0]
+                if legacy_processing:
+                    # Retrieve the first layer to inspect the logits and mask out the hidden states
+                    # that are set to 0
+                    first_layer_past_key_value = past_key_values[0][0][:, :, :, 0]
 
-                # Sum all dimensions of head_dim (-2) to avoid random errors such as: https://github.com/huggingface/transformers/pull/28032#issuecomment-1863691941
-                batch_index, non_attended_tokens = torch.where(first_layer_past_key_value.float().sum(-2) == 0)
+                    # Sum all dimensions of head_dim (-2) to avoid random errors such as: https://github.com/huggingface/transformers/pull/28032#issuecomment-1863691941
+                    batch_index, non_attended_tokens = torch.where(first_layer_past_key_value.float().sum(-2) == 0)
 
-                # Get the target length
-                past_length = first_layer_past_key_value.shape[-1]
+                    # Get the target length
+                    past_length = first_layer_past_key_value.shape[-1]
 
-                extended_attention_mask = torch.ones(
-                    (attention_mask.shape[0], past_length),
-                    dtype=attention_mask.dtype,
-                    device=attention_mask.device,
-                )
-                # Filter out only the tokens that can be un-attended, this can happen
-                # if one uses Llava + Fused modules where the cache on the
-                # first iteration is already big enough, or if one passes custom cache
-                valid_indices = non_attended_tokens < extended_attention_mask.size(-1)
-                new_batch_index = batch_index[valid_indices]
-                new_non_attended_tokens = non_attended_tokens[valid_indices]
-
-                # Zero-out the places where we don't need to attend
-                extended_attention_mask[new_batch_index, new_non_attended_tokens] = 0
-
-                attention_mask = extended_attention_mask
-                attention_mask[:, -pad_len:] = 0
+                    extended_attention_mask = torch.ones(
+                        (attention_mask.shape[0], past_length),
+                        dtype=attention_mask.dtype,
+                        device=attention_mask.device,
+                    )
+                    # Filter out only the tokens that can be un-attended, this can happen
+                    # if one uses Llava + Fused modules where the cache on the
+                    # first iteration is already big enough, or if one passes custom cache
+                    valid_indices = non_attended_tokens < extended_attention_mask.size(-1)
+                    new_batch_index = batch_index[valid_indices]
+                    new_non_attended_tokens = non_attended_tokens[valid_indices]
+
+                    # Zero-out the places where we don't need to attend
+                    extended_attention_mask[new_batch_index, new_non_attended_tokens] = 0
+
+                    attention_mask = extended_attention_mask
+                    attention_mask[:, -pad_len:] = 0
 
             if attention_mask is not None and position_ids is None:
                 # create position_ids on the fly for batch generation
diff --git a/optimum/habana/transformers/models/mixtral/__init__.py b/optimum/habana/transformers/models/mixtral/__init__.py
index 65bdca2fbd..7a612b80b3 100644
--- a/optimum/habana/transformers/models/mixtral/__init__.py
+++ b/optimum/habana/transformers/models/mixtral/__init__.py
@@ -4,8 +4,6 @@
     GaudiMixtralDecoderLayer,
     GaudiMixtralForCausalLM,
     GaudiMixtralModel,
-    gaudi_mixtral_block_dynamic_moe_forward,
-    gaudi_mixtral_block_moe_forward,
-    gaudi_mixtral_block_sparse_moe_forward,
+    GaudiMixtralSparseMoeBlock,
     gaudi_mixtral_rmsnorm_forward,
 )
diff --git a/optimum/habana/transformers/models/mixtral/modeling_mixtral.py b/optimum/habana/transformers/models/mixtral/modeling_mixtral.py
index 97e9a8026f..3b24b29fbb 100644
--- a/optimum/habana/transformers/models/mixtral/modeling_mixtral.py
+++ b/optimum/habana/transformers/models/mixtral/modeling_mixtral.py
@@ -22,7 +22,6 @@
 
 import contextlib
 import math
-import os
 from typing import List, Optional, Tuple, Union
 
 import habana_frameworks.torch.core as htcore
@@ -42,13 +41,12 @@
     MixtralDecoderLayer,
     MixtralForCausalLM,
     MixtralModel,
+    MixtralSparseMoeBlock,
     apply_rotary_pos_emb,
     load_balancing_loss_func,
 )
 from transformers.utils import is_torchdynamo_compiling, logging
 
-from optimum.habana.utils import get_device_name
-
 from ..llama.modeling_llama import (
     GaudiLlamaDynamicNTKScalingRotaryEmbedding,
     GaudiLlamaLinearScalingRotaryEmbedding,
@@ -150,6 +148,61 @@ def gaudi_mixtral_repeat_kv(
     return query_states, key_states, value_states, attention_mask
 
 
+class GaudiMixtralSparseMoeBlock(MixtralSparseMoeBlock):
+    def forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        original_shape = hidden_states.shape
+        hidden_dim = original_shape[2]
+        hidden_states = hidden_states.view(-1, hidden_dim)
+        # router_logits: (batch * sequence_length, n_experts)
+        router_logits = self.gate(hidden_states)
+
+        if is_deepspeed_available() and (not self.training):
+            from deepspeed import comm as dist
+
+            if dist.is_initialized():
+                output_tensors = [router_logits.clone() for _ in range(dist.get_world_size())]
+                dist.all_gather(output_tensors, router_logits)
+                router_logits = torch.cat(output_tensors, dim=1)
+
+        routing_weights, selected_experts = calculate_routing_tensors(router_logits, self.top_k, hidden_states.dtype)
+
+        final_hidden_states = self.call_dynamic_moe_op(
+            hidden_states=hidden_states,
+            expert_routing_table=selected_experts,
+            router_weights=routing_weights,
+        )
+        if is_deepspeed_available() and (not self.training):
+            from deepspeed import comm as dist
+
+            if dist.is_initialized():
+                dist.all_reduce(final_hidden_states)
+        return final_hidden_states.view(original_shape), router_logits
+
+    def call_dynamic_moe_op(
+        self,
+        hidden_states,
+        expert_routing_table,
+        router_weights,
+    ):
+        # pre-processing for custom op inputs
+        w1_list = [expert.w1.weight for expert in self.experts]
+        w2_list = [expert.w2.weight for expert in self.experts]
+        w3_list = [expert.w3.weight for expert in self.experts]
+
+        return torch.ops.hpu.mixture_of_experts(
+            hidden_states=hidden_states,
+            expert_routing_table=expert_routing_table,
+            router_weights=router_weights,
+            w1=w1_list,
+            w3=w2_list,
+            w2=w3_list,
+            permuted_weights=True,
+            activation="silu",
+            experts_min=0,
+            experts_max=len(self.experts) - 1,
+        )
+
+
 class GaudiMixtralAttentionLongSequence:
     @staticmethod
     def forward(q, k, v, mask, causal, q_block_size):
@@ -359,108 +412,6 @@ def forward(
         return attn_output, attn_weights, past_key_value
 
 
-def gaudi_mixtral_block_moe_forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-    # We need this workaround until moe op in hpu is supporting fp8
-    if not self.training and not os.environ.get("QUANT_CONFIG") and not get_device_name() == "gaudi":
-        # Gaudi1 is not supporting dynamic moe
-        return self.dynamic_moe_forward(hidden_states)
-
-    return self.sparse_moe_forward(hidden_states)
-
-
-def gaudi_mixtral_block_sparse_moe_forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Copied from MixtralSparseMoeBlock.forward: https://github.com/huggingface/transformers/blob/v4.37.0/src/transformers/models/mixtral/modeling_mixtral.py
-    The only differences are:
-    - optimize expert forward, remove dynamic control and dynamic shape
-    """
-    batch_size, sequence_length, hidden_dim = hidden_states.shape
-    hidden_states = hidden_states.view(-1, hidden_dim)
-    # router_logits: (batch * sequence_length, n_experts)
-    router_logits = self.gate(hidden_states)
-
-    if is_deepspeed_available() and (not self.training):
-        from deepspeed import comm as dist
-
-        if dist.is_initialized():
-            output_tensors = [router_logits.clone() for _ in range(dist.get_world_size())]
-            dist.all_gather(output_tensors, router_logits)
-            router_logits = torch.cat(output_tensors, dim=1)
-
-    routing_weights = F.softmax(router_logits, dim=1, dtype=torch.float)
-    routing_weights, selected_experts = torch.topk(routing_weights, self.top_k, dim=-1)
-    routing_weights /= routing_weights.sum(dim=-1, keepdim=True)
-    # we cast back to the input dtype
-    routing_weights = routing_weights.to(hidden_states.dtype)
-
-    final_hidden_states = torch.zeros(
-        (batch_size, sequence_length, hidden_dim), dtype=hidden_states.dtype, device=hidden_states.device
-    )
-
-    padded_weights = torch.zeros(
-        (batch_size * sequence_length, self.num_experts), dtype=hidden_states.dtype, device=hidden_states.device
-    )
-    padded_weights.scatter_(-1, selected_experts, routing_weights)
-    padded_weights = padded_weights.reshape(-1, sequence_length, self.num_experts)
-    padded_weights = padded_weights.permute(2, 0, 1).unsqueeze(-1)
-
-    # Loop over all available experts in the model and perform the computation on each expert
-    for expert_idx in range(self.num_experts):
-        expert_layer = self.experts[expert_idx]
-        padded_weight = padded_weights[expert_idx]
-        current_state_static = hidden_states.reshape(-1, hidden_dim)
-        current_hidden_states_static = (
-            expert_layer(current_state_static).reshape(-1, sequence_length, hidden_dim) * padded_weight
-        )
-        final_hidden_states += current_hidden_states_static
-        # support long sequences exceeding 8192
-        if not self.training and sequence_length > 8192:
-            htcore.mark_step()
-
-    return final_hidden_states, router_logits
-
-
-def gaudi_mixtral_block_dynamic_moe_forward(self, hidden_states: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-    batch_size, sequence_length, hidden_dim = hidden_states.shape
-    original_shape = hidden_states.shape
-    hidden_states = hidden_states.view(-1, hidden_dim)
-    # router_logits: (batch * sequence_length, n_experts)
-    router_logits = self.gate(hidden_states)
-
-    if is_deepspeed_available() and (not self.training):
-        from deepspeed import comm as dist
-
-        if dist.is_initialized():
-            output_tensors = [router_logits.clone() for _ in range(dist.get_world_size())]
-            dist.all_gather(output_tensors, router_logits)
-            router_logits = torch.cat(output_tensors, dim=1)
-
-    routing_weights, selected_experts = calculate_routing_tensors(router_logits, self.top_k, hidden_states.dtype)
-    # pre-processing for custom op inputs
-    w1_list = [expert.w1.weight for expert in self.experts]
-    w2_list = [expert.w2.weight for expert in self.experts]
-    w3_list = [expert.w3.weight for expert in self.experts]
-
-    final_hidden_states = torch.ops.hpu.mixture_of_experts(
-        hidden_states=hidden_states,
-        expert_routing_table=selected_experts,
-        router_weights=routing_weights,
-        w1=w1_list,
-        w3=w2_list,
-        w2=w3_list,
-        permuted_weights=True,
-        activation="silu",
-        experts_min=0,
-        experts_max=7,
-    )
-    if is_deepspeed_available() and (not self.training):
-        from deepspeed import comm as dist
-
-        if dist.is_initialized():
-            dist.all_reduce(final_hidden_states)
-    return final_hidden_states.view(original_shape), router_logits
-
-
 def calculate_routing_tensors(
     score: torch.Tensor, topk: int, hidden_states_dtype: torch.dtype
 ) -> Tuple[torch.Tensor, torch.Tensor]:
@@ -545,9 +496,6 @@ def forward(
 
 
 class GaudiMixtralModel(MixtralModel):
-    def __init__(self, config: MixtralConfig):
-        super().__init__(config)
-
     def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
         for layer in self.layers:
             layer.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
diff --git a/optimum/habana/transformers/models/mllama/modeling_mllama.py b/optimum/habana/transformers/models/mllama/modeling_mllama.py
index 7e73868249..03b8ea16b3 100644
--- a/optimum/habana/transformers/models/mllama/modeling_mllama.py
+++ b/optimum/habana/transformers/models/mllama/modeling_mllama.py
@@ -694,6 +694,10 @@ def forward(
         next_decoder_cache = None if isinstance(past_key_values, Cache) else ()
 
         for idx, decoder_layer in enumerate(self.layers):
+            if not self.training and (
+                not torch.distributed.is_initialized() or torch.distributed.get_world_size() == 1
+            ):
+                htcore.mark_step()
             if output_hidden_states:
                 all_hidden_states += (hidden_states,)
 
diff --git a/optimum/habana/transformers/models/t5/modeling_t5.py b/optimum/habana/transformers/models/t5/modeling_t5.py
index c498916b18..8927cf3787 100644
--- a/optimum/habana/transformers/models/t5/modeling_t5.py
+++ b/optimum/habana/transformers/models/t5/modeling_t5.py
@@ -406,7 +406,7 @@ def gaudi_T5Stack_forward(
 
         if self.gradient_checkpointing and self.training:
             layer_outputs = self._gradient_checkpointing_func(
-                layer_module.forward,
+                layer_module.__call__,
                 hidden_states,
                 extended_attention_mask,
                 position_bias,
diff --git a/optimum/habana/transformers/models/wav2vec2/modeling_wav2vec2.py b/optimum/habana/transformers/models/wav2vec2/modeling_wav2vec2.py
index e2fdc3f2e6..4608a56d3f 100644
--- a/optimum/habana/transformers/models/wav2vec2/modeling_wav2vec2.py
+++ b/optimum/habana/transformers/models/wav2vec2/modeling_wav2vec2.py
@@ -497,6 +497,7 @@ def __init__(
             is_causal,
             config,
         )
+        self.use_flash_attention = True if os.getenv("USE_FLASH_ATTENTION") == "1" else False
         self.flash_attention_fast_softmax = True if os.getenv("FLASH_ATTENTION_FAST_SOFTMAX") == "1" else False
         self.flash_attention_recompute = True if os.getenv("FLASH_ATTENTION_RECOMPUTE") == "1" else False
 
@@ -581,7 +582,7 @@ def forward(
         # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1.
         is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False
 
-        if FusedSDPA:
+        if self.use_flash_attention and FusedSDPA:
             if tgt_len == 1:
                 # next token
                 softmax_mode = True if os.getenv("QUANT_CONFIG", "") else False
diff --git a/optimum/habana/transformers/trainer.py b/optimum/habana/transformers/trainer.py
index 9c44426559..0880e316fa 100644
--- a/optimum/habana/transformers/trainer.py
+++ b/optimum/habana/transformers/trainer.py
@@ -158,7 +158,7 @@ def _get_input_update_settings(model, lazy_mode: Optional[bool] = None) -> Tuple
     inputs_update: Dict = {}
 
     should_update_inputs = (getattr(model, "generation_config", None) is not None) and (
-        model.config.model_type in ("llama", "qwen2", "starcoder2", "gemma", "baichuan", "chatglm")
+        model.config.model_type in ("llama", "qwen2", "starcoder2", "gemma", "baichuan", "chatglm", "deepseek_v2")
     )
     if should_update_inputs:
         if model.generation_config.attn_softmax_bf16:
@@ -2463,6 +2463,7 @@ def create_accelerator_and_postprocess(self):
             "distribution_strategy": self.args.distribution_strategy,
             "dynamic": self.args.compile_dynamic,
             "dataloader_config": dataloader_config,
+            "use_regional_compilation": self.args.use_regional_compilation,
         }
 
         # create accelerator object
diff --git a/optimum/habana/transformers/training_args.py b/optimum/habana/transformers/training_args.py
index 4a2b12593f..e914141f71 100644
--- a/optimum/habana/transformers/training_args.py
+++ b/optimum/habana/transformers/training_args.py
@@ -101,6 +101,12 @@ class GaudiTrainingArguments(TrainingArguments):
             Whether to use compiled autograd for training. Currently only for summarization models.
         compile_dynamic (`bool|None`, *optional*, defaults to `None`):
             Set value of 'dynamic' parameter for torch.compile.
+        use_regional_compilation (`bool`, *optional*, defaults to `False`):
+            Whether to use regional compile with deepspeed
+        inline_inbuilt_nn_modules (`bool`, *optional*, defaults to `None`):
+            Set value of 'inline_inbuilt_nn_modules' parameter for torch._dynamo.config. Currently, disabling this parameter improves the performance of the ALBERT model.
+        cache_size_limit(`int`, *optional*, defaults to 'None'):
+            Set value of 'cache_size_limit' parameter for torch._dynamo.config
         disable_tensor_cache_hpu_graphs (`bool`, *optional*, defaults to `False`):
             Whether to disable tensor cache when using hpu graphs. If True, tensors won't be cached in hpu graph and memory can be saved.
         max_hpu_graphs (`int`, *optional*):
@@ -170,6 +176,21 @@ class GaudiTrainingArguments(TrainingArguments):
         metadata={"help": ("Set value of 'dynamic' parameter for torch.compile.")},
     )
 
+    cache_size_limit: Optional[int] = field(
+        default=None,
+        metadata={"help": "Set value of 'cache_size_limit' parameter for torch._dynamo.config."},
+    )
+
+    use_regional_compilation: Optional[bool] = field(
+        default=False,
+        metadata={"help": ("Whether to use regional compile for traing.")},
+    )
+
+    inline_inbuilt_nn_modules: Optional[bool] = field(
+        default=None,
+        metadata={"help": ("Set value of 'inline_inbuilt_nn_modules' parameter for torch._dynamo.config.")},
+    )
+
     disable_tensor_cache_hpu_graphs: Optional[bool] = field(
         default=False,
         metadata={"help": "Whether to use a tensor cache for hpu graphs."},
@@ -195,6 +216,11 @@ class GaudiTrainingArguments(TrainingArguments):
         metadata={"help": ("Determines how many ranks are divided into context parallel group.")},
     )
 
+    minimize_memory: Optional[bool] = field(
+        default=False,
+        metadata={"help": ("Whether to enable minimze memory for fp8")},
+    )
+
     throughput_warmup_steps: Optional[int] = field(
         default=0,
         metadata={
@@ -310,6 +336,15 @@ class GaudiTrainingArguments(TrainingArguments):
         },
     )
 
+    # Use this to override default attn_implementation in transformers
+    attn_implementation: Optional[str] = field(
+        default="eager",
+        metadata={
+            "help": "choose whether to use scale dot product attention (SDPA) or not.",
+            "choices": ["eager", "sdpa"],
+        },
+    )
+
     sdp_on_bf16: bool = field(
         default=False,
         metadata={"help": "Allow pyTorch to use reduced precision in the SDPA math backend"},
@@ -860,6 +895,12 @@ def _setup_devices(self) -> "torch.device":
         if self.sdp_on_bf16:
             torch._C._set_math_sdp_allow_fp16_bf16_reduction(True)
 
+        if self.inline_inbuilt_nn_modules is not None:
+            torch._dynamo.config.inline_inbuilt_nn_modules = self.inline_inbuilt_nn_modules
+
+        if self.torch_compile and self.cache_size_limit is not None:
+            torch._dynamo.config.cache_size_limit = self.cache_size_limit
+
         logger.info("PyTorch: setting up devices")
         if not is_accelerate_available():
             raise ImportError(
@@ -927,6 +968,7 @@ def _setup_devices(self) -> "torch.device":
                 accelerator_state_kwargs["backend"] = self.ddp_backend
                 accelerator_state_kwargs["timeout"] = timedelta(seconds=self.ddp_timeout)
             accelerator_state_kwargs["context_parallel_size"] = self.context_parallel_size
+            accelerator_state_kwargs["minimize_memory"] = self.minimize_memory
         else:
             raise ValueError(
                 "No device has been set. Use either --use_habana to run on HPU or --no_cuda to run on CPU."
diff --git a/optimum/habana/trl/models/modeling_base.py b/optimum/habana/trl/models/modeling_base.py
index fcdc7ddc3f..e0d55c7e7c 100644
--- a/optimum/habana/trl/models/modeling_base.py
+++ b/optimum/habana/trl/models/modeling_base.py
@@ -16,7 +16,7 @@
 import torch
 from trl import PreTrainedModelWrapper
 
-from optimum.habana.utils import to_device_dtype
+from ...utils import to_device_dtype
 
 
 def adapt_PreTrainedModelWrapper_to_gaudi():
diff --git a/optimum/habana/trl/models/modeling_sd_base.py b/optimum/habana/trl/models/modeling_sd_base.py
index 2bcd1ff415..ff0427fd14 100644
--- a/optimum/habana/trl/models/modeling_sd_base.py
+++ b/optimum/habana/trl/models/modeling_sd_base.py
@@ -24,8 +24,8 @@
     _left_broadcast,
 )
 
-from optimum.habana import GaudiConfig
-from optimum.habana.diffusers import (
+from ... import GaudiConfig
+from ...diffusers import (
     GaudiDDIMScheduler,
     GaudiStableDiffusionPipeline,
 )
diff --git a/optimum/habana/trl/trainer/ddpo_trainer.py b/optimum/habana/trl/trainer/ddpo_trainer.py
index 73c6b725e1..a40dfcab4c 100644
--- a/optimum/habana/trl/trainer/ddpo_trainer.py
+++ b/optimum/habana/trl/trainer/ddpo_trainer.py
@@ -27,9 +27,9 @@
 from trl.trainer import DDPOConfig
 from trl.trainer.utils import PerPromptStatTracker
 
-from optimum.habana import GaudiConfig
-from optimum.habana.accelerate import GaudiAccelerator
-from optimum.habana.utils import set_seed
+from ... import GaudiConfig
+from ...accelerate import GaudiAccelerator
+from ...utils import set_seed
 
 
 logger = get_logger(__name__)
diff --git a/optimum/habana/trl/trainer/ppo_config.py b/optimum/habana/trl/trainer/ppo_config.py
index bc5bcda60a..098c555bdf 100644
--- a/optimum/habana/trl/trainer/ppo_config.py
+++ b/optimum/habana/trl/trainer/ppo_config.py
@@ -70,7 +70,7 @@ def __post_init__(self):
                 )
 
         if self.use_habana:
-            from optimum.habana.transformers.modeling_utils import (
+            from ...transformers.modeling_utils import (
                 adapt_transformers_to_gaudi,
             )
 
diff --git a/optimum/habana/trl/trainer/ppo_trainer.py b/optimum/habana/trl/trainer/ppo_trainer.py
index c2303a5f8a..790c50f00f 100644
--- a/optimum/habana/trl/trainer/ppo_trainer.py
+++ b/optimum/habana/trl/trainer/ppo_trainer.py
@@ -53,8 +53,7 @@
     RunningMoments,
 )
 
-from optimum.habana.utils import set_seed
-
+from ...utils import set_seed
 from . import GaudiPPOConfig
 
 
@@ -99,7 +98,7 @@ def __init__(
             )
         # Step 1: Initialize Accelerator
         if config.use_habana:
-            from optimum.habana.accelerate import GaudiAccelerator as Accelerator
+            from ...accelerate import GaudiAccelerator as Accelerator
         else:
             from accelerate import Accelerator
         self.accelerator = Accelerator(
diff --git a/optimum/habana/trl/trainer/reward_trainer.py b/optimum/habana/trl/trainer/reward_trainer.py
index bbb0c761fe..73ba7a21ac 100644
--- a/optimum/habana/trl/trainer/reward_trainer.py
+++ b/optimum/habana/trl/trainer/reward_trainer.py
@@ -20,7 +20,7 @@
 )
 from transformers.utils import PaddingStrategy
 
-from optimum.habana import GaudiTrainer
+from ... import GaudiTrainer
 
 
 class GaudiRewardTrainer(GaudiTrainer):
diff --git a/optimum/habana/utils.py b/optimum/habana/utils.py
index 2225cb8c89..244b52e203 100755
--- a/optimum/habana/utils.py
+++ b/optimum/habana/utils.py
@@ -403,3 +403,15 @@ def get_device_name():
         return "gaudi3"
     else:
         raise ValueError(f"Unsupported device: the device type is {device_type}.")
+
+
+def get_device_count():
+    """
+    Returns the number of the current gaudi devices
+    """
+    import habana_frameworks.torch.utils.experimental as htexp
+
+    if htexp.hpu.is_available():
+        return htexp.hpu.device_count()
+    else:
+        raise ValueError("No hpu is found avail on this system")
diff --git a/tests/baselines/bridgetower_large_itm_mlm_itc.json b/tests/baselines/bridgetower_large_itm_mlm_itc.json
index 9b2a27509e..6a1a8540b0 100644
--- a/tests/baselines/bridgetower_large_itm_mlm_itc.json
+++ b/tests/baselines/bridgetower_large_itm_mlm_itc.json
@@ -7,8 +7,8 @@
                 "multi_card": {
                     "learning_rate": 1e-5,
                     "train_batch_size": 48,
-                    "train_runtime": 314.5877,
-                    "train_samples_per_second": 918.387,
+                    "train_runtime": 224.42,
+                    "train_samples_per_second": 904.93,
                     "extra_arguments": [
                         "--dataset_config_name matching",
                         "--dataset_revision 3c6c4f6c0ff7e902833d3afa5f8f3875c2b036e6",
@@ -19,7 +19,6 @@
                         "--dataloader_num_workers 2",
                         "--logging_steps 10",
                         "--use_hpu_graphs_for_inference",
-                        "--distribution_strategy fast_ddp",
                         "--trust_remote_code True"
                     ]
                 }
diff --git a/tests/baselines/clip_roberta.json b/tests/baselines/clip_roberta.json
index a712dfb792..0c2dfec435 100755
--- a/tests/baselines/clip_roberta.json
+++ b/tests/baselines/clip_roberta.json
@@ -29,14 +29,14 @@
     },
     "gaudi2": {
         "ydshieh/coco_dataset_script": {
-            "num_train_epochs": 1,
             "eval_batch_size": 64,
+            "num_train_epochs": 1,
             "distribution": {
                 "multi_card": {
-                    "learning_rate": 5e-5,
+                    "learning_rate": 5e-05,
                     "train_batch_size": 512,
-                    "train_runtime": 62.3694,
-                    "train_samples_per_second": 16572.31,
+                    "train_runtime": 59.50,
+                    "train_samples_per_second": 14124,
                     "extra_arguments": [
                         "--data_dir $PWD/",
                         "--dataset_config_name 2017",
@@ -45,13 +45,13 @@
                         "--remove_unused_columns False",
                         "--warmup_steps 0",
                         "--weight_decay 0.1",
-                        "--save_strategy epoch",
-                        "--use_hpu_graphs_for_training",
-                        "--use_hpu_graphs_for_inference",
-                        "--dataloader_num_workers 16",
-                        "--distribution_strategy fast_ddp",
+                        "--save_strategy no",
+                        "--use_hpu_graphs",
+                        "--dataloader_num_workers 2",
                         "--mediapipe_dataloader",
-                        "--trust_remote_code True"
+                        "--logging_nan_inf_filter",
+                        "--trust_remote_code True",
+                        "--max_steps 100"
                     ]
                 }
             }
diff --git a/tests/baselines/falcon_40b.json b/tests/baselines/falcon_40b.json
index ab040192c6..f4e26f0a03 100644
--- a/tests/baselines/falcon_40b.json
+++ b/tests/baselines/falcon_40b.json
@@ -7,9 +7,9 @@
                 "multi_card": {
                     "learning_rate": 4e-4,
                     "train_batch_size": 1,
-                    "perplexity": 4.0893,
-                    "train_runtime": 360,
-                    "train_samples_per_second": 28.162,
+                    "perplexity": 4.0,
+                    "train_runtime": 550,
+                    "train_samples_per_second": 15.0,
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 16",
@@ -42,9 +42,9 @@
                 "multi_card": {
                     "learning_rate": 4e-4,
                     "train_batch_size": 1,
-                    "perplexity": 4.0893,
-                    "train_runtime": 470,
-                    "train_samples_per_second": 28.162,
+                    "perplexity": 1.6,
+                    "train_runtime": 710,
+                    "train_samples_per_second": 15.0,
                     "extra_arguments": [
                         "--bf16",
                         "--gradient_accumulation_steps 16",
diff --git a/tests/baselines/llama_7b.json b/tests/baselines/llama_7b.json
index cc8737429e..dcfd6d3807 100644
--- a/tests/baselines/llama_7b.json
+++ b/tests/baselines/llama_7b.json
@@ -376,7 +376,6 @@
                         "--report_to none",
                         "--max_steps 100",
                         "--peft_type prompt_tuning",
-                        "--max_seq_length 64",
                         "--lr_scheduler_type cosine",
                         "--warmup_steps 0",
                         "--weight_decay 0.05",
@@ -402,7 +401,6 @@
                         "--report_to none",
                         "--max_steps 100",
                         "--peft_type prefix_tuning",
-                        "--max_seq_length 64",
                         "--lr_scheduler_type cosine",
                         "--warmup_steps 0",
                         "--weight_decay 0.05",
@@ -428,7 +426,6 @@
                         "--report_to none",
                         "--max_steps 100",
                         "--peft_type p_tuning",
-                        "--max_seq_length 64",
                         "--lr_scheduler_type cosine",
                         "--warmup_steps 0",
                         "--weight_decay 0.05",
diff --git a/tests/ci/slow_tests_diffusers.sh b/tests/ci/slow_tests_diffusers.sh
index ab776092a5..3c95a67d0d 100644
--- a/tests/ci/slow_tests_diffusers.sh
+++ b/tests/ci/slow_tests_diffusers.sh
@@ -2,6 +2,7 @@
 
 python -m pip install --upgrade pip
 export RUN_SLOW=true
+huggingface-cli login --token $1
 make test_installs
 CUSTOM_BF16_OPS=1 python -m pytest tests/test_diffusers.py -v -s -k "test_no_throughput_regression_autocast"
 make slow_tests_diffusers
diff --git a/tests/test_diffusers.py b/tests/test_diffusers.py
old mode 100755
new mode 100644
index 267c8e12ec..3f3309c368
--- a/tests/test_diffusers.py
+++ b/tests/test_diffusers.py
@@ -25,15 +25,14 @@
 import shutil
 import subprocess
 import tempfile
-import time
 from io import BytesIO, StringIO
 from pathlib import Path
 from typing import Callable, Union
-from unittest import TestCase, skipIf, skipUnless
+from unittest import TestCase, skipUnless
 
 import diffusers
-import habana_frameworks.torch.hpu as hthpu
 import numpy as np
+import PIL
 import pytest
 import requests
 import safetensors
@@ -47,11 +46,11 @@
     CogVideoXTransformer3DModel,
     ControlNetModel,
     DiffusionPipeline,
-    DPMSolverMultistepScheduler,
     EulerAncestralDiscreteScheduler,
     EulerDiscreteScheduler,
     FlowMatchEulerDiscreteScheduler,
     FluxTransformer2DModel,
+    I2VGenXLUNet,
     LCMScheduler,
     PNDMScheduler,
     SD3Transformer2DModel,
@@ -66,17 +65,17 @@
 from diffusers.image_processor import VaeImageProcessor
 from diffusers.pipelines.controlnet.pipeline_controlnet import MultiControlNetModel
 from diffusers.schedulers import KarrasDiffusionSchedulers
-from diffusers.utils import logging, numpy_to_pil
-from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available
+from diffusers.utils import logging
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
     floats_tensor,
     load_image,
-    load_numpy,
+    numpy_cosine_similarity_distance,
     require_torch,
 )
 from diffusers.utils.torch_utils import randn_tensor
-from huggingface_hub import snapshot_download
+from huggingface_hub import HfApi, hf_hub_download, snapshot_download
+from huggingface_hub.utils import HfHubHTTPError
 from parameterized import parameterized
 from PIL import Image
 from transformers import (
@@ -91,7 +90,6 @@
     DPTConfig,
     DPTFeatureExtractor,
     DPTForDepthEstimation,
-    T5Config,
     T5EncoderModel,
     T5Tokenizer,
 )
@@ -107,6 +105,7 @@
     GaudiEulerDiscreteScheduler,
     GaudiFluxImg2ImgPipeline,
     GaudiFluxPipeline,
+    GaudiI2VGenXLPipeline,
     GaudiStableDiffusion3Pipeline,
     GaudiStableDiffusionControlNetPipeline,
     GaudiStableDiffusionDepth2ImgPipeline,
@@ -141,32 +140,34 @@
     THROUGHPUT_BASELINE_AUTOCAST = 0.394
     TEXTUAL_INVERSION_THROUGHPUT = 131.7606336456344
     TEXTUAL_INVERSION_RUNTIME = 1.542460777796805
-    CONTROLNET_THROUGHPUT = 120.123522340414
-    CONTROLNET_RUNTIME = 1.8647471838630736
-    INPAINT_THROUGHPUT_BASELINE_BF16 = 4.584
-    INPAINT_XL_THROUGHPUT_BASELINE_BF16 = 1.151
-    TEXT_TO_VIDEO_SYNTHESIS_BF16_BASELINE = 70
-    DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT = 0.946
-    THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 0.15186785472532677
-    DEPTH2IMG_GENERATION_LATENCY_BASELINE_BF16 = 36.06376791000366
     TEXTUAL_INVERSION_SDXL_THROUGHPUT = 2.6694
     TEXTUAL_INVERSION_SDXL_RUNTIME = 74.92
+    CONTROLNET_THROUGHPUT = 120.123522340414
+    CONTROLNET_RUNTIME = 1.8647471838630736
+    INPAINT_THROUGHPUT_BASELINE_BF16 = 1.025
+    INPAINT_XL_THROUGHPUT_BASELINE_BF16 = 0.175
+    THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 0.145
+    SDXL_THROUGHPUT = 0.301
+    SVD_THROUGHPUT = 0.012
+    SD3_THROUGHPUT = 0.006
     FLUX_THROUGHPUT = 0.03
+    FLUX_DEV_I2I_THROUGHPUT = 0.12
+    I2V_THROUGHPUT = 0.017
 else:
-    THROUGHPUT_BASELINE_BF16 = 0.309
+    THROUGHPUT_BASELINE_BF16 = 0.275
     THROUGHPUT_BASELINE_AUTOCAST = 0.114
     TEXTUAL_INVERSION_THROUGHPUT = 122.7445217395719
     TEXTUAL_INVERSION_RUNTIME = 1.8249286960053723
-    CONTROLNET_THROUGHPUT = 78.51566937458146
-    CONTROLNET_RUNTIME = 2.852933710993966
-    INPAINT_THROUGHPUT_BASELINE_BF16 = 1.42
-    INPAINT_XL_THROUGHPUT_BASELINE_BF16 = 0.271
-    DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT = 0.302
-    THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 0.050208662346013566
-    TEXT_TO_VIDEO_SYNTHESIS_BF16_BASELINE = 1000  # TODO: Get Gaudi 1 benchmark numbers
-    DEPTH2IMG_GENERATION_LATENCY_BASELINE_BF16 = 200  # TODO: Get Gaudi 1 Throughput
     TEXTUAL_INVERSION_SDXL_THROUGHPUT = 2.695
     TEXTUAL_INVERSION_SDXL_RUNTIME = 74.19
+    CONTROLNET_THROUGHPUT = 78.51566937458146
+    CONTROLNET_RUNTIME = 2.852933710993966
+    INPAINT_THROUGHPUT_BASELINE_BF16 = 0.272
+    INPAINT_XL_THROUGHPUT_BASELINE_BF16 = 0.042
+    THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16 = 0.045
+    SDXL_THROUGHPUT = 0.074
+    SVD_THROUGHPUT = 0.012
+    I2V_THROUGHPUT = 0.008
 
 
 _run_custom_bf16_ops_test_ = parse_flag_from_env("CUSTOM_BF16_OPS", default=False)
@@ -183,6 +184,35 @@ def custom_bf16_ops(test_case):
     return skipUnless(_run_custom_bf16_ops_test_, "test requires custom bf16 ops")(test_case)
 
 
+def check_gated_model_access(model):
+    """
+    Skip test for a gated model if access is not granted; this occurs when an account
+    with the required permissions is not logged into the HF Hub.
+    """
+    try:
+        hf_hub_download(repo_id=model, filename=HfApi().model_info(model).siblings[0].rfilename)
+        gated = False
+
+    except HfHubHTTPError:
+        gated = True
+
+    return pytest.mark.skipif(gated, reason=f"{model} is gated, please log in with approved HF access token")
+
+
+def check_8xhpu(test_case):
+    """
+    Decorator marking a test as it requires 8xHPU on system
+    """
+    from optimum.habana.utils import get_device_count
+
+    if get_device_count() != 8:
+        skip = True
+    else:
+        skip = False
+
+    return pytest.mark.skipif(skip, reason="test requires 8xHPU multi-card system")(test_case)
+
+
 class GaudiPipelineUtilsTester(TestCase):
     """
     Tests the features added on top of diffusers/pipeline_utils.py.
@@ -420,52 +450,6 @@ def test_stable_diffusion_output_types(self, output_type):
         )
 
         self.assertEqual(len(outputs.images), 2 * 3)
-        # TODO: enable safety checker
-        # if output_type == "latent":
-        #     self.assertIsNone(outputs.nsfw_content_detected)
-        # else:
-        #     self.assertEqual(len(outputs.nsfw_content_detected), 2 * 3)
-
-    # TODO: enable this test when PNDMScheduler is adapted to Gaudi
-    # def test_stable_diffusion_negative_prompt(self):
-    #     device = "cpu"  # ensure determinism for the device-dependent torch.Generator
-    #     unet = self.dummy_cond_unet
-    #     scheduler = PNDMScheduler(skip_prk_steps=True)
-    #     vae = self.dummy_vae
-    #     bert = self.dummy_text_encoder
-    #     tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
-
-    #     # make sure here that pndm scheduler skips prk
-    #     sd_pipe = StableDiffusionPipeline(
-    #         unet=unet,
-    #         scheduler=scheduler,
-    #         vae=vae,
-    #         text_encoder=bert,
-    #         tokenizer=tokenizer,
-    #         safety_checker=None,
-    #         feature_extractor=self.dummy_extractor,
-    #     )
-    #     sd_pipe = sd_pipe.to(device)
-    #     sd_pipe.set_progress_bar_config(disable=None)
-
-    #     prompt = "A painting of a squirrel eating a burger"
-    #     negative_prompt = "french fries"
-    #     generator = torch.Generator(device=device).manual_seed(0)
-    #     output = sd_pipe(
-    #         prompt,
-    #         negative_prompt=negative_prompt,
-    #         generator=generator,
-    #         guidance_scale=6.0,
-    #         num_inference_steps=2,
-    #         output_type="np",
-    #     )
-
-    #     image = output.images
-    #     image_slice = image[0, -3:, -3:, -1]
-
-    #     assert image.shape == (1, 128, 128, 3)
-    #     expected_slice = np.array([0.4851, 0.4617, 0.4765, 0.5127, 0.4845, 0.5153, 0.5141, 0.4886, 0.4719])
-    #     assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
 
     def test_stable_diffusion_num_images_per_prompt(self):
         components = self.get_dummy_components()
@@ -651,13 +635,11 @@ def test_stable_diffusion_hpu_graphs(self):
     def test_no_throughput_regression_bf16(self):
         prompts = [
             "An image of a squirrel in Picasso style",
-            "High quality photo of an astronaut riding a horse in space",
         ]
-        num_images_per_prompt = 11
-        batch_size = 4
+        num_images_per_prompt = 28
+        batch_size = 7
         model_name = "CompVis/stable-diffusion-v1-4"
         scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
-
         pipeline = GaudiStableDiffusionPipeline.from_pretrained(
             model_name,
             scheduler=scheduler,
@@ -665,6 +647,7 @@ def test_no_throughput_regression_bf16(self):
             use_hpu_graphs=True,
             gaudi_config=GaudiConfig.from_pretrained("Habana/stable-diffusion"),
             torch_dtype=torch.bfloat16,
+            sdp_on_bf16=True,
         )
         pipeline.unet.set_default_attn_processor(pipeline.unet)
         set_seed(27)
@@ -672,28 +655,48 @@ def test_no_throughput_regression_bf16(self):
             prompt=prompts,
             num_images_per_prompt=num_images_per_prompt,
             batch_size=batch_size,
+            output_type="np",
         )
+
+        # Check expected number of output images
         self.assertEqual(len(outputs.images), num_images_per_prompt * len(prompts))
+
+        # Throughput regression test
         self.assertGreaterEqual(outputs.throughput, 0.95 * THROUGHPUT_BASELINE_BF16)
 
+        n = 0
+        clip_score_avg = 0.0
+        for i in range(len(outputs.images)):
+            # Check expected shape for each output image
+            self.assertEqual(outputs.images[i].shape, (512, 512, 3))
+
+            if np.any(outputs.images[i] != 0):
+                clip_score = calculate_clip_score(np.expand_dims(outputs.images[i], axis=0), prompts)
+                clip_score_avg += clip_score
+                n += 1
+
+        # Quality test (check that the average CLIP score of valid output images is well in the 30s range)
+        clip_score_avg /= n
+        CLIP_SCORE_THRESHOLD = 30.0
+        self.assertGreaterEqual(clip_score_avg, CLIP_SCORE_THRESHOLD)
+
     @custom_bf16_ops
     @slow
     def test_no_throughput_regression_autocast(self):
         prompts = [
             "An image of a squirrel in Picasso style",
-            "High quality photo of an astronaut riding a horse in space",
         ]
-        num_images_per_prompt = 11
-        batch_size = 4
+        num_images_per_prompt = 28
+        batch_size = 7
         model_name = "stabilityai/stable-diffusion-2-1"
         scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
-
         pipeline = GaudiStableDiffusionPipeline.from_pretrained(
             model_name,
             scheduler=scheduler,
             use_habana=True,
             use_hpu_graphs=True,
             gaudi_config=GaudiConfig.from_pretrained("Habana/stable-diffusion-2"),
+            sdp_on_bf16=True,
         )
         set_seed(27)
         outputs = pipeline(
@@ -703,54 +706,22 @@ def test_no_throughput_regression_autocast(self):
             height=768,
             width=768,
         )
-        self.assertEqual(len(outputs.images), num_images_per_prompt * len(prompts))
-        self.assertGreaterEqual(outputs.throughput, 0.95 * THROUGHPUT_BASELINE_AUTOCAST)
-
-    @slow
-    def test_no_generation_regression(self):
-        seed = 27
-        set_seed(seed)
-        model_name = "CompVis/stable-diffusion-v1-4"
-        # fp32
-        scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
-        pipeline = GaudiStableDiffusionPipeline.from_pretrained(
-            model_name,
-            scheduler=scheduler,
-            safety_checker=None,
-            use_habana=True,
-            use_hpu_graphs=True,
-            gaudi_config=GaudiConfig(use_torch_autocast=False),
-        )
-        pipeline.unet.set_default_attn_processor(pipeline.unet)
-
-        prompt = "An image of a squirrel in Picasso style"
-        generator = torch.manual_seed(seed)
-        outputs = pipeline(
-            prompt=prompt,
-            generator=generator,
-            output_type="np",
-        )
-
-        if IS_GAUDI2:
-            target_score = 29.8925
-        else:
-            target_score = 36.774
-
-        image = outputs.images[0]
-        pil_image = numpy_to_pil(image)[0]
-        pil_image.save("test_no_generation_regression_output.png")
 
-        clip_score = calculate_clip_score(np.expand_dims(image, axis=0), [prompt])
+        # Check expected number of output images
+        self.assertEqual(len(outputs.images), num_images_per_prompt * len(prompts))
 
-        self.assertEqual(image.shape, (512, 512, 3))
-        self.assertGreaterEqual(clip_score, 0.95 * target_score)
+        # Throughput regression test
+        self.assertGreaterEqual(outputs.throughput, 0.95 * THROUGHPUT_BASELINE_AUTOCAST)
 
+    @custom_bf16_ops
     @slow
     def test_no_generation_regression_ldm3d(self):
-        seed = 27
-        set_seed(seed)
+        prompts = [
+            "An image of a squirrel in Picasso style",
+        ]
+        num_images_per_prompt = 28
+        batch_size = 7
         model_name = "Intel/ldm3d-4c"
-        # fp32
         scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
         pipeline = GaudiStableDiffusionLDM3DPipeline.from_pretrained(
             model_name,
@@ -758,35 +729,44 @@ def test_no_generation_regression_ldm3d(self):
             safety_checker=None,
             use_habana=True,
             use_hpu_graphs=True,
-            gaudi_config=GaudiConfig(),
+            gaudi_config=GaudiConfig.from_pretrained("Habana/stable-diffusion-2"),
+            sdp_on_bf16=True,
         )
-
-        prompt = "An image of a squirrel in Picasso style"
-        generator = torch.manual_seed(seed)
+        set_seed(27)
         outputs = pipeline(
-            prompt=prompt,
-            generator=generator,
+            prompt=prompts,
+            num_images_per_prompt=num_images_per_prompt,
+            batch_size=batch_size,
             output_type="np",
         )
 
-        if IS_GAUDI2:
-            target_score = 28.0894
-        else:
-            target_score = 35.81
+        # Check expected number of output images
+        self.assertEqual(len(outputs.rgb), num_images_per_prompt * len(prompts))
+        self.assertEqual(len(outputs.depth), num_images_per_prompt * len(prompts))
+
+        # Throughput regression test
+        self.assertGreaterEqual(outputs.throughput, 0.95 * THROUGHPUT_BASELINE_AUTOCAST)
 
-        rgb = outputs.rgb[0]
-        depth = outputs.depth[0]
+        n = 0
+        clip_score_avg = 0.0
+        for i in range(len(outputs.rgb)):
+            # Check expected shape for each output image
+            self.assertEqual(outputs.rgb[i].shape, (512, 512, 3))
+            self.assertEqual(outputs.depth[i].shape, (512, 512, 1))
 
-        rgb_clip_score = calculate_clip_score(np.expand_dims(rgb, axis=0), [prompt])
+            if np.any(outputs.rgb[i] != 0):
+                clip_score = calculate_clip_score(np.expand_dims(outputs.rgb[i], axis=0), prompts)
+                clip_score_avg += clip_score
+                n += 1
 
-        self.assertEqual(rgb.shape, (512, 512, 3))
-        self.assertEqual(depth.shape, (512, 512, 1))
-        self.assertGreaterEqual(rgb_clip_score, 0.95 * target_score)
+        # Quality test (check that the average CLIP score of valid output images is well in the 30s range)
+        clip_score_avg /= n
+        CLIP_SCORE_THRESHOLD = 30.0
+        self.assertGreaterEqual(clip_score_avg, CLIP_SCORE_THRESHOLD)
 
     @slow
     def test_no_generation_regression_upscale(self):
         model_name = "stabilityai/stable-diffusion-x4-upscaler"
-        # fp32
         scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
         pipeline = GaudiStableDiffusionUpscalePipeline.from_pretrained(
             model_name,
@@ -794,49 +774,38 @@ def test_no_generation_regression_upscale(self):
             use_habana=True,
             use_hpu_graphs=True,
             gaudi_config=GaudiConfig(use_torch_autocast=False),
+            sdp_on_bf16=True,
         )
         set_seed(27)
-
         url = "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale/low_res_cat.png"
         response = requests.get(url)
         low_res_img = Image.open(BytesIO(response.content)).convert("RGB")
         low_res_img = low_res_img.resize((128, 128))
         prompt = "a white cat"
         upscaled_image = pipeline(prompt=prompt, image=low_res_img, output_type="np").images[0]
-        if IS_GAUDI2:
-            expected_slice = np.array(
-                [
-                    0.16527882,
-                    0.161616,
-                    0.15665859,
-                    0.1660901,
-                    0.1594379,
-                    0.14936888,
-                    0.1578255,
-                    0.15342498,
-                    0.14590919,
-                ]
-            )
-        else:
-            expected_slice = np.array(
-                [
-                    0.1652787,
-                    0.16161594,
-                    0.15665877,
-                    0.16608998,
-                    0.1594378,
-                    0.14936894,
-                    0.15782538,
-                    0.15342498,
-                    0.14590913,
-                ]
-            )
+
+        # Check expected shape of the upscaled image
         self.assertEqual(upscaled_image.shape, (512, 512, 3))
+
+        # Check expected upscaled values of a sample slice
+        expected_slice = np.array(
+            [
+                0.16528079,
+                0.16161581,
+                0.15665841,
+                0.16609294,
+                0.15943781,
+                0.14936810,
+                0.15782778,
+                0.15342544,
+                0.14590860,
+            ]
+        )
         self.assertLess(np.abs(expected_slice - upscaled_image[-3:, -3:, -1].flatten()).max(), 5e-3)
 
     @slow
-    @pytest.mark.skipif(hthpu.is_available() and hthpu.device_count() != 8, reason="system does not have 8 cards")
-    def test_textual_inversion(self):
+    @check_8xhpu
+    def test_sd_textual_inversion(self):
         path_to_script = (
             Path(os.path.dirname(__file__)).parent
             / "examples"
@@ -900,17 +869,13 @@ def test_textual_inversion(self):
                     use_habana=True,
                     use_hpu_graphs=True,
                     gaudi_config=GaudiConfig(use_habana_mixed_precision=False),
+                    sdp_on_bf16=True,
                 )
                 prompt = "A <cat-toy> backpack"
                 set_seed(27)
                 image = pipe(prompt, num_inference_steps=50, guidance_scale=7.5, output_type="np").images[0]
 
-                # TODO: see how to generate images in a reproducible way
-                # expected_slice = np.array(
-                #     [0.57421875, 0.5703125, 0.58203125, 0.58203125, 0.578125, 0.5859375, 0.578125, 0.57421875, 0.56640625]
-                # )
                 self.assertEqual(image.shape, (512, 512, 3))
-                # self.assertLess(np.abs(expected_slice - image[-3:, -3:, -1].flatten()).max(), 5e-3)
 
 
 class GaudiStableDiffusionXLPipelineTester(TestCase):
@@ -1218,7 +1183,7 @@ def test_stable_diffusion_xl_bf16(self):
         self.assertEqual(image.shape, (64, 64, 3))
 
     @slow
-    def test_textual_inversion_sdxl(self):
+    def test_sdxl_textual_inversion(self):
         path_to_script = (
             Path(os.path.dirname(__file__)).parent
             / "examples"
@@ -1253,6 +1218,7 @@ def test_textual_inversion_sdxl(self):
                     f"--output_dir {run_dir}",
                     "--save_as_full_pipeline",
                     "--gaudi_config_name Habana/stable-diffusion",
+                    "--sdp_on_bf16",
                     "--throughput_warmup_steps 3",
                     "--seed 27",
                 ]
@@ -1278,6 +1244,7 @@ def test_textual_inversion_sdxl(self):
                     use_habana=True,
                     use_hpu_graphs=True,
                     gaudi_config=GaudiConfig(use_habana_mixed_precision=False),
+                    sdp_on_bf16=True,
                 )
 
                 set_seed(27)
@@ -1337,79 +1304,178 @@ def test_stable_diffusion_xl_hpu_graphs(self):
         self.assertEqual(len(images), 10)
         self.assertEqual(images[-1].shape, (64, 64, 3))
 
-    @slow
-    def test_stable_diffusion_xl_inference_script(self):
-        path_to_script = (
-            Path(os.path.dirname(__file__)).parent / "examples" / "stable-diffusion" / "text_to_image_generation.py"
+    def test_stable_diffusion_xl_num_images_per_prompt_optimized(self):
+        import habana_frameworks.torch.hpu as torch_hpu
+
+        kwargs = {"timestep_spacing": "linspace"}
+        scheduler = GaudiEulerDiscreteScheduler.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler", **kwargs
+        )
+        scheduler.hpu_opt = True
+        kwargs = {
+            "scheduler": scheduler,
+            "use_habana": True,
+            "use_hpu_graphs": True,
+            "gaudi_config": "Habana/stable-diffusion",
+            "torch_dtype": torch.bfloat16,
+        }
+
+        os.environ["PATCH_SDPA"] = "1"
+
+        from optimum.habana.diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_mlperf import (
+            StableDiffusionXLPipeline_HPU,
         )
 
-        with tempfile.TemporaryDirectory() as run_dir:
-            cmd_line = f"""
-                python3
-                {path_to_script}
-                --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0
-                --num_images_per_prompt 1
-                --num_inference_steps 30
-                --batch_size 1
-                --image_save_dir {run_dir}
-                --use_habana
-                --gaudi_config Habana/stable-diffusion
-                --sdp_on_bf16
-                --bf16
-                """.split()
-            cmd_line.append("--prompts")
-            cmd_line.append("Sailing ship painting by Van Gogh")
+        model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
 
-            # Run textual inversion
-            p = subprocess.Popen(cmd_line)
-            return_code = p.wait()
+        sd_pipe = StableDiffusionXLPipeline_HPU.from_pretrained(
+            model_name_or_path,
+            **kwargs,
+        )
 
-            # Ensure the run finished without any issue
-            self.assertEqual(return_code, 0)
+        sd_pipe.unet.set_default_attn_processor(sd_pipe.unet)
+        sd_pipe.to(torch.device("hpu"))
+        sd_pipe.unet = torch_hpu.wrap_in_hpu_graph(sd_pipe.unet)
+        sd_pipe.set_progress_bar_config(disable=None)
 
-    if IS_GAUDI2:
-        _sdxl_inferece_throughput_data = (("ddim", 1, 10, 0.301), ("euler_discrete", 1, 10, 0.301))
-    else:
-        _sdxl_inferece_throughput_data = (("ddim", 1, 10, 0.074),)
+        prompt = "A painting of a squirrel eating a burger"
 
-    @parameterized.expand(_sdxl_inferece_throughput_data, skip_on_empty=True)
-    def test_stable_diffusion_xl_generation_throughput(
-        self, scheduler: str, batch_size: int, num_images_per_prompt: int, baseline: float
-    ):
-        def _sdxl_generation(self, scheduler: str, batch_size: int, num_images_per_prompt: int, baseline: float):
-            kwargs = {"timestep_spacing": "linspace"}
-            if scheduler == "euler_discrete":
-                scheduler = GaudiEulerDiscreteScheduler.from_pretrained(
-                    "stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler", **kwargs
-                )
-            elif scheduler == "ddim":
-                scheduler = GaudiDDIMScheduler.from_pretrained(
-                    "stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler", **kwargs
-                )
+        # Test num_images_per_prompt=1 (default)
+        images = sd_pipe(prompt, num_inference_steps=2, output_type="np").images
 
-            kwargs = {
-                "scheduler": scheduler,
-                "use_habana": True,
-                "use_hpu_graphs": True,
-                "gaudi_config": "Habana/stable-diffusion",
-            }
-            pipeline = GaudiStableDiffusionXLPipeline.from_pretrained(
-                "stabilityai/stable-diffusion-xl-base-1.0",
-                **kwargs,
-            )
-            pipeline.unet.set_default_attn_processor(pipeline.unet)
-            num_images_per_prompt = num_images_per_prompt
-            res = {}
-            outputs = pipeline(
-                prompt="Sailing ship painting by Van Gogh",
-                num_images_per_prompt=num_images_per_prompt,
-                batch_size=batch_size,
-                num_inference_steps=30,
-                **res,
-            )
-            self.assertGreaterEqual(outputs.throughput, 0.95 * baseline)
+        self.assertEqual(len(images), 1)
+        self.assertEqual(images[0].shape, (1024, 1024, 3))
+
+        # Test num_images_per_prompt=1 (default) for several prompts
+        num_prompts = 3
+        images = sd_pipe([prompt] * num_prompts, num_inference_steps=2, output_type="np").images
+
+        self.assertEqual(len(images), num_prompts)
+        self.assertEqual(images[-1].shape, (1024, 1024, 3))
+
+        # Test num_images_per_prompt for single prompt
+        num_images_per_prompt = 2
+        images = sd_pipe(
+            prompt, num_inference_steps=2, output_type="np", num_images_per_prompt=num_images_per_prompt
+        ).images
+
+        self.assertEqual(len(images), num_images_per_prompt)
+        self.assertEqual(images[-1].shape, (1024, 1024, 3))
+
+        # Test num_images_per_prompt for several prompts
+        num_prompts = 2
+        images = sd_pipe(
+            [prompt] * num_prompts,
+            num_inference_steps=2,
+            output_type="np",
+            num_images_per_prompt=num_images_per_prompt,
+        ).images
+
+        self.assertEqual(len(images), num_prompts * num_images_per_prompt)
+        self.assertEqual(images[-1].shape, (1024, 1024, 3))
+
+        os.environ.pop("PATCH_SDPA")
+
+    def test_stable_diffusion_xl_optimized_fp8(self):
+        import habana_frameworks.torch.hpu as torch_hpu
+
+        kwargs = {"timestep_spacing": "linspace"}
+        scheduler = GaudiEulerDiscreteScheduler.from_pretrained(
+            "stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler", **kwargs
+        )
+        scheduler.hpu_opt = True
+        kwargs = {
+            "scheduler": scheduler,
+            "use_habana": True,
+            "use_hpu_graphs": True,
+            "gaudi_config": "Habana/stable-diffusion",
+            "torch_dtype": torch.bfloat16,
+        }
+
+        os.environ["PATCH_SDPA"] = "1"
+        # Set QUANT_CONFIG environment variable
+        os.environ["QUANT_CONFIG"] = "./quantization/stable-diffusion-xl/quantize_config.json"
+
+        from optimum.habana.diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_mlperf import (
+            StableDiffusionXLPipeline_HPU,
+        )
+
+        model_name_or_path = "stabilityai/stable-diffusion-xl-base-1.0"
+
+        sd_pipe = StableDiffusionXLPipeline_HPU.from_pretrained(
+            model_name_or_path,
+            **kwargs,
+        )
+        sd_pipe.unet.set_default_attn_processor(sd_pipe.unet)
+        sd_pipe.to(torch.device("hpu"))
+
+        quant_config_path = os.getenv("QUANT_CONFIG")
+
+        original_dir = os.getcwd()
+        config_dir = Path(os.path.dirname(__file__)).parent / "examples" / "stable-diffusion"
+        os.chdir(config_dir)
+
+        if quant_config_path:
+            import habana_frameworks.torch.core as htcore
+            from neural_compressor.torch.quantization import FP8Config, convert, prepare
+
+            htcore.hpu_set_env()
+
+            config = FP8Config.from_json_file(quant_config_path)
+
+            if config.measure:
+                print("Running measurements")
+                sd_pipe.unet = prepare(sd_pipe.unet, config)
+            elif config.quantize:
+                print("Running quantization")
+                sd_pipe.unet = convert(sd_pipe.unet, config)
+            htcore.hpu_initialize(sd_pipe.unet, mark_only_scales_as_const=True)
+
+        sd_pipe.unet = torch_hpu.wrap_in_hpu_graph(sd_pipe.unet)
+        sd_pipe.set_progress_bar_config(disable=None)
+
+        prompt = "A painting of a squirrel eating a burger"
+
+        # Test using quantization configuration
+        images = sd_pipe(prompt, num_inference_steps=2, output_type="np").images
+
+        self.assertEqual(len(images), 1)
+        self.assertEqual(images[0].shape, (1024, 1024, 3))
+        os.chdir(original_dir)
+
+        os.environ.pop("PATCH_SDPA")
+
+    @slow
+    def test_stable_diffusion_xl_generation_throughput(self):
+        prompts = [
+            "Sailing ship painting by Van Gogh",
+        ]
+        num_images_per_prompt = 28
+        batch_size = 7
+        model_name = "stabilityai/stable-diffusion-xl-base-1.0"
+        scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
+
+        pipeline = GaudiStableDiffusionXLPipeline.from_pretrained(
+            model_name,
+            scheduler=scheduler,
+            use_habana=True,
+            use_hpu_graphs=True,
+            gaudi_config=GaudiConfig.from_pretrained("Habana/stable-diffusion"),
+            sdp_on_bf16=True,
+        )
+        set_seed(27)
+        outputs = pipeline(
+            prompt=prompts,
+            num_images_per_prompt=num_images_per_prompt,
+            batch_size=batch_size,
+            num_inference_steps=30,
+        )
+
+        # Check expected number of output images
+        self.assertEqual(len(outputs.images), num_images_per_prompt * len(prompts))
 
-        _sdxl_generation(self, scheduler, batch_size, num_images_per_prompt, baseline)
+        # Throughput regression test
+        self.assertGreaterEqual(outputs.throughput, 0.95 * SDXL_THROUGHPUT)
 
 
 class GaudiStableDiffusion3PipelineTester(TestCase):
@@ -1632,6 +1698,32 @@ def test_fused_qkv_projections(self):
             "Original outputs should match when fused QKV projections are disabled."
         )
 
+    @slow
+    @check_gated_model_access("stabilityai/stable-diffusion-3-medium-diffusers")
+    @pytest.mark.skipif(not IS_GAUDI2, reason="does not fit into Gaudi1 memory")
+    def test_sd3_inference(self):
+        repo_id = "stabilityai/stable-diffusion-3-medium-diffusers"
+
+        pipe = self.pipeline_class.from_pretrained(
+            repo_id,
+            use_habana=True,
+            use_hpu_graphs=True,
+            gaudi_config="Habana/stable-diffusion",
+            torch_dtype=torch.bfloat16,
+            sdp_on_bf16=True,
+        )
+
+        outputs = pipe(
+            prompt="Sailing ship painting by Van Gogh",
+            num_inference_steps=28,
+            batch_size=1,
+            num_images_per_prompt=10,
+            output_type="np",
+        )
+
+        # Check expected performance of FLUX.1 dev img-to-img model
+        self.assertGreaterEqual(outputs.throughput, 0.95 * SD3_THROUGHPUT)
+
 
 class GaudiStableDiffusionControlNetPipelineTester(TestCase):
     """
@@ -2383,13 +2475,18 @@ def test_depth2img_pipeline_hpu_graphs(self):
         assert images[0].shape == (32, 32, 3)
 
     @slow
-    def test_depth2img_pipeline_latency_bf16(self):
+    def test_depth2img_pipeline(self):
         gaudi_config = GaudiConfig(use_torch_autocast=True)
         model_name = "stabilityai/stable-diffusion-2-depth"
         scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
 
         pipe = GaudiStableDiffusionDepth2ImgPipeline.from_pretrained(
-            model_name, gaudi_config=gaudi_config, scheduler=scheduler, use_habana=True, use_hpu_graphs=True
+            model_name,
+            scheduler=scheduler,
+            use_habana=True,
+            use_hpu_graphs=True,
+            gaudi_config=gaudi_config,
+            sdp_on_bf16=True,
         )
         image = Image.open(
             requests.get(
@@ -2399,7 +2496,6 @@ def test_depth2img_pipeline_latency_bf16(self):
         )
         prompt = "A fancy meal with soup and pancakes"
 
-        start_time = time.time()
         outputs = pipe(
             prompt=prompt,
             image=image,
@@ -2407,8 +2503,7 @@ def test_depth2img_pipeline_latency_bf16(self):
             num_inference_steps=50,
             output_type="np",
         )
-        end_time = time.time()
-        latency = end_time - start_time
+
         images = outputs.images
         clip_score = calculate_clip_score(np.expand_dims(image, axis=0), [prompt])
         target_score = 22.76
@@ -2417,8 +2512,6 @@ def test_depth2img_pipeline_latency_bf16(self):
         self.assertEqual(images[0].shape, (512, 512, 3))
         self.assertGreaterEqual(clip_score, 0.95 * target_score)
 
-        self.assertLessEqual(latency, 1.05 * DEPTH2IMG_GENERATION_LATENCY_BASELINE_BF16)
-
 
 class TrainTextToImage(TestCase):
     """
@@ -2502,7 +2595,7 @@ class TrainControlNet(TestCase):
     Tests the train_controlnet.py script for Gaudi.
     """
 
-    def test_train_controlnet_script(self):
+    def test_script_train_controlnet(self):
         path_to_script = (
             Path(os.path.dirname(__file__)).parent
             / "examples"
@@ -2521,7 +2614,7 @@ def test_train_controlnet_script(self):
         self.assertEqual(return_code, 0)
 
     @slow
-    @pytest.mark.skipif(hthpu.is_available() and hthpu.device_count() != 8, reason="system does not have 8 cards")
+    @check_8xhpu
     def test_train_controlnet(self):
         with tempfile.TemporaryDirectory() as tmpdir:
             path_to_script = (
@@ -2586,6 +2679,7 @@ def test_train_controlnet(self):
                 use_habana=True,
                 use_hpu_graphs=True,
                 gaudi_config=GaudiConfig(use_habana_mixed_precision=False),
+                sdp_on_bf16=True,
             )
             pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
 
@@ -2874,13 +2968,10 @@ def test_stable_video_diffusion_single_video(self):
         components = self.get_dummy_components()
         gaudi_config = GaudiConfig(use_torch_autocast=False)
         sd_pipe_oh = GaudiStableVideoDiffusionPipeline(use_habana=True, gaudi_config=gaudi_config, **components)
-        sd_pipe_hf = StableVideoDiffusionPipeline(**components)
+        components2 = self.get_dummy_components()
+        sd_pipe_hf = StableVideoDiffusionPipeline(**components2)
 
         def _get_image_from_pipeline(pipeline, device=device):
-            for component in pipeline.components.values():
-                if hasattr(component, "set_default_attn_processor"):
-                    component.set_default_attn_processor()
-
             pipeline.to(device)
             pipeline.set_progress_bar_config(disable=None)
 
@@ -2893,7 +2984,7 @@ def _get_image_from_pipeline(pipeline, device=device):
             self.assertEqual(image.shape, (2, 3, 32, 32))
             return image[0, -3:, -3:, -1]
 
-        image_slice_oh = _get_image_from_pipeline(sd_pipe_oh)
+        image_slice_oh = _get_image_from_pipeline(sd_pipe_oh, device="hpu").cpu()
         image_slice_hf = _get_image_from_pipeline(sd_pipe_hf)
 
         self.assertLess(np.abs(image_slice_oh.flatten() - image_slice_hf.flatten()).max(), 1e-2)
@@ -2913,6 +3004,7 @@ def test_stable_video_diffusion_no_throughput_regression_bf16(self):
             use_hpu_graphs=True,
             gaudi_config=GaudiConfig.from_pretrained("Habana/stable-diffusion"),
             torch_dtype=torch.bfloat16,
+            sdp_on_bf16=True,
         )
         set_seed(42)
         prompt_image = load_image(image_url)
@@ -2924,9 +3016,11 @@ def test_stable_video_diffusion_no_throughput_regression_bf16(self):
             width=256,
         )
 
+        # Check expected number of output frames
         self.assertEqual(len(outputs.frames[0]), 25)
-        if IS_GAUDI2:
-            self.assertGreaterEqual(outputs.throughput, 0.95 * 0.012)
+
+        # Throughput regression test
+        self.assertGreaterEqual(outputs.throughput, 0.95 * SVD_THROUGHPUT)
 
 
 class GaudiStableVideoDiffusionControlNetPipelineTester(TestCase):
@@ -3702,87 +3796,14 @@ def test_stable_diffusion_xl_img2img_euler(self):
 
         self.assertEqual(image.shape, (1, 32, 32, 3))
 
-        expected_slice = np.array([0.4925, 0.5007, 0.6594, 0.5544, 0.4423, 0.5585, 0.4643, 0.5444, 0.5376])
+        expected_slice = np.array([0.4664, 0.4886, 0.4403, 0.6902, 0.5592, 0.4534, 0.5931, 0.5951, 0.5224])
         self.assertLess(np.abs(image_slice.flatten() - expected_slice).max(), 1e-2)
 
 
-class GaudiDeterministicImageGenerationTester(TestCase):
+class GaudiCogVideoXPipelineTester(TestCase):
     """
-    Test deterministic generation using text_to_image_generation.py.
-    """
-
-    @slow
-    def test_deterministic_image_generation(self):
-        path_to_script = (
-            Path(os.path.dirname(__file__)).parent / "examples" / "stable-diffusion" / "text_to_image_generation.py"
-        )
-
-        with tempfile.TemporaryDirectory():
-            test_args = f"""
-                python3
-                {path_to_script}
-                --model_name_or_path CompVis/stable-diffusion-v1-4
-                --num_images_per_prompt 20
-                --batch_size 4
-                --image_save_dir /tmp/stable_diffusion_images
-                --use_habana
-                --use_hpu_graphs
-                --gaudi_config Habana/stable-diffusion
-                --sdp_on_bf16
-                --bf16
-                --use_cpu_rng
-                """.split()
-            test_args.append("--prompts")
-            test_args.append("An image of a squirrel in Picasso style")
-            p = subprocess.Popen(test_args)
-            return_code = p.wait()
-
-            # Ensure the run finished without any issue
-            self.assertEqual(return_code, 0)
-
-    @slow
-    def test_deterministic_image_generation_no_throughput_regression_bf16(self):
-        kwargs = {"timestep_spacing": "linspace"}
-        scheduler = GaudiDDIMScheduler.from_pretrained(
-            "CompVis/stable-diffusion-v1-4", **kwargs, subfolder="scheduler"
-        )
-
-        kwargs = {
-            "scheduler": scheduler,
-            "use_habana": True,
-            "use_hpu_graphs": True,
-            "gaudi_config": "Habana/stable-diffusion",
-        }
-
-        pipeline = GaudiStableDiffusionPipeline.from_pretrained(
-            "CompVis/stable-diffusion-v1-4",
-            **kwargs,
-        )
-        pipeline.unet.set_default_attn_processor(pipeline.unet)
-
-        num_images_per_prompt = 20
-        res = {}
-        generator = [set_seed(27) for i in range(num_images_per_prompt)]
-        outputs = pipeline(
-            prompt="An image of a squirrel in Picasso style",
-            num_images_per_prompt=num_images_per_prompt,
-            batch_size=4,
-            num_inference_steps=50,
-            guidance_scale=7.5,
-            negative_prompt=None,
-            eta=0.0,
-            output_type="pil",
-            generator=generator,
-            **res,
-        )
-
-        self.assertGreaterEqual(outputs.throughput, 0.95 * DETERMINISTIC_IMAGE_GENERATION_THROUGHPUT)
-
-
-class GaudiCogVideoXPipelineTester(TestCase):
-    """
-    Tests the TextToVideoSDPipeline for Gaudi.
-    Adapted from https://github.com/huggingface/diffusers/blob/v0.24.0-release/tests/pipelines/text_to_video_synthesis/test_text_to_video.py
+    Tests the TextToVideoSDPipeline for Gaudi.
+    Adapted from https://github.com/huggingface/diffusers/blob/v0.24.0-release/tests/pipelines/text_to_video_synthesis/test_text_to_video.py
     """
 
     def get_dummy_components(self):
@@ -4017,6 +4038,9 @@ def test_text_to_video_default_case(self):
 
     @slow
     def test_stable_video_diffusion_no_latency_regression_bf16(self):
+        prompts = [
+            "An astronaut riding a horse",
+        ]
         model_name = "ali-vilab/text-to-video-ms-1.7b"
         pipeline = GaudiTextToVideoSDPipeline.from_pretrained(
             model_name,
@@ -4024,15 +4048,16 @@ def test_stable_video_diffusion_no_latency_regression_bf16(self):
             use_hpu_graphs=True,
             gaudi_config=GaudiConfig.from_pretrained("Habana/stable-diffusion"),
             torch_dtype=torch.bfloat16,
+            sdp_to_bf16=True,
+        )
+
+        set_seed(27)
+        outputs = pipeline(
+            prompt=prompts,
         )
-        set_seed(42)
-        start_time = time.time()
-        prompt = "Spiderman is surfing"
-        outputs = pipeline(prompt, num_inference_steps=50, output_type="pil")
-        latency = time.time() - start_time
-        assert len(outputs.videos[0]) == 16
 
-        assert latency < 1.05 * TEXT_TO_VIDEO_SYNTHESIS_BF16_BASELINE
+        # Check expected number of output frames
+        self.assertEqual(len(outputs.videos[0]), 16)
 
 
 """
@@ -4491,8 +4516,6 @@ def test_dict_tuple_outputs_equivalent(self, expected_max_difference=1e-4):
     def test_components_function(self):
         init_components = self.get_dummy_components()
 
-        # init_components = {k: v for k, v in init_components.items() if not isinstance(v, (str, int, float))}
-
         pipe = self.pipeline_class(**init_components)
         init_components.pop("use_habana")
         init_components.pop("use_hpu_graphs")
@@ -4502,103 +4525,6 @@ def test_components_function(self):
         self.assertTrue(hasattr(pipe, "components"))
         self.assertTrue(set(pipe.components.keys()) == set(init_components.keys()))
 
-    @skipIf(torch_device != "cuda", reason="float16 requires CUDA")
-    def test_float16_inference(self, expected_max_diff=5e-2):
-        components = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        for component in pipe.components.values():
-            if hasattr(component, "set_default_attn_processor"):
-                component.set_default_attn_processor()
-
-        pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
-
-        components = self.get_dummy_components()
-        pipe_fp16 = self.pipeline_class(**components)
-        for component in pipe_fp16.components.values():
-            if hasattr(component, "set_default_attn_processor"):
-                component.set_default_attn_processor()
-
-        pipe_fp16.to(torch_device, torch.float16)
-        pipe_fp16.set_progress_bar_config(disable=None)
-
-        inputs = self.get_dummy_inputs(torch_device)
-        # Reset generator in case it is used inside dummy inputs
-        if "generator" in inputs:
-            inputs["generator"] = self.get_generator(0)
-
-        output = pipe(**inputs)[0]
-
-        fp16_inputs = self.get_dummy_inputs(torch_device)
-        # Reset generator in case it is used inside dummy inputs
-        if "generator" in fp16_inputs:
-            fp16_inputs["generator"] = self.get_generator(0)
-
-        output_fp16 = pipe_fp16(**fp16_inputs)[0]
-
-        max_diff = np.abs(to_np(output) - to_np(output_fp16)).max()
-        self.assertLess(max_diff, expected_max_diff, "The outputs of the fp16 and fp32 pipelines are too different.")
-
-    @skipIf(torch_device != "cuda", reason="float16 requires CUDA")
-    def test_save_load_float16(self, expected_max_diff=1e-2):
-        components = self.get_dummy_components()
-        for name, module in components.items():
-            if hasattr(module, "half"):
-                components[name] = module.to(torch_device).half()
-
-        pipe = self.pipeline_class(**components)
-        for component in pipe.components.values():
-            if hasattr(component, "set_default_attn_processor"):
-                component.set_default_attn_processor()
-        pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
-
-        inputs = self.get_dummy_inputs(torch_device)
-        output = pipe(**inputs)[0]
-
-        with tempfile.TemporaryDirectory() as tmpdir:
-            pipe.save_pretrained(tmpdir)
-            pipe_loaded = self.pipeline_class.from_pretrained(tmpdir, torch_dtype=torch.float16)
-            for component in pipe_loaded.components.values():
-                if hasattr(component, "set_default_attn_processor"):
-                    component.set_default_attn_processor()
-            pipe_loaded.to(torch_device)
-            pipe_loaded.set_progress_bar_config(disable=None)
-
-        for name, component in pipe_loaded.components.items():
-            if hasattr(component, "dtype"):
-                self.assertTrue(
-                    component.dtype == torch.float16,
-                    f"`{name}.dtype` switched from `float16` to {component.dtype} after loading.",
-                )
-
-        inputs = self.get_dummy_inputs(torch_device)
-        output_loaded = pipe_loaded(**inputs)[0]
-        max_diff = np.abs(to_np(output) - to_np(output_loaded)).max()
-        self.assertLess(
-            max_diff, expected_max_diff, "The output of the fp16 pipeline changed after saving and loading."
-        )
-
-    @skipIf(torch_device != "cuda", reason="CUDA and CPU are required to switch devices")
-    def test_to_device(self):
-        components = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        pipe.set_progress_bar_config(disable=None)
-
-        pipe.to("cpu")
-        model_devices = [component.device.type for component in components.values() if hasattr(component, "device")]
-        self.assertTrue(all(device == "cpu" for device in model_devices))
-
-        output_cpu = pipe(**self.get_dummy_inputs("cpu"))[0]
-        self.assertTrue(np.isnan(output_cpu).sum() == 0)
-
-        pipe.to("cuda")
-        model_devices = [component.device.type for component in components.values() if hasattr(component, "device")]
-        self.assertTrue(all(device == "cuda" for device in model_devices))
-
-        output_cuda = pipe(**self.get_dummy_inputs("cuda"))[0]
-        self.assertTrue(np.isnan(to_np(output_cuda)).sum() == 0)
-
     def test_to_dtype(self):
         components = self.get_dummy_components()
         pipe = self.pipeline_class(**components)
@@ -4642,73 +4568,6 @@ def _test_attention_slicing_forward_pass(
         if test_mean_pixel_difference:
             assert_mean_pixel_difference(to_np(output_with_slicing[0]), to_np(output_without_slicing[0]))
 
-    @skipIf(
-        torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"),
-        reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
-    )
-    def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4):
-        components = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-        for component in pipe.components.values():
-            if hasattr(component, "set_default_attn_processor"):
-                component.set_default_attn_processor()
-
-        pipe.set_progress_bar_config(disable=None)
-
-        generator_device = "cpu"
-        inputs = self.get_dummy_inputs(generator_device)
-        output_without_offload = pipe(**inputs)[0]
-
-        pipe.enable_sequential_cpu_offload()
-
-        inputs = self.get_dummy_inputs(generator_device)
-        output_with_offload = pipe(**inputs)[0]
-
-        max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
-        self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
-
-    @skipIf(
-        torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.17.0"),
-        reason="CPU offload is only available with CUDA and `accelerate v0.17.0` or higher",
-    )
-    def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
-        generator_device = "cpu"
-        components = self.get_dummy_components()
-        pipe = self.pipeline_class(**components)
-
-        for component in pipe.components.values():
-            if hasattr(component, "set_default_attn_processor"):
-                component.set_default_attn_processor()
-
-        pipe = pipe.to(torch_device)
-        pipe.set_progress_bar_config(disable=None)
-
-        inputs = self.get_dummy_inputs(generator_device)
-        output_without_offload = pipe(**inputs)[0]
-
-        pipe.enable_model_cpu_offload()
-        inputs = self.get_dummy_inputs(generator_device)
-        output_with_offload = pipe(**inputs)[0]
-
-        max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
-        self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
-        offloaded_modules = [
-            v
-            for k, v in pipe.components.items()
-            if isinstance(v, torch.nn.Module) and k not in pipe._exclude_from_cpu_offload
-        ]
-        (
-            self.assertTrue(all(v.device.type == "cpu" for v in offloaded_modules)),
-            f"Not offloaded: {[v for v in offloaded_modules if v.device.type != 'cpu']}",
-        )
-
-    @skipIf(
-        torch_device != "cuda" or not is_xformers_available(),
-        reason="XFormers attention is only available with CUDA and `xformers` installed",
-    )
-    def test_xformers_attention_forwardGenerator_pass(self):
-        self._test_xformers_attention_forwardGenerator_pass()
-
     def _test_xformers_attention_forwardGenerator_pass(
         self, test_max_difference=True, test_mean_pixel_difference=True, expected_max_diff=1e-4
     ):
@@ -5187,20 +5046,10 @@ def assert_mean_pixel_difference(image, expected_image, expected_max_diff=10):
 VIDEO_TO_VIDEO_BATCH_PARAMS = frozenset(["prompt", "negative_prompt", "video"])
 
 
-"""
-Copied from: https://github.com/huggingface/diffusers/blob/v0.26.3/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py
-- Modified pipeline to Gaudi pipeline.
-- Modified the get_dummy_components to add the Gaudi pipeline parameters: use_habana, use_hpu_graphs, gaudi_config, bf16_full_eval
-- Added testcases:
-    test_stable_diffusion_inpaint_no_safety_checker
-    test_stable_diffusion_inpaint_enable_safety_checker
-    test_stable_diffusion_inpaint_no_throughput_regression
-"""
-
 enable_full_determinism()
 
 
-class StableDiffusionInpaintPipelineFastTests(
+class StableDiffusionInpaintPipelineTests(
     PipelineLatentTesterMixin, PipelineKarrasSchedulerTesterMixin, PipelineTesterMixin, TestCase
 ):
     pipeline_class = GaudiStableDiffusionInpaintPipeline
@@ -5314,314 +5163,91 @@ def test_stable_diffusion_inpaint(self):
     def test_inference_batch_single_identical(self):
         super().test_inference_batch_single_identical(expected_max_diff=3e-3)
 
-
-class StableDiffusionInpaintPipelineIntegrationTests(TestCase):
-    def tearDown(self):
-        # clean up the VRAM after each test
-        super().tearDown()
-        gc.collect()
-
-    def create_inpaint_pipe(
-        self,
-        model_name="stabilityai/stable-diffusion-2-inpainting",
-        scheduler=None,
-        use_hpu_graphs=False,
-        gaudi_config="Habana/stable-diffusion",
-        disable_safety_checker=False,
-        torch_dtype=torch.bfloat16,
-    ):
-        if scheduler is None:
-            scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
-
-        kwargs = {
-            "scheduler": scheduler,
-            "use_habana": True,
-            "use_hpu_graphs": use_hpu_graphs,
-            "gaudi_config": gaudi_config,
-        }
-
-        if disable_safety_checker is True:
-            kwargs["safety_checker"] = None
-
-        sdi_pipe = GaudiStableDiffusionInpaintPipeline.from_pretrained(model_name, **kwargs).to(torch_dtype)
-
-        sdi_pipe.set_progress_bar_config(disable=None)
-
-        return sdi_pipe
-
     @slow
-    def test_stable_diffusion_inpaint_pipeline(self):
+    def test_stable_diffusion_inpaint_no_throughput_regression(self):
+        """Test that stable diffusion inpainting no throughput regression autocast"""
+
+        # Initialize inpaint parameters
         init_image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
-            "/sd2-inpaint/init_image.png"
+            "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"
         )
         mask_image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint/mask.png"
-        )
-        expected_image = load_numpy(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint"
-            "/yellow_cat_sitting_on_a_park_bench.npy"
+            "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"
         )
 
-        model_id = "stabilityai/stable-diffusion-2-inpainting"
-        init_kwargs = {
-            "use_habana": True,
-            "use_hpu_graphs": True,
-            "gaudi_config": "Habana/stable-diffusion",
-            "torch_dtype": torch.float,
-        }
-
-        pipe = GaudiStableDiffusionInpaintPipeline.from_pretrained(model_id, safety_checker=None, **init_kwargs)
-        pipe.set_progress_bar_config(disable=None)
-        pipe.enable_attention_slicing()
-
-        prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
+        prompts = [
+            "concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k",
+        ]
+        model_name = "stabilityai/stable-diffusion-2-inpainting"
+        num_images_per_prompt = 12
+        batch_size = 4
+        pipeline = GaudiStableDiffusionInpaintPipeline.from_pretrained(
+            model_name,
+            use_habana=True,
+            use_hpu_graphs=True,
+            gaudi_config="Habana/stable-diffusion",
+            torch_dtype=torch.bfloat16,
+            sdp_on_bf16=True,
+        )
 
-        generator = torch.manual_seed(0)
-        output = pipe(
-            prompt=prompt,
+        set_seed(27)
+        outputs = pipeline(
+            prompt=prompts,
             image=init_image,
             mask_image=mask_image,
-            generator=generator,
-            output_type="np",
+            num_images_per_prompt=num_images_per_prompt,
+            batch_size=batch_size,
+            throughput_warmup_steps=3,
         )
-        image = output.images[0]
 
-        assert image.shape == (512, 512, 3)
-        # There is no difference in the experimental results observed by the human eye.
-        # np.abs(expected_image - image).max() = 0.31966144
-        assert np.abs(expected_image - image).max() < 0.4
-
-    @slow
-    def test_stable_diffusion_inpaint_pipeline_bf16(self):
-        init_image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
-            "/sd2-inpaint/init_image.png"
-        )
-        mask_image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint/mask.png"
-        )
-        expected_image = load_numpy(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint"
-            "/yellow_cat_sitting_on_a_park_bench_fp16.npy"
-        )
+        # Check expected number of output images
+        self.assertEqual(len(outputs.images), num_images_per_prompt * len(prompts))
 
-        model_id = "stabilityai/stable-diffusion-2-inpainting"
-        init_kwargs = {
-            "use_habana": True,
-            "use_hpu_graphs": True,
-            "gaudi_config": "Habana/stable-diffusion-2",
-            "torch_dtype": torch.bfloat16,
-        }
+        # Throughput regression test
+        self.assertGreaterEqual(outputs.throughput, 0.95 * INPAINT_THROUGHPUT_BASELINE_BF16)
 
-        pipe = GaudiStableDiffusionInpaintPipeline.from_pretrained(model_id, safety_checker=None, **init_kwargs)
-        pipe.set_progress_bar_config(disable=None)
-        pipe.enable_attention_slicing()
 
-        prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
+class StableDiffusionXLInpaintPipelineTests(PipelineLatentTesterMixin, PipelineTesterMixin, TestCase):
+    pipeline_class = GaudiStableDiffusionXLInpaintPipeline
+    params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS
+    batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS
+    image_params = frozenset([])
+    # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess
+    image_latents_params = frozenset([])
+    callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union(
+        {
+            "add_text_embeds",
+            "add_time_ids",
+            "mask",
+            "masked_image_latents",
+        }
+    )
 
-        generator = torch.manual_seed(0)
-        output = pipe(
-            prompt=prompt,
-            image=init_image,
-            mask_image=mask_image,
-            generator=generator,
-            output_type="np",
+    def get_dummy_components(self, skip_first_text_encoder=False, time_cond_proj_dim=None):
+        torch.manual_seed(0)
+        set_seed(0)
+        unet = UNet2DConditionModel(
+            block_out_channels=(32, 64),
+            layers_per_block=2,
+            sample_size=32,
+            in_channels=4,
+            out_channels=4,
+            time_cond_proj_dim=time_cond_proj_dim,
+            down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
+            up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
+            # SD2-specific config below
+            attention_head_dim=(2, 4),
+            use_linear_projection=True,
+            addition_embed_type="text_time",
+            addition_time_embed_dim=8,
+            transformer_layers_per_block=(1, 2),
+            projection_class_embeddings_input_dim=72,  # 5 * 8 + 32
+            cross_attention_dim=64 if not skip_first_text_encoder else 32,
         )
-        image = output.images[0]
-
-        assert image.shape == (512, 512, 3)
-        # The format of expected_image used for testing is only float16. There is no difference in the experimental results observed by the human eye.
-        # np.abs(expected_image - image).max() = 0.9626465
-        assert np.abs(expected_image - image).max() < 0.97
-
-    @slow
-    def test_stable_diffusion_inpaint_no_safety_checker(self):
-        """Test that stable diffusion inpainting works without a saftey checker"""
-        from diffusers.utils import load_image
-
-        # Create test inpaint pipeline
-        gaudi_config = GaudiConfig()
-        scheduler = GaudiDDIMScheduler(
+        scheduler = EulerDiscreteScheduler(
             beta_start=0.00085,
             beta_end=0.012,
-            beta_schedule="scaled_linear",
-            clip_sample=False,
-            set_alpha_to_one=False,
-        )
-        sdi_pipe = self.create_inpaint_pipe(
-            gaudi_config=gaudi_config, scheduler=scheduler, disable_safety_checker=True
-        )
-
-        # Initialize inpaint parameters
-        init_image = load_image(
-            "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"
-        )
-        mask_image = load_image(
-            "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"
-        )
-
-        self.assertIsInstance(sdi_pipe, GaudiStableDiffusionInpaintPipeline)
-        self.assertIsInstance(sdi_pipe.scheduler, GaudiDDIMScheduler)
-        self.assertIsNone(sdi_pipe.safety_checker)
-
-        image = sdi_pipe("example prompt", image=init_image, mask_image=mask_image, num_inference_steps=2).images[0]
-        self.assertIsNotNone(image)
-
-        # Check that there's no error when saving a pipeline with one of the models being None
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            sdi_pipe.save_pretrained(tmpdirname)
-            sdi_pipe = GaudiStableDiffusionInpaintPipeline.from_pretrained(
-                tmpdirname,
-                use_habana=True,
-                gaudi_config=tmpdirname,
-            )
-
-        # Sanity check that the pipeline still works
-        self.assertIsNone(sdi_pipe.safety_checker)
-        image = sdi_pipe("example prompt", image=init_image, mask_image=mask_image, num_inference_steps=2).images[0]
-        self.assertIsNotNone(image)
-
-    @slow
-    def test_stable_diffusion_inpaint_enable_safety_checker(self):
-        """Test that stable diffusion inpainting works with a saftey checker and it is loaded from_pretrained"""
-        from diffusers.utils import load_image
-
-        # Create test inpaint pipeline
-        gaudi_config = GaudiConfig()
-        scheduler = GaudiDDIMScheduler(
-            beta_start=0.00085,
-            beta_end=0.012,
-            beta_schedule="scaled_linear",
-            clip_sample=False,
-            set_alpha_to_one=False,
-        )
-        sdi_pipe = self.create_inpaint_pipe(
-            gaudi_config=gaudi_config, scheduler=scheduler, disable_safety_checker=False
-        )
-
-        # Initialize inpaint parameters
-        init_image = load_image(
-            "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"
-        )
-        mask_image = load_image(
-            "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"
-        )
-
-        self.assertIsInstance(sdi_pipe, GaudiStableDiffusionInpaintPipeline)
-        self.assertIsInstance(sdi_pipe.scheduler, GaudiDDIMScheduler)
-        # self.assertIsNotNone(sdi_pipe.safety_checker) <--- The safety checker is not being found.
-
-        image = sdi_pipe("example prompt", image=init_image, mask_image=mask_image, num_inference_steps=2).images[0]
-        self.assertIsNotNone(image)
-
-        # Check that there's no error when saving a pipeline with one of the models being None
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            sdi_pipe.save_pretrained(tmpdirname)
-            sdi_pipe = GaudiStableDiffusionInpaintPipeline.from_pretrained(
-                tmpdirname,
-                use_habana=True,
-                gaudi_config=tmpdirname,
-            )
-
-        # Sanity check that the pipeline still works
-        self.assertIsNone(sdi_pipe.safety_checker)
-        image = sdi_pipe("example prompt", image=init_image, mask_image=mask_image, num_inference_steps=2).images[0]
-        self.assertIsNotNone(image)
-
-    @slow
-    def test_stable_diffusion_inpaint_no_throughput_regression(self):
-        """Test that stable diffusion inpainting no throughput regression autocast"""
-        from diffusers.utils import load_image
-
-        # Initialize inpaint parameters
-        init_image = load_image(
-            "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint.png"
-        )
-        mask_image = load_image(
-            "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/inpaint_mask.png"
-        )
-
-        prompts = [
-            "a black cat with glowing eyes, cute, adorable, disney, pixar, highly detailed, 8k",
-            "concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k",
-        ]
-        num_images_per_prompt = 10
-        num_inference_steps = 10
-        model_name = "stabilityai/stable-diffusion-2-inpainting"
-
-        init_kwargs = {
-            "use_habana": True,
-            "use_hpu_graphs": True,
-            "gaudi_config": "Habana/stable-diffusion",
-            "torch_dtype": torch.bfloat16,
-        }
-        sdi_pipe = GaudiStableDiffusionInpaintPipeline.from_pretrained(model_name, **init_kwargs)
-
-        set_seed(0)
-        outputs = sdi_pipe(
-            prompt=prompts,
-            image=init_image,
-            mask_image=mask_image,
-            num_images_per_prompt=num_images_per_prompt,
-            throughput_warmup_steps=3,
-            num_inference_steps=num_inference_steps,
-            batch_size=4,
-        )
-
-        self.assertEqual(len(outputs.images), num_images_per_prompt * len(prompts))
-        self.assertGreaterEqual(outputs.throughput, 0.95 * INPAINT_THROUGHPUT_BASELINE_BF16)
-
-
-"""
-Copied from: https://github.com/huggingface/diffusers/blob/v0.26.3/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_inpaint.py
-- Modified pipeline to Gaudi pipeline.
-- Modified the get_dummy_components to add the Gaudi pipeline parameters: use_habana, use_hpu_graphs, gaudi_config, bf16_full_eval
-- added test_stable_diffusion_xl_inpaint_no_throughput_regression
-"""
-
-
-class StableDiffusionXLInpaintPipelineFastTests(PipelineLatentTesterMixin, PipelineTesterMixin, TestCase):
-    pipeline_class = GaudiStableDiffusionXLInpaintPipeline
-    params = TEXT_GUIDED_IMAGE_INPAINTING_PARAMS
-    batch_params = TEXT_GUIDED_IMAGE_INPAINTING_BATCH_PARAMS
-    image_params = frozenset([])
-    # TO-DO: update image_params once pipeline is refactored with VaeImageProcessor.preprocess
-    image_latents_params = frozenset([])
-    callback_cfg_params = TEXT_TO_IMAGE_CALLBACK_CFG_PARAMS.union(
-        {
-            "add_text_embeds",
-            "add_time_ids",
-            "mask",
-            "masked_image_latents",
-        }
-    )
-
-    def get_dummy_components(self, skip_first_text_encoder=False, time_cond_proj_dim=None):
-        torch.manual_seed(0)
-        set_seed(0)
-        unet = UNet2DConditionModel(
-            block_out_channels=(32, 64),
-            layers_per_block=2,
-            sample_size=32,
-            in_channels=4,
-            out_channels=4,
-            time_cond_proj_dim=time_cond_proj_dim,
-            down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
-            up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
-            # SD2-specific config below
-            attention_head_dim=(2, 4),
-            use_linear_projection=True,
-            addition_embed_type="text_time",
-            addition_time_embed_dim=8,
-            transformer_layers_per_block=(1, 2),
-            projection_class_embeddings_input_dim=72,  # 5 * 8 + 32
-            cross_attention_dim=64 if not skip_first_text_encoder else 32,
-        )
-        scheduler = EulerDiscreteScheduler(
-            beta_start=0.00085,
-            beta_end=0.012,
-            steps_offset=1,
+            steps_offset=1,
             beta_schedule="scaled_linear",
             timestep_spacing="leading",
         )
@@ -5830,10 +5456,6 @@ def test_attention_slicing_forward_pass(self):
     def test_inference_batch_single_identical(self):
         super().test_inference_batch_single_identical(expected_max_diff=3e-3)
 
-    # TODO(Patrick, Sayak) - skip for now as this requires more refiner tests
-    def test_save_load_optional_components(self):
-        pass
-
     def test_stable_diffusion_xl_inpaint_negative_prompt_embeds(self):
         device = "cpu"
         components = self.get_dummy_components()
@@ -5951,166 +5573,6 @@ def new_step(self, *args, **kwargs):
             # Currently cannot support the default HeunDiscreteScheduler
             # assert_run_mixture(steps, 0.33, HeunDiscreteScheduler)
 
-    @slow
-    def test_stable_diffusion_two_xl_mixture_of_denoiser(self):
-        components = self.get_dummy_components()
-        pipe_1 = GaudiStableDiffusionXLInpaintPipeline(**components)
-        pipe_1.unet.set_default_attn_processor()
-        pipe_2 = GaudiStableDiffusionXLInpaintPipeline(**components)
-        pipe_2.unet.set_default_attn_processor()
-
-        def assert_run_mixture(
-            num_steps, split, scheduler_cls_orig, num_train_timesteps=pipe_1.scheduler.config.num_train_timesteps
-        ):
-            inputs = self.get_dummy_inputs()
-            inputs["num_inference_steps"] = num_steps
-
-            class scheduler_cls(scheduler_cls_orig):
-                pass
-
-            pipe_1.scheduler = scheduler_cls.from_config(pipe_1.scheduler.config)
-            pipe_2.scheduler = scheduler_cls.from_config(pipe_2.scheduler.config)
-
-            # Let's retrieve the number of timesteps we want to use
-            pipe_1.scheduler.set_timesteps(num_steps)
-            expected_steps = pipe_1.scheduler.timesteps.tolist()
-
-            split_ts = num_train_timesteps - int(round(num_train_timesteps * split))
-
-            if pipe_1.scheduler.order == 2:
-                expected_steps_1 = list(filter(lambda ts: ts >= split_ts, expected_steps))
-                expected_steps_2 = expected_steps_1[-1:] + list(filter(lambda ts: ts < split_ts, expected_steps))
-                expected_steps = expected_steps_1 + expected_steps_2
-            else:
-                expected_steps_1 = list(filter(lambda ts: ts >= split_ts, expected_steps))
-                expected_steps_2 = list(filter(lambda ts: ts < split_ts, expected_steps))
-
-            # now we monkey patch step `done_steps`
-            # list into the step function for testing
-            done_steps = []
-            old_step = copy.copy(scheduler_cls.step)
-
-            def new_step(self, *args, **kwargs):
-                done_steps.append(args[1].cpu().item())  # args[1] is always the passed `t`
-                return old_step(self, *args, **kwargs)
-
-            scheduler_cls.step = new_step
-
-            inputs_1 = {**inputs, **{"denoising_end": split, "output_type": "latent"}}
-            latents = pipe_1(**inputs_1).images[0]
-
-            assert expected_steps_1 == done_steps, f"Failure with {scheduler_cls.__name__} and {num_steps} and {split}"
-
-            inputs_2 = {**inputs, **{"denoising_start": split, "image": latents}}
-            pipe_2(**inputs_2).images[0]
-
-            assert expected_steps_2 == done_steps[len(expected_steps_1) :]
-            assert expected_steps == done_steps, f"Failure with {scheduler_cls.__name__} and {num_steps} and {split}"
-
-        for steps in [5, 8, 20]:
-            for split in [0.33, 0.49, 0.71]:
-                for scheduler_cls in [
-                    GaudiDDIMScheduler,
-                    GaudiEulerDiscreteScheduler,
-                    GaudiEulerAncestralDiscreteScheduler,
-                    DPMSolverMultistepScheduler,
-                    UniPCMultistepScheduler,
-                    # HeunDiscreteScheduler,
-                ]:
-                    assert_run_mixture(steps, split, scheduler_cls)
-
-    @slow
-    def test_stable_diffusion_three_xl_mixture_of_denoiser(self):
-        components = self.get_dummy_components()
-        pipe_1 = GaudiStableDiffusionXLInpaintPipeline(**components)
-        pipe_1.unet.set_default_attn_processor()
-        pipe_2 = GaudiStableDiffusionXLInpaintPipeline(**components)
-        pipe_2.unet.set_default_attn_processor()
-        pipe_3 = GaudiStableDiffusionXLInpaintPipeline(**components)
-        pipe_3.unet.set_default_attn_processor()
-
-        def assert_run_mixture(
-            num_steps,
-            split_1,
-            split_2,
-            scheduler_cls_orig,
-            num_train_timesteps=pipe_1.scheduler.config.num_train_timesteps,
-        ):
-            inputs = self.get_dummy_inputs()
-            inputs["num_inference_steps"] = num_steps
-
-            class scheduler_cls(scheduler_cls_orig):
-                pass
-
-            pipe_1.scheduler = scheduler_cls.from_config(pipe_1.scheduler.config)
-            pipe_2.scheduler = scheduler_cls.from_config(pipe_2.scheduler.config)
-            pipe_3.scheduler = scheduler_cls.from_config(pipe_3.scheduler.config)
-
-            # Let's retrieve the number of timesteps we want to use
-            pipe_1.scheduler.set_timesteps(num_steps)
-            expected_steps = pipe_1.scheduler.timesteps.tolist()
-
-            split_1_ts = num_train_timesteps - int(round(num_train_timesteps * split_1))
-            split_2_ts = num_train_timesteps - int(round(num_train_timesteps * split_2))
-
-            if pipe_1.scheduler.order == 2:
-                expected_steps_1 = list(filter(lambda ts: ts >= split_1_ts, expected_steps))
-                expected_steps_2 = expected_steps_1[-1:] + list(
-                    filter(lambda ts: ts >= split_2_ts and ts < split_1_ts, expected_steps)
-                )
-                expected_steps_3 = expected_steps_2[-1:] + list(filter(lambda ts: ts < split_2_ts, expected_steps))
-                expected_steps = expected_steps_1 + expected_steps_2 + expected_steps_3
-            else:
-                expected_steps_1 = list(filter(lambda ts: ts >= split_1_ts, expected_steps))
-                expected_steps_2 = list(filter(lambda ts: ts >= split_2_ts and ts < split_1_ts, expected_steps))
-                expected_steps_3 = list(filter(lambda ts: ts < split_2_ts, expected_steps))
-
-            # now we monkey patch step `done_steps`
-            # list into the step function for testing
-            done_steps = []
-            old_step = copy.copy(scheduler_cls.step)
-
-            def new_step(self, *args, **kwargs):
-                done_steps.append(args[1].cpu().item())  # args[1] is always the passed `t`
-                return old_step(self, *args, **kwargs)
-
-            scheduler_cls.step = new_step
-
-            inputs_1 = {**inputs, **{"denoising_end": split_1, "output_type": "latent"}}
-            latents = pipe_1(**inputs_1).images[0]
-
-            assert expected_steps_1 == done_steps, (
-                f"Failure with {scheduler_cls.__name__} and {num_steps} and {split_1} and {split_2}"
-            )
-
-            inputs_2 = {
-                **inputs,
-                **{"denoising_start": split_1, "denoising_end": split_2, "image": latents, "output_type": "latent"},
-            }
-            pipe_2(**inputs_2).images[0]
-
-            assert expected_steps_2 == done_steps[len(expected_steps_1) :]
-
-            inputs_3 = {**inputs, **{"denoising_start": split_2, "image": latents}}
-            pipe_3(**inputs_3).images[0]
-
-            assert expected_steps_3 == done_steps[len(expected_steps_1) + len(expected_steps_2) :]
-            assert expected_steps == done_steps, (
-                f"Failure with {scheduler_cls.__name__} and {num_steps} and {split_1} and {split_2}"
-            )
-
-        for steps in [7, 11, 20]:
-            for split_1, split_2 in zip([0.19, 0.32], [0.81, 0.68]):
-                for scheduler_cls in [
-                    GaudiDDIMScheduler,
-                    GaudiEulerDiscreteScheduler,
-                    GaudiEulerAncestralDiscreteScheduler,
-                    DPMSolverMultistepScheduler,
-                    UniPCMultistepScheduler,
-                    # HeunDiscreteScheduler,
-                ]:
-                    assert_run_mixture(steps, split_1, split_2, scheduler_cls)
-
     def test_stable_diffusion_xl_multi_prompts(self):
         device = "cpu"
         components = self.get_dummy_components()
@@ -6330,32 +5792,34 @@ def test_stable_diffusion_xl_inpaint_no_throughput_regression(self):
         )
 
         prompts = [
-            "a black cat with glowing eyes, cute, adorable, disney, pixar, highly detailed, 8k",
             "concept art digital painting of an elven castle, inspired by lord of the rings, highly detailed, 8k",
         ]
         model_name = "diffusers/stable-diffusion-xl-1.0-inpainting-0.1"
-        num_images_per_prompt = 10
-        num_inference_steps = 10
-        init_kwargs = {
-            "use_habana": True,
-            "use_hpu_graphs": True,
-            "gaudi_config": "Habana/stable-diffusion",
-            "torch_dtype": torch.bfloat16,
-        }
-        sdi_pipe = GaudiStableDiffusionXLInpaintPipeline.from_pretrained(model_name, **init_kwargs)
+        num_images_per_prompt = 12
+        batch_size = 4
+        pipeline = GaudiStableDiffusionXLInpaintPipeline.from_pretrained(
+            model_name,
+            use_habana=True,
+            use_hpu_graphs=True,
+            gaudi_config="Habana/stable-diffusion",
+            torch_dtype=torch.bfloat16,
+            sdp_on_bf16=True,
+        )
 
-        set_seed(0)
-        outputs = sdi_pipe(
+        set_seed(27)
+        outputs = pipeline(
             prompt=prompts,
             image=init_image,
             mask_image=mask_image,
             num_images_per_prompt=num_images_per_prompt,
+            batch_size=batch_size,
             throughput_warmup_steps=3,
-            num_inference_steps=num_inference_steps,
-            batch_size=4,
         )
 
+        # Check expected number of output images
         self.assertEqual(len(outputs.images), num_images_per_prompt * len(prompts))
+
+        # Throughput regression test
         self.assertGreaterEqual(outputs.throughput, 0.95 * INPAINT_XL_THROUGHPUT_BASELINE_BF16)
 
 
@@ -6501,6 +5965,7 @@ def test_no_throughput_regression_bf16(self):
             use_habana=True,
             use_hpu_graphs=True,
             gaudi_config=gaudi_config,
+            sdp_on_bf16=True,
         )
         outputs = pipe(batch_size=batch_size)
         self.assertGreaterEqual(outputs.throughput, 0.95 * THROUGHPUT_UNCONDITIONAL_IMAGE_BASELINE_BF16)
@@ -6661,26 +6126,30 @@ def test_flux_prompt_embeds(self):
     @slow
     @pytest.mark.skipif(not IS_GAUDI2, reason="does not fit into Gaudi1 memory")
     def test_flux_inference(self):
-        repo_id = "black-forest-labs/FLUX.1-schnell"
-
-        pipe = self.pipeline_class.from_pretrained(
-            repo_id,
-            torch_dtype=torch.bfloat16,
+        prompts = [
+            "A cat holding a sign that says hello world",
+        ]
+        num_images_per_prompt = 10
+        batch_size = 1
+        model_name = "black-forest-labs/FLUX.1-schnell"
+        pipeline = GaudiFluxPipeline.from_pretrained(
+            model_name,
             use_habana=True,
             use_hpu_graphs=True,
             gaudi_config="Habana/stable-diffusion",
+            sdp_on_bf16=True,
         )
-
-        generator = torch.Generator(device="cpu").manual_seed(0)
-
-        outputs = pipe(
-            prompt="A photo of a cat",
-            num_inference_steps=5,
-            guidance_scale=5.0,
-            output_type="np",
-            generator=generator,
+        set_seed(27)
+        outputs = pipeline(
+            prompt=prompts,
+            num_images_per_prompt=num_images_per_prompt,
+            batch_size=batch_size,
+            num_inference_steps=4,
         )
 
+        # Check expected number of output images
+        self.assertEqual(len(outputs.images), num_images_per_prompt * len(prompts))
+
         # Check expected performance of FLUX.1 schnell model
         self.assertGreaterEqual(outputs.throughput, 0.95 * FLUX_THROUGHPUT)
 
@@ -6821,3 +6290,242 @@ def test_flux_prompt_embeds(self):
 
         max_diff = np.abs(output_with_prompt - output_with_embeds).max()
         assert max_diff < 1e-4
+
+    @slow
+    @check_gated_model_access("black-forest-labs/FLUX.1-dev")
+    @pytest.mark.skipif(not IS_GAUDI2, reason="does not fit into Gaudi1 memory")
+    def test_flux_img2img_inference(self):
+        repo_id = "black-forest-labs/FLUX.1-dev"
+        image_path = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png"
+        image = PIL.Image.open(requests.get(image_path, stream=True).raw)
+        image = PIL.ImageOps.exif_transpose(image)
+        image = image.convert("RGB")
+
+        pipe = self.pipeline_class.from_pretrained(
+            repo_id,
+            torch_dtype=torch.bfloat16,
+            use_habana=True,
+            use_hpu_graphs=True,
+            gaudi_config="Habana/stable-diffusion",
+            sdp_on_bf16=True,
+        )
+
+        outputs = pipe(
+            image=image,
+            prompt="cat wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k",
+            num_inference_steps=30,
+            guidance_scale=3.5,
+            strength=0.9,
+            batch_size=1,
+            num_images_per_prompt=10,
+            output_type="np",
+        )
+
+        # Check expected performance of FLUX.1 dev img-to-img model
+        self.assertGreaterEqual(outputs.throughput, 0.95 * FLUX_DEV_I2I_THROUGHPUT)
+
+
+class I2VGenXLPipelineTests(TestCase):
+    pipeline_class = GaudiI2VGenXLPipeline
+    params = frozenset(["prompt", "negative_prompt", "image"])
+    batch_params = frozenset(["prompt", "negative_prompt", "image", "generator"])
+    # No `output_type`.
+    required_optional_params = frozenset(["num_inference_steps", "generator", "latents", "return_dict"])
+
+    supports_dduf = False
+    test_layerwise_casting = True
+
+    def get_dummy_components(self):
+        torch.manual_seed(0)
+        scheduler = GaudiDDIMScheduler(
+            beta_start=0.00085,
+            beta_end=0.012,
+            beta_schedule="scaled_linear",
+            clip_sample=False,
+            set_alpha_to_one=False,
+        )
+
+        torch.manual_seed(0)
+        unet = I2VGenXLUNet(
+            block_out_channels=(4, 8),
+            layers_per_block=1,
+            sample_size=32,
+            in_channels=4,
+            out_channels=4,
+            down_block_types=("CrossAttnDownBlock3D", "DownBlock3D"),
+            up_block_types=("UpBlock3D", "CrossAttnUpBlock3D"),
+            cross_attention_dim=4,
+            attention_head_dim=4,
+            num_attention_heads=None,
+            norm_num_groups=2,
+        )
+
+        torch.manual_seed(0)
+        vae = AutoencoderKL(
+            block_out_channels=(8,),
+            in_channels=3,
+            out_channels=3,
+            down_block_types=["DownEncoderBlock2D"],
+            up_block_types=["UpDecoderBlock2D"],
+            latent_channels=4,
+            sample_size=32,
+            norm_num_groups=2,
+        )
+        torch.manual_seed(0)
+        text_encoder_config = CLIPTextConfig(
+            bos_token_id=0,
+            eos_token_id=2,
+            hidden_size=4,
+            intermediate_size=16,
+            layer_norm_eps=1e-05,
+            num_attention_heads=2,
+            num_hidden_layers=2,
+            pad_token_id=1,
+            vocab_size=1000,
+            hidden_act="gelu",
+            projection_dim=32,
+        )
+        text_encoder = CLIPTextModel(text_encoder_config)
+        tokenizer = CLIPTokenizer.from_pretrained("hf-internal-testing/tiny-random-clip")
+
+        torch.manual_seed(0)
+        vision_encoder_config = CLIPVisionConfig(
+            hidden_size=4,
+            projection_dim=4,
+            num_hidden_layers=2,
+            num_attention_heads=2,
+            image_size=32,
+            intermediate_size=16,
+            patch_size=1,
+        )
+        image_encoder = CLIPVisionModelWithProjection(vision_encoder_config)
+
+        torch.manual_seed(0)
+        feature_extractor = CLIPImageProcessor(crop_size=32, size=32)
+
+        components = {
+            "unet": unet,
+            "scheduler": scheduler,
+            "vae": vae,
+            "text_encoder": text_encoder,
+            "image_encoder": image_encoder,
+            "tokenizer": tokenizer,
+            "feature_extractor": feature_extractor,
+            "use_habana": True,
+            "use_hpu_graphs": True,
+            "gaudi_config": "Habana/stable-diffusion",
+        }
+        return components
+
+    def get_dummy_inputs(self, device, seed=0):
+        if str(device).startswith("mps"):
+            generator = torch.manual_seed(seed)
+        else:
+            generator = torch.Generator(device=device).manual_seed(seed)
+
+        input_image = floats_tensor((1, 3, 32, 32), rng=random.Random(seed)).to(device)
+        inputs = {
+            "prompt": "A painting of a squirrel eating a burger",
+            "image": input_image,
+            "generator": generator,
+            "num_inference_steps": 2,
+            "guidance_scale": 6.0,
+            "output_type": "pt",
+            "num_frames": 4,
+            "width": 32,
+            "height": 32,
+        }
+        return inputs
+
+    def test_cfg(self):
+        sig = inspect.signature(self.pipeline_class.__call__)
+
+        if "guidance_scale" not in sig.parameters:
+            return
+
+        components = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_dummy_inputs(device="cpu")
+
+        inputs["guidance_scale"] = 1.0
+        out_no_cfg = pipe(**inputs)[0]
+
+        inputs["guidance_scale"] = 7.5
+        out_cfg = pipe(**inputs)[0]
+
+        assert out_cfg[0].shape == out_no_cfg[0].shape
+
+    def test_text_to_video_default_case(self):
+        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
+        components = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_dummy_inputs(device)
+        inputs["output_type"] = "np"
+        frames = pipe(**inputs).frames
+
+        image_slice = frames[0][0][-3:, -3:, -1]
+
+        assert frames[0][0].shape == (32, 32, 3)
+        expected_slice = np.array([0.5146, 0.6525, 0.6032, 0.5204, 0.5675, 0.4125, 0.3016, 0.5172, 0.4095])
+
+        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
+
+    def test_num_videos_per_prompt(self):
+        device = "cpu"  # ensure determinism for the device-dependent torch.Generator
+        components = self.get_dummy_components()
+        pipe = self.pipeline_class(**components)
+        pipe.set_progress_bar_config(disable=None)
+
+        inputs = self.get_dummy_inputs(device)
+        inputs["output_type"] = "np"
+        frames = pipe(**inputs, num_videos_per_prompt=2).frames
+
+        assert frames.shape == (2, 4, 32, 32, 3)
+        assert frames[0][0].shape == (32, 32, 3)
+
+        image_slice = frames[0][0][-3:, -3:, -1]
+        expected_slice = np.array([0.5146, 0.6525, 0.6032, 0.5204, 0.5675, 0.4125, 0.3016, 0.5172, 0.4095])
+
+        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
+
+    @slow
+    def test_i2vgen_xl_bf16(self):
+        pipe = GaudiI2VGenXLPipeline.from_pretrained(
+            "ali-vilab/i2vgen-xl",
+            use_habana=True,
+            use_hpu_graphs=True,
+            gaudi_config=GaudiConfig.from_pretrained("Habana/stable-diffusion"),
+            sdp_on_bf16=True,
+            torch_dtype=torch.bfloat16,
+        )
+        pipe.enable_model_cpu_offload(device=torch_device)
+        pipe.set_progress_bar_config(disable=None)
+        image = load_image(
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true"
+        )
+
+        generator = torch.Generator("cpu").manual_seed(0)
+        num_frames = 3
+
+        output = pipe(
+            image=image,
+            prompt="my cat",
+            num_frames=num_frames,
+            generator=generator,
+            num_inference_steps=50,
+            output_type="np",
+        )
+
+        image = output.frames[0]
+        assert image.shape == (num_frames, 704, 1280, 3)
+
+        image_slice = image[0, -3:, -3:, -1]
+        expected_slice = np.array(
+            [0.44921875, 0.3642578, 0.38671875, 0.46484375, 0.41210938, 0.45874023, 0.49536133, 0.4387207, 0.48242188]
+        )
+        assert numpy_cosine_similarity_distance(image_slice.flatten(), expected_slice.flatten()) < 1e-3
+        self.assertGreaterEqual(output.throughput, 0.95 * I2V_THROUGHPUT)
diff --git a/tests/test_encoder_decoder.py b/tests/test_encoder_decoder.py
index 308a3fe242..b9be7b77f6 100644
--- a/tests/test_encoder_decoder.py
+++ b/tests/test_encoder_decoder.py
@@ -11,41 +11,33 @@
 from .test_examples import ACCURACY_PERF_FACTOR, TIME_PERF_FACTOR
 
 
-if os.environ.get("GAUDI2_CI", "0") == "1":
-    # Gaudi2 CI baselines
-    MODELS_TO_TEST = {
-        "summarization": {
-            "bf16": [
-                ("facebook/bart-large-cnn", "Habana/bart", 4.339, 28.9801, 2, 2),
-                ("t5-3b", "Habana/t5", 3.848, 21.8877, 2, 1),
-            ],
-        },
-        "translation": {
-            "bf16": [
-                ("t5-small", "Habana/t5", 11.648, 11.7277, 2, 1),
-            ],
-        },
-    }
-else:
-    # Gaudi1 CI baselines
-    MODELS_TO_TEST = {
-        "summarization": {
-            "bf16": [
-                ("facebook/bart-large-cnn", "Habana/bart", 2.304, 29.174, 2, 2),
-                ("t5-3b", "Habana/t5", 1.005, 21.7286, 2, 1),
-            ],
-        },
-        "translation": {
-            "bf16": [
-                ("t5-small", "Habana/t5", 9.188, 11.6126, 2, 1),
-            ],
-        },
-    }
+MODELS_TO_TEST = {
+    "summarization": {
+        "bf16": [
+            ("facebook/bart-large-cnn", "Habana/bart", 2, 2),
+            ("t5-3b", "Habana/t5", 2, 1),
+        ],
+    },
+    "translation": {
+        "bf16": [
+            ("t5-small", "Habana/t5", 2, 1),
+        ],
+    },
+}
 
 
 class TestEncoderDecoderModels:
     PATH_TO_EXAMPLE_DIR = Path(__file__).resolve().parent.parent / "examples"
 
+    @pytest.fixture(autouse=True)
+    def _pretest(self, baseline):
+        """
+        This is automatically called before each test function is executed.
+
+        Collect custom fixtures (from conftest.py).
+        """
+        self.baseline = baseline
+
     def _install_requirements(self, task: str):
         cmd_line = f"pip install -r {self.PATH_TO_EXAMPLE_DIR / task / 'requirements.txt'}".split()
         p = subprocess.Popen(cmd_line)
@@ -80,8 +72,6 @@ def _run_test(
         self,
         command: List[str],
         task: str,
-        baseline: float,
-        baseline_acc: float,
     ):
         with TemporaryDirectory() as tmp_dir:
             command.append(f"--output_dir {tmp_dir}")
@@ -93,35 +83,36 @@ def _run_test(
             proc = subprocess.run(command)
 
             # Ensure the run finished without any issue
-            # Use try-except to avoid logging the token if used
-            try:
-                assert proc.returncode == 0
-            except AssertionError as e:
-                if "'--token', 'hf_" in e.args[0]:
-                    e.args = (f"The following command failed:\n{' '.join(command[:-2])}",)
-                raise
+            assert proc.returncode == 0
 
             with open(Path(tmp_dir) / "predict_results.json") as fp:
                 results = json.load(fp)
 
+        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
+
         # Ensure performance requirements (throughput) are met
-        assert results["predict_samples_per_second"] >= (2 - TIME_PERF_FACTOR) * baseline
+        self.baseline.assertRef(
+            compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
+            context=[device],
+            predict_samples_per_second=results["predict_samples_per_second"],
+        )
 
         if task == "summarization":
             accuracy_metric = "predict_rougeLsum"
         elif task == "translation":
             accuracy_metric = "predict_bleu"
-        assert results[accuracy_metric] >= ACCURACY_PERF_FACTOR * baseline_acc
+        self.baseline.assertRef(
+            compare=lambda actual, ref: actual >= ACCURACY_PERF_FACTOR * ref,
+            context=[device],
+            **{accuracy_metric: results[accuracy_metric]},
+        )
 
     def _test_text_summarization(
         self,
         model_name: str,
         gaudi_config: str,
-        baseline: float,
-        baseline_acc: float,
         batch_size: int,
         num_beams: int,
-        token: str,
         deepspeed: bool = False,
         world_size: int = 8,
     ):
@@ -159,17 +150,14 @@ def _test_text_summarization(
         if not deepspeed and model_name == "t5-3b":
             command.append("--bf16_full_eval")
 
-        self._run_test(command, task, baseline, baseline_acc)
+        self._run_test(command, task)
 
     def _test_text_translation(
         self,
         model_name: str,
         gaudi_config: str,
-        baseline: float,
-        baseline_acc: float,
         batch_size: int,
         num_beams: int,
-        token: str,
         deepspeed: bool = False,
         world_size: int = 8,
     ):
@@ -213,36 +201,30 @@ def _test_text_translation(
             command_args=command_args,
         )
 
-        self._run_test(command, task, baseline, baseline_acc)
+        self._run_test(command, task)
 
     @pytest.mark.parametrize(
-        "model_name, gaudi_config, baseline, baseline_acc, batch_size, num_beams",
+        "model_name, gaudi_config, batch_size, num_beams",
         MODELS_TO_TEST["summarization"]["bf16"],
     )
     def test_text_summarization_bf16(
         self,
         model_name: str,
         gaudi_config: str,
-        baseline: float,
-        baseline_acc: float,
         batch_size: int,
         num_beams: int,
-        token: str,
     ):
-        self._test_text_summarization(model_name, gaudi_config, baseline, baseline_acc, batch_size, num_beams, token)
+        self._test_text_summarization(model_name, gaudi_config, batch_size, num_beams)
 
     @pytest.mark.parametrize(
-        "model_name, gaudi_config, baseline, baseline_acc, batch_size, num_beams",
+        "model_name, gaudi_config, batch_size, num_beams",
         MODELS_TO_TEST["translation"]["bf16"],
     )
     def test_text_translation_bf16(
         self,
         model_name: str,
         gaudi_config: str,
-        baseline: float,
-        baseline_acc: float,
         batch_size: int,
         num_beams: int,
-        token: str,
     ):
-        self._test_text_translation(model_name, gaudi_config, baseline, baseline_acc, batch_size, num_beams, token)
+        self._test_text_translation(model_name, gaudi_config, batch_size, num_beams)
diff --git a/tests/test_examples.py b/tests/test_examples.py
index a1271d4da1..c0f1c2ca00 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -205,7 +205,7 @@ def is_valid_model_type(model_type: str) -> bool:
     "run_image2text_lora_finetune": _get_supported_models_for_script(
         MODELS_TO_TEST_MAPPING,
         MODEL_FOR_VISION_2_SEQ_MAPPING,
-        ["idefics2", "mllama"],
+        ["idefics2", "mllama", "llava"],
     ),
 }
 
@@ -514,10 +514,12 @@ def test(self):
                     extra_command_line_arguments.remove("--use_hpu_graphs_for_inference")
             if os.environ.get("DATA_CACHE", None) is not None and self.EXAMPLE_NAME == "run_clip":
                 extra_command_line_arguments[0] = "--data_dir {}".format(os.environ["DATA_CACHE"])
-            elif torch_compile and (
+
+            if torch_compile and (
                 model_name == "bert-large-uncased-whole-word-masking"
                 or model_name == "roberta-large"
                 or model_name == "albert-xxlarge-v1"
+                or model_name == "./clip-roberta"
             ):
                 extra_command_line_arguments.append("--torch_compile_backend hpu_backend")
                 extra_command_line_arguments.append("--torch_compile")
@@ -530,6 +532,8 @@ def test(self):
 
             if self.EXAMPLE_NAME == "run_audio_classification":
                 extra_command_line_arguments.append("--sdp_on_bf16")
+                if "wav2vec2" in model_name:
+                    extra_command_line_arguments.append("--attn_implementation sdpa")
 
             if self.EXAMPLE_NAME == "run_image_classification":
                 extra_command_line_arguments.append("--sdp_on_bf16")
@@ -550,6 +554,11 @@ def test(self):
                 if model_name == "openai/whisper-small":
                     extra_command_line_arguments.append("--sdp_on_bf16")
 
+            if self.EXAMPLE_NAME == "run_speech_recognition_ctc":
+                if "wav2vec2" in model_name:
+                    extra_command_line_arguments.append("--sdp_on_bf16")
+                    extra_command_line_arguments.append("--attn_implementation sdpa")
+
             if self.EXAMPLE_NAME == "run_clip":
                 extra_command_line_arguments.append("--sdp_on_bf16")
 
@@ -687,7 +696,7 @@ def _create_command_line(
                 "--save_strategy no",
             ]
 
-        if "compile" in task:
+        if "compile" in task or "--torch_compile" in extra_command_line_arguments:
             cmd_line += ["--use_lazy_mode False"]
         elif self.EXAMPLE_NAME not in ["dpo", "ppo", "reward_modeling"]:
             cmd_line += ["--use_lazy_mode"]
@@ -877,7 +886,7 @@ class MultiCardSeq2SeqQuestionAnsweringExampleTester(
 
 
 class MultiCardVisionLanguageExampleTester(
-    ExampleTesterBase, metaclass=ExampleTestMeta, example_name="run_clip", multi_card=True
+    ExampleTesterBase, metaclass=ExampleTestMeta, example_name="run_clip", multi_card=True, torch_compile=True
 ):
     TASK_NAME = "ydshieh/coco_dataset_script"
 
diff --git a/tests/test_fp8_examples.py b/tests/test_fp8_examples.py
index 27020a2b8f..4e2382b8b7 100644
--- a/tests/test_fp8_examples.py
+++ b/tests/test_fp8_examples.py
@@ -18,8 +18,6 @@
                 "mistralai/Mistral-7B-Instruct-v0.2",
                 "tatsu-lab/alpaca",
                 "",
-                12.373,
-                0.7538,
                 "language-modeling",
                 8,
                 8,
@@ -36,8 +34,7 @@ def _test_fp8_train(
     model_name: str,
     dataset_name: str,
     gaudi_config: str,
-    baseline: float,
-    baseline_acc: float,
+    baseline,
     task: str,
     batch_size_train: int,
     batch_size_eval: int,
@@ -112,27 +109,34 @@ def _test_fp8_train(
         with open(Path(tmp_dir) / "all_results.json") as fp:
             results = json.load(fp)
 
+        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
+
         # Ensure performance requirements (throughput) are met
-        assert results["train_samples_per_second"] >= (2 - TIME_PERF_FACTOR) * baseline
-        assert results["eval_accuracy"] >= ACCURACY_PERF_FACTOR * baseline_acc
+        baseline.assertRef(
+            compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
+            context=[device],
+            train_samples_per_second=results["train_samples_per_second"],
+        )
+        baseline.assertRef(
+            compare=lambda actual, ref: actual >= ACCURACY_PERF_FACTOR * ref,
+            context=[device],
+            eval_accuracy=results["eval_accuracy"],
+        )
 
 
 @pytest.mark.parametrize(
-    "model_name, dataset_name, gaudi_config, baseline, baseline_acc, task, bs_train, bs_eval, script",
+    "model_name, dataset_name, gaudi_config, task, bs_train, bs_eval, script",
     MODELS_TO_TEST["fp8"],
 )
 def test_fp8_train(
     model_name: str,
     dataset_name: str,
     gaudi_config: str,
-    baseline: float,
-    baseline_acc: float,
     task: str,
     bs_train: int,
     bs_eval: int,
     script: str,
-    token: str,
+    baseline,
+    token,
 ):
-    _test_fp8_train(
-        model_name, dataset_name, gaudi_config, baseline, baseline_acc, task, bs_train, bs_eval, script, token
-    )
+    _test_fp8_train(model_name, dataset_name, gaudi_config, baseline, task, bs_train, bs_eval, script, token)
diff --git a/tests/test_fsdp_examples.py b/tests/test_fsdp_examples.py
index 180a2bb3f9..90931e1e25 100644
--- a/tests/test_fsdp_examples.py
+++ b/tests/test_fsdp_examples.py
@@ -17,8 +17,6 @@
             (
                 "bert-base-uncased",
                 "Habana/bert-base-uncased",
-                2983.533,
-                85.7077,
                 "question-answering",
                 24,
                 8,
@@ -28,8 +26,6 @@
             (
                 "meta-llama/Llama-2-7b-hf",
                 "",
-                85.016,
-                0.9093,
                 "language-modeling",
                 8,
                 8,
@@ -46,8 +42,7 @@
 def _test_fsdp(
     model_name: str,
     gaudi_config: str,
-    baseline: float,
-    baseline_acc: float,
+    baseline,
     task: str,
     batch_size_train: int,
     batch_size_eval: int,
@@ -150,27 +145,38 @@ def _test_fsdp(
         with open(Path(tmp_dir) / "all_results.json") as fp:
             results = json.load(fp)
 
+        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
+
         # Ensure performance requirements (throughput) are met
-        assert results["train_samples_per_second"] >= (2 - TIME_PERF_FACTOR) * baseline
+        baseline.assertRef(
+            compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
+            context=[device],
+            train_samples_per_second=results["train_samples_per_second"],
+        )
         if model_name == "bert-base-uncased":
-            assert results["eval_f1"] >= ACCURACY_PERF_FACTOR * baseline_acc
+            baseline.assertRef(
+                compare=lambda actual, ref: actual >= ACCURACY_PERF_FACTOR * ref,
+                context=[device],
+                eval_f1=results["eval_f1"],
+            )
         else:
-            assert results["train_loss"] <= (2 - ACCURACY_PERF_FACTOR) * baseline_acc
+            baseline.assertRef(
+                compare=lambda actual, ref: actual <= (2 - ACCURACY_PERF_FACTOR) * ref,
+                context=[device],
+                train_loss=results["train_loss"],
+            )
 
 
-@pytest.mark.parametrize(
-    "model_name, gaudi_config, baseline, baseline_acc, task, bs_train, bs_eval, script, policy", MODELS_TO_TEST["bf16"]
-)
+@pytest.mark.parametrize("model_name, gaudi_config, task, bs_train, bs_eval, script, policy", MODELS_TO_TEST["bf16"])
 def test_fsdp_bf16(
     model_name: str,
     gaudi_config: str,
-    baseline: float,
-    baseline_acc: float,
     task: str,
     bs_train: int,
     bs_eval: int,
     script: str,
     policy: str,
-    token: str,
+    baseline,
+    token,
 ):
-    _test_fsdp(model_name, gaudi_config, baseline, baseline_acc, task, bs_train, bs_eval, script, policy, token)
+    _test_fsdp(model_name, gaudi_config, baseline, task, bs_train, bs_eval, script, policy, token)
diff --git a/tests/test_image_to_text_example.py b/tests/test_image_to_text_example.py
index 921f59ad68..51e99b8466 100644
--- a/tests/test_image_to_text_example.py
+++ b/tests/test_image_to_text_example.py
@@ -14,32 +14,34 @@
     # Gaudi2 CI baselines
     MODELS_TO_TEST = {
         "bf16": [
-            # ("llava-hf/llava-1.5-7b-hf", 1, 77.98733740859008),
-            # ("llava-hf/llava-1.5-13b-hf", 1, 48.54364937033955),
-            ("llava-hf/llava-v1.6-mistral-7b-hf", 1, 33.17984878151546),
-            ("llava-hf/llava-v1.6-vicuna-7b-hf", 1, 35.00608681379742),
-            ("llava-hf/llava-v1.6-vicuna-13b-hf", 1, 23.527610042925),
-            ("google/paligemma-3b-mix-224", 1, 132.8949150246155),
-            ("HuggingFaceM4/idefics2-8b", 1, 21.89944593215077),
-            ("meta-llama/Llama-3.2-11B-Vision-Instruct", 1, 18.974541922240313),
-            ("tiiuae/falcon-11B-vlm", 1, 23.69260849957278),
+            # ("llava-hf/llava-1.5-7b-hf", 1),
+            # ("llava-hf/llava-1.5-13b-hf", 1),
+            ("llava-hf/llava-v1.6-mistral-7b-hf", 1),
+            ("llava-hf/llava-v1.6-vicuna-7b-hf", 1),
+            ("llava-hf/llava-v1.6-vicuna-13b-hf", 1),
+            ("google/paligemma-3b-mix-224", 1),
+            ("HuggingFaceM4/idefics2-8b", 1),
+            ("meta-llama/Llama-3.2-11B-Vision-Instruct", 1),
+            ("tiiuae/falcon-11B-vlm", 1),
+            ("Qwen/Qwen2-VL-2B-Instruct", 1),
+            ("Qwen/Qwen2-VL-7B-Instruct", 1),
         ],
         "fp8": [
-            # ("llava-hf/llava-1.5-7b-hf", 1, 98.72578382705062),
-            # ("llava-hf/llava-1.5-13b-hf", 1, 67.20488222876344),
-            ("llava-hf/llava-v1.6-mistral-7b-hf", 1, 45.011551008367084),
-            ("llava-hf/llava-v1.6-vicuna-7b-hf", 1, 45.18544502949674),
-            ("llava-hf/llava-v1.6-vicuna-13b-hf", 1, 30.9535718774675),
+            # ("llava-hf/llava-1.5-7b-hf", 1),
+            # ("llava-hf/llava-1.5-13b-hf", 1),
+            ("llava-hf/llava-v1.6-mistral-7b-hf", 1),
+            ("llava-hf/llava-v1.6-vicuna-7b-hf", 1),
+            ("llava-hf/llava-v1.6-vicuna-13b-hf", 1),
         ],
     }
 else:
     # Gaudi1 CI baselines
     MODELS_TO_TEST = {
         "bf16": [
-            ("llava-hf/llava-1.5-7b-hf", 1, 28.04096918512148),
-            ("llava-hf/llava-1.5-13b-hf", 1, 16.704731010481538),
-            ("llava-hf/llava-v1.6-mistral-7b-hf", 1, 10.759228696741),
-            ("llava-hf/llava-v1.6-vicuna-13b-hf", 1, 6.96732060769783),
+            ("llava-hf/llava-1.5-7b-hf", 1),
+            ("llava-hf/llava-1.5-13b-hf", 1),
+            ("llava-hf/llava-v1.6-mistral-7b-hf", 1),
+            ("llava-hf/llava-v1.6-vicuna-13b-hf", 1),
         ],
         "fp8": [],
     }
@@ -47,7 +49,7 @@
 
 def _test_image_to_text(
     model_name: str,
-    baseline: float,
+    baseline,
     token: str,
     batch_size: int = 1,
     fp8: bool = False,
@@ -61,6 +63,7 @@ def _test_image_to_text(
         f"--model_name_or_path {model_name}",
         f"--batch_size {batch_size}",
         "--max_new_tokens 20",
+        "--ignore_eos",
     ]
 
     command += [
@@ -116,15 +119,21 @@ def _test_image_to_text(
         with open(Path(tmp_dir) / "results.json") as fp:
             results = json.load(fp)
 
+        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
+
         # Ensure performance requirements (throughput) are met
-        assert results["throughput"] >= (2 - TIME_PERF_FACTOR) * baseline
+        baseline.assertRef(
+            compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
+            context=[device],
+            throughput=results["throughput"],
+        )
 
 
-@pytest.mark.parametrize("model_name, batch_size, baseline", MODELS_TO_TEST["bf16"])
-def test_image_to_text_bf16(model_name: str, baseline: float, batch_size: int, token: str):
+@pytest.mark.parametrize("model_name, batch_size", MODELS_TO_TEST["bf16"])
+def test_image_to_text_bf16(model_name: str, batch_size: int, baseline, token):
     _test_image_to_text(model_name, baseline, token, batch_size)
 
 
-@pytest.mark.parametrize("model_name, batch_size, baseline", MODELS_TO_TEST["fp8"])
-def test_image_to_text_fp8(model_name: str, baseline: float, batch_size: int, token: str):
+@pytest.mark.parametrize("model_name, batch_size", MODELS_TO_TEST["fp8"])
+def test_image_to_text_fp8(model_name: str, batch_size: int, baseline, token):
     _test_image_to_text(model_name, baseline, token, batch_size, fp8=True)
diff --git a/tests/test_openclip_vqa.py b/tests/test_openclip_vqa.py
index c0c3d38521..812db05645 100644
--- a/tests/test_openclip_vqa.py
+++ b/tests/test_openclip_vqa.py
@@ -10,22 +10,12 @@
 from .test_examples import TIME_PERF_FACTOR
 
 
-if os.environ.get("GAUDI2_CI", "0") == "1":
-    # Gaudi2 CI baselines
-    MODELS_TO_TEST = {
-        "bf16": [
-            ("laion/CLIP-ViT-g-14-laion2B-s12B-b42K", 1472),
-            ("microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224", 1816),
-        ],
-    }
-else:
-    # Gaudi1 CI baselines
-    MODELS_TO_TEST = {
-        "bf16": [
-            ("laion/CLIP-ViT-g-14-laion2B-s12B-b42K", 550),
-            ("microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224", 1200),
-        ],
-    }
+MODELS_TO_TEST = {
+    "bf16": [
+        "laion/CLIP-ViT-g-14-laion2B-s12B-b42K",
+        "microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224",
+    ],
+}
 
 
 def _install_requirements():
@@ -38,7 +28,7 @@ def _install_requirements():
     assert return_code == 0
 
 
-def _test_openclip_vqa(model_name: str, baseline: float):
+def _test_openclip_vqa(model_name: str, baseline):
     _install_requirements()
     command = ["python3"]
     path_to_example_dir = Path(__file__).resolve().parent.parent / "examples"
@@ -72,10 +62,16 @@ def _test_openclip_vqa(model_name: str, baseline: float):
         with open(Path(tmp_dir) / "results.json") as fp:
             results = json.load(fp)
 
+        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
+
         # Ensure performance requirements (throughput) are met
-        assert results["throughput"] >= (2 - TIME_PERF_FACTOR) * baseline
+        baseline.assertRef(
+            compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
+            context=[device],
+            throughput=results["throughput"],
+        )
 
 
-@pytest.mark.parametrize("model_name, baseline", MODELS_TO_TEST["bf16"])
-def test_openclip_vqa_bf16(model_name: str, baseline: float):
+@pytest.mark.parametrize("model_name", MODELS_TO_TEST["bf16"])
+def test_openclip_vqa_bf16(model_name: str, baseline):
     _test_openclip_vqa(model_name, baseline)
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 200f2a78a2..ca53718f54 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import operator
 import os
 
 import numpy as np
@@ -27,20 +28,20 @@
 
 MODELS_TO_TEST = {
     "text-to-speech": [
-        ("microsoft/speecht5_tts", 16000),
-        ("facebook/hf-seamless-m4t-medium", 16000),
-        ("facebook/mms-tts-eng", 16000),
+        "microsoft/speecht5_tts",
+        "facebook/hf-seamless-m4t-medium",
+        "facebook/mms-tts-eng",
     ],
     "image-to-text": [
-        ("Salesforce/blip-image-captioning-base", "a soccer player is playing a game on the app"),
-        ("nlpconnect/vit-gpt2-image-captioning", "a soccer game with a player jumping to catch"),
+        ("Salesforce/blip-image-captioning-base", 44),
+        ("nlpconnect/vit-gpt2-image-captioning", 44),
     ],
 }
 
 
 class TestGaudiPipeline:
-    @pytest.mark.parametrize("model, expected_result", MODELS_TO_TEST["image-to-text"])
-    def test_image_to_text(self, model, expected_result):
+    @pytest.mark.parametrize("model, validate_length", MODELS_TO_TEST["image-to-text"])
+    def test_image_to_text(self, model, validate_length, baseline):
         adapt_transformers_to_gaudi()
         MODEL_DTYPE_LIST = [torch.bfloat16, torch.float32]
         generate_kwargs = {
@@ -60,10 +61,12 @@ def test_image_to_text(self, model, expected_result):
             generator.model = wrap_in_hpu_graph(generator.model)
             for i in range(3):
                 output = generator(image, generate_kwargs=generate_kwargs)
-            assert output[0]["generated_text"].startswith(expected_result)
 
-    @pytest.mark.parametrize("model, expected_sample_rate", MODELS_TO_TEST["text-to-speech"])
-    def test_text_to_speech(self, model, expected_sample_rate):
+            result = output[0]["generated_text"][:validate_length]
+            baseline.assertRef(compare=operator.eq, generated_text=result)
+
+    @pytest.mark.parametrize("model", MODELS_TO_TEST["text-to-speech"])
+    def test_text_to_speech(self, model, baseline):
         adapt_transformers_to_gaudi()
         MODEL_DTYPE_LIST = [torch.bfloat16, torch.float32]
         text = "hello, the dog is cooler"
@@ -95,4 +98,5 @@ def test_text_to_speech(self, model, expected_sample_rate):
                 for i in range(3):
                     output = generator(text, forward_params=forward_params, generate_kwargs=generate_kwargs)
             assert isinstance(output["audio"], np.ndarray)
-            assert output["sampling_rate"] == expected_sample_rate
+
+            baseline.assertRef(compare=operator.eq, sampling_rate=output["sampling_rate"])
diff --git a/tests/test_sentence_transformers.py b/tests/test_sentence_transformers.py
index 90d97f3005..f9b3033a7f 100644
--- a/tests/test_sentence_transformers.py
+++ b/tests/test_sentence_transformers.py
@@ -9,45 +9,26 @@
 from .test_examples import TIME_PERF_FACTOR
 
 
-if os.environ.get("GAUDI2_CI", "0") == "1":
-    # Gaudi2 CI baselines
-    MODELS_TO_TEST = [
-        ("sentence-transformers/all-mpnet-base-v2", 762.5595168883357),
-        ("sentence-transformers/multi-qa-mpnet-base-dot-v1", 545.3360251829846),
-        ("sentence-transformers/all-distilroberta-v1", 958.5097903298335),
-        ("sentence-transformers/all-MiniLM-L12-v2", 3614.2610109716247),
-        ("sentence-transformers/multi-qa-distilbert-cos-v1", 944.6166139694299),
-        ("sentence-transformers/all-MiniLM-L6-v2", 2615.6975354038477),
-        ("sentence-transformers/multi-qa-MiniLM-L6-cos-v1", 1208.3672807492396),
-        ("sentence-transformers/paraphrase-multilingual-mpnet-base-v2", 2392.1654748794062),
-        ("sentence-transformers/paraphrase-albert-small-v2", 3896.1911011860166),
-        ("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", 3558.0778715789693),
-        ("sentence-transformers/paraphrase-MiniLM-L3-v2", 5734.318427972881),
-        ("sentence-transformers/distiluse-base-multilingual-cased-v1", 3487.3319366004903),
-        ("sentence-transformers/distiluse-base-multilingual-cased-v2", 3807.2486282025716),
-    ]
-else:
-    # Gaudi1 CI baselines
-    MODELS_TO_TEST = [
-        ("sentence-transformers/all-mpnet-base-v2", 164.36556936723508),
-        ("sentence-transformers/multi-qa-mpnet-base-dot-v1", 116.82789535569364),
-        ("sentence-transformers/all-distilroberta-v1", 226.90237421623164),
-        ("sentence-transformers/all-MiniLM-L12-v2", 1252.6261862281467),
-        ("sentence-transformers/multi-qa-distilbert-cos-v1", 216.47035182888888),
-        ("sentence-transformers/all-MiniLM-L6-v2", 1109.160132821451),
-        ("sentence-transformers/multi-qa-MiniLM-L6-cos-v1", 471.14320842607674),
-        ("sentence-transformers/paraphrase-multilingual-mpnet-base-v2", 518.4762252952173),
-        ("sentence-transformers/paraphrase-albert-small-v2", 1139.806075824319),
-        ("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", 1253.06776127632),
-        ("sentence-transformers/paraphrase-MiniLM-L3-v2", 3029.398417051629),
-        ("sentence-transformers/distiluse-base-multilingual-cased-v1", 947.844857744754),
-        ("sentence-transformers/distiluse-base-multilingual-cased-v2", 947.7317550605878),
-    ]
+MODELS_TO_TEST = [
+    "sentence-transformers/all-mpnet-base-v2",
+    "sentence-transformers/multi-qa-mpnet-base-dot-v1",
+    "sentence-transformers/all-distilroberta-v1",
+    "sentence-transformers/all-MiniLM-L12-v2",
+    "sentence-transformers/multi-qa-distilbert-cos-v1",
+    "sentence-transformers/all-MiniLM-L6-v2",
+    "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
+    "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
+    "sentence-transformers/paraphrase-albert-small-v2",
+    "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
+    "sentence-transformers/paraphrase-MiniLM-L3-v2",
+    "sentence-transformers/distiluse-base-multilingual-cased-v1",
+    "sentence-transformers/distiluse-base-multilingual-cased-v2",
+]
 
 
 def _test_sentence_transformers(
     model_name: str,
-    baseline: float,
+    baseline,
 ):
     model = SentenceTransformer(model_name)
 
@@ -74,10 +55,17 @@ def _test_sentence_transformers(
         end_time = time.perf_counter()
         diff_time = end_time - start_time
         measured_throughput = len(sentences) / diff_time
+
+    device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
+
     # Only assert the last measured throughtput as the first iteration is used as a warmup
-    assert measured_throughput >= (2 - TIME_PERF_FACTOR) * baseline
+    baseline.assertRef(
+        compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
+        context=[device],
+        measured_throughput=measured_throughput,
+    )
 
 
-@pytest.mark.parametrize("model_name, baseline", MODELS_TO_TEST)
-def test_compute_embeddings_throughput(model_name: str, baseline: float):
+@pytest.mark.parametrize("model_name", MODELS_TO_TEST)
+def test_compute_embeddings_throughput(model_name: str, baseline):
     _test_sentence_transformers(model_name, baseline)
diff --git a/tests/test_text_generation_example.py b/tests/test_text_generation_example.py
index f782bd84d1..0c513d8bb1 100644
--- a/tests/test_text_generation_example.py
+++ b/tests/test_text_generation_example.py
@@ -1,4 +1,5 @@
 import json
+import operator
 import os
 import re
 import subprocess
@@ -18,147 +19,142 @@
 prev_quant_rank = 0
 
 if os.environ.get("GAUDI2_CI", "0") == "1":
-    # Gaudi2 CI baselines
+    # Gaudi2 CI
     MODELS_TO_TEST = {
         "bf16_1x": [
-            ("bigscience/bloomz-7b1", 1, False, 130.0472971205316, False),
-            ("gpt2-xl", 1, False, 281.8734689674413, False),
-            ("EleutherAI/gpt-j-6b", 1, False, 160.5823842101192, False),
-            ("EleutherAI/gpt-neox-20b", 1, False, 50.67672679310354, False),
-            ("meta-llama/Llama-2-7b-hf", 1, True, 141.25776956002076, True),
-            ("tiiuae/falcon-40b", 1, True, 25.202450111088346, False),
-            ("bigcode/starcoder", 256, True, 6846.575763562658, True),
-            ("Salesforce/codegen2-1B", 1, False, 446.4029486883532, False),
-            ("mosaicml/mpt-30b", 1, False, 36.06464336116623, False),
-            ("mistralai/Mistral-7B-v0.1", 1, True, 130.2172236767782, True),
-            ("mistralai/Mixtral-8x7B-v0.1", 1, False, 23.7931001677926, True),
-            ("microsoft/phi-2", 1, False, 224.72307766211117, False),
-            ("meta-llama/Meta-Llama-3-8B", 1, True, 129, False),
-            ("meta-llama/Llama-2-7b-hf", 512, True, 12808, False),
-            ("meta-llama/Llama-2-7b-hf", 512, False, 8711, False),  # in some cases like TGI, reuse_cache isn't used
-            ("stabilityai/stablelm-2-12b", 1, False, 74.8904496532218, False),
-            ("codellama/CodeLlama-34b-hf", 1, True, 32.644, False),
-            ("bigcode/starcoder2-3b", 1, False, 261.07213776344133, True),
-            ("adept/persimmon-8b-base", 4, False, 366.73968820698406, False),
-            # ("Qwen/Qwen1.5-7B", 4, False, 490.8621617893209, False),
-            ("google/gemma-7b", 1, False, 109.70751574382221, True),
-            ("google/gemma-2-9b", 1, False, 92.302359446567, True),
-            ("google/gemma-2-27b", 1, False, 36.578709544111, True),
-            ("state-spaces/mamba-130m-hf", 1536, False, 5385.511100161605, False),
-            # ("Deci/DeciLM-7B", 1, False, 115, False),
-            ("Qwen/Qwen2-7B", 256, False, 8870.945160540245, True),
-            ("Qwen/Qwen1.5-MoE-A2.7B", 1, True, 44.25834541569395, False),
-            # ("EleutherAI/gpt-neo-2.7B", 1, False, 257.2476416844122, False),
-            # ("facebook/xglm-1.7B", 1, False, 357.46365062825083, False),
-            # ("CohereForAI/c4ai-command-r-v01", 1, False, 29.50315234651154, False),
-            ("tiiuae/falcon-mamba-7b", 1, False, 47.1464839567739, False),
-            ("openbmb/MiniCPM3-4B", 1, False, 65.116, False),
-            ("baichuan-inc/Baichuan2-7B-Chat", 1, True, 108, False),
-            ("baichuan-inc/Baichuan2-13B-Chat", 1, False, 66, False),
-            ("deepseek-ai/DeepSeek-V2-Lite", 1, False, 35, False),
-            ("THUDM/chatglm3-6b", 1, True, 150, False),
+            ("bigscience/bloomz-7b1", 1, False, False),
+            ("gpt2-xl", 1, False, False),
+            ("EleutherAI/gpt-j-6b", 1, False, False),
+            ("EleutherAI/gpt-neox-20b", 1, False, False),
+            ("meta-llama/Llama-2-7b-hf", 1, True, True),
+            ("tiiuae/falcon-40b", 1, True, False),
+            ("bigcode/starcoder", 256, True, True),
+            ("Salesforce/codegen2-1B", 1, False, False),
+            ("mosaicml/mpt-30b", 1, False, False),
+            ("mistralai/Mistral-7B-v0.1", 1, True, True),
+            ("mistralai/Mixtral-8x7B-v0.1", 1, False, True),
+            ("microsoft/phi-2", 1, False, False),
+            ("meta-llama/Meta-Llama-3-8B", 1, True, False),
+            ("meta-llama/Llama-2-7b-hf", 512, True, False),
+            ("meta-llama/Llama-2-7b-hf", 512, False, False),  # in some cases like TGI, reuse_cache isn't used
+            ("stabilityai/stablelm-2-12b", 1, False, False),
+            ("codellama/CodeLlama-34b-hf", 1, True, False),
+            ("bigcode/starcoder2-3b", 1, False, True),
+            ("adept/persimmon-8b-base", 4, False, False),
+            # ("Qwen/Qwen1.5-7B", 4, False, False),
+            ("google/gemma-7b", 1, False, True),
+            ("google/gemma-2-9b", 1, False, True),
+            ("google/gemma-2-27b", 1, False, True),
+            ("state-spaces/mamba-130m-hf", 1536, False, False),
+            # ("Deci/DeciLM-7B", 1, False, False),
+            ("Qwen/Qwen2-7B", 256, False, True),
+            ("Qwen/Qwen1.5-MoE-A2.7B", 1, True, False),
+            # ("EleutherAI/gpt-neo-2.7B", 1, False, False),
+            # ("facebook/xglm-1.7B", 1, False, False),
+            # ("CohereForAI/c4ai-command-r-v01", 1, False, False),
+            ("tiiuae/falcon-mamba-7b", 1, False, False),
+            ("openbmb/MiniCPM3-4B", 1, False, False),
+            ("baichuan-inc/Baichuan2-7B-Chat", 1, True, False),
+            ("baichuan-inc/Baichuan2-13B-Chat", 1, False, False),
+            ("deepseek-ai/DeepSeek-V2-Lite", 1, False, False),
+            ("THUDM/chatglm2-6b", 1, True, False),
+            ("THUDM/chatglm3-6b", 1, True, False),
+            ("Qwen/Qwen2.5-7B", 4, False, False),
         ],
         "fp8": [
-            ("tiiuae/falcon-180B", 4, 950, True, 128, 128, 2506.68),
-            ("meta-llama/Llama-2-7b-hf", 1, 1230, False, 128, 128, 13152.7),
-            ("meta-llama/Llama-2-7b-hf", 1, 163, False, 128, 2048, 4774.7),
-            ("meta-llama/Llama-2-7b-hf", 1, 94, False, 2048, 128, 1293.3),
-            ("meta-llama/Llama-2-7b-hf", 1, 81, False, 2048, 2048, 1942.9),
-            ("meta-llama/Llama-2-70b-hf", 4, 3042, False, 128, 128, 5374.6),
-            ("meta-llama/Llama-2-70b-hf", 4, 750, False, 128, 2048, 7422.4),
-            ("meta-llama/Llama-2-70b-hf", 4, 207, False, 2048, 128, 568.5),
-            ("meta-llama/Llama-2-70b-hf", 8, 172, False, 2048, 2048, 4656.2),
-            ("mistralai/Mistral-7B-Instruct-v0.2", 1, 896, True, 128, 128, 17068.965283763682),
-            # ("mistralai/Mistral-7B-Instruct-v0.2", 1, 120, True, 128, 2048, 6979.225194247115),
-            # ("mistralai/Mistral-7B-Instruct-v0.2", 1, 120, True, 2048, 128, 1681.4401450088983),
-            ("mistralai/Mistral-7B-Instruct-v0.2", 1, 44, True, 2048, 2048, 3393.149396451692),
-            ("mistralai/Mixtral-8x7B-v0.1", 1, 1, True, 128, 128, 40.94),
-            ("mistralai/Mixtral-8x7B-v0.1", 2, 768, True, 128, 128, 3428.65),
-            # ("mistralai/Mixtral-8x7B-v0.1", 2, 96, True, 128, 2048, 2570.34),
-            # ("mistralai/Mixtral-8x7B-v0.1", 2, 96, True, 2048, 128, 379.03),
-            ("mistralai/Mixtral-8x7B-v0.1", 2, 48, True, 2048, 2048, 1147.50),
-            ("microsoft/phi-2", 1, 1, True, 128, 128, 254.08932787178165),
+            ("tiiuae/falcon-180B", 4, 950, True, 128, 128),
+            ("meta-llama/Llama-2-7b-hf", 1, 1230, False, 128, 128),
+            ("meta-llama/Llama-2-7b-hf", 1, 163, False, 128, 2048),
+            ("meta-llama/Llama-2-7b-hf", 1, 94, False, 2048, 128),
+            ("meta-llama/Llama-2-7b-hf", 1, 81, False, 2048, 2048),
+            ("meta-llama/Llama-2-70b-hf", 4, 3042, False, 128, 128),
+            ("meta-llama/Llama-2-70b-hf", 4, 750, False, 128, 2048),
+            ("meta-llama/Llama-2-70b-hf", 4, 207, False, 2048, 128),
+            ("meta-llama/Llama-2-70b-hf", 8, 172, False, 2048, 2048),
+            ("mistralai/Mistral-7B-Instruct-v0.2", 1, 896, True, 128, 128),
+            # ("mistralai/Mistral-7B-Instruct-v0.2", 1, 120, True, 128, 2048),
+            # ("mistralai/Mistral-7B-Instruct-v0.2", 1, 120, True, 2048, 128),
+            ("mistralai/Mistral-7B-Instruct-v0.2", 1, 44, True, 2048, 2048),
+            ("mistralai/Mixtral-8x7B-v0.1", 1, 1, True, 128, 128),
+            ("mistralai/Mixtral-8x7B-v0.1", 2, 768, True, 128, 128),
+            # ("mistralai/Mixtral-8x7B-v0.1", 2, 96, True, 128, 2048),
+            # ("mistralai/Mixtral-8x7B-v0.1", 2, 96, True, 2048, 128),
+            ("mistralai/Mixtral-8x7B-v0.1", 2, 48, True, 2048, 2048),
+            ("microsoft/phi-2", 1, 1, True, 128, 128),
         ],
         "load_quantized_model_with_autogptq": [
-            ("TheBloke/Llama-2-7b-Chat-GPTQ", 1, 10, False, 128, 2048, 456.7),
+            ("TheBloke/Llama-2-7b-Chat-GPTQ", 1, 10, False, 128, 2048),
+        ],
+        "load_quantized_model_with_autoawq": [
+            ("TheBloke/Llama-2-7b-Chat-AWQ", 1, 10, False, 128, 2048),
         ],
         "deepspeed": [
-            ("bigscience/bloomz", 8, 1, 36.77314954096159),
-            # ("meta-llama/Llama-2-70b-hf", 8, 1, 64.10514998902435),
-            ("meta-llama/Meta-Llama-3-70B-Instruct", 8, 1, 64),
-            ("facebook/opt-66b", 2, 1, 28.48069266504111),
-            ("google/gemma-2-9b", 8, 1, 110.12610917383735),
-            ("google/gemma-2-27b", 8, 1, 87.578709544111),
+            ("bigscience/bloomz", 8, 1),
+            # ("meta-llama/Llama-2-70b-hf", 8, 1),
+            ("meta-llama/Meta-Llama-3-70B-Instruct", 8, 1),
+            ("facebook/opt-66b", 2, 1),
+            ("google/gemma-2-9b", 8, 1),
+            ("Qwen/Qwen2.5-72B", 2, 1),
+            ("google/gemma-2-27b", 8, 1),
         ],
         "torch_compile": [
-            ("meta-llama/Llama-2-7b-hf", 102.27823420713148),
+            "meta-llama/Llama-2-7b-hf",
         ],
         "torch_compile_distributed": [
-            ("meta-llama/Llama-2-7b-hf", 39.72973199515235),
+            "meta-llama/Llama-2-7b-hf",
         ],
         "distributed_tp": [
-            ("meta-llama/Llama-2-7b-hf", 1345.2369318328463),
+            "meta-llama/Llama-2-7b-hf",
         ],
         "contrastive_search": [
-            ("gpt2-xl", 1, False, 51.61471298016438),
+            ("gpt2-xl", 1, False),
         ],
         "beam_search": [
-            ("Qwen/Qwen2-7b-Instruct", 1, True, 91.24938949709826),
+            ("Qwen/Qwen2-7b-Instruct", 1, True),
         ],
     }
-    MODEL_OUTPUTS = {
-        "bigcode/starcoder": 'def print_hello_world():\n    print("Hello World")\n\ndef print_hello_world_twice():\n    print_hello_world()\n    print_hello_world()\n\ndef print_hello_world_thrice():\n    print_hello_world()\n    print_hello_world()\n    print_hello_world()\n\ndef print_hello_world_four_times():\n    print_hello_world()\n    print_hello_world()\n    print_hello_world()\n   ',
-        "bigcode/starcoder2-3b": 'def print_hello_world():\n    print("Hello World")\n\ndef print_hello_world_with_name(name):\n    print("Hello World, " + name)\n\ndef print_hello_world_with_name_and_age(name, age):\n    print("Hello World, " + name + ", " + str(age))\n\ndef print_hello_world_with_name_and_age_and_gender(name, age, gender):\n    print("Hello',
-        "google/gemma-7b": "DeepSpeed is a machine learning framework that enables training of large-scale models on commodity hardware. It is designed to be a drop-in replacement for PyTorch, and it is compatible with the existing PyTorch ecosystem. DeepSpeed is designed to be easy to use, and it provides a number of features that make it easy to train large-scale models. DeepSpeed is designed to be scalable, and it can be used to train models on a single machine or on a cluster of machines. DeepSpeed is designed to be efficient,",
-        "google/gemma-2-9b": "DeepSpeed is a machine learning framework that enables training of large-scale deep learning models on a single GPU or across multiple GPUs. It is designed to be easy to use and highly scalable, making it a powerful tool for researchers and practitioners working with large-scale deep learning models.\n\nDeepSpeed is built on top of PyTorch, a popular deep learning framework, and provides a set of tools and libraries that make it easy to train large-scale models. It includes features such as zero-shot inference, which allows models to be",
-        "google/gemma-2-27b": "DeepSpeed is a machine learning framework that enables you to train models with trillions of parameters and beyond, using model parallelism to partition large models over multiple GPUs.\n\nThe following is a brief introduction to the DeepSpeed model parallel training.\n\n<h2>1. Introduction</h2>\n\nThe DeepSpeed model parallel training is a simple and effective way to train large models. It is a framework that enables you to train models with trillions of parameters and beyond.\n\nDeepSpeed is a distributed deep learning optimization toolkit that makes it easy and efficient",
-        "meta-llama/Llama-2-7b-hf": "DeepSpeed is a machine learning framework for deep learning. It is designed to be fast and efficient, while also being easy to use. DeepSpeed is based on the TensorFlow framework, and it uses the TensorFlow library to perform computations.\nDeepSpeed is a deep learning framework that is designed to be fast and efficient. It is based on the TensorFlow library and uses the TensorFlow library to perform computations. DeepSpeed is designed to be easy to use and to provide a high level of flex",
-        "mistralai/Mistral-7B-v0.1": "DeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system.\n\nDeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system",
-        "mistralai/Mixtral-8x7B-v0.1": "DeepSpeed is a machine learning framework that enables training of large models on a single machine with a single GPU. It is designed to be easy to use and efficient, and it can be used to train models on a variety of tasks.\n\n## Introduction\n\nDeepSpeed is a machine learning framework that enables training of large models on a single machine with a single GPU. It is designed to be easy to use and efficient, and it can be used to train models on a variety of tasks.\n\n## What is DeepSpeed",
-        "Qwen/Qwen2-7B": "DeepSpeed is a machine learning framework that provides a unified interface for training deep learning models. It is designed to be easy to use and to provide high performance. DeepSpeed is built on top of PyTorch and TensorFlow, and it supports a wide range of models, including transformers, convolutional neural networks, and recurrent neural networks.\nDeepSpeed is a machine learning framework that provides a unified interface for training deep learning models. It is designed to be easy to use and to provide high performance. DeepSpeed is built on top of Py",
-    }
 else:
-    # Gaudi1 CI baselines
+    # Gaudi1 CI
     MODELS_TO_TEST = {
         "bf16_1x": [
-            ("bigscience/bloomz-7b1", 1, False, 41.7555095197846, False),
-            ("gpt2-xl", 1, False, 142.11481820425706, False),
+            ("bigscience/bloomz-7b1", 1, False, False),
+            ("gpt2-xl", 1, False, False),
             # TODO: fix OPT 6.7B
             # ("facebook/opt-6.7b", 0.0),
-            ("EleutherAI/gpt-j-6b", 1, True, 156.2893125740893, False),
-            ("meta-llama/Llama-2-7b-hf", 1, True, 44.39616259946937, False),
-            ("tiiuae/falcon-7b", 1, True, 44.82870145718665, False),
-            ("bigcode/starcoder", 1, False, 15.945023767901013, False),
-            ("Salesforce/codegen2-1B", 1, False, 155.32071248826423, False),
-            ("mosaicml/mpt-7b", 1, False, 45.45168927038262, False),
-            ("mistralai/Mistral-7B-v0.1", 1, True, 41.21906841459711, False),
-            ("microsoft/phi-2", 1, False, 92.53083167241344, False),
-            ("google/gemma-7b", 1, False, 28.84284625836978, False),
-            ("stabilityai/stablelm-2-12b", 1, False, 26.80858949645992, False),
-            ("Qwen/Qwen1.5-7B", 1, False, 39.29068423087616, False),
-            ("adept/persimmon-8b-base", 1, False, 34.53559807384106, False),
-            ("bigcode/starcoder2-3b", 1, False, 82.09655684566117, False),
-            ("state-spaces/mamba-130m-hf", 224, False, 794.542, False),
+            ("EleutherAI/gpt-j-6b", 1, True, False),
+            ("meta-llama/Llama-2-7b-hf", 1, True, False),
+            ("tiiuae/falcon-7b", 1, True, False),
+            ("bigcode/starcoder", 1, False, False),
+            ("Salesforce/codegen2-1B", 1, False, False),
+            ("mosaicml/mpt-7b", 1, False, False),
+            ("mistralai/Mistral-7B-v0.1", 1, True, False),
+            ("microsoft/phi-2", 1, False, False),
+            ("google/gemma-7b", 1, False, False),
+            ("stabilityai/stablelm-2-12b", 1, False, False),
+            ("Qwen/Qwen1.5-7B", 1, False, False),
+            ("adept/persimmon-8b-base", 1, False, False),
+            ("bigcode/starcoder2-3b", 1, False, False),
+            ("state-spaces/mamba-130m-hf", 224, False, False),
         ],
         "fp8": [],
         "load_quantized_model_with_autogptq": [],
+        "load_quantized_model_with_autoawq": [],
         "deepspeed": [
-            ("bigscience/bloomz-7b1", 8, 1, 31.994268212011505),
+            ("bigscience/bloomz-7b1", 8, 1),
         ],
         "torch_compile": [],
         "torch_compile_distributed": [],
         "distributed_tp": [],
         "contrastive_search": [
-            ("gpt2-xl", 1, False, 34.48141280163397),
+            ("gpt2-xl", 1, False),
         ],
         "beam_search": [],
     }
-    MODEL_OUTPUTS = {}
 
 
 def _test_text_generation(
     model_name: str,
-    baseline: float,
+    baseline,
     token: str,
     batch_size: int = 1,
     reuse_cache: bool = False,
@@ -167,6 +163,7 @@ def _test_text_generation(
     torch_compile: bool = False,
     fp8: bool = False,
     load_quantized_model_with_autogptq: bool = False,
+    load_quantized_model_with_autoawq: bool = False,
     max_input_tokens: int = 0,
     max_output_tokens: int = 100,
     parallel_strategy: str = None,
@@ -300,6 +297,8 @@ def _test_text_generation(
         ]
     if load_quantized_model_with_autogptq:
         command += ["--load_quantized_model_with_autogptq"]
+    if load_quantized_model_with_autoawq:
+        command += ["--load_quantized_model_with_autoawq"]
     if parallel_strategy is not None:
         command += [
             f"--parallel_strategy={parallel_strategy}",
@@ -367,21 +366,23 @@ def _test_text_generation(
         with open(Path(tmp_dir) / "results.json") as fp:
             results = json.load(fp)
 
+        device = "gaudi2" if os.environ.get("GAUDI2_CI", "0") == "1" else "gaudi1"
+
         # Ensure performance requirements (throughput) are met
-        assert results["throughput"] >= (2 - TIME_PERF_FACTOR) * baseline
+        baseline.assertRef(
+            compare=lambda actual, ref: actual >= (2 - TIME_PERF_FACTOR) * ref,
+            context=[device],
+            throughput=results["throughput"],
+        )
 
         # Verify output for 1 HPU, BF16
         if check_output:
-            assert model_name in MODEL_OUTPUTS, (
-                f"Failed functional testing, missing expected output in MODEL_OUTPUTS for model {model_name}"
-            )
-            expected_output = MODEL_OUTPUTS[model_name]
-            assert results["output"][0][0] == expected_output
+            baseline.assertRef(compare=operator.eq, context=[device], output=results["output"][0][0])
 
 
-@pytest.mark.parametrize("model_name, batch_size, reuse_cache, baseline, check_output", MODELS_TO_TEST["bf16_1x"])
+@pytest.mark.parametrize("model_name, batch_size, reuse_cache, check_output", MODELS_TO_TEST["bf16_1x"])
 def test_text_generation_bf16_1x(
-    model_name: str, baseline: float, batch_size: int, reuse_cache: bool, token: str, check_output: bool
+    model_name: str, batch_size: int, reuse_cache: bool, check_output: bool, baseline, token
 ):
     _test_text_generation(
         model_name=model_name,
@@ -395,17 +396,17 @@ def test_text_generation_bf16_1x(
 
 @pytest.mark.skipif(condition=not bool(int(os.environ.get("GAUDI2_CI", "0"))), reason="Skipping test for G1")
 @pytest.mark.parametrize(
-    "model_name, world_size, batch_size, reuse_cache, input_len, output_len, baseline", MODELS_TO_TEST["fp8"]
+    "model_name, world_size, batch_size, reuse_cache, input_len, output_len", MODELS_TO_TEST["fp8"]
 )
 def test_text_generation_fp8(
     model_name: str,
-    baseline: float,
     world_size: int,
     batch_size: int,
     reuse_cache: bool,
     input_len: int,
     output_len: int,
-    token: str,
+    baseline,
+    token,
 ):
     deepspeed = True if world_size > 1 else False
     _test_text_generation(
@@ -424,18 +425,18 @@ def test_text_generation_fp8(
 
 @pytest.mark.skipif(condition=not bool(int(os.environ.get("GAUDI2_CI", "0"))), reason="Skipping test for G1")
 @pytest.mark.parametrize(
-    "model_name, world_size, batch_size, reuse_cache, input_len, output_len, baseline",
+    "model_name, world_size, batch_size, reuse_cache, input_len, output_len",
     MODELS_TO_TEST["load_quantized_model_with_autogptq"],
 )
 def test_text_generation_gptq(
     model_name: str,
-    baseline: float,
     world_size: int,
     batch_size: int,
     reuse_cache: bool,
     input_len: int,
     output_len: int,
-    token: str,
+    baseline,
+    token,
 ):
     deepspeed = True if world_size > 1 else False
     _test_text_generation(
@@ -453,27 +454,58 @@ def test_text_generation_gptq(
     )
 
 
-@pytest.mark.parametrize("model_name,  world_size, batch_size, baseline", MODELS_TO_TEST["deepspeed"])
-def test_text_generation_deepspeed(model_name: str, baseline: float, world_size: int, batch_size: int, token: str):
+@pytest.mark.skipif(condition=not bool(int(os.environ.get("GAUDI2_CI", "0"))), reason="Skipping test for G1")
+@pytest.mark.parametrize(
+    "model_name, world_size, batch_size, reuse_cache, input_len, output_len",
+    MODELS_TO_TEST["load_quantized_model_with_autoawq"],
+)
+def test_text_generation_awq(
+    model_name: str,
+    world_size: int,
+    batch_size: int,
+    reuse_cache: bool,
+    input_len: int,
+    output_len: int,
+    baseline,
+    token,
+):
+    deepspeed = True if world_size > 1 else False
+    _test_text_generation(
+        model_name,
+        baseline,
+        token,
+        deepspeed=deepspeed,
+        world_size=world_size,
+        fp8=False,
+        load_quantized_model_with_autoawq=True,
+        batch_size=batch_size,
+        reuse_cache=reuse_cache,
+        max_input_tokens=input_len,
+        max_output_tokens=output_len,
+    )
+
+
+@pytest.mark.parametrize("model_name, world_size, batch_size", MODELS_TO_TEST["deepspeed"])
+def test_text_generation_deepspeed(model_name: str, world_size: int, batch_size: int, baseline, token):
     _test_text_generation(model_name, baseline, token, deepspeed=True, world_size=world_size, batch_size=batch_size)
 
 
 @pytest.mark.skipif(condition=not bool(int(os.environ.get("GAUDI2_CI", "0"))), reason="Skipping test for G1")
-@pytest.mark.parametrize("model_name, baseline", MODELS_TO_TEST["torch_compile"])
-def test_text_generation_torch_compile(model_name: str, baseline: float, token: str):
+@pytest.mark.parametrize("model_name", MODELS_TO_TEST["torch_compile"])
+def test_text_generation_torch_compile(model_name: str, baseline, token):
     _test_text_generation(model_name, baseline, token, torch_compile=True)
 
 
 @pytest.mark.skipif(condition=not bool(int(os.environ.get("GAUDI2_CI", "0"))), reason="Skipping test for G1")
-@pytest.mark.parametrize("model_name, baseline", MODELS_TO_TEST["torch_compile_distributed"])
-def test_text_generation_torch_compile_distributed(model_name: str, baseline: float, token: str):
+@pytest.mark.parametrize("model_name", MODELS_TO_TEST["torch_compile_distributed"])
+def test_text_generation_torch_compile_distributed(model_name: str, baseline, token):
     world_size = 8
     _test_text_generation(model_name, baseline, token, deepspeed=True, world_size=world_size, torch_compile=True)
 
 
 @pytest.mark.skipif(condition=not bool(int(os.environ.get("GAUDI2_CI", "0"))), reason="Skipping test for G1")
-@pytest.mark.parametrize("model_name, baseline", MODELS_TO_TEST["distributed_tp"])
-def test_text_generation_distributed_tp(model_name: str, baseline: float, token: str):
+@pytest.mark.parametrize("model_name", MODELS_TO_TEST["distributed_tp"])
+def test_text_generation_distributed_tp(model_name: str, baseline, token):
     world_size = 8
     _test_text_generation(
         model_name,
@@ -487,16 +519,14 @@ def test_text_generation_distributed_tp(model_name: str, baseline: float, token:
     )
 
 
-@pytest.mark.parametrize("model_name, batch_size, reuse_cache, baseline", MODELS_TO_TEST["contrastive_search"])
-def test_text_generation_contrastive_search(
-    model_name: str, baseline: float, batch_size: int, reuse_cache: bool, token: str
-):
+@pytest.mark.parametrize("model_name, batch_size, reuse_cache", MODELS_TO_TEST["contrastive_search"])
+def test_text_generation_contrastive_search(model_name: str, batch_size: int, reuse_cache: bool, baseline, token):
     _test_text_generation(model_name, baseline, token, batch_size, reuse_cache, contrastive_search=True)
 
 
 @pytest.mark.skipif(condition=not bool(int(os.environ.get("GAUDI2_CI", "0"))), reason="Skipping test for G1")
-@pytest.mark.parametrize("model_name, batch_size, reuse_cache, baseline", MODELS_TO_TEST["beam_search"])
-def test_text_generation_beam_search(model_name: str, baseline: float, batch_size: int, reuse_cache: bool, token: str):
+@pytest.mark.parametrize("model_name, batch_size, reuse_cache", MODELS_TO_TEST["beam_search"])
+def test_text_generation_beam_search(model_name: str, batch_size: int, reuse_cache: bool, baseline, token):
     _test_text_generation(model_name, baseline, token, batch_size, reuse_cache, num_beams=3)
     _test_text_generation(model_name, baseline, token, batch_size, reuse_cache, num_beams=3, num_return_sequences=2)
 
diff --git a/tests/utils.py b/tests/utils.py
index f8de616149..6024a573df 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -47,7 +47,7 @@
     ],
     "wav2vec2": [
         ("facebook/wav2vec2-base", "Habana/wav2vec2"),
-        # ("facebook/wav2vec2-large-lv60", "Habana/wav2vec2"),
+        ("facebook/wav2vec2-large-lv60", "Habana/wav2vec2"),
     ],
     "swin": [("microsoft/swin-base-patch4-window7-224-in22k", "Habana/swin")],
     "clip": [("./clip-roberta", "Habana/clip")],
@@ -65,6 +65,7 @@
     "mllama": [("meta-llama/Llama-3.2-11B-Vision-Instruct", "Habana/gpt2")],
     "gemma": [("google/gemma-2b-it", "Habana/gpt2")],
     "chatglm": [("THUDM/chatglm3-6b", "Habana/gpt2")],
+    "llava": [("llava-hf/llava-1.5-7b-hf", "Habana/gpt2")],
 }
 
 MODELS_TO_TEST_FOR_QUESTION_ANSWERING = [

From be813545325bc81f06cf2941e1621dda406cc15b Mon Sep 17 00:00:00 2001
From: root <root@G6.sh.intel.com>
Date: Mon, 24 Feb 2025 14:44:40 +0800
Subject: [PATCH 31/32] add conflict free cogvideox support.

---
 .../GaudiNIC/.deepspeed_env                   |     5 +
 .../nli/requirements.txt                      |     2 +
 .../paraphrases/requirements.txt              |     1 +
 .../sts/requirements.txt                      |     2 +
 .../stable-diffusion-3/measure_config.json    |     5 +
 .../stable-diffusion-3/quantize_config.json   |     6 +
 .../measure/fp8_hooks_maxabs.json             | 18871 ++++++++++++++++
 .../measure/fp8_hooks_maxabs.npz              |   Bin 0 -> 263025 bytes
 .../stable-diffusion-xl/measure_config.json   |     6 +
 .../stable-diffusion-xl/quantize_config.json  |     7 +
 .../text_to_video_generation.py               |   216 +
 .../training/download_train_datasets.py       |    55 +
 .../quantization_config/pow2_quant.json       |     7 +
 .../quantization_config/weight_opt_quant.json |     7 +
 examples/text-generation/requirements_awq.txt |     2 +
 examples/video-comprehension/README.md        |    43 +
 examples/video-comprehension/requirements.txt |     2 +
 examples/video-comprehension/run_example.py   |   278 +
 optimum/habana/AutoAWQ/gemm_hpu.py            |   153 +
 optimum/habana/accelerate/utils/modeling.py   |    52 +
 .../pipelines/i2vgen_xl/pipeline_i2vgen_xl.py |   668 +
 optimum/habana/quantizers/bitsandbytes.py     |   265 +
 .../habana/transformers/integrations/awq.py   |   216 +
 .../modeling_utils_transformers.py            |    89 +
 .../models/deepseek_v3/__init__.py            |     2 +
 .../deepseek_v3/configuration_deepseek_v3.py  |   217 +
 .../deepseek_v3/modeling_deepseek_v3.py       |  1913 ++
 .../transformers/models/qwen2_vl/__init__.py  |     9 +
 .../models/qwen2_vl/modeling_qwen2_vl.py      |   755 +
 .../models/video_llava/__init__.py            |     1 +
 .../video_llava/modeling_video_llava.py       |   411 +
 .../models/xlm_roberta/__init__.py            |     1 +
 .../xlm_roberta/modeling_xlm_roberta.py       |   102 +
 tests/Habana_Validated_Models.md              |   136 +
 .../fixture/tests/test_encoder_decoder.json   |    32 +
 .../fixture/tests/test_fp8_examples.json      |     8 +
 .../fixture/tests/test_fsdp_examples.json     |    14 +
 .../tests/test_image_to_text_example.json     |    94 +
 .../fixture/tests/test_openclip_vqa.json      |    18 +
 .../fixture/tests/test_pipeline.json          |    17 +
 .../tests/test_sentence_transformers.json     |   106 +
 .../tests/test_text_generation_example.json   |   444 +
 tests/baselines/llava_1_5_7b_hf.json          |    38 +
 tests/test_bnb_inference.py                   |    66 +
 tests/test_bnb_qlora.py                       |   152 +
 tests/test_video_llava.py                     |    77 +
 46 files changed, 25571 insertions(+)
 create mode 100644 examples/multi-node-training/GaudiNIC/.deepspeed_env
 create mode 100644 examples/sentence-transformers-training/nli/requirements.txt
 create mode 100644 examples/sentence-transformers-training/paraphrases/requirements.txt
 create mode 100644 examples/sentence-transformers-training/sts/requirements.txt
 create mode 100644 examples/stable-diffusion/quantization/stable-diffusion-3/measure_config.json
 create mode 100644 examples/stable-diffusion/quantization/stable-diffusion-3/quantize_config.json
 create mode 100644 examples/stable-diffusion/quantization/stable-diffusion-xl/measure/fp8_hooks_maxabs.json
 create mode 100644 examples/stable-diffusion/quantization/stable-diffusion-xl/measure/fp8_hooks_maxabs.npz
 create mode 100644 examples/stable-diffusion/quantization/stable-diffusion-xl/measure_config.json
 create mode 100644 examples/stable-diffusion/quantization/stable-diffusion-xl/quantize_config.json
 create mode 100755 examples/stable-diffusion/text_to_video_generation.py
 create mode 100755 examples/stable-diffusion/training/download_train_datasets.py
 create mode 100644 examples/text-generation/quantization_config/pow2_quant.json
 create mode 100644 examples/text-generation/quantization_config/weight_opt_quant.json
 create mode 100644 examples/text-generation/requirements_awq.txt
 create mode 100644 examples/video-comprehension/README.md
 create mode 100644 examples/video-comprehension/requirements.txt
 create mode 100644 examples/video-comprehension/run_example.py
 create mode 100644 optimum/habana/AutoAWQ/gemm_hpu.py
 create mode 100644 optimum/habana/accelerate/utils/modeling.py
 create mode 100644 optimum/habana/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
 create mode 100644 optimum/habana/quantizers/bitsandbytes.py
 create mode 100644 optimum/habana/transformers/integrations/awq.py
 create mode 100644 optimum/habana/transformers/modeling_utils_transformers.py
 create mode 100644 optimum/habana/transformers/models/deepseek_v3/__init__.py
 create mode 100644 optimum/habana/transformers/models/deepseek_v3/configuration_deepseek_v3.py
 create mode 100644 optimum/habana/transformers/models/deepseek_v3/modeling_deepseek_v3.py
 create mode 100644 optimum/habana/transformers/models/qwen2_vl/__init__.py
 create mode 100644 optimum/habana/transformers/models/qwen2_vl/modeling_qwen2_vl.py
 create mode 100644 optimum/habana/transformers/models/video_llava/__init__.py
 create mode 100644 optimum/habana/transformers/models/video_llava/modeling_video_llava.py
 create mode 100644 optimum/habana/transformers/models/xlm_roberta/__init__.py
 create mode 100644 optimum/habana/transformers/models/xlm_roberta/modeling_xlm_roberta.py
 create mode 100644 tests/Habana_Validated_Models.md
 create mode 100644 tests/baselines/fixture/tests/test_encoder_decoder.json
 create mode 100644 tests/baselines/fixture/tests/test_fp8_examples.json
 create mode 100644 tests/baselines/fixture/tests/test_fsdp_examples.json
 create mode 100644 tests/baselines/fixture/tests/test_image_to_text_example.json
 create mode 100644 tests/baselines/fixture/tests/test_openclip_vqa.json
 create mode 100644 tests/baselines/fixture/tests/test_pipeline.json
 create mode 100644 tests/baselines/fixture/tests/test_sentence_transformers.json
 create mode 100644 tests/baselines/fixture/tests/test_text_generation_example.json
 create mode 100644 tests/baselines/llava_1_5_7b_hf.json
 create mode 100644 tests/test_bnb_inference.py
 create mode 100644 tests/test_bnb_qlora.py
 create mode 100644 tests/test_video_llava.py

diff --git a/examples/multi-node-training/GaudiNIC/.deepspeed_env b/examples/multi-node-training/GaudiNIC/.deepspeed_env
new file mode 100644
index 0000000000..0fa8686f68
--- /dev/null
+++ b/examples/multi-node-training/GaudiNIC/.deepspeed_env
@@ -0,0 +1,5 @@
+GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
+HABANA_LOGS=/var/log/habana_logs/
+HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw
+HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins
+DATA_LOADER_AEON_LIB_PATH=/usr/lib/habanalabs/libaeon.so
diff --git a/examples/sentence-transformers-training/nli/requirements.txt b/examples/sentence-transformers-training/nli/requirements.txt
new file mode 100644
index 0000000000..680dc8a2bb
--- /dev/null
+++ b/examples/sentence-transformers-training/nli/requirements.txt
@@ -0,0 +1,2 @@
+datasets
+peft
diff --git a/examples/sentence-transformers-training/paraphrases/requirements.txt b/examples/sentence-transformers-training/paraphrases/requirements.txt
new file mode 100644
index 0000000000..aee11b288a
--- /dev/null
+++ b/examples/sentence-transformers-training/paraphrases/requirements.txt
@@ -0,0 +1 @@
+datasets
diff --git a/examples/sentence-transformers-training/sts/requirements.txt b/examples/sentence-transformers-training/sts/requirements.txt
new file mode 100644
index 0000000000..680dc8a2bb
--- /dev/null
+++ b/examples/sentence-transformers-training/sts/requirements.txt
@@ -0,0 +1,2 @@
+datasets
+peft
diff --git a/examples/stable-diffusion/quantization/stable-diffusion-3/measure_config.json b/examples/stable-diffusion/quantization/stable-diffusion-3/measure_config.json
new file mode 100644
index 0000000000..ebf3baa292
--- /dev/null
+++ b/examples/stable-diffusion/quantization/stable-diffusion-3/measure_config.json
@@ -0,0 +1,5 @@
+{
+    "method": "HOOKS",
+    "mode": "MEASURE",
+    "dump_stats_path": "quantization/stable-diffusion-3/measure_all/fp8"
+}
\ No newline at end of file
diff --git a/examples/stable-diffusion/quantization/stable-diffusion-3/quantize_config.json b/examples/stable-diffusion/quantization/stable-diffusion-3/quantize_config.json
new file mode 100644
index 0000000000..1fa98ebce0
--- /dev/null
+++ b/examples/stable-diffusion/quantization/stable-diffusion-3/quantize_config.json
@@ -0,0 +1,6 @@
+{
+    "method": "HOOKS",
+    "mode": "QUANTIZE",
+    "scale_method": "maxabs_hw_opt_weight",
+    "dump_stats_path": "quantization/stable-diffusion-3/measure_all/fp8"
+}
\ No newline at end of file
diff --git a/examples/stable-diffusion/quantization/stable-diffusion-xl/measure/fp8_hooks_maxabs.json b/examples/stable-diffusion/quantization/stable-diffusion-xl/measure/fp8_hooks_maxabs.json
new file mode 100644
index 0000000000..62c76a2685
--- /dev/null
+++ b/examples/stable-diffusion/quantization/stable-diffusion-xl/measure/fp8_hooks_maxabs.json
@@ -0,0 +1,18871 @@
+{
+    "GlobalRank": null,
+    "LocalRank": null,
+    "Mode": "DynamicRange",
+    "Nodes": {
+        "conv_in": {
+            "inputs": [
+                [
+                    [
+                        7.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.4765625
+                    ]
+                ]
+            }
+        },
+        "time_embedding.linear_1": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.171875
+                    ]
+                ]
+            }
+        },
+        "time_embedding.linear_2": {
+            "inputs": [
+                [
+                    [
+                        3.671875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "add_embedding.linear_1": {
+            "inputs": [
+                [
+                    [
+                        6.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        1.390625
+                    ]
+                ]
+            }
+        },
+        "add_embedding.linear_2": {
+            "inputs": [
+                [
+                    [
+                        4.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        5.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.57421875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.57421875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        12.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.486328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.65234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.294921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        11.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.41796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.0.downsamplers.0.conv": {
+            "inputs": [
+                [
+                    [
+                        13.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.proj_in": {
+            "inputs": [
+                [
+                    [
+                        6.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.177734375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2314453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.65625
+                    ]
+                ],
+                [
+                    [
+                        9.4375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.34375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        270.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.287109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1279296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10302734375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        107.5
+                    ]
+                ],
+                [
+                    [
+                        6.84375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        976.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2255859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        23.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1865234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        9.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1826171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        9.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1826171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.0
+                    ]
+                ],
+                [
+                    [
+                        9.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.65625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        266.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.33203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.154296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2412109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        97.0
+                    ]
+                ],
+                [
+                    [
+                        5.375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        17.375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2288.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        7.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.240234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        28.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.291015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.0.proj_out": {
+            "inputs": [
+                [
+                    [
+                        22.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1083984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.proj_in": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1748046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.193359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.248046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.1875
+                    ]
+                ],
+                [
+                    [
+                        8.3125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        168.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0986328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        15.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1884765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        135.0
+                    ]
+                ],
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        15.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1936.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        27.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.193359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.35546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.5
+                    ]
+                ],
+                [
+                    [
+                        9.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.3125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        182.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09716796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        17.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1943359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        152.0
+                    ]
+                ],
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        19.625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2080.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        28.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.22265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.attentions.1.proj_out": {
+            "inputs": [
+                [
+                    [
+                        26.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        9.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.8203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.271484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.0.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        33.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.251953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        5.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        1.1953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.326171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.322265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.1.downsamplers.0.conv": {
+            "inputs": [
+                [
+                    [
+                        46.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.25390625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.proj_in": {
+            "inputs": [
+                [
+                    [
+                        8.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.390625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.390625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.390625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.4375
+                    ]
+                ],
+                [
+                    [
+                        7.15625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        144.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09521484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2333984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.119140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        104.0
+                    ]
+                ],
+                [
+                    [
+                        6.9375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        21.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1776.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        2.703125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2158203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        27.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1279296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.921875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1689453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.15625
+                    ]
+                ],
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        3.921875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        101.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10400390625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2353515625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        15.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.05322265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        95.0
+                    ]
+                ],
+                [
+                    [
+                        5.53125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        21.75
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1240.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        26.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1748046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.96875
+                    ]
+                ],
+                [
+                    [
+                        6.21875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        135.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09912109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12353515625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        18.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0439453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        126.0
+                    ]
+                ],
+                [
+                    [
+                        5.59375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.5
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        884.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.296875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2119140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        16.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.25
+                    ]
+                ],
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        127.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10888671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.169921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11767578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        18.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.049072265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        122.0
+                    ]
+                ],
+                [
+                    [
+                        5.34375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        25.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2208.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.390625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.193359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        51.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.13671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.123046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1279296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.119140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.25
+                    ]
+                ],
+                [
+                    [
+                        7.03125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.71875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        129.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0810546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1044921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        22.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.04638671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        110.0
+                    ]
+                ],
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        23.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1240.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.296875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        37.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.130859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1259765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12451171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.625
+                    ]
+                ],
+                [
+                    [
+                        5.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        108.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08349609375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09716796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09228515625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        20.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.040771484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        65.0
+                    ]
+                ],
+                [
+                    [
+                        3.859375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        26.25
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        932.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.703125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        61.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1240234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1220703125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.1875
+                    ]
+                ],
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        4.46875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        128.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.080078125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06884765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.049560546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        1.765625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0260009765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        43.25
+                    ]
+                ],
+                [
+                    [
+                        2.9375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        692.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.18359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        61.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11962890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11767578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ],
+                [
+                    [
+                        5.53125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.21875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        83.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0751953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08740234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0693359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.039794921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        42.75
+                    ]
+                ],
+                [
+                    [
+                        2.6875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        628.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        16.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.154296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11865234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11083984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.169921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ],
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.03125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        76.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.068359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.083984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.059326171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        24.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.039794921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        49.75
+                    ]
+                ],
+                [
+                    [
+                        2.890625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.25
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        908.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        29.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.130859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1240234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ],
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.0625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        88.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07275390625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.087890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        32.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        63.5
+                    ]
+                ],
+                [
+                    [
+                        2.59375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        32.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1024.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        20.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.17578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.0.proj_out": {
+            "inputs": [
+                [
+                    [
+                        31.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11962890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.proj_in": {
+            "inputs": [
+                [
+                    [
+                        9.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.490234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12255859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2294921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.6875
+                    ]
+                ],
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        203.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10498046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2060546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2021484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        23.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        131.0
+                    ]
+                ],
+                [
+                    [
+                        5.34375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        23.375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1584.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.177734375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        16.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.251953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1787109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.9375
+                    ]
+                ],
+                [
+                    [
+                        6.59375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.25
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        107.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2392578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1474609375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07568359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        117.0
+                    ]
+                ],
+                [
+                    [
+                        7.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        19.75
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1424.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.140625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1787109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        26.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.890625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.890625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.890625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1728515625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.3125
+                    ]
+                ],
+                [
+                    [
+                        5.28125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        96.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.224609375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        17.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07568359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        134.0
+                    ]
+                ],
+                [
+                    [
+                        6.21875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        19.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1200.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        33.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.640625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.640625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.640625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.34375
+                    ]
+                ],
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        4.46875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        80.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1806640625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07373046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        142.0
+                    ]
+                ],
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1440.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.796875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        26.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1640625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        4.4375
+                    ]
+                ],
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        3.859375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        79.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.212890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        14.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.064453125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        139.0
+                    ]
+                ],
+                [
+                    [
+                        4.53125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1864.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        23.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.166015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12451171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ],
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        3.859375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        72.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.21484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.13671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06396484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        168.0
+                    ]
+                ],
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        26.875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1512.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1640625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        21.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.123046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1201171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        3.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ],
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        4.21875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        91.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09033203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.169921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        28.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0576171875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        156.0
+                    ]
+                ],
+                [
+                    [
+                        4.0625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        30.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1728.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        17.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.130859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12255859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ],
+                [
+                    [
+                        5.34375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        4.46875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        80.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.123046875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        25.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        142.0
+                    ]
+                ],
+                [
+                    [
+                        4.0625
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        34.25
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2112.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        24.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.13671875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1416015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.09375
+                    ]
+                ],
+                [
+                    [
+                        5.1875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.3125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        110.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0986328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        25.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0595703125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        151.0
+                    ]
+                ],
+                [
+                    [
+                        4.3125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        30.125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        3280.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        55.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.130859375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1240234375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.40625
+                    ]
+                ],
+                [
+                    [
+                        5.28125
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.6875
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        96.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08349609375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11962890625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08984375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        26.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.055419921875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        119.0
+                    ]
+                ],
+                [
+                    [
+                        4.375
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        39.5
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2640.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.177734375
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        29.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.attentions.1.proj_out": {
+            "inputs": [
+                [
+                    [
+                        19.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07666015625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        5.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.58203125
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.65625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.29296875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.0.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        100.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.265625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.494140625
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.66796875
+                    ]
+                ]
+            }
+        },
+        "down_blocks.2.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.proj_in": {
+            "inputs": [
+                [
+                    [
+                        7.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.341796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.859375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.859375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.859375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2001953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.625
+                    ]
+                ],
+                [
+                    [
+                        8.9375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.71875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        304.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09521484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1708984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.052734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        85.0
+                    ]
+                ],
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        33.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        804.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        2.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.173828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        26.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.392578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1728515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1552734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1806640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.5
+                    ]
+                ],
+                [
+                    [
+                        7.4375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.84375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        193.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1708984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.13671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.05908203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        88.0
+                    ]
+                ],
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        844.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        24.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1650390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.228515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.25
+                    ]
+                ],
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.1875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        160.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.29296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.51953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        14.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        108.0
+                    ]
+                ],
+                [
+                    [
+                        5.9375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        868.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.703125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.193359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        25.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2392578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1552734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.46875
+                    ]
+                ],
+                [
+                    [
+                        8.0625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        186.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        13.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        17.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        127.0
+                    ]
+                ],
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        18.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        944.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.15625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1728515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        25.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.197265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.318359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.1875
+                    ]
+                ],
+                [
+                    [
+                        6.84375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        153.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        15.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12060546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1689453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        16.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.072265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        106.5
+                    ]
+                ],
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        940.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1845703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        42.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2275390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.154296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.294921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.625
+                    ]
+                ],
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        161.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        16.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.17578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1279296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        12.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06787109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        96.5
+                    ]
+                ],
+                [
+                    [
+                        4.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        22.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1144.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        36.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.197265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.353515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.875
+                    ]
+                ],
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        149.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.177734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.123046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        15.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.061767578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        98.0
+                    ]
+                ],
+                [
+                    [
+                        3.84375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        25.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1976.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1689453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        38.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.216796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.30859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.15625
+                    ]
+                ],
+                [
+                    [
+                        7.53125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.78125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        182.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        12.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.052734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        93.5
+                    ]
+                ],
+                [
+                    [
+                        3.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        30.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1656.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.236328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        35.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.337890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1416015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.34375
+                    ]
+                ],
+                [
+                    [
+                        6.34375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.78125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        138.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        17.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0947265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        23.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09130859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        138.0
+                    ]
+                ],
+                [
+                    [
+                        4.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        39.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        4768.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.29296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        65.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.33984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        9.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        9.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.302734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.34375
+                    ]
+                ],
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.34375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        141.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        21.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11181640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0810546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        31.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.080078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        91.0
+                    ]
+                ],
+                [
+                    [
+                        4.84375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        31.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2928.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2021484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        61.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.474609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.0.proj_out": {
+            "inputs": [
+                [
+                    [
+                        146.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.proj_in": {
+            "inputs": [
+                [
+                    [
+                        6.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.28515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.31640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.28125
+                    ]
+                ],
+                [
+                    [
+                        6.34375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.1875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        136.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09521484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        79.5
+                    ]
+                ],
+                [
+                    [
+                        4.21875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        560.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        25.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.259765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1845703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.78125
+                    ]
+                ],
+                [
+                    [
+                        6.03125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        113.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        15.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06884765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        65.0
+                    ]
+                ],
+                [
+                    [
+                        5.46875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        25.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        512.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        58.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.248046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.154296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.185546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.15625
+                    ]
+                ],
+                [
+                    [
+                        5.3125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.28125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        85.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.181640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        14.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        126.0
+                    ]
+                ],
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        772.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.859375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2236328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        102.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.20703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1669921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1474609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2216796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.78125
+                    ]
+                ],
+                [
+                    [
+                        5.90625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.21875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        118.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        8.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.201171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1689453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06787109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        154.0
+                    ]
+                ],
+                [
+                    [
+                        5.28125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        18.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1392.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.275390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        105.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2431640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2353515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.40625
+                    ]
+                ],
+                [
+                    [
+                        5.90625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        100.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1259765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.193359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        10.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06396484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        118.5
+                    ]
+                ],
+                [
+                    [
+                        4.09375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        19.375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        880.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2099609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        38.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1474609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2490234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.75
+                    ]
+                ],
+                [
+                    [
+                        5.96875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        115.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1669921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0732421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        117.5
+                    ]
+                ],
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        23.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1392.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.26171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        35.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.150390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.345703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.9375
+                    ]
+                ],
+                [
+                    [
+                        6.34375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        96.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0634765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        199.0
+                    ]
+                ],
+                [
+                    [
+                        4.46875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        22.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1944.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        58.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.17578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.267578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.78125
+                    ]
+                ],
+                [
+                    [
+                        5.90625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.4375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        116.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1005859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1279296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        15.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.059326171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        144.0
+                    ]
+                ],
+                [
+                    [
+                        3.59375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        25.75
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2272.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2275390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        58.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2099609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.78125
+                    ]
+                ],
+                [
+                    [
+                        5.84375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.46875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        108.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09423828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        16.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11474609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        123.5
+                    ]
+                ],
+                [
+                    [
+                        4.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        28.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2480.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.228515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        60.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.27734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.25
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.90625
+                    ]
+                ],
+                [
+                    [
+                        6.15625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        8.8125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        84.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.103515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.076171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        97.5
+                    ]
+                ],
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        34.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2016.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        50.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.37890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.1.proj_out": {
+            "inputs": [
+                [
+                    [
+                        74.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11865234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.proj_in": {
+            "inputs": [
+                [
+                    [
+                        6.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.251953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.453125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.453125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.453125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.130859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.5390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.4375
+                    ]
+                ],
+                [
+                    [
+                        7.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        4.8125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        185.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10009765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.341796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.259765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        2.734375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2392578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        116.5
+                    ]
+                ],
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        31.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        952.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        22.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.365234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.671875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.671875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.671875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.26171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.0
+                    ]
+                ],
+                [
+                    [
+                        7.46875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        140.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1689453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.55078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        12.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07373046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        80.5
+                    ]
+                ],
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        42.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        916.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.234375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.185546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        24.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.244140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.263671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.5
+                    ]
+                ],
+                [
+                    [
+                        7.53125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        4.90625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        156.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.5
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.412109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08056640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        98.0
+                    ]
+                ],
+                [
+                    [
+                        5.65625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        49.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        748.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.173828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        16.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2392578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1650390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.34765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.84375
+                    ]
+                ],
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        147.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.4921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        11.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        105.5
+                    ]
+                ],
+                [
+                    [
+                        4.375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        19.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1004.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.671875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        20.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.169921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.494140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.6875
+                    ]
+                ],
+                [
+                    [
+                        7.53125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.4375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        140.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10791015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1728515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.18359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        11.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07958984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        76.0
+                    ]
+                ],
+                [
+                    [
+                        3.75
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        17.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        600.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.859375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        19.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.28125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1650390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.337890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        5.84375
+                    ]
+                ],
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        130.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0947265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10205078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        10.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.05810546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        79.0
+                    ]
+                ],
+                [
+                    [
+                        3.796875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        25.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        832.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.703125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1826171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        15.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1826171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.18359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1376953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.30859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.53125
+                    ]
+                ],
+                [
+                    [
+                        6.46875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        131.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        12.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0810546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.11865234375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08154296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        13.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.04638671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        60.25
+                    ]
+                ],
+                [
+                    [
+                        3.21875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        34.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1248.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.734375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.18359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        22.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.220703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.19140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.32421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ],
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        161.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        13.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0869140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08642578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        10.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0419921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        61.75
+                    ]
+                ],
+                [
+                    [
+                        3.25
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        20.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1424.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.828125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1669921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        30.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.38671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1806640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.208984375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2119140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.28125
+                    ]
+                ],
+                [
+                    [
+                        7.96875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.4375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        190.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        15.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08447265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0751953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.043701171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        70.5
+                    ]
+                ],
+                [
+                    [
+                        3.4375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        38.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1584.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.703125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1669921875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        25.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.298828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1640625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.251953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2470703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.34375
+                    ]
+                ],
+                [
+                    [
+                        6.84375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        132.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        18.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1787109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        4.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07763671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        80.0
+                    ]
+                ],
+                [
+                    [
+                        4.21875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        46.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        2816.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1845703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        26.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.5625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.attentions.2.proj_out": {
+            "inputs": [
+                [
+                    [
+                        121.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.435546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.62109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        10.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.384765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.0.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        82.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09228515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        6.96875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.53515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.76171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        9.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.50390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.1.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        84.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.162109375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.2.conv1": {
+            "inputs": [
+                [
+                    [
+                        4.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.91015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.2.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.82421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.2.conv2": {
+            "inputs": [
+                [
+                    [
+                        6.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.26953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.resnets.2.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        100.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.0.upsamplers.0.conv": {
+            "inputs": [
+                [
+                    [
+                        55.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.400390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.proj_in": {
+            "inputs": [
+                [
+                    [
+                        5.21875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.26171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.20703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.197265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.34765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        10.0
+                    ]
+                ],
+                [
+                    [
+                        9.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.53125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        202.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1376953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2236328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0751953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        16.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1650390625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        91.5
+                    ]
+                ],
+                [
+                    [
+                        4.21875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        22.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        800.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        22.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.380859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        9.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.201171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        9.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        9.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.380859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.75
+                    ]
+                ],
+                [
+                    [
+                        11.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.90625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        360.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        13.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.244140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10498046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        27.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.314453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        159.0
+                    ]
+                ],
+                [
+                    [
+                        4.75
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        27.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1336.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        7.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1962890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        41.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.201171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.0.proj_out": {
+            "inputs": [
+                [
+                    [
+                        26.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.23828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.proj_in": {
+            "inputs": [
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.205078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.23046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.228515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.173828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        5.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.625
+                    ]
+                ],
+                [
+                    [
+                        8.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.21875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        236.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1982421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0751953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        21.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        108.0
+                    ]
+                ],
+                [
+                    [
+                        4.5625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        21.625
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1008.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.71875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.251953125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        42.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2099609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2138671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        7.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2099609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.326171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        9.3125
+                    ]
+                ],
+                [
+                    [
+                        9.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        8.375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        408.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.22265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12451171875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        22.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.291015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        141.0
+                    ]
+                ],
+                [
+                    [
+                        4.71875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        22.875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1416.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.248046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        90.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2421875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.1.proj_out": {
+            "inputs": [
+                [
+                    [
+                        26.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.proj_in": {
+            "inputs": [
+                [
+                    [
+                        6.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2255859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.23046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2314453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.298828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.75
+                    ]
+                ],
+                [
+                    [
+                        9.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        211.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        11.4375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.255859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12353515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        24.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.203125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        170.0
+                    ]
+                ],
+                [
+                    [
+                        4.59375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.375
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        936.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        6.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.396484375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        28.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.318359375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2236328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.263671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1748046875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.03125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.427734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.5625
+                    ]
+                ],
+                [
+                    [
+                        7.21875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        168.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        10.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.29296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        37.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.298828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        117.0
+                    ]
+                ],
+                [
+                    [
+                        3.921875
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        37.5
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1136.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        5.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.23828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        37.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.30859375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.attentions.2.proj_out": {
+            "inputs": [
+                [
+                    [
+                        22.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1591796875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        6.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        1.0078125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1552734375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.59375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.0.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        143.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2236328125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        5.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        1.015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.470703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.5546875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.1.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        48.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.24609375
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.2.conv1": {
+            "inputs": [
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.64453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.2.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.7578125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.2.conv2": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.263671875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.resnets.2.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        33.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1259765625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.1.upsamplers.0.conv": {
+            "inputs": [
+                [
+                    [
+                        21.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        8.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.87890625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.296875
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        12.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.90625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.0.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        68.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.3515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        10.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1572265625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        15.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.66015625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.1.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        28.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.2314453125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.2.conv1": {
+            "inputs": [
+                [
+                    [
+                        9.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.78515625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.2.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1845703125
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.2.conv2": {
+            "inputs": [
+                [
+                    [
+                        9.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.9140625
+                    ]
+                ]
+            }
+        },
+        "up_blocks.2.resnets.2.conv_shortcut": {
+            "inputs": [
+                [
+                    [
+                        14.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.25
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.proj_in": {
+            "inputs": [
+                [
+                    [
+                        10.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.298828125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        3.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.140625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        3.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        3.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1328125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1923828125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.625
+                    ]
+                ],
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        6.625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        266.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.28125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07470703125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12109375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1064453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        19.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.046875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        83.0
+                    ]
+                ],
+                [
+                    [
+                        4.53125
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        22.625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1168.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        2.765625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1806640625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.0.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        18.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1630859375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1533203125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1494140625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.4375
+                    ]
+                ],
+                [
+                    [
+                        9.25
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        247.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07958984375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1259765625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1064453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        19.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.042236328125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        83.5
+                    ]
+                ],
+                [
+                    [
+                        5.125
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        23.75
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        988.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.1.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16015625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.1.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        19.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.142578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.140625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1767578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.84375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.34375
+                    ]
+                ],
+                [
+                    [
+                        7.625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.25
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        170.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        7.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.083984375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1220703125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        18.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0439453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        72.0
+                    ]
+                ],
+                [
+                    [
+                        3.9375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.75
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        780.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.2.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1611328125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.2.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        83.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1376953125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1513671875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        6.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.90625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1337890625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        7.5
+                    ]
+                ],
+                [
+                    [
+                        6.625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.90625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        140.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06298828125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.10791015625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07958984375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        23.375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.03515625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        62.0
+                    ]
+                ],
+                [
+                    [
+                        3.984375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        24.25
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1088.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.3.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.765625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.154296875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.3.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        32.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1416015625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        5.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        5.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.15234375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1708984375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        8.25
+                    ]
+                ],
+                [
+                    [
+                        7.65625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        8.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        159.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        6.1875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0576171875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.09619140625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0693359375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        22.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0322265625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        60.5
+                    ]
+                ],
+                [
+                    [
+                        3.4375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        32.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        928.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.4.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        3.640625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.158203125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.4.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        23.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.65625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.138671875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.150390625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.96875
+                    ]
+                ],
+                [
+                    [
+                        7.59375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.75
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        146.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        5.09375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0556640625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08642578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.06591796875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        27.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.033935546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        47.5
+                    ]
+                ],
+                [
+                    [
+                        3.09375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        27.25
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.5.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.16796875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.5.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        26.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1552734375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1357421875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.78125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.123046875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        6.46875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1240234375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.4375
+                    ]
+                ],
+                [
+                    [
+                        6.09375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.0625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        121.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.8125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.059814453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08349609375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.055908203125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        31.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.030517578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        61.75
+                    ]
+                ],
+                [
+                    [
+                        3.484375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        31.5
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1288.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.6.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.6.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        40.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.12890625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.126953125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.9375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1416015625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        7.34375
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.8125
+                    ]
+                ],
+                [
+                    [
+                        6.65625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        7.59375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        122.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.5625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.054443359375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.07958984375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.05029296875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        28.75
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.031982421875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        57.25
+                    ]
+                ],
+                [
+                    [
+                        2.875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        30.25
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        912.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.7.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1455078125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.7.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        28.625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.134765625
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1318359375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        8.0625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.84375
+                    ]
+                ],
+                [
+                    [
+                        7.8125
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        8.125
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        142.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.05517578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0830078125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.052734375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        24.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.0296630859375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        61.0
+                    ]
+                ],
+                [
+                    [
+                        2.984375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        26.75
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1584.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.8.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.40625
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.17578125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.8.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        32.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1435546875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1298828125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.to_k": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1240234375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.to_v": {
+            "inputs": [
+                [
+                    [
+                        4.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        8.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.14453125
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        6.40625
+                    ]
+                ],
+                [
+                    [
+                        7.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        8.4375
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn1.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        117.5
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.to_q": {
+            "inputs": [
+                [
+                    [
+                        4.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.046630859375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.to_k": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.08740234375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.to_v": {
+            "inputs": [
+                [
+                    [
+                        852.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.04248046875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.to_out.0": {
+            "inputs": [
+                [
+                    [
+                        21.0
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.026123046875
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.bmm1": {
+            "inputs": [
+                [
+                    [
+                        63.0
+                    ]
+                ],
+                [
+                    [
+                        3.046875
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.bmm2": {
+            "inputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ],
+                [
+                    [
+                        28.625
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.attn2.attention_module.softmax": {
+            "inputs": [
+                [
+                    [
+                        1848.0
+                    ]
+                ]
+            ],
+            "outputs": [
+                [
+                    [
+                        1.0
+                    ]
+                ]
+            ]
+        },
+        "mid_block.attentions.0.transformer_blocks.9.ff.net.0.proj": {
+            "inputs": [
+                [
+                    [
+                        4.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1552734375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.transformer_blocks.9.ff.net.2": {
+            "inputs": [
+                [
+                    [
+                        50.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.146484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.attentions.0.proj_out": {
+            "inputs": [
+                [
+                    [
+                        33.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.1396484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.0.conv1": {
+            "inputs": [
+                [
+                    [
+                        5.5
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.41015625
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.0.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.53515625
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.0.conv2": {
+            "inputs": [
+                [
+                    [
+                        8.3125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.71484375
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.1.conv1": {
+            "inputs": [
+                [
+                    [
+                        6.53125
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.482421875
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.1.time_emb_proj": {
+            "inputs": [
+                [
+                    [
+                        7.6875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.640625
+                    ]
+                ]
+            }
+        },
+        "mid_block.resnets.1.conv2": {
+            "inputs": [
+                [
+                    [
+                        10.875
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.61328125
+                    ]
+                ]
+            }
+        },
+        "conv_out": {
+            "inputs": [
+                [
+                    [
+                        8.25
+                    ]
+                ]
+            ],
+            "params": {
+                "weight": [
+                    [
+                        0.21875
+                    ]
+                ]
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/examples/stable-diffusion/quantization/stable-diffusion-xl/measure/fp8_hooks_maxabs.npz b/examples/stable-diffusion/quantization/stable-diffusion-xl/measure/fp8_hooks_maxabs.npz
new file mode 100644
index 0000000000000000000000000000000000000000..ad009dc7682bac5ee8a0400e28b5f75db42b1f6a
GIT binary patch
literal 263025
zcmb@P1)Ln!()JS|NN{(zV1X<yyO~Xb1_Cs$A)XMOO0#I=5Zv9}-MP31cXxMp=lefh
zbBevZOSrr9-S_uCRW)_a)AbD9bM|DWx;N=N`4qEt^8YQ|se9w9tA9M%6rJ?{({!4x
z(*U#nD|8<-ZuiceW-4FQKqd9R<NIx~(I(q;?$pw0&m{*B89&IDTw|dn_u6>LWfxj<
z=vd2bz?lAHZSWAiyv~48<A?D2_~8S_4I%Ef)Upc|3)T0(uKP|tXv{97$L-#I&{!MN
zee^D)@)6WKVE3;5ZOEXpW5(yU%b>h#wUdwPI%cqEtDXCHI;c8X@6H3OlTWBlkyX2N
z>)Uy$+MvPt?&F4Ztxmc1WC+}Nwdz!d9x|aibyl6GTi?mI?#yd@RlD}<*KdPA`Ckcr
zd*;<?2OhdZb-JQDeYdT<GGyIRV+Rfxl?@m(vN}V*t*7CY4aW|uy)a|Ht@Y+@G<NWi
z>P%ZthudfOF#|@A7{u^lL#i`x-G!I>ksV*1rKrx@ZR@Fs5!2Ft#F*-AMRoRWTTjU=
zBgTx|B_Cg%V~6UT!#fQx1<f@aHFIaxc~GPOJGeUUaF;Q3rRsc=F@IKFAjw#(y5N7y
z=#*C%8hB`Liu<xsT{y2UGH^h7q;Ugm!07SSMT_cUWxVT<5yOV()y30cm(XFC%&Ohu
zuuBbJpyc_ksk(GtU1s3;UACT+lJgOxhx8vZdf<@3gGY=R)_v57F+&De{~pz4i|TUS
zhNGl=qFi2-D`eFkLD^8F+_z_S#k{)GzyZUlXXW8h#44JxYF1q>W~^SDu|iXIjl8<%
zq>E^%u2obU%OaXm5j{m&$*Rpk*{eo5vQq7xSNnJoYyVZmI-0R=R$VV<tY4ckRa13?
zyxLbq%)vV`VDR9!-iQs0>PBS^{Zb7Zi*b{zx@j<GHO7Z3)y?wi=3c`Vf7P(1W^9#J
zw~iUx)Mi}Qv$}0w-ENXKG*q`Qsymc5?3ilkFUFm+>VRMzSYzC`QXQ062YU@e{;FZ9
zW(>=!!(+yX+Kiu?sypY^k*Z;S-iN_scOBDz;Ha^KMvm{kLU$W7e$0?uo=*23)lo%t
zbXm%nRLWQpkISmnAhsIuot5hNyqbF{yZlv3i)QSaRd<USyVqvS+OxVxUfolrEIx@+
z(n;HYoQ>VNx>r#x%98d@CGDdF?3-2hiv#RmJHQW>>H&H6KriW_ze+k-GbUu!Lt@6E
zf0cAtUOoJOd3PGBM-<f~%TkU?r5r8dW3uY8L3~_|c)jN8@p<(GFXhC)N;ye0PR^>Q
z#EesGGk$KWo|acnpL8ibYX70mD5__crJR*YIa|c%WYu$n_`JW~mGkrJ1zyU9f0c5P
zW?Y<AFNqnK)@Cf(vwB%xy?oODp?Y-x@BdI&6xA!slCDZ6U9AILlU1*c16=o4N!RDq
z8@!|&|0?Mw&A2(M-V!rzt<5;OsrsM1dfWf<?le?yFRFKxrQDfHxl6=%XVrUx_}&`v
zTFuq_^6LFw$^(Cu@}Oo+%&HH?jE8G8UTCU5l2;#9DGUFf(^%Kn_yMEGjT&O<vZ_8-
zR39(Pc_Nkbq{yGjs!s>`Gd1!Znyb&|)#tpN=l?3_1<iOdtG*O7Uarj;*HnEaufD2s
z7WrE_J-QFb^C4sM5iHDH<GP--q`g*DUoY!<Bh~Y!*x$;kZwLE3HTEYe)pzsidtT4`
zf7SDWW_*}cKZ+S2*JiBJRQ)8ceyV!bm~=h*Yh^rtwTvEOe|;oT@fc36vHh#n&x-2j
zWu0H7I=|FWzsjm#$5Fqj9d+{N>bH6IJFoNmzv}!!Gk(mfKgEonYcqy7Re#B=zt&yn
z$m(xJ_4l&QKT@54a#ZWYpDvko2Gg-^GJ+`BCMU{m3a~8Ox+t60SDR81G*e+Sn;Hz7
zX_WE>f5Rs0N|f8QVBK0;Y&vKPn;w)Rn*paL%4U>LK{H`9n^{9@fA!cbN_jz(H`}a4
zxy=T;uGy6hU2`acW=?EobAdrKw^B|sSCh>{l-s<ZbT!p;E{)wK@4kY~2UTJ7gQd0w
zaFVtK<x|u`*vuBzkfd!9rM#$#6<d@jx5Yr$wz#sPZ3#utEQ!sm8yGZ8DdjYyn`~*K
z+?D~Qt#605#r55P^w`0>j2hB?;ONmkY*}~<TTVyhzhQT5w<L6Vy-0;CU^DBXR1(^t
zlvntD#a1NBZ6$DEn!B<FFn1MfW~+iRcQvJa?wrlGI#F(GfCGn&|NpG6!PbPgu(fnV
z)HPy5U6Wp<!k*a7DoQ1F%}RNN`}VY6M7i|_LtP&YVD8%3%+>*8?z&3(+(#?69#L-V
zgHqS;|KESR@neVPqX+C}8$exHU$8t+H^fQ*2{w{Xk^QilZLA?l;3i6WkrR5`rbM}A
zAbSe!XSf8keX-4WX$RX}gMAZYTgZoVOKfIafx)TW*I#cM+fF#Q1qTl3_=kH#`QdKc
zK~vcFU@3A3oK(<`@+qi4HnW{HB#9iLl#86H*#;8jHVE{Kaj>#+F%D4#%}{J+!@!^!
zu9VX(-ee<)a@!fK=L59VHPS{xQ`ji5)HNC>=^7)Sg2rMq8>b;jS5+x5Xp25(M7fOz
zU01Gb=-NdQG%eW7b_IiGH>I5B_$J$(D7QVpx^=bKp3oGw7g*{laFVXQ<x|i;*v$6T
zkfdurr9#*KM7bRRx~>D24P6H*g63dsW)r}mIYcR^nY_skCCcqEu%3mX{O{!p!{JaB
zb_7^zI}#^pJ4!x99gWTG7!66<j#bKwI=9)5Bg*Y~(6ybQY-l@C5i}=ZGdmd!np2c=
znr9pBRHEEY1M68B8vgpj@4ps?)8Q@b3>}e=&NH#`(Rr3$q{6eYnVq9l5_+yuUg3+q
z>^!2}&IjY8^8yWE?uFRQE&^lj#Y*|yS1Wc2QEr!l^(+i+t81{!;4SQO9T9a`U_;%N
zdXWmR!e(~0Qc2x4N_mAW4A&Cnb{!b%uGawO-hj>QMlj~yq?FI)6u6lvw_Ct^7KS#|
zrG?>Es0;fKSe~c1;iU8QcKH-}2R5@iH6#hVODQih?_+lp<#rD^aKO&K7~HD?wZ-5*
z`S9G2&Fld%cpg+L-l&O0xjh7GF=*_-DmZj#cmDCAwlA@VVJhqqu+;V_PO9lK`PB3{
zHnS%*Bx!q6DX-~~iakY?+tZ+5gU=`%*Wj~?pm`3P+4Eq~yr7iR3~RC%iE?`h)HS$T
z$E7vc%a9cI3RtRo6(^~BO+Mwkj?L^14N0orRLaY_v)SGv%I$5?RlTEZsCrisH1A<E
zdmjv%50rA6bDHc!qTD_LJGL5>o5|YN%sz&uuus5J*QYp1*Jtu6=yPmlUua0u^`%l?
z(5%h&6;W<qgRbivWkc7uilF%po7wkZ(EOm3)AVezABl4N39MV!Nc$O@!hQiuUBBWa
zUBAhvpx?2X{h=XA*Plv-u1@?jc5a<P*EJd5U%Dn&1kDuK%({R<Go?~avu2Y`MU>mr
zVBNY}Y#L|^>k5{-ro~CRrjt)W(_=H6K|_+R8I=lMGZE!BGw8ZzQ8xa2&8i5R*|3?-
z4hGE}N`<aDiE^6@l&<z~>sp<MzIx9MRblghrM7u-lD7HeQ`G#}%ofm)q-{Z^yr^p{
zwh&Qn3xlq05oJT$qKcqd44c{FV9+d~l+#?%WJ?m|)(w=l_HXNwwzju*OTk;%(mEpR
z+cMZ#-<H*jRJa^Av+hbIq01}f6~0xm6^L@{0mk~)paINX5u4dcV9Z@vDWA)q+p7@e
zwkp`M)q7Iv8f-Oq3tL@BMBN(LP`9RDq{6kZnKdev)HNyP6|#EwB+9J<hPq}AU~VsL
zX1&3f+eay%dt=4cCdzFcuw$$DKdMWs_qtFQwjNlXr|aXS^K=9G6xkP>*@haD1a731
z7kN-`>qnH^#$c@8n`l67_1;uIJQ+5#&A{N<T&Z}Ywjj!FOR!^GITu^o%54QrVfyy2
z6t)dcDrj5z6to>Sv+Xq`3EM#_FQ}>6b|lKJKj;_WPRhmwI6x6J1F@M60)u9-Qcg2>
zlMNxtZ75jJhh}XnHw>D>hJ&T95jaWL&hjZ}BsQ~A8j^I4R>})vD>sHHx3Qq>8mDaN
zsw#rUu$heqgC<wXX)bHBU5IjP0qfS)V!J|9*lu8{Yj>QaYY+Jpv?n&Ry)-20DwGOc
zdlTih59qq~RW@|(rwE$;v6&qJ2F-y=In4r1b`Vi+2ZQx22CcSo6QC;W5U|vCC{EIL
zn0$&l9Glq@8j`dfsgxJBZL=Lkl-tpuYdc2S&~~gMXpX~Xc03p~Cn)7KyEfU0M7f;=
z*0UJ2u^lM?jqhZ53p+(e<m2*GY<ygvrWdL3bZlm4D3yessgzfEZ!bHGD7Ul0__#br
z1DJa*Hna1<n0vlbKKF`>T|kuEg<w64LEGvY>>_vzyI4m=-6hyicd1^a!ppFkU9MD8
zcZE`3A&bG4M7doBhPtaYfVtOTGrJayxz{P>a}RH}>xpu^0jy^+XhU6E3~q$Fu$#d0
zJiQqwou{|Rr^s8enf*sYlEB-P@*-zk+ioYy?G7*&gF7{#wiw(cAD+9hncV{h&%H{;
z8+9L1Zuf&7+sgIm{@+`<2Vg4fL9o;|5hvC3kbG)-7@OH68j`d<s+8CCQ^g)5%I$H`
zufZpjjcf2pMbJEj&FpC~Xr58ZX<C}>S)$yY19c6ye=8T#8ti#U3VQ)8RlSIlRJ|mh
za$d$}_KJoiRj(@L<-FBwuMy?;I_Rq2P&QP(sR)|4u$jFL2F*K4In9Yp_AXIw?}1Xa
z=%hEO<+pzPrEKprzOWC#Qox5eNx(<)Ir7KY%s$bOB;Zq}{Kz*q+h;_%eGa;SFO&@d
zUn+v;D{N+8gF*9+Qcm+mqkT)1+jpP@EHWtpJ<`{f_C3Q3`vENL{}CtE|C4+U{4+MQ
zUo<4u|Ep4d;QK1}8&Ph*gI@n1%0~TvDuSld<f7>e2F+vyHJTNhY;q#LxdT=IB){h5
zyUa;{&1qerDQrrx)HM}O(lxbw3YrF+Syv57x~5gibv;wD>4<Wh9&}wZC>y$FR0PdT
zSiZRfgJu?`oMxRSo0W)f?!dZrjkMXJDQpg~)HNqg(lwWS3Yr_6**qGObj_<&=$elx
zxA{TWwScmrYe7ZOEQIA-J1}S#QOar7Zn8y*_|^`rTUU!M4ozW8fTgY_agwfX@+oL3
zY-USqNYb^8QlV>EqTH4PT~~KyL)Y?(pjiRSmvmszG$`dX{JFa#5ns}Q(lyDw^uL{l
zzL#DZs=`(QOKq#-ByFq7r>NDjnXREAN!yxAc~Q?+Y%QYP8bR0Aq-<#GsR)`1HnV0h
zXnHB-G<!E$Z=&4#fYLU}z4RotwY`^K8{Wd!(GmFrV_j_gfw7)mq{8*FnQfp{653ZO
zuaNJ#Hzdk!BQXBJ=%)e9-58tMCSc6nR4Jd^rP(r~+%^OM_0Okvt81{$;Vo<n9T9a~
zVnf|ldXWmZ#%8vSQc2ymN_mC+`LrETZrg*QZU+ru?vB{Z`hzidC#8Judlef%l-oe?
zU;lh+pStwt(;%n|8w{4`=@6WBo(`2yk;AZ=4cCw)aD-A`WVhb7Gf{3M!T9rOlm^uP
zd>Snuo-x?W#)82!PN{gKszkXN_^*5ENr<iOrN={4SPquLcEL#nwaBNSU9p+%rXfk#
z?n-$<vozZtM7iw=`USX`vT*?xilEsWo7p~K(Cn*}(@fK3`w``~KUmL)W^FHh05pXi
z2$s4I!b!RgmQO(wu$djAAxYPvN_j!-r4J*@?Qqa_9ieRKI#LldM`1HN8Vs6alyaK+
zo9tMk+>Qh5*41LiLsQrZV5#dwoTTd{`4n_AHnUSSB<VU;snB&AQEsP$uImhCL)V##
zpg9Yh+1X&woTHS}Jkn_A66JOtSkGe6YA=00RE1ptmf9}FN!l)wPf-_RGrL4XlD12g
z@}jKSE+fkAa?rJ1p=@ZoQV}#)Vfk7P44P|{a+>BQyOxNr<-mFtgEsbK<zD)FcniBh
zN95!3Mr?ds-lP|)@Mdggw<wi_-l~*WI8|@^4^eKnf$?#9y9O}#4lLivfid?krF`zG
z6}y{=@8rOG7K66cHQ2rI7IvSGh`Rf+q3!{_NQDn#Gn=SXQumNjULlLY!$i3~0*1Or
zHGsK~VKaLijJZ!J<#Ra&o+QfcDX^Z!pbd3tF?bs4!kz)k^YmGqbe=vZpCX^fX7+-H
zB!Mp~<wbte$6g}J?PV|)gI6@5wivuBAD-8+nY|7M&l^g`8}%kpZf}AAx|jZk#o)j9
z(r?35*gIgU?OmKy(|hu%>3wWwA81I@_MuW<)8!TWh$y#@LB9q+Q8uo@PZdG)88)-e
z!Jzp<DW_Sl$-X4Y?JH2%;3W6bla|(CUqe#ZH(;siTb!ioJNcCJJvOr+G$g6|Q7JFy
z^=A8tD7T+MSM`gsq3Tyf(ENtY>~}C|{!q$k4s5bNiE`^i4@SpUgY?h-`o*eshNdvJ
z>Xf=B$4R=TkWWEfu$fJ%AxYO%N_jz@n{8^M+@=9tS65|2*R+bDnGT!T^kC4;pp?_F
zm79?$x0%4Yb&a%{p($(@u+%jxPSQ1-d<vQ!o7o&1l61|fROp(ED7U#m*ENr_p=(}6
z(9DO;Y<@6k7EsD*`Zd{tM7b>l)~&0>7KWy<MZi+mqBu#{V)7|yacpKwXh_nvq*9@)
z8&Pgcfv#(5Wkc68ilA8*o7r+;&~#VIX%=m=<%x1z0hF%xZ{=E@hrX5T0aalQV5x0I
zoTP0f`4qJ>HnUYUBxze!DKF~7imgVJ+v=ceTSM8<wx%Lz*240&92hiBN;%C<P1ci$
zujN2#YyVa*X={5c*9>oAy>vv@H+56R`qoD;QsLTIe$%N`61uKZUg6ZuwjL3`=>%ha
z+du=D+ZW4sa$wBeNGYGYK(qBD;yXF8W2^V1)HT>9@D{eIj)=Mp8|pUGi&VHdHnT01
zO6s;$$}42`-ijzUHNl0tZ8U(n+hQ}@4ve|mE9G-Psn`xgx$OvcZ1w&}b!qkP4|QQX
zf#rER04JTN1LafXAZ%uXH6#ffqLdf8N^cuVl-n>cR`1~&P+Pr6$cJZVY-S_D;2EV<
zyiucxavKA7Y%AwtYg@Uo&=fWfEQM8ZQb8u4g2rPr%QYkk+eIlaXwhbCA<Au6&@aH<
zl#L5;cSX?bfz51BFlhEt%4vRRw1OzNy}^1uG;3SAeV{39U$E4*A5PM>zkCWh0Gru?
z8j^G!q?8xLR_<V;+$Mmo>kwr_*P)7_ISiZG;b71lp_J3i*JMW$<#rTUx2_gD8k)k6
z0ZU!S;v`+i$)}*>v6-EqAxYPXN`<bIh;lm_bX}(?8@f(a1kGvK%uWY`<_x8rW@4kA
zNtD}JU_FaLtF7GGP!)C#SZX^LCuuuRK1H36&FlgVN!l({%8NRp*)Af=?PAciU7~Dg
zyHpW0mtpx@4h)(rlyaJNo9s#=zLo>)Sq$3P4wPHDtKlu|8Xb|3%WJXmae19yq{8d5
zncbjN5_+RjUg2E5?Ixn!ZU*Dy@)iwX?yXq9lLKS!ZA$svvnqBw5#Pyy^(+Q$t81`3
z;VtYg9T9bRV?*6NdXWn6#b$P&Qc2zYN_mAW1`iPB_8=JQCTak4AHrt#Fc@<mQOf6@
z*=&yz<@Oj@&tlMqy0jQP4s~HqfaQ7mBu+X{pOR0JPh&HCMnjUoXO;3ICtKT|Bg*Z0
zFcyOsG@!N^yeJ=@m#~?=3<l3DO2r%XDp78)fgRh*#dn9^d1tS~RM;C}sqIaiRMT7X
zsp)NOX76Z7()O-Wwf_JWdyj~3@<6`^KTtNV!4DNd^AR?)kHMh%L@B4?kLXW{a{CO_
zHQ4^GTu5uM&mk%73$Rr6B~DWHm3+$i8k^ZS8j@6ftCW}XL9=~Fl-u{9tNKCNQ1zoC
zXnw+G_A?kXzbNH2gPZJEqTGH1rE1at;af$0E&4m-3;P2s1^kJV1a#uY06KDKY-W>z
zapcLB@*_XhY*P^B)&+C{Qz{z*rcwmW)Y#0X0fVNiQcg2llTAyM+jO7={JmYf$KTtv
zZF)u)HUn6eJ|j*leJ1%Fb7pL2vuH?q?`KuYk2z1X%|?{l?4XxEhq6)noQj~C3!B;8
zV9?B?l+$EAZC;|><^xsw;{WHp|Etsdzq*EPer6Q509Z;`5GP4kNIu727@OH58j>U|
zs+1r5hl(vml-uH<OISkLkg%j8Xu4rDTM7)CrIm7;g?idDM7b>s{x9!-gDuC%!n%WH
z>C5A!(pQkrF?(P$YtWEX`ie^VF~6(WN<_J>40`FSC>y1(stB6Zu$iq62F)5uInCis
zwkA<-Yk~jv?w7Z<7(bj_g$&}{Z)8ScO<*aZCr*-3k<YQ4v6=PKkR+kEQhw|%y{r#W
zZfk=sVI5^d!n%r}Sr41p`e4v(pp?_>*JOQ(a@!D`^t)f{fVYhpSy(@?EPZ2~RQe|J
zIp(I=%rXs0rEjK`AM=iiZBCTi7ND2DrLs}_R*Im}?Lqja3=EoWm2#R>`KFABZ_2<)
zzx({FSnYls<*f~D2WAwuBUno4kCP<qB%fmsz-Bg3Lz09+O8K!l_XiW@HUxADLzN8)
z!xTX?9Glq)FlcsG%4sfcvXMl&jROCdcfY|#GqSKTU|ITDoK*Ta`5d!~&CE0;l|Ei6
zKjv2z%Za#M4Ctk|C>y2kstB6hu$k=+2F)HyIn9MlwkJ_;dx0u_;s5i=(?&<W6^t%y
zZ?G(XADmSFzVbQle%Q?R*N{~H0ZRFC*XU&j66JOf=;a@*Y?MDi5j2NjGdmOvn!}WG
zn*E#XaH8Cf09F1Xe=EP$pGvJd`0YrB7j_g_)_*iks{a`I9QaslX2)qrs{eSU{J?yB
zeF9N#CxTx8Ny<k3Co6*H6l`Xvf<bedQclB9-cKjW?F>-$|NW;hf2;rBehPCYG=-f7
zmb%WyNxIIFPeJEm`C+1lBwgn#<ppiS4-<*_VIt_dE>bphU91S2OR$+;3I@$(N;%E!
zO?Ej^ZdZVH>l$fSLQ~jPV5#eBoTTd-`4n_5HnZzAB<Z?dsnB%;QEoSauInacL)XoU
zpt%K`*{xvE{6{ILnWM>WBg*Y|ux?!~b_X<t-3gYu?!rmB?v_tM_h2)-S3{Dn`;-b@
z_Y>vz0O-0NR5o-?R0PdKShgl$&^)4))9|;`qeN^?K<WDXzn`}2JoJA*eH<#bB4DZQ
zNt~qZDftxjG?uN1h9qszD&<A-@2AfZu@wPb+Y8Euwigva^AeVw2pBZ4DCIN@HQB2~
z>_k9m`}@D2{_on_{`b?@;bmW<BeJP{6C0b#xAY<vzKvyHqEr(4u2Np%6YNWf*q4B@
zsr*0#nEN3%vyZ@-`>|3!_x*}}LX_L5U|U<}`qVYpXYdyGxsHgsFR-EROT9>iUt#&V
zqEboSH%fVhY?;3$;^&HBsQX?6nEL}Zvme2j`;$^Wms8+pqTGG~+uAbMp)PHie}%fR
z-@x)b{T(Nrr+>(&$Um`}b>b#wQDkR=T9GsLw#kTcn;eWS^As9T+cI~N56_g?%%%c^
zXKJP5jhcoix2|AY|9<+vimm<o>9o)kHXT?Bn;s_>G=qE!nh~4XOd68@%gwBm7xZGq
zW+BRLR?siN*_4e7aCSw|%z@2pPB3WZQp#!WYqYtEa+?RN=R>ph@2B%ZQ`mf9scU|m
zq-z2B6to~VvxPJy=~`GRFNl9XU4$sNMM2lKn6ja3aYfK9fz515Flf3d<uqfPY$>AL
zmImwA)ndy)Q`oX#scSi$q^rAp3R)hU*$NtxboEdwbTts=wj$`dR#G-}t*i)|Rj}NE
z1Pq$hlyaKy8f|qVZa@Oovlz7c_tQ0@;+`X5sjU$wX={>CQ9ZHTb3{Xuwq~WgD1Lj~
zi->!UfUd2NvY~BlMbNB+&1_vTXx3B8X)bBB^@(!Z0IX**Xmu_A*Bt`-!duveIwBvJ
z8)4()vY%e0!i}-~wo$1hbW^3g!l!y!M#OI$!T7k`TmzW91vaxS!I-<1Qa<;Mis^ob
zxord1vlz6kuEDm2x3KMWMAU7M4Rt%{MJn78%WoT%O6qn}$}40s7(m2t8^KUFNCTKV
z7@OG;Fy;<b%I99%Y{Q6h8xGd97_^}-Ee0c?E^KG8JWogBr1Nx?e2N^6&1{T@B!Od<
zs{JSEW8;W&tAeo@m<H4qgYoj=$+4O30tQcuQt?LZN|f7fU|auw+De3WHf`<<{`;pe
zyTer29$=|$Pn=ZKUh=7_z-G3$h9qtKDCIRRSFwGGa@!B|YjA&M;~G3b5i|#4Gdl<j
znuC>cnjM;K0#R;<fVu|%{_m&lkk(*_LQ>daV5#bGoTTaq`IK`cHnXENB&j-DDKF>c
zW;=!`w_`z9b)2%H>Uc%aoPf>jL@;PhQp#yoXtI-uaytd=*lLiz>Mef?b1F22od%Y=
zPRB{Q&X7+*XJRuuOGA>bvz79K4yo8VM7f;{x~}t-4PECeg60BjW*34%bCFU`)3wPi
zCd%y+ux?!=?NVq8y9_LKU5=A<T_K->uEb_`m4+l;S1T2|t|7|pTF`Y}r)=oDUJ*1m
zV7cE27&JF2<uqNI>}DeFHv-nJtHo}GhI@;ErLNm>lCIn3Q_vk)e#fXGN!MLUg|54a
z_#GqYy6#mrbls;2n)|WbS_BN52bFRf?(Z{^h+B(*($)U0T&wfYw{j0d#l1zqQrn|A
zN!w%cDe7@7_ZHERr0q$iyr@07w+IpU76Dz`Gs=dxXB9#79F}{FfI;(uQcklR_ZA`I
z-Xfs1wSOy@w6(pJdl_DCH=-l5zP*Z#_3bsiNQJLsx!s6TN$8tOd4(5N>@6a0Hv-1`
z_KpTH_g!pe?}0J*eWiTv85R40D7O#6j;-F4QrBP~!CTnJIwI;m!G^j|^&%C1hUG_$
zN+oq)DCHHhdVfj8j~KyF_q7Hv_Zw_x--0psJEeRsr@;3_x%~ikZ1w&}b!qkf5$eKz
z0?YICXPk7N{vw|uf5m3@n}#HTzboZMKGMtnAj<7eFjntQ-2W}{be0d#WZ29m2ZLt{
zrQ(h1LX_K-V8^y{F1EInn+lr3rUpx4)8M3ny2_`ZX|b72ry)t$^h$X_otkY1qTFT#
z{Q{gx*|-2_Rs_u~*vw`HgJw3RoaXsPo1G}PIly{8G;3SAIiV?RF0j-!H%`(uk9-Q6
z7n|998j^I)uap<WR&D{J+!h2~*FwsMu7wprvj{e`MZut1Oev?SHre7txh(<Kt*gbB
zgr=}=V5w^<oTO`M`4qGaHnU|lB<WgCsnFG(D7WQ7*R_JOp{s`?Xd1AYtq2CqN=i8m
zi`>dYxvc`$vlz76%B>1jVXJ|qw$*Wxwl(Ba)SB4L*3yuqtx+j2YWHSqBFe2N=-MjE
zhPGx!(DcG`%MmbW`Y7cz+cw(TMBH)&tY<N3_2=e)ZROU5m-~(Ah<seGkByJZ4fG-v
z_QmqsMx~O_jg;~V@9bs$i1=+I7$29LXaI9J#q#YM7;`sM%I9)Voz02(b`7j&F=$&|
zgKY_KVO!~lsM9SjL)|udkqWoP^4mtGlDh4c@(NiDb|B)njbNzjuK~>637gpfFy;<a
z%I99yY=ek$8w}R77_^}-Ee1oNE^H`Ro~Ofb(s?>uK1GheX1242B!MH9@*+>~W21<2
z8x6){Fh&DvTe-3F;Tea`tO^E?DHU(jc%s~Luwz@f_`OK&r!c#~R9FjGYTFej)wG*@
zYT6x}*&Z5_wC$;s*VL<GdlBVUfPM|`t!!L_`zV5DUu<UkfkCssQclCatsg*?+kv32
z!S-+ELRy0z1W92BgQcnoI7!tZ@+s#~Y-WdPNK$pUQeMu}&2|J)ZbyQy>L_JH)zONe
zIR=~Av0%^~r<Bvspng12ZYO{pTMf$dGTrR$L}&^-2`qJ;jFWVoBA<dz#b$Pzh9q65
zE9C|8Rr?u4xt$5RuCtU4U1uwT<{WHh=Ym0Vo>ES;NRypUl-mVh-MU8Fh0qjs5m@TF
z7$@nvL_P&wip}gY4N1B#S1NQ}L6qB-pzFFy+0b>hB51C`a=#HUXs%PrX{K$m>xsDE
z2w1nS7P}D|?kxhAx^Bivx^9tALAPT09ixUMUAHL}x^5@pcZ{Ixx>MQEb(bP&?#6O!
z5in@(Rmy2*X|nr>xU~o<UG3k>wK@-dEB63Y+*<@JwN1oH+8&ZmQ4eFew}^%$ZI3GD
zMRDtz$B4MM2<Y0LP&TwZsR){<u-sb&44P+@avJ(bpC#hnBA~Rje=C=?wY`;l9$s!Y
zq9d}ty@-wV?IpcPg)d{d-H1|2=&MS3g?Cl#H6m^|0>=9Gh6XVAO>Aawfid@OrF<?o
zI)8^Kw|Bvgt=^MT*I@6#TiE+LBI-WChPn^+A{Bmw<wuN4C3T-D<rT7ee@euU7{O5Y
zxdt%z3v6azf-(0irF`yn75kbfw{O6Xt=|8rF0J0*LS5K*V0oT?kCV>RALLWykJ!w9
z(vT$ZXQjMIT1tN*%I#M$R`1_5ptgGdE+3vhu$lb{22ZD{gNHY&Gf{4nfgRh*x!BrP
zZgOY}n*uC_b-_smO(~y(ro!^X0u4#Rrcug;&CqOJiFjfG=ojF0%Ekpay&`C4z-Bfh
z7&J2}<us2r+RQ|`%>vf*p;_C?%?eFnvw@|q*>RGtIpkB&oLHV%pdm@u+)8;tY~|)5
z;)w;I>zYs5&^5mzXcoX`wjdZZ3n}F^OE%fUM7b>j)~&0>7KNs;#lTY6;y6jy67nf%
zNi0t*(2%5SDWyW!(nLJ10CZi;DjT|%Qv^+SY-Y=YL9>EVPV;-C^&rZv0jy^+XtkAF
z5vsyg0!wWx<0NgX$fu}Pv6-!=AxYcnN_kNyHQO3QxvdGhwzZTEZH<bcX~JgK6AYS)
zQckl{qcs!d)(fm>F(|Jze_+dhZRL8yTUZ|*k&nx@vGH-aj$Wj~b+MVPr&JQUzEWP{
zZ@p{-qTKp|@o~AK1~7LcY-atyn7gr3KKJ^HZ9<gWreHmbLEGvYEQ7bO&2&W6ZH^6f
zTj)h9+!D*P36x6ewpPk3<TGv?BA!hEhPv%EfVtaaGur`-xjQQ5b1!JN{zSR$1lF?{
zw4p971_PijY#>;kr-N|Pc{*4=MGnDcHdI5Bz+p;xkvsIU;Y7KO0An%OSp#Z|!ASY=
zjKXF%8VsH>O2r#BmMFJzV8^y{4S&1YTNS3l3@o*c$4NEi@~LSTY-TMQlC<rrl-IOb
z#daggZFkVG!9A3XYj96R(CmfHtN?>%Z>5}On<m?bD7Sq<U4!l4%7wHB+Yge$_6JK<
z2jC=C2g;|MgRq$$tRYF&1f{&3{hI9%qTCJzUDaXAhN{CAL30E)vm?QvIZ7#~Vaszg
zQEtb89a{}n)UyE6R_<763Of!gbsdkBbe$ldf=<L{c9Mo9T_-E$1wB@=Q;2dq6?9#v
zDI2;@R|L%&*v!rZgXS!yoMzT0JDVuCbHKWFjkI&2DeOG3)O9{i(shA+3c3)>!wEDb
z>AF~{&~*tB4<`U!*Ja9vuFDlca|JfDE5V?-N-3w|hlp1b<#r8Nx2_hu7MjAY14~`k
z<0M@-$fuwiv6<bZAxYQGN`<alh;q9XbY1^ZHgw&l2%6imncV>f&7Dd)4gXMb7g26^
zgVNRhtz4_~(6@5;KvmeiV5#jsoTTl3`4sg4HnRscBx#$dloxe(#U3Kc?P1WhJ)&%A
zdsGoLk72n*2^cg_DCIQsHrbOz+@b`Ow)Su3lD4+Da!<p{{Yi90*0*P|-ICDf^dc2L
zkLCU(N+qE$D&-YERI!(cxIYOvFwK2M1DN|NHnZ2jnESd?K9?UJy+M@Qn_$OQ?@6g^
zu(#kX>}?$pb?;z9-Me~`3g5%>Yyzc{x(}4{3R%5BB;wfwV5s|81DN{>HnUH`nERPh
zKKG@HeNL3y7huO$?|)R6R_`yNF6=9?JWs#IN&gAHkx!A|Vl(?rLz2MnmGUD0>}5X?
z<@O^OtM^YDP+Pr!mJiP_*vx(fgXcG;;*I*9D7Qbrj&0>!Y;7y|Cp3le%wZSS87CDq
znS2VG9Glq`8j^%{QOXONs@bL_%55spFTkmljSFxZMbLD`W;QJtG}9^NG;cQA^hCMM
z0M_%NS=-9Z2u)!#f#rX&nQ@Y?S>#jDtk}$E(~zWVcBQ-^wsLb2<u)hiy5>?gbj_^@
znt8C9%?k$2d`dY@_a>X4D7OW`x^=bKg3uJU5LoJ37$@mkL_P&Aip^{>4N1BdS1NQZ
zL6qB)pzG?UZ0K4_5j0C<Gg}4>nq`%8ny(veIilRUgY_&1t+sN@Lsi%cV5zMKPSVyO
zpQ2X8@_YgfN!nIc%8NRl=Mxa|d;-w5t)^^fTU`+}YhW{56AYTQlyaK&8?BKjw<fTj
z#h{JtK-r+)6W+oqIwBvJ&Di+3?4=i}us1fdK1wB_Yb)gye%H&^A<AuCFg`BV(*WkK
zkL9M7V9f2Sl+Wd6P#Y3)(@L<O#h`6<4b~6d!Zy|sQMU;;)NQI4sW8LxYyzc{y3LjH
z3Rw)cAmZ5sV5r+l1DLA^d((;s#@ua{^0`ko+jc~>;(_%n25qQIi@^?1)3XPb=V^bO
zbe`@cpCSif>DkkeByf;YUS!v`Z7>l%dtfXELp7kb7z~pS&u}bFdtmVFtW>;FBZ+9*
z13R{rTk&sOxzRAu)CZQ@#^R)!#>uCqDwd``4N2O@E9EuOeU%f@)Cc-C*rIG)gS#q%
zW;ZNNePGb+p_J2b_o_XKXzBxX4Yq$P7t$K6fP`K@uvE1VPExh6e9GAmORt}XBvl6}
z<>mCF*N=!^KhRYjtZb;7pa_~nu=M(YL35Z=PBR_7enj;8fgM{7R%*SKI}#dt0Krn%
z(Kt!hG4d(sSS&q&8j^G!uap;bPQ^|jq6ZLkT_-6Ux=vOE%_&%V0KuR+O)01Ot<g>=
zq6ZMHTh~ZC6B>E|!BW@RI7!zz@+s(CEIoi4l60M~ROq^Zh#o-DbzP)v=(<=DG?!rM
z0R)5QGNqh`A0l2(L=PZXx2_hu5*m5{!BW@NI7!zv@+s(AEIoi4l5}0KROq^ah#o-D
zb={<F=(<@EG`C>s0R)5QKT0_b?Gd*T(E|udSNpeet<FQ=%H07KeSl!8?Jk_8?QZ!L
zbq|(4Kn+RS?o-N(y18Qa6VV3<y0!<E4Q&$@LGuunK0q*N9#P6^xNGgBMDziI($@a1
zT+-I|R_<|lX$sU4S>K+-#`^Y@UZldOu`~rLm4rU4lvl`B?l~fw0>N0{UeEyMzKG52
zB{1f`td!4vyJD{p<@PGrvDJH0>Kg1dcnf=7M?~El*iiSTUZldeusoYUsif{5rMyB`
z?{|rKHUSvw-q!%;et^yFLonukq?FI)6!@4Zw@<*1t=|8rF0J05LS5KrV0oT?j+4&Q
zFXU6?m)Oj{(vT$ZYo)x%&b{p$qTId(WA*+{18S@H_wwQS0ZU^b7(72I6>rqfL^KA1
z9ox#e*xFX^S7>Mg1WRGR<D`QAkWWE>Vrc^G+Nz+=1hs<xsMusgGy#Hs0ZyT8T!38^
zK{F+m9zZa3O|6vcx~S2nA)*Hmtmi|sww0R}8hQZ1QrGl2N!JYWDQHG4J%AdL{)5e|
zlo!NSZWbbX072I^o3f#6c16(4fu#o!44S!=avFZ3F*gxCfMDIaT5Miu=m7*vUGw84
zT?@#kparq?0BT6mwXjm5YY`%P072Kan6ja3aYfK9fu#o!44Q6AIn4`=wiFRPfM7j~
zL94CYGEmV62$tHG!%5n@%crR2vGf6INYd6rDKCoWj5H9@2MD^hm6Q!_D=UI#6)b&#
zV9>0ll+!fP2S`L8AXv|0(8hM4+{&#9FHM0uA|IEH*!Z|?(u-8s6H8N|Qb}mDQeNR}
zy{s1zO@Uy1T=vlb=B|z9rj=mKT~{fe%dhR%BjTo&U_FaL+v*x@19*7~xsHgs4Y8qa
zBfUt4{jfZRT&bjP6Q#UD7K2TRcnUcf>Ne8==5CJVrj=mK-BKx^drPxzMZ`@j!Fm>h
zHq@oXU>m3l+ZHU()9rB5dAhxPirfL4*^U~L1ol_Ti{uBEI}y>k2F7AAPy=d<!65nY
z493zJ2nNqkrQ(emMnq#E*s-nLN`KqRjev<hK(N#{5+~I(N<KA>#?l9<AxYa<rMxD-
zl^aJyA0X)0peY;I;CMyQ<XGAO!JuhT%4zuCU{@mA06|@Y?cd6Uv<BN95;_3EQq`U~
zN!4ERDW|~F0jMEK)jmpjIdk-~eTnD*1YOns%7&@~6hU(!mJUELXbx7&X}CSe1R^>B
z!H%s4E4SXt9SRLSfMBWXaGa#;2>BFrB$ggP4N1C=R>}+F!{8VqdH_M!b)2%H>v%=b
zoPebV5Dc1=lyaIW=>a672N0}V*GM}R8hQZ1QrGD?N!J<jDd<crJ%AdLbe*kK=sJgp
z9zf7_ou_Q*I$seq7hvfD1cT-xrJQCmdH{*&0R-#T)nb=ILk}QW>be{!>AFHb1zm}y
z2T((juB(*_UDpuN0|>gV>y!;$*DHeN1}r^*V9?y8l+$#k2at#!Kv25czm;os9{N`9
zR;cI$1WRqV;UsOh%crP2u=D|HNYZwfQeM<$JkNlLK0wg5-K%V9yH61`_hab;1cT;5
zrJUx^Mw>`PA0Q}g?cd5JZEbJm9)_2uKpm0w?NMy3Z;$CkDtsJEQ=n2w=#xr$g*VU?
zNJLX080*_J8o=CVv6(#w#@y$X^0~KH>;<CSUIaU~dQVDSgS`ZAVK3{5sCxw)>R#20
zRQMW}XBjA!)V-mUSIFx9CK1mv07Ko|8o=Clu$jFJ#@zRm^11xR_I;w<J^(wmdjF%k
zw0eICbzvWY<$3xsPC8FNkx!AIVl(?pLz2MHmGUBA>SbRL<@O~QtM^wLP+PsfmJiQ2
zSQ-Pt;Q3Cec%!~2qA?Kc*jCQP*0yp#LPHZESPJ_YCl&OId<yy%OB0}mBw@cR<pq6K
zu|J4t0tEd6>@+RexBxpVf@U%-J%C`)Orez1T;6D1i0A<X>-o^EZRMtdh8{q$)HMxG
z($!Ty1x<^k2T((juIZKXg4oK<KtvB9=(=W7HgwIb2%1^2^Z<fEGn-OQ!|j!4C!z-s
ztXo%$%?S-XfMBU>Zk(iR9{ChBFP0ua4N1D@S1NQZKtvB9=(-kCHgqkl2%1H(^Z<fE
zvzSs&b4Q~sPDBqNSkGe6YAd%SRP+IYrM9JTlD4JgQ`9n8`T#W~X<JSyFY3Hz>rO-;
zAn4jwP&TynPy|f_mOel*XjW3nX_joXm5JyB1nXH0+Sm@1Te(%?r72KH<l}O6Y<yg<
zp%<xeO)O1;N+qF<N_mC*_p&A;ngYT2xU6UZbDOc;L=%j;y_NF0yH>0Z5jW8U>sbui
zR@Y$bz+2e5IwI=U!-l%`^&%B+fX%G0Qc2y0N_mAW1{)FO)(;GI8*2b_H^FAMDHwAz
zrF`x)&9)g)ZkvPkECy|;ON+r4P#3l(Se~a_;iU6)Yxxwp4K}lFH6#h#PAM;PL?7Fp
zD7PKJSPXX5fZAfvUp_oLVQCBmgJ+;p@kR|IqA?Kc*j8@kzis7)z(gM)SZW)FlWH0+
zpPEKs=>yb|q-~^9UeoA`jUu8C5cF$sjIwbJj#UKBI4o^|V9=OSPD97bcp};WL0yCG
z-^zuw2HOP^Ism~^)vh>6)o$`BXLl?efEtoi?WvTPvr8}Ai--<D&{gfNY^d5t5j6W^
z=>P<SW`CuehKFb#Ktu;1*s;}MmDXFigP@@Y5G-{~z)89ekxxN~V(9_YkfiHyrM#dE
zDs}`BJ%FI=I!f8lb+jUAj=|Cc2nNk@N;%CRjdnZ{J%C`{x<=ZG(9i=2mby;HNxDvv
zPeG?*=>gP`r0aC0Lf08Y^Z<gc>nvqM*V&4oIR{G*AQ&{~DdjYs=m8|62N0}VSBqT;
z4LyKhsq12#r0Wv-6m%(;9zYFAx-M5LbX`G24<P8eu2MF1U9AY3Yq0bHf<begQcm+r
zqg_u#4<IOA?cd6^IuCs-cOz8v0fMErn{kr1TjW#Jtyua1H6&@fO(`$xaGqyCL?0mN
z+U`^~wB4l$n!B;|0fIquuToCKKc3%5L?0k1ZG9(wm)kb(1(me5y_I_aUYY_yt#1>t
zvA#W|7pd@JEKPw*C83Wh<rQ8;Qy>vdfncm}PiO#hpTuVN6c}@#R?6o-Ua@D0a(fo+
z*y=qgbq)3$yoEilBckpFY^ZxtFH+%4SneRIR8sefQeGje_p3zQK@<#iuWJBv-@s<}
zCKz+yQp)EZU9q=`a(f5t*y{a{>eA}{F4Tp+2bSmQ`#9-5{XjlNeu(8Jq8gF}eyo%i
z*{QdELc~o(!C1XN(}3FQ{keR2zQEEL2nNqrO2r%XH4%-0V8^y{F1EIn`xY9S0Krn&
z_c*DbALLWek64-jH6#i9S*e<^X8VPRCP2_Hz~7XO3-EVE(ENd=2M`RJPSXX=1&!93
zh#o+&o)691R&H`==m7*vU0raJt|{eH&{SA@05v4(nno$t#a6B>5j}vQ>zYp4&^5gx
zXlB6D0|*AqOiDS;k@Nr((E|w9t*gamg@ztLu+%j>PSQ1pd<vQqOAnxiBwceW6}sjj
zq6ZLkUGpg$y5?5|%>r0@0KuSHNGYf3+h_|D(E|w9vlz76$}I{NeSl!8ZE>8WZ3+1l
zwIr54Kn+RSmQu=#dZ(u?O++6c=-QT5Hnc6L2%7F#`T)V8SwShMd44VHK|~)QSkGe6
z#&)3G%B=`5O@TThAD1g*<KuD_y-0<tVrdFgDhXX(DX(z3UbY4iO@Uy1T&|@7%x%PG
z)&$1fo=W-LwJKI2%B>l!XEA77U4!+4x3JziBI^2JL*3eXkqXzratBeRlDhSj@(NiD
z)+gc)qF|`&s{zd25S!UXV9f2Ol+V4m*)}H1Z4<Db#h?v!X))Lo>cTRxJWn^nN$2V2
z@+oo)EH@F=kR)&`rE35A`q<V)+(Z<N#b8?vs4WKD$%kiqERBI+@a(8myixs$Xbc29
zwv}7uZ(F$mFwqAHmf8m4q?!iHr=}rT`T#W~X&a`L*R*ZLh7-{T2>La+v$Amwj#LEA
zC@gJ&V9<<F%4ybWvav+80fM>)+rO0yX$@9|gbqNkR5czasmkS3&MsIw05v43+Epnp
zXV+e~8xb9VpsU(L*-*8oB53x)(g6qt&E85m4ZT<U5YYh$c5F3Rwe?nRKWOLy1WR29
z;3QoK%BP@%u=D_GNYXVyDKF@RiXB2k4<P8e4pTOC9j*wPBe3)Uf<beXQclB`=V&5&
z0KvL-jkIH-p$8BwbsdkBbe$ldf=<NJ1E?WM*U3tSu2YEU0R&ywY08GK(-lE;29_Q`
zFlf$F%4xoDw6lrm0R-#T)neyDLk}QW>N+1M>AFBZ1zm`x2T((ju8WllU6&Bi0|>gV
z%ajdWmn(wi3M@T<V9;Epl+(~2aWxS=fS`1>e=FDOJoK&HwNTLq2$tHe$4T05kWW!J
zV(A0akfiNqrM#$<Ds~GIeSn~A`;W4r?KVZw+>WIW5Dc0-m2#So8tpD3`T#*`>)Y<F
zT+-I|R_-2nX$sU4S>Nu%#`<=@UZlbYurviKm4r@I$}2pZra&T^0>N0{9?<~iK8nrk
zF)-#nu9VN^re#kM<@O}lvDJH0>Kg1Rcnf=4M?~E-*iiSZUZle3u-rjZsif`&rMyB`
z?-z-<gD4p4Ue*BSzJkr{RWRnhrj*aUxMHsp<@N^HvDNz@)uq+@O{fcd3oOsmw{g;W
z`i^{xd>6}2L^UJ{d|xRq@}ge$0TDM51!ML8NCRrC_s8<#`2<U2AQ(KKDHU(j=R`CH
zf*sq+x!BrP?n`KB0t8E8U*n{LzL8Hs-(qP3)Q}|Xd!@V}9tH6O5lw)gUw}U;8yDcu
zilF%gOAjCzG`}h3G^aJ%??m(fg7ti8*0yqgLPHPW^fg_bagwgd<Wtb(Sb6|8B<bp+
zlo!NSZb~A0072I^wX&gW8b#1_#nJ-^2F-LzInDL-020vy2-dBu#b$(t9zd}C?=>?{
z(lv{G3Yry551@u5U9&3{y5=CF2M}~!b156T=2isFJXm@F!JwH>DW^HA(dH+j2N0}h
zF=(}wTM#Px0Krn*!Z=CWBJwF}Q7nCc8j`dvu9O$GNwY0ML?0mN+PWzl+LlrT&C*!<
z0KuSHRw<`hls-Tr`T)Ut7K1jn1Lan3d3b3G)Dii(?17Ds%Lctjg)3rd3REfyU0Erw
zaG74V3K30#V0>JzrUA@d9h=!2V9Z@pDWAJ+#nvL?A$edui$UA!8mtN4!g}h6sH<Q@
zU9(=K!d_VJAgWYS*GDO@ki}qaBJLmxhPrh%fVt~odEy=zb2m`R=ibw7eTjJD9$3#}
z(1yCS7;FSJkLUx-^K@gJbe?V^pCUKKauZPvNdh-h%8Q)8k8MuGO+>+147SvO+G4Pk
ze0a9T(ijK^&$dd%8?_w~je%gtwsNcfZ7a6}O!NVQrMCV!sivLeQ_}z}eSjL0v<*_q
zYZ}M1`-tcR1pOKus%%_?!xTX?97`J@7&JR8<up9eYa|hEfS|6y_HX4v8jtmZgbqNk
zR5cbSsTwDra;jK505v438n2X>vwySYM05axuBt`ZP_?TfXm-QW0SE@o9!fdQ0(1Zp
z(E$i{Y&BS|^;WKch8{q$)U^*z(zUOA3fd1#51@u5T?Z)T1sz+l1BvJX1YOs`%7(59
zil8|JOAjCzG>0kWG+#B^;Y9QRf_3W}X-7gs4<K0TIvOYGIz~PP9gC$0P(zZg<CO|s
zClJvC2)eG5lnq@cD}v?}EIoi=(43}}(|k)0AQ3%)VBNY}>`Z9r0R&53XX7MY=g6m^
zbFuUQYDm&`zEYv<0wQ_<LDzMWvZ3o@MbKP=r3VlUn#+`Ons*!Raw2*FLFsD$R<6}~
z=v%ofp`s5EEVW&YleAqUpQ5hC(g&y^N!#^Gc~SiC@CG9K072Jwld_@hW<}84f~5}-
z44VHa<uu%#;5H)q06}SM|5h$(YkMnq2fQ=|>WHjwcVT0FyIU_(;XPQI0+mWa?^DVv
zJe8(EBANohSl=Gh0On4_X7&&mb01bJ<~~A{+oNE|R_{rvYp}=QE$nd}5p_>sL*0{l
zkqV!}atBeRlDcP<@(NkKpC#fBqF|_dUIUo>0yeW3!I=A!Qa<;dioHyf+bdwlR_}jQ
zmsamrp)Tw-usly+$4Teu8}cdgO)NJN)sQ6cZKZ1e33}N(MBGFajMe)+4XCZ&@5_hh
z11yb!VDNmTRJ>6i6VVt5c5Ex>VryHuPobd+5G;j#j*|-dLOun3iKPipLz1wsmGXj~
zuh=(4Gy#Hs0e+`!T!7yzg60P-J%C`){G^oA9MNb$6VU?**7KoR+sge44LyKhsq1%~
zr0Wm)6!a&S9>5t|71WuaRuEgc$%yCy1YOq@%7(5kilCViOAjCzx~5jjbzMOZAQ3%)
zVBNY}Y+7jO0R&53)8iyvGsvf)8L{*LYDoI;HM3G)5V~d|q6ZLkU9%}0x@K1d%^X;I
z0KuS{ODU%r&vO8Y=m7-lSqxfj<>rNoK0vV4Ha||%wt#$!S`bSgpoS!E3oGSC-P>%7
z5YYz+y0*oX4Q-1nf@TRUeSl!lbW_S{*vc(ML?0kn&tlNVcA(tKEdwu2fjS}|m&;+}
z<FdP6q{8K~GzBV^g!WL%D_pmiH4xDh2*$_dN*ci2m9d$v0><1`mGZf(S8O$++*Swc
zSq$1%*I;YFTiBX9BI?${hPp<*NQF&U?jWjEQdd#RD`YWfCgKjFV5sY@0nF`#&1`Kj
z=B}fZ&%L+V)+NeqJ+Pj|pbd3tF<2k!!ZrZQ^RzEcI!`y0Pmvp8xrwNTB!L?%<wXwa
zW1A3h6Hzc0gG>Wzi@|2{;n^HZV;~qjTPhWA)K)|^27(>i%B}Xdt=u*+(FX{Y+P1?<
zHEl1Sns&g_2dE)QTYsgzrk0BBM8waQK)(hDDjV0}AVts&#?l4|2F*~VoQAHlVMMe6
zg1QFVzm*GV4K@N2Ism~^)kvJAYLtA+8I7d_P(zZcu}XP4pEcV!B02y;S7pkEs_}}T
z$+2_*f<e=wl+&z62Otq0fMCZ~gVkGa<#vaL9zd|vwI@!}wU>MfDzNkbYDm(xk5XO`
zw>{aHh#o-Db?vWg=sG|VGzVho0R)5QV5OXfUbzWG^Z<f&>l$f?LPHNASn4_)C+Rvu
zJ_Q|#r3X+$lCGnb3SGw#(E|v&uH%#qUB@ee<^(J~fMC#^q?FUpD|a#xJ%C`{x?1d1
zXy^e1OI@eqBwc67r=T;j^Z;r|(si~{q3awXdH_M!b)K@J>wHDfT!5tq5Dc1&lyVw=
z5PLBZJ%FHewSOzu>OAzV+@(;_2MCthF2_mQu8>brS7PY{)R3g@YNfoWb1QZY5q*H5
zYr9U_(008dXl}sL2M7kuO-eZpcT>HYh(172+S<RBOWNAr%H0YtO@TTh>)UPESl@2f
zi&S_AmZm_ZlF+-9@(TG@?rtKQ0>N0{?$rS1-iOWXelX@fpp?(0x%EM!+$Mq@TfHZx
zuE8FHx3GtGMASWk4Rw#|MJjv@%N<0OO6s0a$}42`ev*hgh=QT+X$@fRGuX_Y1!L}W
zO8MOLD)u~4ZZCixTfP5LU0S_ggu1Yo!16qO87G~mugIs!SFzkgR6~-$*Ol@jukB@T
z5OEVxFjnukG@!P6zbzl0cd#@Dg2D5iQt?K;PefxN*s-mgi>+<tK7@uQK(G|{F-|Jz
z6ZsVMDV8Qc4N1a2SIP^zw_;xq(F6$k1^AV+aRGj<2%2xO^Z<fE^PN&ob7G@?PeczO
zSkH%MZ7cU9H1q(1rLLcGlCEFmQ_!zidH^*f>H1wMFNiJAA4K#3g08F6jATPsXGPFV
zhNTA(44NsFa+(|H0VJXa5Ug8Qi%kU$J%C`TYZ{!StE+qpnifkBpoSz}(<>FaW+0*m
z5OiHLDI2<GRs_u~Sb6}#pqWi6r@4h5Kq7ho!Fm>hR$I9_p`s5EEVa#zleEnvpQ7f)
z(g&y^N!$ENc~PBu*#boL0fMe=A!S3`!iu0-1WO+v7&MD1<up9-c5x#50Ks||gEqDU
z<yLM<cxejM5&5`W3L77nOY21{Tn0;1pi)Wba!PrHd-t;LL^K70@o~9=1~9h=HnRpW
z=B}ue&t0NoD-q?kGFZ=I(6+h;TLs?2R@D(vw;DFot*#fTa1AVX5LGIvTT3agkj0>p
zh&zaap{}O}Ft>uutQm~Cy_E8~oC3Xxa_a-uvlz6YE-eOYLtWT9V0oUdi<8dN_2g6J
z`dDrvsv${WU!`gS``Cs=+(Z<N#h{-C)E0w{<-@ZHmc~FZcrvBpjoOTe#z3%RTe;Q$
zww2ogCi(!uQrlKIsiv*vQ`0tB`T#W~Y1>XIuW4|_wkM(w5cF$sM`hz0?5_x#ov^e4
zf<ZG-DW{pM$p#V81_<gJZ2wj+q&3(ONaz3rOI5>glB(hIDQ5(h4nPe_szxg1<($)O
zqloAL1YOk_Wkc0iMbM1H(g6qtjVa|cE71W+L<b-!Rf|sg$6w=Y?9To9OWAf|Jk5S!
zDPUKeBw#oB9C>#v&3+n^1njAlANjXt+lz>1KhOp2t!xO`M-eppVrljRgJyrFoaXyR
zJAjC0KTrY|nUsJY-Ae%@#@Im&r>PGt>z{y=>OVw22R;-_Q=f*U`VUvi4?GhO_9LRH
z5A^zvQa0*8S`jqIU}@?DgXTD;oaR>^>_<dXAE^4*m~{PZG*Ye5dM|w<G&J>rrLL24
zlCD$aQ_!hcn));(={j90FQ^X>_9LRH4|H8;DI2=ZRs_vCSep94pgB*e+S@}@9}!J`
zVBNZSupcxu^?{|Xi*b^!OXO3~rC6H!G$iS|T&d7?1rbespzFFy+0b>hB51C`($ohA
z&2>sS4G+M(o`|MCux?#E*bf>S`oL1x%{WQdE%GVoRxAyD8j^I~rc~&<ors1$&~@FZ
zZ0Nd65j1yWY3Kuk=3b?o=GR8MkBEjoP`W0$m+sMh(&wS?r5}Kb9zL+tHW4Rjdq_S-
zJ&dJ?PeYQnN0suT_=^59B6|2h*Y<?6q3uaU&^(3ZYdJ7zo>9taM)F`kBEFUbrEQXX
z=}Bs9doTSwygb-XN8}HT7qRgN#!GsU3SY+ZU_Ygj&{vi63SZ#CendRj4~#!B-p~N%
zzKP{KIWXqFt(4E@SBUQr@tqv_uYW$ZTOAMfgO>;U>4>QN02}H))QeR35tax0DV5ZH
zqLf$2pHH6>@nAnN)P1f2%>4pO{~j1~zf#KQ@=tqT6Vbm1{_CGl?NgWjeEJq@diucf
zJpCRgou@y@r^p|%^z>;+68N)HUSwY$>_<dT9~gf={iXr6Kc9Y=56>T1n)<-t=`>UD
z@J4keqNxx3*S+*4#MbuGlS4yOA6N?Of|ClGQa%Mug{7%aLz1v*lyYHRd9WW5O?{wW
zfYT`(7vS`YpqT+nQy&;KGb!aXFVfUUL{lGF&xdAhFFh+XH1&a{uGw*tt~umW(41JB
z`ZOfznp-I^h`sbYL^Sn*u4_JJL)ZL@pjiM*Qy&;K3n}F^oAF>jBAWWZx^?kjKWJ#^
z14~_t<0M^6$fuwsu{88)NYb^GQlV>UA{zQY*R`y&p=&us&~(Sr&<6(13Q9T6#!c3P
zh=x9}p2eWmUV25S=-~rPZ7bs>ZL7$qs8zA_@M%cWwz^VY)NMT2kBA;V(6z0lY-np#
z1Wgl`ujRm?sVL<%+#RBsh_B_qdKQB=_G9H<x;MN$*iT2~<8p0md|a-h7pZVvED!cm
zDhXX*DX(zy-nIb|5B3A&<8ng{VD3g(zLNuE?#4>_T%HE82@&7Pf%Pl~ZL8zKe(>^O
zKOGTun`1-W7J88ix5V;bKc$kot(Ec$`Hb6!hzI+Dp>8`3VD9!<`uD(?yQ5M*ms6lW
z5&e5$J&Qpb>e6B`0BRcg!16pDgp<zG!SX3`2$qIE4M_rrDdj~z$b<ceXy^lDG1yrH
zYKy^0`S6Uw($NP7&lsiRjT%ctM<4jFd+C2z42BNvK4wVXz4nauDonKXfu*+bIH{&w
zJ~i!vrL9jxlD1uy@|yPM!G1)v^?`m3?xAd4gL^81W-lylePGb+t(4QOLR%jZZGE7w
z!Ab6=CoPQ!`$0leA6Tk704J$BP(I}xgr%uZLz1cqN_ja4@nAn9n)*Ojb(pfD>TpHS
z9D${&4-A^4lyaK=XzC-PsSoVfYS5$gR_<76XzBw?UB}}jT_?z=pcAn)^=U}bb+S^m
zf_ShW5lwxd>pD%@&~>^ZXwJaW)CUI5SxPz03_RG6h^9WUZe2Xs4;q^Kz*5)wI7!z9
z@+s&-EKPkHl5}0HROq^dh^9W!bzP=x=(=1HG*@70>H~x3Dy5u;mEdY3n)<-Hb@5<7
zXlUpIOI_FFBwaVir=T0LH1ugm(si>^q3aeR8u~!j^&e$J*KLZRxgASG9~d-uD&;g&
z@L)e88u~!#YX4TQ)p_V!xqG0ZhYu{Z-G`I3-7lY_9>CJWry)t(M5Vl_W9Z={qK6N3
zZI37$+8$K|&0|=;mIH(438kE7RUYg|#Mg45w6%XLm$bFLm3taq9_*(hvc5fwjrHw0
zy-0=6V|lQjQc38GN_mBM@?bwA9_$Ck`u2(jF!xm~-^qb7_jRRw?z24DkBIN&z>cln
zlTycn{ov)nemWxR-ob{tcl9C_zK7+(eo7^EA1LJ&vU-0=#Do37Q1`J0F!vKI{d-`{
z{Y)vJdkGKrBcgu~?AYr4kLuFu{Uy}&^nvAh`ZZ2EPrs2*k>6tJ>C=!T@O!1a$QO99
z9}zu$V65IhX+UlD{#iaezhG(V1B2%`rQ(hHortDBuwz>}7hBuP{Rs_CeKXgDb;d~r
zO(vg$Cdbm$ry)sL7p1%)o(3@`5lwxdUw~698yDa-ilFI=rKt}Ln(35sn)#Y+dLo+o
zz<NG3Yg@S)p`ob{EdPVejFWWDBA<e0#nRNLAxYQlN_jzS<>nxwsSk8rb156T=2isF
zJXo6gz@V8=DW}<m2m2Ax)CbnBiwFBbLqi`}>RK2l=~_fS1ucrDp-)4SuEmuKT}u$r
z&<DD%ZpwzPr4&K4G?s=wFld%l%4z0ovgL?q=mYCn3|ejFmWPTSKCsl*11D)~kWW!7
zV(H=2kfd#8rM#%SDz*v{J$#^RTTR)}wz?u{*1+<$92hifDdjY$^I$(BzLo>)Sq$3P
z4wPHDp70h{(GmH$Y{tgNWiP!*g}t#n*iWe>bZw=)!cTax9}y4s1LNazJq=*)`dGe`
z17mJqrF`yNJlKzj@8rOG7K66c@nAoAd9a_3h`LR%p>9*XNQD`e2m2|N)NQVmSIA<p
z1rZPS14G?b8o=DGvGnhOF?U;~eC}C1*pG<*J+Pj|pbd3tG1viW8v4NUJnfH@&eNUb
zQ{(_F4SgDt1P)Tli@dIn4JM+Y4~)fNs0P#)gJJUF8IGl+4-B52m5MiNBoQ5bV8^y{
zJ^uE!=xCT|>jO(|V{uYV<K$CQ6-!&6h9qs{mGYX_=fQqNe3J+IHQ1tTT!XtRf@U`?
zZGB+S?4gv?@CVtRM6~sRx(3_7l?!P+*bfq#`oL1vJ~&C$zVaz&KP*js8j@5Upp=*M
zY_lCmL{lH=st#5*R83F>%^_Ht`oN$$Oev>Xj0gJ>(bNZaY&9s)%k+%)BcY+G4=i;Z
zjgxd8BcFnf#nRNLAxYQqN_j!M^I$(Bn)*Q3b&|57>tsdHoPwpP4-A^qlyVwcOHU`F
zsSm7M7Z3J>hNeES)O9va(shn}3OW}{Q=f(;UFRzmx-KB1sSk8r7bzRME>;B1C0LsJ
zz@WKIsnB&f5lwwy-MV<NA2c-dfu*jiagwfU<WtbKSQ`2?B<Z?dsnB%;5e<Ey>$*wV
z&~>vSXl}vM&<6(1f0S|>eg%3P5e<EybhUpg*Xlg<t=t_@(ZdIp+U~+h+U}N5QTJf!
z;nR?$?LMWvs1xboBcg{7bZrkR8`>r+g61JC5B39t<`JcwrUwu9BjUk+ptQArE0?sj
zy_I_$ULNeHBeK3diH-H`DZNOAPh)wopHfNavr2h|Joe%_A|C7q#`^Yx1~B(UED!br
zWA4jJ`P@r+upbc*_5(Y%dQVCn5B7tX2m9%WsCxq&>fY3gRQMK_2m2|N)V-sWSIFx9
zE)fs*14G^W8o=BSu=MYNG4~^-d@lW}9~05P2X<`r{zr9b_5KuUdiucfJpCLeou^;O
zr^qj{^z>;+68N=JUgX`q>>DC_`oLJdzte!)>ixZZcz(dr)CUI7PfEob^)nGoePGA7
zaxS*EmHQPMn)<*}*zY*0pg-hO(4SbE`etcWP-lW#LGw4;WJEOefqnr_p=?}$T@*nx
zC6=Z>Fmz3=l<VSGh0_qx)Cbn{p;_C?O$!Z8ePF3;dYq(d2Kf{;BbKH<4N3pOW>(4z
zQd1uhO?{y2noZfzHM=5c=D^a_2L{btN;%C?9_&X%Qy*BjE*|U$4Gn!@scU|mq-z2B
z6tp0ghCU5Rx)xR{bS*+eLm%k67E?BKEv^WfC9pK~fkD$vDW~DzE0-dop%1KQF=(}w
zTLvn6_`p)zayUs_cli{xJeD3l4N2O1DCI>R!GryX=-~rh+e*rYwv`n@vkI06`+-5T
zno>@~R&I479_$C!vlx_D+K@5%h_Pe(j~+XCmr+Byms`0t;pM@8IwBvJjoA3OY|@KV
z*b|#sMX4mTSt+k@%HGzChzI+D@p0Kl1DLxumIwQRF?U_1eC|XZ>_^0d{lIz_gSOT2
zU_W?yu%C{Ix(%_RZX>-&h5fKR*iWgXZWE=vLKcHfiFmLd80t3D0OoFvrGF2Mxmzmb
zbI<0%enj-|f%Pl~ZKz9&!8TCS&<B?1>2^5jJl$SCMecy5p-)4S!2U{kktg-Borw6^
z5*UlYKn<uZ27~0oGZ;%p9~eACm5MiN7!e(PV8^y{4S(Cpjev=^KCsj_5+~I(N<KA>
z#?scOAxYa<rM#vcd9WW5ZGE6$gQjd;gX0xJlVfS?1B0eTDW_S2wmu@-`aoTS?cd6U
zG#=~+2~B-qscKK0q-rnulv7}7>eG;<Y9FP%oGv`rkBFu|&{gfPY^XXw5i|#4Y3c)m
z=3u3qh9B%rAfl-c?AU6sV(V|^4uyuMKCskvI8M@agnSA*5=&E`h9q4_E9C`^;K6=G
zH1&b5>o{dY*YS#=IRQ&k9~d+zDOG!WXzC-PsSm7M7Z3J>hNeES)O9*e(shP>3OW-@
zQ=f(;U1uv5y3Qe@sSk8r=P4Vy&Q}D@1z4K;z@WKEsnB&X5lwwy-MV<NA2c-dfu*j?
zagwep<Wta<SQ`2?B<Z?ZsnB%|5e<Ey>$*<a&~?2cXl}sL&<6(1O-eZp&oRH5h=x8;
zy4t^$Yjqy_R_<1)=-~rPZMWehZMVy(s5`Lq@M%cWc9&9K)Is#{5z)g3y0&|j4Q=-+
zg64iK5B39t=0T;LhHdIZA|C7qN?ZH4a!Fg;Te*kf<-vYBBJ10u*jV2l(~DI2IF<+d
zDV2misgzg9?_!=J;=z7ktZ&a~0CS(k@?bwO=02~K&*kwgFA(uyKd@t~_oUSEU_W?y
zu%C{Ix>vCOkF~Q5kgICCb%MJE*C4?y6Nmsa%uIrN;~F3&5JD2DG=nzo?he7-9fAbH
zOYq?C?(TY@b*ihrdAXB(ce?MNRaH~_tge-GpTlOddZ)TK^g||mlgPn-YK6MD)$$2>
zdcQ-(!G2(>dru3v_I)D$Jut8RP%Xcf(;z;g!oLUB_Vk{px_Ekj0yUmKuy~(-MiTGS
z&y_Re7eqXLS`q@kQp<<DgoFL4@brQC^!`>0s!#9ll*98q5mO(SJU^<He^5VBVd?{G
z+se7v>Q?R-XfXAGg|Od9VxZrZGteJIOnq7s!v0dr2kPBoe^X)V1N{Tor5oM+0CrUc
z&16JOePGf|p_bE3(_~XpVd?|xxoK9na#KTtsShl4O-mBGrc=&9(-SfEX-VjsQ7s>c
zt=vphnEF81HH*5bYgSdz%tpl22PVzzYB^26CYyr_Qy*BjE)MpC216fM=$eNlbj_=r
zf#xG(=+lzWwSZcwYe6aueW2^=u5RjDSQRvj5Ha+DNwb(*PQ!nc^q|7f2iEg2Xt$MH
z0xCRwV4-a(lF+uaa)w%lh=)&0Lfdj``B0xX+wxR+_(0dTqPnTArz&U~h#c$(Ce2D}
zInCJ|>_^4HeqcQhgATR>#a6Blyd3POSLAltkC?a1RrEt9T$RYderko#MzwsxnfhB3
z6$kr)dAn@Z0<LW#a<Ctm*Y;P-uf3In{irzD53J{5(6KrW_Jfy${q%~cTbr2b*3l1{
za9tt?`>7S`)>q3X<YBM@6$kr)scs`J;M$Fe`1ioPwxpI{dkF{oQQ_YM>v<S-pe`N;
zn?sGE4=moNTav{4bSvcyxit|(pO%EcZPoH2Z|7h?Dhz#KJ`8ryg6hLypmKP2B;x1;
zlV@kO@(*ed6^=fzwyj*Re{JOk!-TC5EVS)P5;N_loSB9avGr+5Xd9-M&oqjI{iv|@
zf&LjBp>BQ#N2-EmPa?KHFlkIJr{RAUjiSQV2kJA}`K??^<6u8X@cMy;s(na8)xOFZ
zXFnodKP?GW2dL%ayn@${3a=mNst!^&RUNDfnnQ?q{lKI-Of9Ebl7s!I@cMzZJq=cB
zzm+=@8a#ktq3dXp&~=P*20E6A2T)5w*YRrkK!<a%9~B-z&~=@pZt6N&6*Q+1@c@EJ
zbDCOCGbJ8CDm;K--MTo~4;nmxV4>@5lF)UIat1n=hzC$hLf83frLGI8@Bo6Y>mqei
z*JxGHTuj6R2qw*?YB|l+cmS#J0D^Vv;$S~$@Bo5^uB%8w*VW1y=o%s(KrIPf*Qu4d
zuBXBS2)eEt)lFSDse<NaA|606X>L`^X*j*(HYz-TpmcS9E7$IQ=v%ovpuz_T7TWG2
z32k>PXQ+FK_yDyew2e{AhuWQk{iyH(g0AfWbyM4es-PK5#0Ll_&BJOr&3qi}M}-d%
zl(x=q<w9G>Te<(hiz!gA$n$MHF`sXb>4!}CI1y8zS|RjFwS2;pFa=U!3Iy}{_KX&A
z?F1qR`+<4wb87ju7jdv36$kr)wLQHjrH+IB;N@UHy&~#fCZ@Vq^g||mmB_(<YK6Mj
z)$$2>dcQ%%!G2(>drJ$r_H7~u`+<4wyK4Eh96J0S6$kr)wLQHjsxF@1A3)8)eqixF
z{fH#qrynb4$WMqI?58Cm@H4f1$kDC#ITZ){f%)|QQVXh2@2`}@^EDA;AecPgs+E6G
z-%()<1Z&&Mx!CGf?gwZv0fL3FpGab$pOrJvFGNg$S`xy3Q_BZhl7s!IFad)80sK?l
z`~d!?3Yx!(cmTnq=`wrL{LI0ARCoZvdTyH4t=#0$-~j{+T~m^TuBntW(9}dcfLao|
zrd7*zv6Y*S3J)OYx@J%}b<L;>nwf}r0Kuf0MJ=b<rpacd!UG7_t&4;GpuqzO7P{sj
z30-q4XP~)=cmTB|bj_ny>YA4d4<P8e=2tg$EuadT1&Me7!KCS~mea76TbK$DAXv}C
zpxst(QK;|%f`zsoB%y6_<qWk15g(wIgtn#B@}YKav8Ac-0fMe=S#?v}a;l(No`??+
zOqvzda+)I=ttS;eK(L;NK?mD`Vk_4RUQB^{MQ)cX6Z3Z2TR&vNK157`YK72#YWalI
z_qSE3Fa?5nyIf5RxVDkV!G2&~ySiF_E$;GWDh~Do>v<S-td4{I;N@UHy&~!c5L4Y6
z`XLjpN#tNZwL;z6YWajb4A!CIU_UU`t)~TCyFQVF{lL6-L$&<c@f_?&#le1HJr9Ep
z)WySK6R0`Z4=moNo07!)bTj1)xjB)8{j?+mZmE_J`56cMQE{*zm=A+(w4nMh*j70_
z+YvDag2}UkTKNYxkg9Aug0*esR{GagZYP-V0fL3LK_oHLF3Oo{FcBZ1mV~xl)$*D6
zf9tzZ;R6KyGq}6D`57Fh3YtBL*Z{$#8KIWbtii#4RM-GPeFi(fl}l+H><0-BK(J6X
ziX>E(l{3!XL>z!x5~}u5%g4F5#rCDb0SLOP{nbrX2dIMPKq3x6Fli1_%W0Oz0Z4@d
z5UlNKuyXsY+@a9m0R#(Ohm(Y^Ba}1HkwiRzS`xaBR?7$CYa5TD!UG7puH)2AUB|0}
z<^&=hKrm@eQp;)B%AHJw2N0}V7YF-6g9i{Sbe&ETy3SC}KxY#10BT9-I$N#Obq*CC
zK+tuar*7&xUllYL5b*$lNpq1}PQ!ndjHbc^2-dBOgZ-ev0|*wnE+YwDmn&zWD~Nai
zwIp<1rB>>?nhFmf=(?^|H+5a73YzPQcmTnqxlt{r`G<r3sPF)S($)E`T)X$7Z{==*
z3LhX?XuFLhwB4?pq3$5!1Jshxc9&W{)OH-~M}-d%bZz&lo7%>xg62LVK0q*O9#G3^
zdf)@3D%)64+B(0L3vC^5<sO0;Q=ndv=i4}9KHnbE51H^kL`;Edh0yV8`Gn_S3Z%jm
z2<G$c2`%8-Cy5;F2j;a;tL4}76{62jaj+j)+tYhe>NwaBUJmxtE28dsVyb&VKV-re
zi5%>wR;YVfEuWC5_bXH!><6a0*R+6ZUng>~ADGv^sg_^MR_-k-4)z0UdwNe)T|B+t
zftrK;z~X)S9!b1U-&f9%9}qd%PfJ4JM{4<yr*N<z6$kr)`Sku&3#w1=&y>USIT2$Z
zm^@#qm48rQQDF=OYun1X*y>j98)z^Af`zc}NMfMxl{3%}L`;BM62g8`%LkgZ#eSy3
z1PJ;E@K<&71NfUNXnrT+0R)rgPqmze?_K_j3J)My&rP$smHP*piglTzs;etW=$cG9
z15HlE1E?jTYf80zAhvQ-QQ-juUDq_~rmksKK{Fi@4<MK{GpOY>+u;GE!UG7_t&4;G
zpuqzO7Jpx}l7z0=lrvB_A|60330-rjmAdAn!UG7puDR7sUGu1dW?mv5Krm_MSIcQ;
zzynBy2N101VbE?Xw-8hn>kby$7A6U8izsKPMTz(TwIsClP|JtfjD!8C@BxCZZAo=g
z+fu5aS(=Cs5KNk7)pDA{IoOX1A0SxI!=QuhK(Uou0bWdjdPQ!RJ&Ad{Y|syxuon?i
zpjshxWwm_5CHq@%DolZ3-Y)xU0oV2;a<Ctm*RHCTUwb+S`%!VQA6U=Bpks9$><2Fg
z`{@-?*Gx=xE&3r7wh}qmPpwclKrNq;hrt?D9P9_Cy0x@`Yu6@nupgM$uB(<`%dbz?
zqvBvcu%3rO2kPQsumRK@><1R_(~U^teY&x7hTMe6!G2m20ykC5hrFMI{irzD56p+b
z7Ftk!7;LE=o~?)&1Ht6kMy>pV+Lj7qAXwX0ZsmV%<+g_jA0Sw08%Po}?WmlYb|T^f
z)RNFPNG+e~1P=D2!UqWYXK;wR`5D|*6*RjMu>pcfv%6YO!|$(#QDFlF^%?B^RxYJ+
zupcBi0Kr1lNRm*sr*g*Gi--eIOG4EswS1hLIM|O02O#LG+SE-|`>29uUm^}bFlqKz
z%V`$D0Z4@d5UlNK(7XLst^y4nK(NqtFiGe-L^%T;O2h-GC86tZwS1sEIM|O04<P8e
zj#4*u9jywQV~BVF!K67(EvMmYPmZU;0|?fwi-Y~3!2<{ux=tnuU8g8#pi_x>0JS7^
zovv2uI)e%iAn3ZzQa5#-tqPiRh<E_Oq&ZJ5r}>kE{iyH&f_3ZSU_WT^0D^_C(Ilbk
zV&x2U2@wyVmV~a$)Jk2KQ{e#wUDuWBrmm}0L31?`4<MK{*Q(_-Y*DYH!UG6OSLe5K
z?cRsJmAe5de1Kq~?Ix1YcC&JZx`l`jP)kDFZEE>YoFj8P6+S@FwcV+1YP(AnG<Os6
z0fI?$uUbyS@AJn{;R6Jvt@B&C(AM!*?tXYN1?m-fzCB3H=i6BQkO?0mVhU6%gpO0o
zC)B}yRG0$6e7-%Z1zbCx$iaSKUi-LOek}*tKS9O8eqe1+?@6iSU_W>{*iWyBx@U-~
zZi0TugwGQB22r&_-ScYsggm`ppyC@u!BqE>7I5v$L=N@?^V(O{@@tRgU_UAj_5*8s
zdQVhcJiXt5nuGno;(huSNxV<rR?d*`5KH#1mW06f)bb(k;$S~2zC;wvr}u|iP<?uT
zq#T})i5LUH<oQ&s{Db<83S%Hx+g8rSR=09rK!XVoEQEbU5(9m$oPoX}Vgl5X5cZu~
zKG0Mx_B|CQK+r#cKdPG_z@Jn>^D_|-Aec12s^v8IaIha09zd|3n`U(@_Xjk10Kr1n
zUnHUHZ{-a153yuj=4>}mSBmODY~?1S!UG7pt|`<_T~n%pW-1~cKrnSpqn7L9#L#J}
z@Bo5!>*8QPXz&1ng{~P%Lf1^n8E9rA9zZRLzpq)<@`2Db8x<Zv&~?qOZt9vt6*O}a
z@c@EJGq+k!^C<`WQQ-ju>v<To+se%c6+S?)(6#_cXj@P@LoGxsS$8c7Z40aALw(G_
zepL7XLD#mJx~Z*)Drgob;sXSeW=XZ2W)BYbqrwLW*7GpvU^`H3<(7dLQ=ndv+vRe^
zyj?D@A2Q(zL`;Edh0vaA`GoWLw+1RqfneS)SJDEmU75(ieqdhPM=ihhFb?*k;$T0p
zo`*rl>NwaBUJmxtE23^SVybJ@51Fuu$Tx_p73!MR@(Fnuv{3O4qF}1)uLWE?fXKmq
zU|zeXT7K=f9PCHM!G2&p4}%WW#lv77s5#gVEZ(Q<k;MCSedP?f0g*2e)shgnky<|F
zs~qe{#g~YJ`7kJHLG@v<sd9KWBVr5$lV=OH@(*fDDvW_(ZCknC|JurJ4HG^<u+X+G
zNzAmJa%S3|h!0RpLfb&Kd?x-+!H!h;073r@?yPQp1_!BvW)~thKrm^BsO2;ZbFd#3
zHb79H!Om~xQW^*QL4pGiEL0662~~S2XPn_g9DrI9sz$2i<KV*GlL`kQ=&DTJR5eN!
zG-V<VKrm_A)N&fmxY&mZ2OwD6)1XiLt=xXl-~j{+T?deat^<`bP=$yGP)kDB!D{(H
zH#gfMRCoYE*L9e>sq1i6&>TU;0|+L~QEEAj4)&wM0|?fwi-Y~3!2<{ux{fCaT_-4K
zpc9FB0JS7^ovc>sI)w@kAn3YIQ#W;;t_qqnh<E_Oq&Z71r(uhFHWeN~ux?!(><0}V
zK(NqtK1t}hKsf_lNW=rEC82AyTB++|Dm;Lo>$+6k)ODFEXf7w>0R)rgO0}GZZ?L?I
z3J)MCU7g>`wR<1>R_+?8@BxB_w(Cej+x5yB>INb{KrIPvH>u@A9mm0bRQLcv*LJJA
zsqHpZ(A-YM2M8w3ooYD^2QA!1g%1#vw$5+mLR-gMxqINn6sT9^`8I}_&$s*ZLngeR
zh$&F55c;56J|VwZ8%u>L5X|S>!&<<#<A@yW2j;c^QOmEzXZI)-2m67wJ-sKTj)VQ+
z<zPR(BI=$Xrn)EfLneHR$Tx_p73!W*%O~XNJ%NgE5Cv1+b6UW)&l5S=56o*{RLif$
zEB6u=2m67wJ-sKYE}q`6K+VB^VDUbEjU?WuuPbNBH;8<RsFsAlx76|>FXmuBD!xP%
z%%}IeT2Os@zo#6Y_lXz-!Q}Z+t^9-fhzesMSld?4#a6d+pFo2N5G;g!MiK*kuAG6s
zAYua4k`VTlT0YRL9PCGh2@v!T;J51L2k<*p(0otC0|+L~k7_y1SPu51!UG7_bJMJD
z<$i$%4<K0R`i&%X{jQvW{vhH3)RNHkms&m$Tb{qE@Bo6YtIJ$;Q&(42&`d_e0|+L~
z6lyum%y<B)@Bo5!>*8QPXz&1ng|2ByLf3T48EASU9zZP#T{Ehcx@Mxn0|>gVS=3Ek
zv#Nq-HX<HCFllC2%W3%km2*(x0R-!L7_{5U%>@-cK(Np@4@qd7S2;t?N5luHC82Er
zwS1_@TWmope1M>9>#lBUTUZq|ixBYvf=RQOT26ClqxGP|2ME^lFz8@AP;BLvfEQDs
zUXk17QpCJnF0CIj;W9)_fog@&<<#;C@yabvg((os+vSQ{z_mSz9P9_?wY}8xYuU=J
zM8&~=U_B3mj@5CnAG{pwr&mN>Ut+53rynxmDn!0PRIO0Anp!>~4}(T3zCjdBb*pOu
z*ESP5*bmHWTh;Pwr{!QjDh~Do>v<S-pe`N;Ye3DxeqixFU5g~%r)w)`$aRQ(iKv!@
z!1dJfA*Wcw)~Di2M8SL*Y^Vj*hrve5;n|poF%V3il3Mu(wJ8<GK(My0T%Uh!<u->2
zA0Sw0+ma+^+DbVyZB4`ns3oCoTeW;94j|r+3LhZopTQl}&ClRKRnY86#0Cf^&CY5$
z&7vIaM}-X#)Mv2sTe+0R!G4h700awFyOM;e-IO!VP$CXMEeTb_)bep2;b1>19Dty!
z8li5g8mS7JJ&8B~!K5*@oQCgd9z}%%5UlNK(6{|oZf|Jt0D^_CeMmyrzRDSBKO!DL
zEeTx*sO1CkWqJov;Q<6)*Foy0u7g!Ua|jU+Aec0VspT~M8;!%M@Bo5!>l$W9LW2ho
zEOZ@B61t92&OpZ!@c?Q`=sI4l)O7+C9zf7_ouqE+I$0Gorx5V~f=P3lT26z5|8y!m
zfMDIa+U!hd@Bo5^uCqx(*Ez}==v*QmKrIPf=c|>vE}+5#2)eF|)J<KZRY7wx5f31k
zG?%L7G_&FXq{0ITN>}H%a_!!SzLmQIDtv%oq3tS?&~~+QhPsA`4^T@&+jVNCw(F_z
z0fMgWMs-u$O{$=|nTQV%OqyHOavHX&w^88(1f{L>Te;BI@mB5*crgX)6?wkhMa<{h
z-TEOD-b2I`s8$Faqn1y28KyugOo3oN-yYBcu6>YLvaw)Z`;c0G?Ul{;Fjd*cfweuo
zC#9~z9)Y)F|IsU=?onc@8?PTS;bTO;K~$|!_k>zLAy4losrUv_Fx5S+1zh_Kv1Aj#
zy!KhO{90~D&ry}_d9b#p_e9mj)B6ReEA}E-yiZ>uiTCNt${F$%B3~k^B_Z%NwS34o
zTkUl!zC;wvr}vv$P<?v8r5v8Oi5LUH<at-E{DXRr3S%Hx+g8rSR=08=K!XVoEQEbT
z5(9m#oPj<eVgl5X5cZi`KG5~e_Bj<MK+r#cU#goQz^_z6^ED9<Aec1Ys^v6KG}?Dm
zcmTnAZkpAt+z-&;0R#(OKaqs4pOrJvFGM_mS`xZ`Q_BZpEB8AU9zf7_{i$y1`b!lw
ze-rTlf=Sb5?xb0w$+}YE0R-#T)n=1Jg9i{SbWKSTx~5XjKvNU(0BT9-npUmUH60Zm
zK+tu~pl<4#Q57^Z5%B<mNi&ODPQyQlos|j?AXv}Cpxsuk8&voJ!9v>{B%y6i<qS0!
z5g(wIgtmFq@}Zt-v3aTR0fMe=esxpZ0;-@{kcbZuOq%X$IZe6I7N)`n2-fp3=wLfg
zY~>b(7gL~Kk=tbtV%{zn*AJO+2_mLIwL<7pYWak7^|z&|Fa?5nyIfWaxOO>W$(9H6
z+7;CDYx$p_D^iuMCs@zJpks9n)&OtCdg&EWw-Pbct*jq1VQ(VeAgWfV>#LSe$itu?
z72hBVrn*(NfNNJHmaGxXYn#;aYrku;)v3zX4A%27=s;aO3|gSBSSwh(Py3U^`*eVE
zhFpWlmxyXf2wY1oA98~=Y;7vOL=?=2!Ma*deHg5#9G>-w7z4rN*-)+ggW8A+V<1@D
zR<7^AwsM=mgbxrbv~5ZfGi|1vnKmcl1JshxwxwD=6Go7&sPF-T{u$gx-TVx0s|uR!
zh}ZzZq}f3&r&+ei22x=I1oav0+v&r!Q(A-V1PKm6uuwIKBvkF9oN)#daR6#bsM=L6
zA7@~z?M8(I5Oh_$tDCBZse)z?A`U<>X-25!Gz;JWq{0CR*7h{$*M2Lv7c_VP!9v$4
zlF(IF&Omz;@c?Q`=-NjuABcaiwJ#MOK+tvVuWsr(Kov9x67c|nNpp}|PQ!2I4yG#G
zAz<CQhS{Oe-~j{+U5Ar|t|OE)(2+zufLao|j#evm9Yci&5OiI~shhfvR|U-pL_C0C
z(wwB0)5rrzg$EF<TUVQ%3Jo4Wu+VinN$5I5IRl+Z!~>`$q3dk5Qr9_DcmP4yb)LGZ
z>wHzvTtLJF2qw)%YB>#C)X`LByBL(N&Tr+~y$^jWcL`MZ0Kr1rWh9~Pa^(zl1rZ;h
zmV~ye)bgSD4e`}f_y9rIcCEUp?K)M^Tu;OY2qw*qYB|l{_yDQ!0fN%j`K?@N>v$`7
z3%r;D^@=>-ZX@RN?RNc;3GX0c3REkE-ldjLcmt+DDolZ3KHu)u0<IlHEZKcvUVFb<
zel3R)JU~^p2f^B&-jh<-U}NE}*h6|n)ICg0b>s9yCVYg*H;Aeg>K;|gC*<ipo{Db}
z1ykMQTEMkW5KHzXnAbj~mS1~kvpr2!wr9ZFp57Bx7f<gAP*?0(uy~(7M-uPT=an<$
z3q-y|R7*nOOKSO$97gam6<;C>=F|ICEvP=dUsDdx>qLx!VDh}FR{lY~MTIdCtZggj
zVyj!Zcc7`*yI>*gJ(3vcedP@F0TC0RmV~g6)bfE&Y_^Z7Fad)80sK_m`~ZHY3YyP}
zcmTnq`BE*XxvtT^qQV0R)^pRWZsop#1`i-u==zQ%bbYU!fqo$30o0Pv^^;mY5L>yQ
zsqg@TuIpEIQ`c{*p!uDM2M|n}Kh<)YpBn8iDm;K--MZTBA87CZ&QsOZl_YdcrksH$
zC*lFrlF&7!TB&O)Dm;Lo>zYR0)HSUtXr?3L0R)p~2DO~#?naxD3J)My&%>bIR&Hjf
z@BxCw-`T7rp=~zh4AqT@4^T@&+Z<~7P@MiWClx+G(6!C2ZfcuH6*Th_@d1KKGrw9+
zGpNxPpuz_T*7GpvU^`H3<rab$Q=ndv+vUQ<yj?D$A2Q*hL`;Edh0q>q`GjA$+Tv80
z0>QjpE~y1vyA-ixOM`jsGHUs?n3<NPD%)~kJr9G9)iv1i@K$UEy&~#XB&NEa`XLiG
z5cvjCwL;xWYWajb3|6M%8$`iW*GCJuwlA?{{lL6-6}9}@1zT-Zs<N#H*7GpvKwUfx
z8lkRO6Ii@YS0{=0X|r;MY$5U`qFNFH`>W+c_E^IPQ1K<AU_K1i)Pm~6U@hhFtWCri
z2qw?EYULl)dQ=z#!P>TR{r<I;+W;nffMB6*Ba)bDW97`W2@xNlmV~xV)$*Bmdu>LA
z4-oXv;1=rUXK+hZ&}>D-1_&n2HflKyM}%)ng$)qYXRvRlw{j`1!M2A42OwCe8b}hV
zc2v$dI}vdJYDuUXq?V8Kd5i5rg#!?DRYTNGRlBN!W;Y@ZKrm@`SIcQQS8f;;4nR<<
z7M--JJ#FOf1Nkpy8_s&neqbSBBuNO^Q#r4^7ZJ0cmV|&&YWXX3NJp6pvmfXJ+SE+}
z`>29uUm|8dFlqKz%V{obv;(Lx`+*X$!2cz{29M&%`B9uXKWOBLww_jDC4N4z82w<9
z82u3Cyyl@q{CrvxqaUu8zb2=J9zlhl5A@NGQa49GS`{?M5b^VYNpqZ9PBX{qc03h+
zK2W3gn8fJip~D9c96WsIf%+S;6S)HG9#}{?nIt5fqMX-0m56muOG3iwYWZvb%<+9x
zSoc7eaF)6$;cQjVoI}LA2PVyVYB|l4tK0chSogsHmp}amyO5Q*^T1;C(IheY#mafj
zONhAhv?NBqOf7%SFPiOgD%^RXkA9`PIr>$qpt+ieI}c2nYt?d^vzqKWD%^SC|MsV^
zH_pIOyN$H+Al~~oa0Mnju#j*QNl3U^Ij?;S5fh%4goNAF^4ET>)o!Q4ga^8WJJn4I
zcd3HrZXzZ;Flp{p%W00mghz!551jO$e$VQA|9)0ts{@PCA0&y<$13MFA0lF_(~=l{
zoLc^xoIUUe6}CFiM}Jh^9DTehXdWZ-n=&wIo>0qa{^G1XDt=Q2PWn&3=kovk-hY}a
zI7JUEBupR)3C}9$wVxw$ik_B)gcsEE*XF(dA{D3TfiB@?byLDCs-St5$QNURN%Oi|
zPO}VOj7h~8V}k!LfBFsf7Aq_EHdu`Q4oQsuu5w=UJz~k;*OD0h1GW4$F^_#nRkn{n
zAN^x>bM#MCLGvlGWS@aa^SN41^Hh_4K~=UdL5<#>JI~&G4%~T<k%NZmMw$Q1Kt*xS
zA3DOmVsXX328-#xA&Kd~RnE(PM=aU*S`yR$pq9Vv2hH{)RoQ+5efpo(&FOzp1<kL-
zlKloI&F^YC%}q`A2UXer1U0>$#1ow!W5tM3Lq^*0!S-MJ;w03+AgS2jV4><Cl2FxU
zULU6`v1F5hInLy2`8a$t&J<K-n-X+YQ>mM(rdEZjX^1787EGGy)N&eJ>(f(}Z3eJT
zRl{sXNGdiHSg4wrBvj3!oN;C)mTWdHiNC9EYNe{#smeA7=&I&aH&xB03YxizC7TCK
znt9c78f+=^QI&0euufHNwg4m*TM#T%EkqKkx+`a#g^49wL`y=|qH3k8#i+{G19Vl3
ztDCBpPzBAB#F8xqCe6}nISt>pybM*@mIbA1wc6i-{=XfUgQ#N5gN3pcNJ80)${DFA
zk$>8#C84aBT0YXt{L@A%{%IrV%6hAt%KE5+rZ17*3YavjsO2;pG})?D>{dW2Tc<{4
zc?bEgL+=`4Wuv0kV<)ycF?V9k`XTeR5ZS1x6*Bv)<?~I~VgsnysDQZ>TT=_Tb}eGb
z)&}$1b=2}}XKt}|smit<sGZpVPi+mhKCBhnK(B|k4T-63BmI#1HYV~<8r2GICAEA$
zcA1+}@lP7T)V8@6aP1bvl5Gj*wOgs>*S^whTT_*78&KNT|Nr&(7JJNXA+FeVVDS#!
zo+RF(J1A$wfy9#Ss3jq9C$)UUbNbuPRAn0k<{onwEvW7>2P=nX2(e_lg2}U+TKUH`
zl&WmIgW6-(cKfgX@5W(}RBR8hkTskn#u=fUaYhnLwx^bati9CAKY61n+bGaKcx83-
zgSWRTXxfM++XqaVebsWBagDYgRoV6jYrj!e|7YU?kW}nIuuxSY2~`IvXPkqHB|Ah*
zLe-&a`8fQ~#>1$}b~xy&j!-vM9jOYMqlhIt8cdpF)N+~?n(SDrvK<H3sjAJ6hooXB
zfQ70PNkY|0${FWmV#!X?l2CQ3TB+(Zs<NF9x~enOO;u;Ag61q@$<79o<{Y)0W~(MU
zm#S>%fwkZM+x?Hm^C7C(1z@4<LXuE+k#a^FO)S~PS`x}GQOid<zS%CND%)kCE4y6X
zRCa|bXs#rd>?$y6u2!q6tkJHaD%-VS?YI99{#RA;zZtKCwPM%n^|(deK+Id@jrt+;
z-9#+e&1!|rTh#LTu5GnjsmgX6n77E=wSa5yAoBYvFt5E!Ex+~{eqTk!@2kMtZ~q-@
zYp{D^t=Jg79@_3BrndX_L*{#cSh5Gz3T<Q6^7**^KSWiwhr!e~P7Ap95hA~>0`uBO
z)$(gOnsPi9zpVmmzx{WhEpGphL(GA9VDS!pk|f@tPbp`_r->YRrzIh8f?7V}Is@!k
zDh|8@^Y;I|7F2KlFDQrSMIv8f2`10WYULl(D^z@mC8*o~s-1q)4H>dL|3j#{v$5CU
zso3jaq3jKknCMOAO!OA9WN&LpD0@dOpJ>@;dzY$g?}7f=dtcrB?0ujLnh%L3`v^>$
zkJWOTh9>)js%)Qv`t0?tRak?421Ug_2MbMKkc6f$l{3s&#FBljC86mXwR{+Uaq%rx
z*}el^)A#D8rXN&6^CPijKY>Z}vszB`c$57?RkmNjTJHZn+y6U}-yo^j?_i<o50X&z
zr*g*mi&(P1wIo#iqn3}uEuhPM8mBAhswN}*A61j9f@TV0$)*I8W-7IuhOOGvRArk6
ztW(u6n--FaO$QdLrY8wiGbm@A8HpvENlQZ2%xa~oS*Xf3E9k0bQ#b#vx~YO@c4Eoq
z0F!1;wVbABlg&j{wz<JNRkhhXkW_45uuwH0NvN7%IpZuqEZKru5~>zbD^+!;D%--K
zt6D_eRJEuoXci-utOuAhi>u`{3pLpiRApNdl&a2d)!MxSeXF(<L={^aER-!n63UiU
z&PdA<`KOCo63SLk%SU>Hf4WG;KV1Y}S%bQ%td}ZiRw9;cWiV-atK~EtMc9X`Y<)o~
z>+E(Wly$sS>j!JaR?+M6JX@8R&$HF^L*{EFa+-l!A#-)Le7<iv&47y248VMzwQ2#^
z_9vEX0GQXVp_X6!b+fHWRkpRjTAtOD($-*W!&<R*^m=Gpmzdht(+`<%eIoy4QLWIn
zp;|s4&+3h+_$P~CYTHB$xVA(r*`{D#yO~;kEr0!+Q<ZHCu$E`_M771UdP|5awiQ^s
zL$@Z0cjz|C8F5=;$+pvykhi^BK4MFM+kvWV1HpV&@2CaUXZ23X;n|s3vO!?-?4nlw
zF%70F+Yqppt(r@%Zq;^$q++{)g{+|@G0yJF8D|)=WP4~y$QrJekMmozji4&qNYFoc
zd#alsyuDOGW5kk;0+Xh!meb&}*_*0tZD8#;%Ia2aA4n>;FIcGBk0ey>ubgoXAeQVv
zEeTZ>wR{}5Y6nr3?O@PV9inckI#d-jhY?G5IG8j?sO2<l)sCbp+fiVhs@m*mNGf&=
zSg1OdBvc)zoN<mPmh1#A2~{Vmm8wpnD%;7Rt2#y9RCTH<Xig)R>~t_`&QQx~aOa&#
zRkpLh+He2uwrXcXRIziwLfN?_q3k^6jC4M+WEW^jD7#QCABiI^FQO{jXwa2itZpj1
zL=`lb5=(X&m^7EG<uul4S5TGhO0f3Ze+U1`Uu@N`g0*5->-D%rUPH`V<hA-C^Ib<Q
z+4X9L%p275`EG8t8>z~66PUNio3(&zZz1ygF)**aO)bBce_D7u6~7+?Yrp+>tgXTB
zgtcOK>GjZdH!-!{qaQNgy~L7@Q7g3Fr<Tvh?f-tNvONH%wg<I<YsV5f&koFMA6Cn+
z<*$Dn73bN3wcq|b&=$A<|3J)vc3|-i9ZwSP(8rWB;^RaPw9}H1_oP}r;-&-aDJl-M
z1M~L(j22XH{}YtM^DL1ssRWbfdA0J7=>;mjq!O%UtJd>h{}$vWcq;ZXSSWjiBqn-Q
zITO7`EZOT?63X6C%O~2b+1{io+gqT2_TE-EKYQ<}g63Uf$=(B#=6$uCX6+{XfU0aC
zg8J-rcB_`c8tfw|D)up0X!?XCG<~X^VLl_4>~k#%O<$<x!;EdQFR9A*73iA2RyQ?$
zqY9dDi6#3EOq%c2avFY_`~y|legtc|{}=Db_^sMckW}nvuu%02NvQf&Iph3BEZOf`
z5~}`C%g4E}+5V&|+h3ro`di&p^^YoOy38+{u3*wkMp30%vdJc=D%%ubovMb}l#o<x
zDzH#BHA$$NMmgh5ODx%RS`w<JS1VP`KvlLGL02`Cx~XbrRnW{rEZMAJ(#)oo(=63w
z-KfeoJ6NZxHk$*Iip>cYs^%gIRdXw6oOy^Pn^#Lh)qHBDs`;tPwgBj=7F0J?Eu;#X
z?!=NU3?|JYYB|lEO|~di*%kw(YPC-O50G~6K;NqMfT&`NgN3psNJ80?${A@XBL6f|
zOG4Q)YWYYX^G_40_@{}WD_dUORJMXDXjUYatS6W>4Qe^f?oHN<s%$HPQr6k6S}5yy
ztF|(%73;0n<9XJHn9sAm`XTf6BXU}SS|M{)wS2x$Ijw+-(+a?Ro;7I!*RD=1Su>c|
zwy5RT{@!e@RAuWA*7B^Ll(q&N0Bgn8(CeXXO=4<WOFv}3wTb+bM72WOx@!4+Jge8E
z;-4gfsci!-;Mxs|CEEzhYd2QQujQ|Q6RNV6z*?Tw6V(>a>P;c8*k)ky4&9t2-l1D4
zXT&XuCEH3%Lf+PD`H0>7+cs2X+ZN1c^>$iNeO7O;9G)GBB^wAP&yH&4AJa}$W!o97
zWvk{=t6Q}}kW_3Ju#h#FB*qz{oN;y~mTWgI30Xtc@^KbxvE8Z4HVpI+-X7}a2XDA4
zXhslAHWEylJ=Jm=zJh8ms<Iha`;D@?RT~9K#mZozYHyNI)ux<r_92#RUo8n$`>EyQ
zuvOcis%!^<uIfN_Q&mM3GzSq&b}*PUhp6Q={L{5VsmgX3Sf{EsI~<aV9RU`qjwA_H
zM=58VqlqOuMoU7~v1+BN<EYAZJm{)UP&ZYbs0x~sh$TB2Oqx^Fa+)buw^OOgb{bgw
z?Z4et?R1DLb_Q4|JCh`oou!<S&L)=Z94!fD=c?r+4QjFTsLFOe=*lioH<ewe3Yv?E
zB^wPU&Bbat%`uI3302uH1#7?k7oV`fBg#WZj`**CRB;)s6}w!o$1U;-V%{RJ)DM~O
zDq_j5Rx4y)qn6M2YO7sKRkrKEyhUEG1zdXrkuRYH^V*x#@@pU9ODL)M5=yZ4+keN}
z8thhBD|VY+4{f&-Q`;T-A@kiyEZJRZg|@rZ^7**^-$PZld%@H;Mhm$1J|gGUfqCr%
zYWcO7w%CJIoL2|de*5o0TipI1f|vvAz~UV`jwIfpk0@uv{}4H_PD?`Gc(r`Qtq0g+
zR2*0b=I#FpEvVl9pHvRdQ$)Um5=@?F)XG1m2~>OuC0NT=t>Ir=wddfe*z;ha>;;mT
z=tbpB^b)aTFKbCCdqpjuXn3=|N>#SkK>zH$u5Nzz-cSY2o5Yg61t!hgYB|lCP4*5|
z+1>^9+3W09Erm7Mdr(yDeX!8<0ZC~3P&vbVL@e3IS`wN*QOk$nn;kx-D%)qEYx-Q>
z)bxcaXuc$t>?<&7zE;a=_HMFosLJ*&Sj+vtSNq?peFsU!z6T3cKahl~AC)uCPsEb_
ztR<o97qxsGyz;+NmF+jsRsF7Rs`^6}G=CCH_7|8mf2-v*{HEp~s<L(ApYYeIYM6C}
zq+*kSg{sL(Le&(?8D~mj$)?hhP&Kt$scIUkvP}!Rs_E2CRnx12W(H!(W(1RFCbgV~
zGcRYRD%&hzovPYwR!Ax~8(667MiQ!KSI#(d5KA_vmV~Og)Jj!zQ<ZHV&{fT=ZmOD3
z6*Th`OSS-*Gz+TbG;GxtqAFW=P^wnz<W{ZSJJ7dk3qw?~MZiMYq9mbgG3AWZgIKb~
zwIq}+p_Y#{S&J=6Rko!-SGKgescacl&@4+V*>YggEU%W+?A&B4P?c>(P|7;HRSRVu
zZ`FFjTCoPb9?!F0#C)Euq#rWh%0x~hP%C8iQOoE1hSLbBIE?_z=h-S+z_qIqOST%A
z*EXu<*LH2OCaSWn4%YIlo|Lu*YlgLAEqXn)wGvZXfBlg81`s)mK&{ZWrdmE9&+4_P
zIEw&GZR=<O*RD$}*?M4JyS`d}?PM*s0ae*H1Z#O#PgGkxt2ctUVjF|SJ9HD0c!!ph
zGvcPil5M6XA#ZcFe8g4y+ZI%1+Y-!YwG2zav$b+~wjq{mTQGUHQ!D?Nwx=rF4qz=?
zHJ4i5sttssVmpF`ter?=oSl_3&LCpRcF~fMHCQblr_^FYsLHl0=pVe@)XfjxP*u?E
zPAu6lFlqKs%W3#G)x)XEHUg~uMp@mejfA9PdxC|ky+}fpDQBEf#FCY@BvkFKmXE_$
zt&OT|`+%-$Uv*Q}eyX6^pIEX3z@#})EvMoC6j!Lqb`V&nsx~_ql8PMy7OD;<2~~$F
zXPm={B|Ab(Le-IKrK+Q-%62s9s*X`NRUNAen&XHiJ047$6V!4Vz5wP#s<NE~)_(hM
zw^chCqKcgY7RpW~31z1#XQb1KB|Ae)LfM&W`AEmK*jZF%I~#Om=ct>?&Q%4?dBl>P
z4<^k8YB|j*jdmeb*)9TWzx{XcpZvvd)kecwv5WP3+#)X_<}LD4{gC-CBbMxPwL<0<
zYWaMBx7w9dWxEQ@TjbSRz_r&9`I1R6uf0w!zZUD(^;G!Rz}j#B9cyc_8)2>3O?o}F
z-Aqhvx9Eq=cPp`Ex2YA{Zdc3a<Mw|CRoU(YQ`=oyz_oW1vBH6Q?Y(OGwP&>07%HrA
zVC}d64z$JX|9*&Z&Vj`{^g)t%hmKXwhz}8Q&S^==8>g0!xW@o{gbL>zn798&wV=9H
z8?PLm$B0<xz~p&Ct^8wpk_zh_Sj$$e*T1%EPs4+u4lI;SAc={dRnA1u5i!(hNho_k
zEuZK}40Ti(>OlYOy{vA2_Fhp1&8tKVbzsuGu9nmA576GA!cYh5v)9?JS_;Ey2L(<$
zu+a1lNoaakIm5h1#A&A`q3Hv)e3++j+EL-O16|X{>ZYboR6+A85vLuPG@q;GH0$ED
zqrzzi)^h(}sr^>%D@btQfrYAXNJ7=O${FW7A`Uz)2~|I+<>T;+^&hEl;DN5{XLVE6
zFRGyVm52imOq$=-avHv&`VT4`cwn8XhS^_`;J^b5RsWELsxAxqI9-W2@W32ra<x*`
z6jV6yKvy-Dx~XbvRj8VVhyxEyn(5SX8a6f4Q{lh^>r~ZdGeUv`4=hy8OcJVQQO-EC
z5^>;ZN&H=PQ!7=?PK5&xbX9Yzo2uqg1<l+<9C%>T%&V5u%!dPy3I`r2Rh`|cwR;Ep
zR&4=@aN&W4vV}-OS$E}(v@j7Do|c5NMb+|=@Tx6Fg$oaKWs9qu%9c<C&5}f1cwo{j
zt(MbVh6|4h7ak~Oo!zR1vW~ZE%fX5zPp`-GYz1OI&sNkAnXe}iOP*RGvzJ;vAHO|X
zi3&>|n9sA`TEMk^h$ZU_=C%FQ@@x4=f~!!KZB?+AXZ57CHP~vfR;*F4hqfkSYFk}D
zWWHu1XAP(o+FI4}`FK|Mr{b&uFtx3r1zfu(v1Dt3dF|S2`L+D@uR~R~b-`Mm)f3eg
z&+7FcuGsou@ebX9B;KJLDrdxvh$Y)tOG4fzYWaxs^tTdK*)|39S-qJSRG-zGD~D$b
zBDOp*dA3q3|CqL>!j=ctvQ=}b)vemLkYK?B3t8Kf#5g-BXPkjVEO=THvUXC-$7yS^
zovE<kf&RhUMcw@14ORus5F!pdFllyE%V~aTw4qcu@W9${l+~@;Fi3FVfrYB!B%x}A
za>f}+#DS+Jp=vL+d>poFMuh_pbX8?_Q`O$8plKuGzyp(JU$vZOxhC6>3I`ror>ZtP
z01_N{V4<o)5~>bT&Nv4Xao}l5s5(@wRCO2?4m{9R9ieWjI#LxhM-g%0fk|_WT28|W
zddE`XzyoW){kPky9S;#MJg`u9B1tGaNjW2(OvHt!C86w8wS1(*TkJF{TzH@>J44-6
zcBU$5&LZN%1C!<)wVdXRMmv`Z7amyq?Z1Qn<S({r=fjF6Pp`)<@<L+XA}`VpnQt@^
zOP*RG^AfduzL#3<QYtKYVBR7x*8;A+g2<Ohf_d##YE`xIC6ZKpi6mJ2?Z0Df4R$T8
z6}wKahqmj9sqF^+koj&Tmh2|ALfg%1`Fz~|Z=ovNtzc@qO$)g8b|UA^fqCtnYWcPN
z1IW9mIByQD{r2C1wz&P@12G5AfyFy?3`x90?^Di*_Y*mAPD?`GgKGJR!v@$`Dh`|j
z^Y;I+7F2Kl<CMem2oYNzm^_cFm48g*sj%gNwQSW^`qx(Nad>dyfrYXsNn)a>lrz!O
zL|k}U63Qm1<rCq6dX@?o9_XLF=he;6-V3Urd69?-4@{bu)pDA~CVPbn6CS9~UT3#z
zDXhU>g985@SZI2KBs9ILoMGM~;=j|9(DaU4KFl($_AV9vJJ2<~uWoAkKov9}67k=G
zN%OH<PBRq$9TomNu$KG(%I&vmpFx5H4=hxDK@zIIRL(eG5pm#YNvQfpEgxrGvwcg2
z0}pgn->aLdeozI?k3<}JVAA}omeb6R1CI&^9$2TUVfGs&IPkzi)gL6G>QCj2^A`~Z
zo|c5Ff7D7<T^7<fT|rkh8QK4+np_n$QxI|Bfk`u!T28Yd4m>Ixcwn8X+H6`#aNvQ3
zs_98W)eOoRXGS6pJS_=TGpm)VW}(7?2fC`+)Xl%EZmOV}ornVuOqw~>avDxgor?+w
z9w=3v-Kw>F2l`fR9*A(^frYa9NJ82C${A?^A}%~F31thZ<s-e+Y~87F;eoDf5p`49
zqN<=-jED;lOq#{ja+<So;Zfnj1Es99TeVQu@m6gqSh3{k^?05wL(J#dvic$OEl0$X
zr&h>ZK`o!}MJ#z#Sn|Mpo;7Fz*Y+ZoY$Y(SU0E%^mhDV$s<QO~Yk5{rN?U{Vg|%Y+
z^m=Gpg_zn_)eo6(H6mvTs1@3p)bjawR<BOQSpr~cYtaI(Z6%hhKbY4JP|L67uYV1y
zvaJc$@~obyws=;r1#!jJ28(y-IwbK9T~|3Hu174{`dSk5Hc-n)T)4k&NL99tz<gG3
ztOeC)^(M;UDG{;dfyuL(TKUJcITf}%u$HZwORa9zwuA%=9$3iQnk2^AMmghbOT>bw
zB_V5jwS1f-TWkj^EO?-Q@OD%;KX^N-f@WtT4m>bvc2Ub|zHYR^R5<X!+HaKAt=g`T
z;J^b5RYOTa)$Ym}XBZI&o|c5F;cEFfY}H0k;lKl3)t>66s=ZV}V?-QyVA7P;a+;NJ
z;8Eeg1M5`PX8S;b0}m`z?MD)-_E*k02M}@KX-TN6sFkV?qQZd(x~fCeO;v}gg61$H
z4m>bvj!?^KCdYwCg#!<){r2B(t9CR*xbVP2*|8*{>^S9&bUYCko|c5N6V>vO)@iYm
zsBqzduIv<bQ`xDipgE0*3lB`1Gt_dL;~VWvDqMJA?YI99{*%Ahs+|ohmOQ;4x5#se
zd5b(xKV-i1iCFT~3YizG<?}t%Y8O#q$piBid9fC7?IlFMjuOmkFH_5}J%z8Mq~hx+
z!P;;C9cyc_D`BnJReC+NT}@1F*XW1LcP+7G*QpiSu2;+F<Mw|8RoQL?Q`=2iz_m9M
zIZqDEYj0J{uf4s+ZlmHnIk5KIe+Sy)_J0S&94H4C@6fwQ;vIUoaz?y|$boWN67t5V
z<s+Uv!0w~sKshjP{|{(E_4faua(KoPvE_lu^RQa^$25)#TOL@;R&C{fZPoq*4=y~g
zP&S?<CVEUc6FpAEg{LK<>`AqJBEHh+DJooepnvwBQ8zz(6I4O-ED;kPm^9C;<uuJr
z_5u|qJW!v#&TiFGScAO;1^zp*(DVvPXnIvS!@Nesf2Sp(=?%4fm~O51CKdiW&^5iS
zZfbf*6*TV>@!x?-^S)Y6!*A6-pu&F#)^h*v-F~a~5hOV9z(Un0B%$h4<&5(g5eJ@@
zgsLyp@^L<CwlAr0;DN5{YjsoAH>#ldmWTrnOq%c2avIDEKTzSo1M5^Z%zlCd2Oe0c
z`h_G^{i>XCek0<*(~?m2hgzxXPbwUEpsV^@-Bk6DDrmZN7fn|%X(pqn((q*|lT+cq
z1M5`PW>Z3f0}m`zO-&N2rcur~(-LvuX-TM>UaeF$0~HQD&{fT(ZmODD6*RLDao~YT
zGn-mY!|w{aQQ^P?rK+=AwRZ17->S_45iUHiP&OAyD4Sb3Bh5p^g{LK<Y(BMoq{o_V
zekxpepetKY-Bh-aDrmYBap8eUvxr(wa|bRwDqMJ=ly!Ei7Rox_s`Y>sOP*ej=h+g(
ze4Z_-A2Q!kL@aq~h0JBt^7-Dxl1GIl56tJ;@>;;PD-cVzBAD0qRLiertJXkOwq9T@
z&+18OYp|7It=P(XJ+$>ErnWx%A@lVm^6i^yg|=1H^7(jHuS&(YZ-S|<Q46@XiCD7L
z!MwIvEx(q({uZjTwSu)gt0$^0p4I&!uGj#uc!#b*67SG8l{4a6M81GiOG4f{YWawB
z^tW}X_ySHapVjMYLG@X^fpT~@Bx1`0lV@YK@{ef~Dr|XREn790THUH`3JDfGu#mMm
zNsP0Fa>m(`hy_neLe|!5`8db5*fvyH@Ie3IZKrO2@U~Y4%??Bycwo}(sFu@w)o43W
z;lKlHzfo4VYJ(ubfd>|<29t!UA<7wNS0WBPEeTaa)$(!Js_jmN0}pgnd#IbLhO2^R
z1Q7=wm^6E;<utu<;8Eeg1M5`PW}_g%fd>|<_9h8cZOR#EA0iGsEeTcosg<hsr^0~;
zx~c=!O;r_D&>Td>fd?kdA!<3zpN)1X6%IVG_S=8Et=i!b;lcw8Wk-^PvZItU($Pd*
zcv=$5j#bM?+N;Hmqr!y;y0R10O=Tylg61S5E<7-4PEpHgDvfq36)rrm_S=64|H)r$
z)lP>MOP*ejTjZI<yhWa+A2Q$BL@aq~h0Jr+^7$TVwezU3<bipMyg&=M_CjLGE&}t~
z(Q5g%ZOwKuRoN~9Yrp+>tgXQ=g|%Xrfzoz4F|}QxA2Q#SM817ft<ZM0T0S4Q|7)oD
z_DwLgU8e<Hdp(i!;=sK2Mz#Ff$6D+rD$a`oYrp+>pe=6yw?NE+abWQdy^SQ^p|>k%
z#5;(50jHLPyt~x$5q}wAcT@2NoM7Jm@703p?SG7Nc<v)&%L9|=0k!gv=|L)Nd0;JD
zwch{Qsyze`E<CVMHjX4FdPF%B{fCGPPfJ4Cc(r_@lbh`^DqMJ=fA*eGH$Qt%s)FVz
zA|^aAX`WHbY4{fi6R0rZf%@$A?$Wt0wohRV_8b)W@4!OS3nZcGMdb|h5)uEMmV~BP
z)be5GZnamb@ZW*1>2-Be(;KRwd6S6$4osT2)pD90@ZVA4zXNNz|MzLXReKK-9C%=%
z>I0Hc^`Uac`G|-EPfJ48Cu;dPli|Rl!hr|6s?XI;RbQxr=1U?DJTPg#R?BJlTKjLP
zaNvP;sv2hBL4pGhEL8nK5~_Yw&Nx32ao}l5sQN{%RP`$r4m{9R{jP4R`a=~oe-d%v
zfl2eXT28|$-~UkIz+1R(Rc+Q45*&D7p=xrHP&I{e#+j0c15Zmr)zoUGs%fZj;DN4c
zI(1Xk^s1nlfrtYSOq!Y0avFZCHZv6tJWzjEt99wzr>fmM(6?%{LWBzsER=O431zb@
zXQVlZxbU<jl+C4<kMvrz%}s?14|HYos+-E@Qw7cZL|k}a(k!T!(_DlLj|vwaC}r!^
zsI23y+QP76$<yocJX@5Q&$Gq!L+0y2#FD30$Xr4#pYNk)TapS(9+=OwrL}--mm!vH
zSun3%PA$LoyJlOSs%$HOwLGiiZ>=M34Ynez73-<jLt6tewe`{unQtW`-@d6<XzQ((
z&&RX64;A0O38uDwTEMld5KFcynAfhRmS4+Xe<M}dn!s9~)f3eg&+647u2?fzyhB?^
z;vL$moDusI`2tQY33+R%<s<g)Z);NV1)N|$tJl_o>a%(s<?yUa#Fhsp&-!ZRAJYa@
z*z&+ywrVc5x>ef<5-fONA!`$o7^kG1aW*Al!PAnEwYge8&WSCy1r-)N&_8%vshc0X
ztyMv@4G{+(m^9m|<uqd(ZF?#lcwp_f|LRt4AS5{Oz(UndB%x|&<%~0khyzbcLe*fk
zd>poFL#S}zfv##dbyL+)RnY8C#DNDU%^qqw4gaTZI28^&uufHNHWCsXcwnJwFOpDY
z${A-A5eJ@@gsQ#ON>y!CIPgGMwXeFVYCl!b>`%mj2PVydYB|mCjaH$;fd|%p`){{Z
zI~XEdcwnLIP?At~m~uusoQMlgOG4R^YWYZOwb)TqxbQ$%c8t2I>{wOM97n{32PVx4
zYB|kTjdmgxE<CXI+kXfD$zN>MPKFgro?eez<f+8GMV_V~GT-S$EO}~$%rn*U`Np)`
zSyWi^z`RACqXk@hF0o|ifqCuuYWcPMHroYMWxEip{r2Cnwg$Th){2eR>!IypVrsiY
zKV-g3iG2H}TA}T7wR}Eq|5s4)?VDg~yGjeV_G%(0&w+XEwQBjbKegC(RGd5q)_(i%
zKwI4YZ-AHs>cHY1dJ{>!LvL2jh_?{=0!}RndAF(MBTg7#w^Q*2oM7Jm@6>|o?f)+2
z@Z3$rmIo%!y=vtj(-<mjd0;JDwLbsas@)F{E<CVM_8>`2G*&qiJw(KXrzN3moLWB7
zXwI*r!i5L=XYWyU^RqWz6*P|#G2wwp^MqPXW1L?{g$WPTXRotcwG`H1PeXzK4lFcH
zAPG&+DrcDIi1_ccBs9ICmJc&?tG!5t{|<CbFRPoHUQq?jt3>>FVA8y<mebsT|Bed(
z9azi#zi<1k+FOv|zyk|a?~sJ5ca<~Fdqf;~S`w;0P|L^BS$9-8@IY7fvAU`16IIZB
zO2mN&Ce7z+InAs%@ThR$fpw}HW?w;q0}m`zeM1tezE#dR-w|=(X-TO1L9JBvBNYxj
z&{h4cZmRl46*RvRao~YT^SfG3!|$K|pu&L%)~Tw^{(=Mt9$2XQha^;WS;WWbO2mN&
z<~Wn9m8zzo!hr|6s;SgXRa2`%)igvLcwo{@r<T)9hXaoa2OcO@o!zRndk6YfZAOT2
z;emy+nMp#~EXo;aRw6DuEs4LcZff~RZ#CQORJia!S2m}*scbG)(9BK5g$E|hylOek
z^|<h;aN&Vc*4eFEDC=lD)6*7!6-%C8kLTG!#C)E0*AJO*VIr11wL<2iYWaLmV#%Yz
zk_YDVY;i5%+9il3TN2D`mr^UQU7D(F%Yd~!t0$$c!Ip)!V$13E(6&4=wXL8ZGT(|s
zzI{`z(AJ=q&&RX67Zu;W38uD{wSa4T6HC?y%xn9q<=66+@%^aEwhCCwvwEW1;#s{a
z#1&f&EZ(7wB=HVyQqG906Zry8EeUxoYWaxM^|w|kzJL?VXY~Lrs6MOLP!7+UL~MCr
z@~o{^{xPjXg)I-PWvk{=t6R18Ai;tM7P2-ViE%bm&Nv$pvEXS*$l63LABW$nm8h`b
zf&RhUOx^t8ZLSKMEr>Yqz@*tqEvLD;(YB_-fd|%pqpWV#wuJ--9$2W_o+MQ5pqz0A
z5^>;ZNvPUMEgy%i+Rjus@IY6!i@K?5uqtSV5OLsvNwb?;PV*ZMJSrS`V4bSkY#1as
z@W4XVaFS3pLOJ7%B;vr+l2EmmTB*vYaNvQis;q9R+FKPgZA2V+VAAZXmeV}U8F*AU
z@W9${|LwMF2S9`i4=j{bNJ7~`${FckA}%~F31x?><s*%0vBRiv;eoE~2z680k*c6M
ziiisjOqyfVs?s#tu~fM5z}j#B9sDPMu~j=BRxEjXJ#LXF67v>$l77g1Clj&csTDF$
zRm<n&9&#ELmOL<Tk!NTD*Pcl%*;!y-d$wABE&l-W9ICRN3)X)7?^s)dod;{h&e!Xq
z?E+$IyHG!5zKe)_`=(l<?P9fjK5qY)Q1R`XU~0Qe3%K@jV#%%m^V%!b@@p4pwX3Mg
zb~RZ0?Y{$Uar?gp;)-1h7VpsONa7uOy>dppfyftdYDvhuNi84o%r)$0D!zad%-jF1
zT2Q_H-=-X%+lkonz~s48t^8xUiwavFSj$$e@4vQc_rQY-4=j|8A&H6ZQ_e*96LH~b
zNho_zEuZN0W*bX|3lH?q-oxtVXK$P;XdWSA!UL1$QMH`rtR@>zg$WPTXRotcwG`H1
zk3)g~4lFc1NfMf#QqC|>6Y<|^Nobm&mJf4yt36AF{|<Cb&#Rl7UQh+ii$wf)VA8y-
zmebsb|Bed(9azi#zhC>U+G~*Dzyk|aZ;*tlH<dHaTSOdqS`w<>QOn2a)?)8c;lKl3
z)%)tEst;5_^C1xj9+)&AtK~F{;lQK9fd|&9YM6Zn2@X84Q1t~#sQOYl<9tQLfu|**
z>KnCE)wfhQ@IY7fy}GID2UXDgNW_5$Ce6=kIn6vc@ThR$fpw~Cv)>@Wfd>|<{vZie
ze=293zlb>Sv?NshqgJZwvZ%)C3c9Mv$o@yw<f@>Vf`|hTOq!|Gs?y-Vqr!m)N>yjK
zYVF>EzEztRB3yW2p=^4RP&R{dMw*d`3r|Zz+01JBNMAJDEL6DgKvy=Ky7~9jO%*h=
z6LH~zNi(NfPIEpkJStpxpp<oXs}{;S-m1+5E0#RH9?!G+i1|F5Uq58N1&CPk)C!pk
zspa#rRqIZLB@fK!*&<rNwTlu<wiuY#_E5{O#m}`kRoRvRYk5{rN?U_132Vic((9pZ
zX<}+yMn7b}Wr=+IrdpwGd9{2#p4BT*@$H*nYU`;5T-!h_SuZfJT}ds!mcRa$smj(H
ztmRofQEl<8?gMef`hvwfv>!>lLswDGh^rF$0!}Rnd5voMh%@%LCMv#w6U=9IvldjJ
z)h){5X(eLI1CwWfTKUJc1{JnEu$HZwORa9z)`A2J9$3g)ha|>XS2^RXN5q1sB_V4A
zwS1gwT5LlqEO?-Q@HSRAKX{v{f~G{ofd?kdW@<SN=c;W^g#!<){YF{cs%;4g4m_|>
zwKYko+D19!Y)iy}rzN3kd$oKVwrV?2;lKl3)sE_>s-09pvojF~9+)({sO2;t;lQK9
zfd|&9s?BzV1P2~is2WNVs&-e-IKzlI@U$dU4Oc5wjiADT2fC^~)lF4<se;CcIPk!v
zDXZl)S2o(-R5<X!+He2uwrcx8gbNQWl<h|n%Jx^zNCyyc;b}=ItElB8{nla!QQ^V^
zUD+Y(rm{m-L30=p7ao{2N2ujAmp9sxRJicK+He0I{3n00RXZA1EO~l8Zjr|l^A>rW
ze#m^s6S3r}6*5m$%jdhZ)lQ<qk_YB3@)RxL+Ea-oI}OZhPgl#YJ-^w`peoy$VC}d6
zj<q$|S+G{@Y`q@Z&LO6@bM-^!JCDe>Z>kmAE>O$o<Mw|c72m!Irnb>qz_k|>OLhsE
z*IufYU&~+rWmILm9IXBJ-+{Kc{a*ob#jXU4cj#3l@eaLOIU`;}<O?{pB;;MEmXFwH
z4ZEI-FW?08_J5-mRB!(`DTn7~BDOp*d2UrJ|Cny0!j=ctvQ_K%udUi0@ZiD&3uSka
z#6)*1XQF$ExbU<jl#Nl#Cpxv+?xVtm2l{940d@1U_n<0h#u72%fl2eQT28Z3lZ~Uo
zga_)g*V(OF3Tv?cK!N`bEHsTL2~CeFXPC!{`0unNG(D-74^wWnr>OAXfv)KpbyL#>
zRnR<3#D51S&GTwG%@6qRsPNx`(!?)Uy6nB@z@7IPIcV4@4k6F~OjNO_jof`8|2=Fk
zu^Ou#Sd9M)NsRxha$ff}B33&siSgf1%U^f4{`MvnRy)wge_P!g{~cA(yi3Gt2PVz?
zYB|ljjrIW*Ry$DR^A7|kHhxbZf9MGNh{agyz+(DONMia=mGiQn5wX;1NlgERTK=-n
zVX33SQV06<U#pwbf1?VTZ;4pyz@+(JEvGpROC1%KI#AR1oz(PY+iAq8AtP=0VEeDX
zo;=hJ5-fFKq3RcsQ1z>F#`%qirA|vi)gNm4IA?IE9Tk>3&{h4dZmRl66*OHI6HQkz
zX(pqn((v`mlT%@-1M5`9p>~j9sRIjDQ<H?MX_Pb0v_vd*S`w<JS1VP`K!v3abX7B{
zo2q731<fo(EOlVg%%+ypa8g1yDlBziovJw04ifBiV4-R*l2A3ba>kj5h@DPLLe+d~
zrK<U<u+xFAYC&~V)k3PE=}yE>2PVxTYB|l~*y*US(}7YoiG6a<<tP0P^nG#<h;Yz>
zg|a0`LfMka8EGjZ4mvFfWy`4LBaOvDM}>n9bY;t{o61&D1<i^?eys*3O@mrab18?~
zQSoavP|7B;PoAW*yaE2#sYWZq%At08J^o{&4>A8^qOX3)eEo<VYNuAnTvaWf?{N;b
zqvB9IF#ltsNej4kbt1o01M}JzwftK4*sWCjP7VBD{zIwL+Bnn>Rt~k(>!EE;VrpAU
zKV-hOi5zODR%lySEuWA7P+E_QL+!xSwt*IK?S@3$b6{S(v08rZqa12Sg?kSCU;aa>
zbK2rRls1JJM;%zaLpLXhcjy+%8F5P@jyf#~d0VUHBTmDic2qd(!2A!T?X;l!A4=OR
zhi3;OmO3zbc2q0>n0BJVQV0Go`{YSTt?rWtL4u_YEMyHPiE)M~XPjM$Sn9MSWDQlz
z$N8z*cBjHp2l@wZ4|Vf{H(V7oBZye)z@*tzEvFgRXnRp%sRL`jQC9cKqaeXj2NtUK
zCJ9w-${A-LB9=NW303>4<>RnV-k%Cf9q6hKR5w*sR6%nP5lbDIG>53=G@R6WC>54E
zuufGRY6l5+I<Qc6BuS_`N;%^kO~g*8C86qAwNlk_RM_c2S9OBAsp>>k(40iXP6sB<
zDQY>*G);CY6?QtX_S=8Eee&rL;h+NxWoMFvva^&k(%D2DbXpS1&Q;4tI;q*tqryQ4
zy0Qz@O=TCVg61M3zg7d2=3=#+X8tC-go<CQfwkZMJJ@d(`{c`D<xo4l9=FIVh<S^=
zQa@zAtB4$Gr&h?kMlGLjG>6(zai|@bx5(?YfNO6c@;fy!uf0hvzxHSjwWH#9YGCcR
z|BkhBs2!{vYNyvj+wH{Ec87k*e0LH#)K0C?cDGtSAGiN|s5sOPOl@PdfNSp~;+_NZ
z+6UC~Yx(-k2dQw+fwkZMJJ1%l|A!#PP6rn6&~YU34t+#9BmReoolZ+a-gvcq#2Gl$
zjtV;+n798Yw4i$Xe^NO-PZ9CcfywiXTKUH`feJqz_`mFvCvy8AGGzG?gUi*8nLP&&
zraG`t_5w*v^rCVmdWndsPD?`BD{A>fhjXYM6{b4SKYOpMo1eWmR6+A55mOzQG;gcr
zG#pa(4i%<4P@la??2{)gj6?09z)}Ymnm!;2O&=;}n2(59>a-*@eWI2R^Ej3|DlB!N
zYx-Q>)bxcaXuc$3sRNVdYqgx_NGx?!Sn9x9?*Bd8|4Q>aNU+p_g{mJ&Le-DT8RsV=
zmO3p7Rllf}e^b9wVW|UM)$i)2sy|dg^CuBY9hfwKtK~HO^6wuiEOkBVR>h%qkYK3;
z3ssYogsLf&GtQJmEOlBEs-{*eRZT;Mr4Dpe)2W-PrdI{c3`8t-VA9N_meXK=oS6zs
z9a!hz6^GhEf}IX5RCOZ>RkJH+oH>Zt>9izN&81eVnwttc9q6j&RX0`5rwW?+iP-7D
zq*+icr<nsg9Tj#uP^vn+RcrSS^sU;$5aFN$3uTLvgtEnyGg1#C4mvFfWlN~#BVC1q
zjtU1I=*pH>H<c};3Yuk!{8|l6n&s7U8osV~1uA~621;3Hw`!rR<E>gxSUJ>AugCMO
z7crk_E9r;Kw=$7K?bHgHebn;#?%_~7Dh{;+^Le(47I5vVM1H3R=CzG#`Lz!<TN4$(
zQv+*xR!>SBhuXo)p>}#bw6zjbTYvqK`34X<)K0C?wx(J>AJ6Kws5sOPOl|9E0oSfe
z#61V*wd<?p*RoUJfC~2<Sj)3|qT1qFy%EGX>cHY1x(P|VLrcmTaZ@6WIxPu#o2%s`
zzTawFP~oTp^I5%>7F3_rTPufW8zPoEFnP99EB~0br@~SP*0NP|snxC8KuECEfrYG{
zNMf9wl{3yDB9=NW30Z^H@^QXuwjoql>OlYC?WS&i@P?{_W_Kc%IxuPWP|ImJC}%ho
zmO8NZ8)bE?HWCsnbzq@tFOpDY${A-A5lfwxgsQ#O@^RRzwNYWI16|d=>ZYpwR6(;p
z5lbDIGzY5XH0yDw9Tk>3uufGRY6l5+I<Qc6C`qU~OgZBmPQ*^9C86p_wNlklRM_c2
zS9OfKsp?o&&>Tm^P6sB<32HeF_Qw;cu+xFH-~QWe)lP;82OU@_JC!7qou-_TPAB4^
z(~?kjrdmGIz8q>tg@X=sW#_1y%Fa~<&3QzAtp+B|1!_6Xi5zN2#jn-C+He0I{3n00
zRT~W}huZ1&xJ6z<%v<E8`XTdOM&wXCwL<0<YWaMJai|>?huVR8i@aJ3xb_+%zf%MA
z+UwNvYj5IEJ1Tys2G)N2?^qj$+QG`9c6vRu-Aqhvx9Eq=cPo)Y?bHfwx2xszar?i6
zibL(d)OME^aP8eh+;d=Fd#_r4E&pXbh6?u_So`h218s5pzaL`kbYSrgeUK#Hp<|UZ
z;zLC2bXpSf#;N5acHvMvD(rM%-u@reg6i#mymELRBjTq6ljjMw@{j3BD*SX{EnBso
z|N52Y)9_%b0}EvnNMfRAl{3+EL`-#B63Sjs%O~Q?f?lNJH*uhU_Fh&uKYOpJg635s
zraCZbURTR$&cIYhg{cnIXRotcwG_soc2Hoc0}D;>kc6gpl{3tHL@ae$5}H0x%ZC}-
zY9CTzsRLcp$Lgl0PgFtkDG^H@m^7cO<unIjsiVSD2i9``FW!;yTeYts!BPhns=gr!
zRo^OSobQNO>a-+O{h*eQb2f+CQDLbAUDeO(rmA05LGvpSOC6XrzpLdmoGI`J6_z@%
zPE{Og2MLxsuu%06NvP_wxR2A7h@}q9aVA$QRZT&Kr4DpeQ>mM(rdEZjX^2?rz@(W@
zEvMms(@jr>r4Fo96^GhEf}IX5RLx8hs%BBnII|M5(`iZkU3F6{Rn1O?oep$WbE=!F
z=28XC+(hhjVA9O1mecT^F7r`grvs&`vs<-x??B(GEdUV?I<Qc-5J@QOuAGq;CgPye
zl2Ep&T0YWgIOwQw(1EUOadlJK5~`qClE|TUVA3qDmeVwGs2vrD+JREm*{xbA>v*fS
z9IPB_r`O|owgNGqXDjN5%-561p>}G8%wB5wd{=O&9TkV#f%!b^tp!}$hsdFJU|!o#
zEx-0!4z;7=P&=@eXZ57Cai|@v9BQZ6Lt7IuwXLonGG8;1L+#WGZLMngd_1fBQ*o#r
znA+CR0<K+?h<gsqYu8rGuf2vt?Wl0ifwerVC#o%;)$2iwqYf<Ip&O9IJ9I<kjJOdI
zN1c|0yiL@q{_(e3i3&#@n9u6Xw4nN|-ds66TM)6-fyuL#TKUJcH5HaRu$HZwORa9z
zwuJ;s9azZPo+QTEK{?|LBx0%4l908NT0YK<9BM~}r4IBD-Y)9q2XC+{Xoe86)PYH}
zn_5o8mtPH~!cqs;exs~z)rLWWr4B4q4JQdzBa}1FNFtUxEeTb7spaFaRWm9qb)c&%
ztDCC!Rs~HP5lbDIH2bRMG#hiM9Tk>3uufGRY6l5+I<QbxAqiCnDQBF6iP-71Bvc)$
zR;oIT3OgO>s*X@MRUN4cnxlx=>A<8pMlGjVg+uMAu+xFH-~QWe)sBY<2OU@_JCP)m
zour(RPA1}@(~?kjs#-qMa1OPj!a)bRvNP08WoN2_<}4zI+JQ-Pj#^H``QztOai|?w
z`|ZE@gbf~19y)TwfBmD1^I_#sJG~ya$P0;ii@ZobWWLcv4z*J&WL~0{&-W*X+EH<+
z9hkSs%e8=OuOM=$9hlc%rIufdpX+KW4z&Yozx{WtjYI8V<xo4l9@?%arnVdOL*~1Y
z$f0&>g|?g3^7**^-$KQqc3^6|O$)g8b|UUMFt5E+Ex&dghuTr$o&#&Y{db@(ZvXc{
zjGYcF-l1bi;vIUQaz?zLh@DPLLf(UF`G_qXYDa~g4$RyC!&*?i{f|=)&m%<qbYSv4
zs#g9nji<s-2iCGxYxvhz?QwW8)q#bwCrM(Wr<60%(?m>lS`x}8sO1yw!=ZLmnCd|P
z>^-k;e)e8a1<i{@Om$$=ysVbf48~MPg{cnIXRotcwG_soc2Hoc0}D-Wkc6f;l{3s+
zL@ae$5}Mvo%ZGWIL+z-r)Pb()eRWgQ2dbd?kcg!YOq!3?a+=Ms)KOun18cee_iF!J
zwa*~IQU?~Qz90!zUn*yuuZURcv?Nr0qn3|z4u{%NVW|UM)%WVAsvlHA^CJ;U9hfvf
ztK~GyV5y_RQU}(libL%n!BPhns{SAeRevgHoWF=z>a-+O{i9Z@>av8!=?c24$;kdk
z)#R$6nSzL=4osS<)N-1+u+&jusRQd&#i4ePV5b8MRnwD%su`3s&WuFtbXpRsW>zaz
z%|eBp4s=zsshfXS-Bdv{I}tk_m^5>$<uuD;r=!A72TE0Ew`%R)fxcCn2O=DFV4-Y2
zl2A6kaz<K!h=Wc`LfJxU`AE0oprgV;2fDIF)J<iJs)A-QB8S?6Nwc_GPBVx@?Wj1^
z4wSOaZq-6r$6K|fVC7Idy&liAWr+DaTlW8ucJBd~Pu2gx-9ix}x#pUCZsSffX2vBH
zF-55mHOifjhoZ*)l4~d-_Yj2$MX4x?A}WfaD2k#eilY2p@6UerZt{Ihzd6r8@3q!h
z`?Jn^pE+lL&YttU_s)a*TbXsJT~d|is!5gpY%{r<$f0(y-Ot8kfNR%a9cl;LwQD6+
zuKkun?L-c>gM-|wr^U^ocB(qmF3*SC`fTI2K_1lKTUdwMB~`hNORDteUcI5np?0uw
zn~(vnoybzp!FKJ&NtJ7FbEut&dJYb9uYN^t^<Mo}g(>P_{|?=pQ{SOm#Mk26Sc<v~
zsr0r?s<imz#BM7QMICJS>a8<i_+GtDe03(VBz3UWc~?^HW!hFmQU?cFsuilkOSN|^
zLQ)5NTHAAKJMWFJo%gXMbs17=?T}PyXD)}@iAd^T`QUvZ+4jNPIgv8Euq1V`mHALo
zrOZ6zy2&DvIym@5IlNT+up%UNuxGU!r?T2TzIOItN$N7Bvf3-D(vGFt-XfAZSXk|w
zY-6=wB4s+3qz<++LrIk~TRGHDL{bMw$;zR2ijdR6p4CB|%Ie_w+W9z3PM0B-)s&<f
zt3yQObg-~GEZN5D@I=ZS!IIO#R_3UrN*Q~aA1xxMgM&~1Q!UkwRfvKP_RNmsRA$G=
z*U||r1zm<zW+x?8TH4W}b|MNoSeTuXY-4t6B4tiv9cl+#na?Cu%H&Wxkwfj^;M4yI
zU-|oA)y`1Wp>}ybQ{-7}n<77#2laP0>rlI-D$R3~D*Y|IQFormp?0uMkr!luYcFIS
zY6si37bR7$^}ksciyUeP2cP~&=H^g4RUK-V=fmwXwsHGn9@O8LSclprRk>Y}RO!$3
zf2GKwcCc}~Dg#`5HA_7Q+qKsuRjxhFp>`tbIXL+AKLWQp{eM+qayr<*L$Bx5cjyiA
zwfJ?GoGwEuy&ID%Eq>Xdb|P{**rxw)XTb3Ee^Y#QZf5D}V5{@pq}t1Li-?{M4zg5R
z<7G>=@2Nvn2YY6>acW1m$JfyhSfaWNsm$(3s&wQpYwr{h)xq-FyDQoD*}FTDGCyXC
z>R>DL)1*q7lZfg>M0GHqz3E-5HDV66Q-abC_MGnHR8IHD*UT?iO1liHoE}K3G;@zb
z?L?GzuyFcyvW?S2iIjPmrL=>s%x{w_We#$voruy74l@6*IrUQQcZyKp!JgG)oXYC)
z_}cjcOM#ammDLkTm3Gdhz!Op6!NTg#$u?F`B~s>TmI4p9GJj2~l<^0U&xk1S;3!!+
z)J_o!JlL~(j#F7ZA74BFU@7o2q_TP;smAI>5d|JBtp1&BWA#!ZWoB4DWoCq}%uIse
zGX4N^W)TG*93?A<+9^VT2YXhtaVo3X<7?;DECpVMR915&)mY6bqQHZN)m+IoR&ys(
zW*(LT54JM%CRNJhP&*L?9t^ALU8+s>4lGNx`4ytVgFUkaIF;Fg@wK!NONEyqmDwUm
zm6on`sGW!k4;E&NCEJ)So=BM`SSmc&$}E*sDYG9Ho`?z$hS~Hk)he@*mukzXN|Kl7
zb3a>-ZTGX~^Pv7#U`g_lsx((hs`PgyNuG!#54QW+DjDF~RauAH!FKKHNtJ6Y)y9Y%
zY6l0oS5J$ZL+w;`s9l~9x3$^EZJj)*zjaxM+9g%Ft)Eos&%JsBkwfiZ<2E(}Tsw|+
zs2yzAj!&vwd!0k=L=LrsgWRiMkz2i2Z=|q8?O^{7-Go!$p_|6n;#*mV+GR+kw|P>f
z#UD)Uwh%eg4z_#s+cRMJUcF^}b+%&3@?fj8byDqR+D1f{2M1ZI6{^EawRb8)f(LtA
z+j43<+r`(;yIB&v45_rXPpY)Du0!oaBzUlV@ZO(n`{3=6NSPg33Ov}#d?2Y(X2x;d
z&LRptIQT?4yj1(3A{2PAXEm8qS?wBMJ0E5#@G_*b+AXQlj-}e}A__cMSnZi?W3^Wz
zW%gz%@L(&mZ&IbqrVh0eQQ*N*vT~@MA{2PAXLSIlvN|xnc0S5d;AKc<bx=}`)xjbP
zJXlzLBH6}jN+M+rVJYxnD|1*<rOXl(cp?fsIQaBG)l%(9g{bgg&+KSUWp+$_Egj2J
z;bll=c3e`WrDYvzC!)fGh1rS8HfAR!Qs!iq3J<n2rzBO%9O+Oy5fvUBeEJ{ZD}TRK
z`?RVgd3iol<mqgiB0rl4^>+qKl9yDac~(-Tzdt$DPDGLi+Z1_D2DtWI)}eN=U3-2~
z!`vKdCvvDA9DMp8nVUoHRCTCbo)5Q+*~aaXJgC1funx6Ls&cz5snVb6|BE7r+QG){
z@(gh86|6(;V7vCqNtJ8QcBq}mp>}Za>3;-nb^8B`!Va~A{X6tpPJM@77hj8CWgTjl
zA(h_sNtG7g?@&9DL+xOj{=bm{!_)tb@zwbzOO^*)oo^@AUZ$HwWO;CqrP`V=TdI9W
z9V$H7GrNUTJGwQ#j=slI;bll=c3V=VBm27EE~3JN<+Jz0WZP%&jzr4b$r9ngR_3mx
zN||>%)J{Z%2lLsR-lbY2=1@B&=<i_9=^jqybZ>mk{EVf)%aF?HzNAVs3pvzIM1Kbh
zr(Y)9I6aU^nFm?=JJ`znI;m2|AHY8(qQ8TK%>Qdmy;S>+A{2PAXY~lDvU)VWc7DfF
z;AKc<^;lA+9bb|k7g6BB!s?I7Hdaq0QszmP0uQz_e@?2Dp?`WxM1coK$;zR2icsLe
zp4Bs)%Iewp+W8wxftMkb)pJQTR?mwl@L*x}&tw~`7ZNG+B1?e>TbX|+Rm%8P?IjTf
z-U_2;<xo3CDDYs<YGzJlHA{T$yo#m3%aF=ywxk-X*+mq1u&{bfvW?XoiIka>rND!&
z%v?#8GXAV{ZV?3@%)iz2F4d-b2bQJU>lC8GgFUnPIF;G!<7;VtmI^OJDzgQWDlHK!
zEhwVGgN50`$u?$-BvNKkmI@EHGK(cu${b3CC!)fGVYc~nZrM|rjl5J_QdN>XnETn%
zY`dSmDG%yz8I~k3sY-LXq)LA`kmQL-@?g84t(XC>U5Rz59c<UGoK(5?CWqRI9BKy#
zxmQn%n?vnXb*NpQ54SOF<F-Z~)Zdz{L+z5P+}2L2^ygl^j>w^QuyI>216;d4>rgw`
zu6;{V<y!yzV?_?NgM-|wUy)nAS8u4WL+xPy4xPZM@6d_ywYU-MP`eDN^fpPVwD{P>
zZc~v%?O?lCZ<YbW_v+2#tFr}5mIqs%w<pzJrY%Ked2o=WTA@0;RC|XaBzUl=wGF4X
zGbz4y-pP{SWk{v9ZBnJ3wH<0FBEf^@gZG|f+Xrv^M9RFErND!&%=?onWo8)H?I5DS
zgM&~1!%MZD6rsR_J*%BLmDMiswevxi0xv@<tI0`~b}ZF)6;a^9!s;W*HdebOQf7CS
z0uQz_dnQ%N_*0R+L=<>%l&l<Trw9ce>{;!{sjRyA+S#9_z{`-z>VTvgs{=(8c(Abg
zSh9`PL5Y+(n5Dpjt;{EqDrGFyridu;;Na8$R7<r(6{5m}J+s3(mDv&TwR9v)g_j|f
z+0jXrmUeQeornq#7G|GJwlO;{kut}#RCutJIWehH=Ccm96H(#8!KeQbzVi1=wNI%^
zl9%T*MV`vGDe|;DsJ~CMBzZ|ynx`jK`g_8mb|R8I*rv!cGr+ZHu@1F^?b@@G8s_Fu
zJCQ@};Na8$$lM%ir>aBk@_e{mz&36d=0W{^o^_~QQkC1qNtOOg|CfjyY6ly)OEbW=
zm$44DgYDWcB~`BdrbF#S4z+`WPyZuutJD9L3Om#e_V3WEIQ1QRb$l&;g>|T1hE#gj
zCRJMWtJ-xUhuXn5{eLY3hNu7Q<EwK6OO^*)oo^)7UZxvGWO;CqrP^9ATdI9a9V$H7
zGrNgXJGwc(j=sZE;bll=c1u#FBY#$RtB48@me1bzlWm{9+Y%{rJ4=KITbUmwRmvRW
zP&*M39?WNNdY5XAm_zN9pudAXr@J|o(~sk8<|i!uU4~Rn_as%C$)R>4`a4)S{XE&m
z>ApnD+|SbA!B*y%NtH62(BFyZ@8BTw|JqY8)qbT21s?2KJ;bT39*(b_->?*T8B$q2
zl2mEu-tpa|A__cMSp7cP#_F*|$~?|e;K5erk4cp>b`^X=M1coK$;zR2icsLep4C&F
z%IfL(+W8AhftMkb)iX&oR?mtk@L*x}_hcKZ=MpLNJWGKGTbX|*Rmv<(fhVHCgQH~S
zP&-8^@L<pCB~E2E!-}Py8CeQE*xH#nsm5v+5d|JBtY%HNv6?LrR<pAdc(9duO;V){
zUHKd$3OpEA)4Nog>K#~?YOhs@3J><o=H^sp^TgNE>sTth45|NK^CeYU`nf~xL{xaN
zFndF?joAW;lv$9a!h@~M!bz1fU!lSiQQ^Tb+k85gYL(f@OSLzuN|Kl7b3a?0ZTGV!
z@}T~fWJ&Upsx+5Qs`U45k~|Sf9&GosWi!CF%drl%gYDWCk}B8Q0dhr=L+#)o_v&eJ
zbEuuF4z<hk;kF9fxUHH8^|u;p_syg#w=qeT{@knA5ZQebHg0QWfNR%g9cl;Lwd*ET
zuJzBqp2(qgaFBcTD{`y%>J1cjs2%Lzp<_Aq9Xc+)7B^&Vz?mVH-h`w|i#Jc~CW>sp
z3ERDT;|v(SS8ozuolRM?JlN`NmQ;J0HW!iQ!9kX4h3fE9?QM#X;K82OmYmwoR`Ip-
z4weKjLn^Ipk}B=2?@&7t2_7sTymuwrK6u+EQf51r0uQz_?@6kZai;b5A__b>_(VCp
zRC}Ky6nL;_wF9TJ+A+R%c48^;GNiKFIjPc)rP?kc3Ora?eJI(+YH}iFc4aB>U@P;H
zq)M4rQ{agx@Zcy}In+)O3Ov}e+KW?J?Hyk``>+&v8B$s8msDfbi74=3VKtO&V|73x
zWe#L1@L((Rv7}0wc`5Kj6nJp(>3^!F+Q${5!h=1tDV)mekoa0Ul%>MUkjm`vq)JQg
za;Tk%3J(@$M<v^s9i2#-V^}IY*vfn|sZ!<$huVp#@ZjLn{|I0C`=#0ms*>d8`Am@~
zv2BVxIS=aZQ!GhdQkCYZNtOQI>QFloNgixd<YzL#wWqTVwS(>2Gm<LTe#)VCB8S?+
z!KeR`xjEEMRfpQ;`EWajZQRbygZev<wfkmLmD>eLmHtfs7mDn@2^+VIGQhPLvktX`
z?b<ISRj$2yLU*aip>}Za>3;-nb^8CJ!Va~A{X6t>PJM@75nqc}vNqt%kV@~Wq)Lm=
zY}{QfvH>S-)BiOYFg*QV8(*F4Sh76W>U=G!_A*^BBFlq=EY;S2*;4K6>QLdqp4p9@
z+R-=T>*!l76<&r^W;Z2OI{Lgr?L<^~uzdEun{4~+-I7R|TUjDJ*vfoAsZwTRhuVpV
z@L)cB)4Nn_#2jj;1pOWCIo-jjobHUTnIEzAcNtPS-JMivW_O3$iRkZO;q=pF8>f2`
zDRVDNe+OHcpC?tytVw?-qQ8TK%>V05y;S>!A{2PAXY~N5vU)JSc7DZD;AKc<^-xl!
z9h=fUETX`Jh1GA9ZLA(iq|Bo%1s-f=exFn+<CmJpL=<>%l&l<Trw9ce>{&g*sjQxi
zubn@!6nGg@Sv{3hWA(I%0uL5ee@(WrdM1%F&$1MFu$B3HQl*Rq=yM_pJUB{L4z*K+
z0uT1AUf@(#FUHr-zgP;q45_SMN~*D%VWqS)BP^_D;{Au!%!!nlg{8oQt<0=Rl`;!b
z;E5>kU|3D>Qf;btU|FiYS|KVt*fX1hQ<=>fUrVoLsqiwSGMhW8($b$DYA2$?gN50=
z$+my5`4TDfdX@?gwlZ%>s+2j73Qt6Z2g7W7mui*S$V;__R3*vF^SPfb!nXU_qIpn%
zZ)8dGlBzTpPpbU`E+HbxgYAB{R0g<qY1W~3uwA=MQsr8I0J*Hlp>}YPd-b%qIn+*7
zhuY=&a9fdW+*Zni`g=2L_syg#w^foV{kd1KDzf_~Y}{7Q0N0LT9cl;LwQD9-uJzBq
zmdK%YaFBcTD{`y%>U9)$s2%Lzq3dz#J9Pc{THJuO0cVC(dSjC+Ej~W68z-^>Cv5lX
z@fk3DubvQJorx@29&B|sPO803n~2Er;2=x2LUnkl_Etqm@L*4Cb53n%i}>1k8%u(h
zA(hsaNtJe%cc`6+1P_)E-qy*s58gJ3l$pd*;K5erT}hQPca80~6;a^9!6(Y$rP{j{
zp}>PZtL-_J)qCS>=Y1>%UWQawJ0w-wu~gepM1cnjs}CgGSnZrhnO#^4JlM*7D5+A$
zU-eHGQQ*N*vT~@MA{2PAXSExrvf4eqcJ^Q?@G_*b+AFEXYHtw*9xSZ(O}4SxFOf1G
zOMwSlnW3agnVBi@L=<>%@acc5rP@aoqQZkcvx7L5*}?I(^l_F7FGDJ`DM^)<4t1!V
zhzbuDW``x)m>r%-nIl*#JlM({l~gISzeDXrRCsXk>3@W;{QXkxSXD{#@_eSq<JdMu
z9-jyGcLGb2msF*BQc|VAc{b`!7Lnw^HbtJ20j@ojb*LR|*M2&wa_z1TwG%ni4h}y3
zkIc=XcB(qmF3*SCnQY^BRvy&f=UBUMCRMqelT_)?^nb3%?whc2J3j+ldjacEJJ_!M
zd{X7wyBumKa;O~~eEJ`OTb=$dQP`n&uz!bM%Bk<r%i?SCi>wVeGo;eHJgL&+Tn@Dp
z*?<$a>Ho_aFg*QV6<?jJS+YFX>Rgjldzr2kk>$ZbmTK#~Y^nBDb*S)Q&+K|m?dXR1
zI{G?Gg_j|f*^Nn+j!t)|ornq#me1a|lWm{9n-VE=GfRXATbb`BRm$w<P&*M39?WNN
zdY5XAm_zN9pudAXr`tG{)9vv!^8=RtE<-A(JCZ8RSgPGAqQ8TM(_P6nPIo6#=Ep4k
z9c*QOnp7!c`FW3s{tgZ@|F1jsQtfAoP~gFy)qR}G>i+oJ`2|aXmm!ta14)&3elxy%
zP(*<T3#(rz+gLr6NSTLO3Ov}#{5Gjl#@GKxL=<>%l&tpXey0cp9_(2?#;L3xkFT9S
zuoQS1QdvEbRAcp|hyo85R)0>mv3e?zGEcJ<c(9fEYf`0*bJd;^QQ*N*vO2K)n<5l=
zuxIrgr?PrJzIOh>Qs8AsW%WW*jn#`H3Ora?{X5yl>ZL@=%<$%vnGv=!GYS5~YGx4y
z9t^ALU8+s>4lGNxS1Ck=2YY6-aVoRf<7?^FEEQgcRAzG|RhZp3zME4-g$E0>xsq+n
z=1!!{JS-I+Y-Q$6s+92u)ANa_@L-rt?^3NY8+oZVzp5m8c|P~E1=w~!TQCpmZy}Z>
zFR4m%k)%q0wiQ`aM3M*F{cN!eaP8vkq;3h=u3a*za_x=dyQM@!-O_N7d-b%qjp^Q`
z>XdGoJRfe$vW?qvc~F1Lvv%K1s&ZQ~snVZ&^-3bUZ^FiH<qUA`D(s|gRoJdwEva(t
zQ{%hUMMK>fILN*F6}i=W^%@FK>DGk(J9I5heTS|cUyJLoHsH*VN^iZShX3g%cI%65
zzzN&E`YjnSe6JoGU!8F*SsrY4#wXQYrU@dlJUGZwtxz3as%@kQ2_EceZNjPTY#LuX
zZ)HjFGNjVlJgL&on<sQzh)D2Y`QW`h+4jNPGLbS{u@rc)mDxI}Qsz5jyKO`icyREE
za(JosPDLp2V9#n>PGz-SeC@oOrNGOO%4++hN;{Tn?-fzt!NThO$u?FyBvNKamI4p9
zG9O5)l<{YsJBujr;3!!g*nLnD3Ov}en#`%Jc8#x{53>|_8B$s8mQ-W4yNCi07FK&E
z+gR<DNSVD^3Ov}#?3+|6^CxHEi74>k;M4z9OSSzKqQZkcvjaGl*@5x3^ih@yFGDJ`
zgOVyO9XX*pSVV;f3$sro+n7yBq|6~K6&`G54oj+(d17pLxQGf54nF;l@Rh$`svW5+
zNnW1M6nQk;rpRORp#F|!N%E4aG>=QF^!MDv?sySN9&A(Oi5cM9lh{e!$*^7fsiexa
zhmG$}5e;>x!ojEik-3fOPE&PC_vt(zZl7Trx6|{W{yxjveKV=b?aZWx``fTPOJw&=
z*tngY0j@oVoz$HR+qLH<Rj&Q*gzkLNP<H_weEJ`OTb=$dRCr4FdDy>0FXGg9=*981
zcnNC*&J3yaE={VmxX32mWg;7J!Z!VXDFcS5|I6d6a|KJ52V0#lC)Hl2t3+gZaFC_i
zx-VO*eMKEAJlHe4mQy>rF20Vw%2MHFNM&|?Ql+EMjqh#{QQ^Vz+51Ma?X!1dB4xhG
z65+vC=G#e?GP{rKZW0mU!F=|ncd6Ehjp@Fl1pOWCIo-mkoNkS;neVamcNtPS-Ii2o
zX8(!Z?IQX+SUCMK*~aOPM9SRB(%->W=B}hlnUm@7MD%xXkokYTsh4U$QG@~y_N?yV
zR95%K*Urya3cL)dtnN#ywDY7B@I(}Nu(0}NvW?XPiIjPerND!&%&(IwWo%XPkca{g
zj*`_L-ES13z=J)jM>v($qw%%#JC*`3Ln^Dsl4`6T7g6BB!s?I7Hdaq0QszmP0uQz_
ze@?2Dv8~8cA__b>N>&GUe^G=25B98{;Z#=7#@Ei@SPHxhsjQw$s<C=rM1cnjtA8fj
zSiO))nHO0KJlM+oJE>BJSm7lR1>VYq)$}garg{gKrP_=NQQ^U!+02~EY?k<1dKF8B
zmm!teY)O@t{x!avT||Wk3$xcG+nCLfNSQfVDm>WA%#~Cra~Kt#hzbwp-)nl8YL(f@
zOSRXjN|Kl7b3dDpZTGX+=Ry6=&ywUNRcS7eRO!zzV+)E%@?g84Et~<aU4)&~EehMU
zZ%nFO`@QkqVxpmLaX84mdRpAZbW5l@rCT!3huczY<F<4j)Zd#}yKg2{xh<R2@N2(e
zx17lCo3L?PAp=~yB0H&D3AStBoK(5imhmf#hPqYYAouE5<W}$1t13LDTMhQ_(A7Eh
z9Xcky7S~{Hz?mVH-dahO7B8FFtu3+vCv5lXbu(c2UcFv?b=GIe@?fj;mZaLtG*(2G
z2M1ZI6{^EawG9;^!Gk@m37p!_#Q56Th$X?xkV<Qlq)IyzCv=;NNbq3!;BA&{``~S!
zNSQ5I3Ov}#ygjK>#ved#DWbrGgHM#hOSN|>LV*W+R@-nYt4Z;-^G=omFGDJ;ZIdeP
zSgLI&qQHZN)q9d{thP_2%zIf1JlM*-KdDm2uWCDpDDdDYSsmEzqzDBb>{;#1sjPO1
zubmIF6nGg@Sxru=vD#Hcfd>n#k0jez?UqQH-B}7e*vjmgR4H@s*lsTo1s)8m!KVMI
zmTLPbM1==?X8Um}vo5}t_GhW^GNdv)AgR)lBd`w?QQ^VD>|@C`W(Orw=3tfz54JL&
zNUD@MXKXh`M1=<jpZ-Vq%HJ>54po&TFVAO+Je+M)<PmvLe@C(;c}Z28M<-SKyJcc`
zjEE!;wkh(H8Q|LE*h$^-uw8pXQsvr<$9E@+hPspB;M4!e+{Sb#t2(9oRGtsFQ`pAs
z)I6xa(^$K2CRMq8CaKb&|EW7&WcN+jxSf#!u04~T)SU&}wVz9>Ts!;3?rhOecMcqU
z`X7N?o&L{NcuIF3?BAj1bLu<vg7{jzkhKA4hE#eNB~@B{apUe{kqtOuoBqF$0mIY(
zrSa9dj3vv1t<IN{YA@5}BC<R<$Wm>+mo3$<REG)=_ROy0)Q+x>ucNQ9RCpOunO&Py
z>F9*<-E|@=JXk(^UrV-q_O4H)%nd9N9&Ba4kyI%&eq48>hzJknvp2m<wMJ}A_bnyp
z?_kgACQjvab9~Kwho!&Ekjm+nq)IcJP3&$J(ci(s>HEnxPPZje=606;4z@BsOsbUm
zF8!T|{tkxIlGD=D?Az_L`%b<OcRw<k)DHIT-_5D*|2V$q{RvBImm#(Ndy*>8dpfC|
zh|~_2_J5viYyZAP%G}SA+QC-lmr0c}&y4LJ5Rux!w7<kFw?Af;zWv?y?tW!3NgeFF
ze~44Ne>lFU{S8Y}mm#(LN0KT}`v6Iuh@=jd?th<b>;ADs$~?}J)WKHfk4cp>hm+Ka
zNa|p^Uw2yF4|O~5z5lNJbbC$icG_jneLlFy{?3T^ExsLUrwB<M>{&g<sjQxkubscJ
zBy|~5Sv`|fX=ewA+KEW&U}5$5WE-pJ5-IaMOHv10nSUl#$~e{N1rbRd93?A<+9^U(
z2YXg8aVo1BRw?bw$dc5-*3QgHHCD5TNa|o=HEXhs)oh8dnw=%7gRRVKk}75FzcYu3
zqz;afl|$_mA*X{qtGPLq)jaXF^E#HCE<@_S)qF`cR<9S4)4{^(4aqiE3nWrzL6)2j
zwlWJRRm#jtPA4L#gJCs|FG{`*P4gXC*2!;Fh=LCG%ogWVW=q7^(vmC%U4~R<OD9!Y
zvIo?gL=<$eFk3d+#%#Gn$}G?NSq-)_D<)OSyvEOJB0sCaFq_6Yd78{dUMH`tszdGa
ze7;Pq%C;{PtK~uct<E~sE~!d$jigF{{>pMqkwfiZ`!cb12Do+|)^BRCUAtaV<=Ph=
zYA5oW8vGw$l%|`TL+w;`s9l~9w{dLawqYLB-+0!cc1cxk6O$_a`J%Lu$f0(yaoZ#V
zT)QbtJqO#hn<Z7Q_1~(Si>T+||M;Rbecb97rMD?eQ3v~X=$4%N4&5rg7T>{A)MZGe
zw@p%|#q~DoCW$EOVEdx<t_&FdqO@&%b+%(k>R_w$o}}8#w7rO=4*rjI@-(PQ?X%N<
z-TM?Fse?VO9XPd}9ph_fCzhlxLn^JElPc{j;7~geNgXU7ybmSYK6sN8DYGj}QU_a^
zk0e#f{Ag^qn~0<i4n9%#toG>kP=uro_N?~eR91V(*UmmHNnM6iR{JGY+ObaVL?m^v
zuo_CXu{t1;G6%9Gb+DEBSW>0TRU~yHk~%m_Rt~jOgq#lctfp`(t3%>z=TMfME<-A-
z!;@;Pju4U4!NTgOWE-oa6De~HOHKz{nNKEF${g)bI}te@9DMrk|E<dOe}Y03bg*Z3
z5~ngdIlh)Y#Zu5^NM&|vQl+I;$9JcRDCl5e_L*cGv(pnP^I6u<YOs|#GpSPMfN|Yf
zB0sCa!KeQb)?58L`D|4kYM19TMV`yHDe}BLsK4`BhuS4oX<nFA>F*B?wG%ni4z?-s
z;tX)@C9L1nV7vCxq{_89)K26#H8}Y6KQcFm+NtVLyF4FmSFnxSm3dHqUuGR@msI6;
zbyB52)BjgQ4z+`g+qD_s+Ur>AIoPiKT2kfOe>l`mL_G%wpZ-VSR;T~3D@;xY`*-M#
zoca#^W_&GvizTPakV@~Sq)Ll>JJe1@P6ylc|J@82p8ju%ug<M3JsoUyzMoWknQjx&
z)4~6-PJV^yf7e}C*?aPkWk6Oo-4E0us)IeVJ2<tYJLBu<M=ViYhE!&ECsjJy%%OH7
zqB>YUdp}LKefI82q|Ci6Q5|e$ex6h*<1aPu6A{(HeD<cXPM$U~huSGYQU`lZ4{$1{
z2jgqzS1d_ghEz@uB~_ZaW<vL{h@=h{PQOjIae5??GLNz(b+DEBeNv^&jU;s<k~%oZ
z{J;9tOSL~JLQ)5NR!?v$t0&`Y=T9t2U4~RvPbF2_*~6iBB9b~-Sp7BG#_E|w$~?=G
z)WKHf?@5(1_6d1TL{bMw$;zR2ijdU7p4AJS%Id}V+W8ktQkNl>)k{e=Rx_-cc4mZy
z)l9tqu$noMGPAHGb+DD0HK|f&HIh0JNgW&|D~H-CLQV&JR&#JFt2yIq=d~<3U4~Rv
zb0^hU%_AbGgN4<+$+my1`4TDfdX}6HwlZ%>s+5_DoK8ee2g7Q5mugeJ1IxX7A%!UD
zV9#t3PGz=ed@a3^rJ&1@%53qZN=p|x)J{Y}2Me>Ml5NbEPNdA6SU;=5R%Y3xN*Q}V
zEhqA`8Vs}PU8+@PBQMofP}QM!c|P~EmDqMadvhMt-^#2*?UJfAS52z)_pn3lL=Lrs
z?S3{U16;cX>o+ynu3amsa;?A6TwCNfH8{w<dRp8ZYNx70?ecuMt<N@Y8{|R#y@hqC
zT~d|XxTH#d?$sNL9BKy}w+R{G+KDXn9BkKaoK(5i|JQFKqMn0;+^b)aTfJAmRbh%c
z*uO(J=hS!T7V)+CHkP6;Ln^&3lPWFFxly;3h@uX*d-c{CFnq7xCcZk8Sdu!}>bxtd
z_A+fNBB_IeEY%9t{)_T{-MbYbse?VO?K!ob_r}-G`&g2?45_qsNUF56phN9MBz3TS
z@IH`i`{3=INSR$&k~-MRd?=|>#zAS5MI?1_@QJc#wMX}1MM&yk&uTYLWwm>J?d-vl
z)MZFzwO3N59ZR*nMI?2wu-Z4-#%jMr%5*GA9c*QWk}73hyJ2^Lh@=jVl9fa46d|XB
zJ*$H_mDR!VwexY7oGwEut0_q}R)>hl>0n`XSh9`P;fa(vf+eSet;|tLl`?PIusd2r
zP6r2{{`-HcGW{Q`5Ct9VnH|Te%#M$*r4v{Rx(unzPD-k@w3$QgL=<$eFgqpL#_ZHY
z%ACgfSq-)_pGm5eIm4lLB0sCa!KeQbzVi1=wKG(8s9m1V6nPfgrpV9bLH(W0I@B(y
zO7q;LN`Dtl?9LN8)DE^O@`4O-?S-u0)L^^zqNK{TKXj;_$Zu+J@acbKZVt6m)uDEI
zKHM&28@Dg!LH&J+b*NoZmD?3bmHtfsSBe~J2OGDmGQhQ0v($62U3*PZ<=W*ZcGrri
z=iuPe{|Mab^#4_b$?0JK4!xdJ-=R0e*W%Y%a=Hwu^lnV5v^d$Jb|P{**rxw)XTb3E
ze^Y#QZf5D}V5{@pq}t1Li-?{M4zg5R{bfICeoq~uI@mM2jZ-_iJ-&{9z!KGENM&|M
zQl+DJIMhz$mpE8Hdv_(<K6`g3Qs&1jQ5|e$ewtJ%GmfZEL{ta!*_+;_S|jFAJ0(c!
zV9)73PUUoee9ioVC8^7h%ISfmN;9`j=pGc2)WO2(*U2_c4<%COVV0y0wlcp>s+2jH
zq)tRq2M3w|`*&phRqc0*kkrAR)nlB>>hbv6`2$N*mm!ta6G@eJ96$J^h@=h{R)0>m
zv3e?zGEcK4b+DEBYf`05By}Q^Iyg#J4z*K+qz?A1p5s(j&&Su!KUk8w45_SMNUE`V
zQAAP)3#)%8+gQDnNSPT{OPLvAD>IW|xXjWHwG)xl!BMhusGTC@bg*YN8>h0GJ-&8c
z&63k)NM$ugQjOJ|B62!dSk0AeV>NdoW#(bY>0m1}Z&Ibq?BsMJayl4R)4Nog>K$0_
z)$=PvK?i$g3vep41><XJA(nzJLn^aHk}56vKf^^u6m+mKTP)eeZ1F_OEWtX|4z@B&
zB{lp4In++%P&*iA)4Now%tl_SEu*SK?ecu?XUnneeztrb)ZYrML+z5PG*?Qh^!Kzw
z?L-c>gYAB{N(Q)gRo0<)uwA=)QsvseIMhz$P&+uty?R>Q9BQYkL+$c>xUJ1LZtLVh
z{jJM7)Gn#YZT+N5f9};Ah#YDM8@I6;;M#F4^&D*1j!&vwd#*$6MAUO|kbCtja;x|1
zjTEM+gZ(>n6Ha}HZW>>UZ)GX!GNjVmJgL&+q8oKvh$!k{yH~$G1BUO_TgF#sE0&}V
zwmMrU)n2A;L?m@^kfmCoI=obSry?YEu&1>xr?#_QeC@oOC8^7hN^AS1N;`g4d#{M3
z4wetz`;%=Syd4rLvm;AV2V0pBBvs1zpSqnzBz17`iE?<U_CZBR>R``mGN-cIHNJK}
z%#ze)NM*HKQl%YBwcSM|b+EA7Gug&!uSCl1&63o?R%YL%N|_lp?Di9p)WK1*a;Tjm
z<aDrSbpWTbIxxO=KFX5QWk_XpP*RQ6!6I@xSXg}`*~V&0B4rL?$?0G#b68TP%smdZ
z6Oq%w!KeSJmTE^TL_r68W=C@>vt#0G=~$M6E<-A_<B}>ZZS7Dy5d|GA%uY<UF*_-d
zGAFYRwS%q9DM^(wmpIf;<WM^}`1Id@!Y1!MwA(&=@3hxGA39*q$*c5BwNI<+P`f;z
zDe`o-O_86?gZevzb*NoZmF8JVmHuvZsGZ26cCby6=VX9u&t)BI2ivvhCsnTfszdEW
z4z+`WPyZuxbEuuF4z<hk;dU|GxLuM5_4ftVp>|1CZkHuh`ZN81QRGlN*tlJu0j|A*
zrJjTB+Ak+nuHAHEca?~G4h}y3kHD=?|6fs<oDTNy&}%vM9eQ1SEq;|Hr^}E^@A{-l
ziyJu9PDD-z+w}j93>cpNZ;Y?bH(7c**y?;csrEA6B%-H-gDlm?ylkoV9d(H6V9)Fp
zPVMN{_&WL?OH`L3mDz1cm5z3FsGW$Y4wlc}50h=5y*m;qb0<qw2V0rDk}74!5Y>r@
z>R>*5)4Nn_#2jj;1W6t2Io-plobHXUnV+#Fbs17Q-Ir8p=9mfH{UVY&SUCMM*~aOC
zM9MtKlGMRg=GRG;GJd6hNJLTx2burZnEF?>-zY*-2YXhJa4M@u<7?-4EJ<C4R925A
zRoXejp>`sYI#^i!G1<oIiA2gg$&%E;R_4!1l`<=k)QL#y;3!!+)J_qSI@q&%hErKR
z8(%wrV@c{Vq_TQ0smAJg5lI~^tp1s7WA#EJWnN@S>R>DL@1#nZbxG<(Bz3Egnw3NC
z6d|XBJ*$~HmDMcqweu>LoGwEutJ#ujtY#OH)4{^(HOV$sb0kt`PL`YwwlZ@iRm#jl
zPA4L#gZa0b-lf`9@4&KDd!0fQbg*YOAEz>VeS9s=&r;B3NM*J_Ql+J{DCk5Kbg(d6
zIN8Q*kwnTY$~x2zwla$)RmxoBP&<)B?O>Qq?^3NY8+oa=q^b_J%k#OPEzP$3*_-m9
z{+3}KYL`@{xm;4EKYv5LyvU(;u-(sA%mCM}#5&Xtwrf{Ts$Bc1L+wNkwS$A)tEa`y
zp?0b|)Gp75+ZeWSTO$wZZ%x*rc1cxkYbRCubFW@U<WM`<xUH7~u3ev{o`db$w<J}r
zJ;$MTBI-Ff$i4a%xz&61h6+#V#>4&{I)PK)p%de4aU+(ZE<-B4O_C}t&bLvwsfeNu
zwtMwv88CdW-aNiKTd*W`u+@2cQtf5hQbbY*2U)5Ws>4gQcPK(q2YXuEaB4e~;%n!f
zEJ<C4R9f36RoYqFp>`sYI#@n<?@6|O@U~B+%zIgqI@rp*KdDk?5t2F)NgW(~`X64Z
z?W71v9qd`{%&Dw)iLadxvLtmGQdvz-s<dONwyTJw4i;7)Nw%@tEs-+2vm|w}mDw|?
zQf8J7yS+ptb#Rod9BQWsIUVd-?Z>ICy7=1JpCzZukjm<Sq#COOMdWm_u=-fCjnzSk
zlsT9sr-QA`Cz2{<UUaCPh@1`%KK)O%R6A553Od*`JDgLQ9T8tkN3s-j8B&=Yom6RQ
z4Tsu^DCl5e_Q_-$v*Qveb3E%%JJ`ycm{ciqv_tJg4z+`WPyZu)<?okjpHkJKc6mNi
z<f&|%B2UYM`ujBNP`jim&C`=A{oUhGJCQ@}V4EV(%mCM(#X8guwrkH$s$6@bL+wNk
zwS$9C|08pAsGX_~wafG2b^+VCU6=>;_j%T#c1cxk7bjKvGyPv8a;P0_+%C-k*Ivd_
z&%t)>my#;ip6O6K5%nA#eEJ`OTb=%|RG6F&_V3WEIQ1QRb$l&;g(auUkV@~`q)Lk)
zcc`6+oDR0>|7#gAJpErEU!5CRdOFzZd?Ts$GTkVmr-Oqm)z)~~Qteyn5Y@q+*-f0<
z(arI7^c|L{E<-A_Taqdr`LnuPMMQP5eD=PdZ2RopmPnb~S)w}F%KR{?QifLO4iQlu
z%x7<UmuiieL+zBHw1YjTyE&E9kK=3RCoH91hEz`XBvqQZctUrth|&%gPCrk!ak?*&
zGWWBTcCeNCWm2WgQ4X~eQQE;l=KnRPUaI{{5ehumvwDbASv?$IJHKHm@G_*bdL*gR
z&UqAgA__cMSp7cP#_F*|$~?|e;K5erk4cp>In+)>fd@y)%At0OP~gFy)l;0x>go8}
z`3p;dmm!taGf6d8&x$DUU}5$5WE-pJ5-IaMOMwSlnSUl#%B(_xC!)ZEqh#e!J4Gn)
zV9)9$PGvR2n9|OSECn8H?aZ81V>OG20uL5evnJbE&6WtO*;xua*vh;nsZz#&fzKhL
zz=L5my-T&J-hpMQ_F9Fg@L<nuZcb%3Pkb%Cj-|rOkoxa6Us9!|A34-cM1=<nvo|E$
zm@SY<nFU!YJlM)CoKz|EFDg6{6&?(;>0PQ-W+N}v-l!@`UY^hWY;m^T&z8u8`dgAE
z$xEuzTso=JpTpbUBqGU!?S8gw2Do-P)}eN=UAsb3<yyb0ttfJ+9USCdJuPkywNurh
zc6mPBR$&{rRr8?!R%0D%msI68CaKb&d-WP3huXo$ZLJJ&?b@tE?O?lh-K5I3cRAEf
z<WM^}$i4a%xz&611`0dW4)*WRv7Gu29T#7V8?uwS@flL-O-QP==no(#iX3VO+r4_@
z3>dywZxUafO<A%$*y?PSRC}2=7m?+`L6&NT>hMzSZHkcK!JgKZoZ8M-@wM{~mIN<D
zDy?mjD($T6P&*L`9xNZccO~0Cc-tmYW;>Pw54JMzNvf3jl|$`B6nJp(iE?<U_C7@@
z@L<ns2To<RV|?xG#7^oykRg@T&PkPaEY)@qQQ*PC>O;vkR+AGcvnxx12V0qsBvs0+
z?oc}s1s)tFD~H-CLV*W+R(o+OtG(lEXCIaVFGDJ;{gP^|IuQjPEUbo-ZLAJRq|AXV
z1s-f=K9*D|^A-v`5d|I`eEOeisrGS&sPJIVYzn6`J0!lA4rQtEGNdv)JgL&sIu5lH
zQQ^VD?5JcLv!fF!a|}y`2V0p>CRNH@<WM^i6&@UX`XAvdf4@{aK~<8xJfA7@B(_bF
zC+9)^eTpT?ORCa5HL23ynG?IyL?n5zO_86;0N0+*I@AufYtKlkT)VGB?L-c>gM&~1
zBXe`8ovIGC%k$xO4%@h$n+NrG9_vuMq$;-yk}Ca~{x1|c)DAXo7iEBJFJ>KT2ivt@
zNUB_WyhH6o4z+`WPyZuutJD7%6?Ui{?BAi6bLu<viuhW*l69zEhE#f2B~@B{gG22^
z4z+`A`oAUvhNu5)<EwKWOO^*)ov$U;UZ(3sWO;CqrP`V=TdI9s9V$H7GrN&fJNjmP
z9es<X!po4#?53njM;~>lornq#me1aIlWm{9TM{XAD@%k2Tbb`CRm!~Gp>`r7Jebek
z^e)vJF^AeIL4OB(PIquBr#s_o=0`03U4~RncPCYvvG)J5i2e>1PCreyak?jwGWW9d
zcd(WDc~Yg!DfD+D`a3wt{J+-JOSNApLV*W+Ru6D0s|Vw2=T|HRUWQaw4<%LF@x%JV
zA__cMSp7EH#_Ewo$~?+a;K5er_eqs9epP!+M1coK$;zR2icsLep4Aha%IeAZ+W8Yp
zftMkb)l*3|R!@s4@L*x}*JK;3XA&v%EK7k0TbaKnRm#|?@;MO&9vmerhuSGZfd_k5
zFK{ZW7vpQ^Un~V)hE!HBCDmBXutwUM5f)Z6@&3bV=0wWO!cySDR%X_uN|{$t;E5>k
zU|3D>Qf;btU|FiYS|KVt*fX1hQ<=>fUrVoLsqiwSGMhW8($X~!wG&a{!NP3bWZS>j
ze2J8KJxhfLTbVZ`RmxmRg(srIgJCwkOSQ^u<fYm|s*>d8`P|PIVcY#|(LAWXH?kyo
zNmZJQCsq1$+QAYcl04Y%XG>*(YnNsnY6si3%Oq8<{h34UL=LrsgWRj9#m%91syfsz
z&xhNJY~!|49@O8PS%=ysRk^K_RO!#XdR37_?O@}!dIq?54C_!k*sfhOsdBA<{<TC7
zwS$A)t6z~@y;rZJutV)&{|;S`Q{SQM$JgQptV8WGq|zIkRB3Uhjk<9nhuXn*uO6QP
z!}sb5@zt5glI6ixXXB*W%e0AzEDsK{R4Y`6muhcSgai-vv^M9|cD9JGowu<hco|Y@
zZJAVQXCsH&iAeBZ`QUAxZ2RDClSr9KECn8HW!{xkDf3;2+KDLe;NTPG@KWvFicsLe
zp4Il8%IdxGwevof0xv@<s~wUm?O3YqD5AiFh1Ca=ZLD@qq|7cX1s-f=K9p1`^BM{~
z5d|I`B`b&8DMEn<dse$~Dy!Y&YiAFZ0xv@<tG$wHto9aB;K9Ob-((xB{SqnDu@rc)
zl^IH^l=;cn?f?-59vpo7pK7W0QH7}RV9)FzPGxp*d@X&PrNYaQ%4|wfrKMFIYA2$?
zgN4~)$u?$(CsO7JmI@EHGDjs<%Ixh>I}sHg9DMp8;VXZ?R6ABxlDs^hDe^eBO_9gv
zLH(V;lH?^-X`Yl+>F>OW-N_=7JlLklQ!>D{r?L*UgYDW+CsnT9-=TIQhuXoxr~i?;
zIn+*7huY=&a66N2+|J5_`uiN~P`jimw{wyz{h9vH6*<%nHg4x<fNL*c9cl;LwVzL_
zTzk7i?L-c>gM&~1BXFzJ|0N1L)DHIV&`UY>9eP=OEq;-8s9lCsdY30vTHJc$?h27Z
z?O>b!znlTX)BjcR)w!A_%Y&`XHA%IX=~@w49voz;w${s*YF|}{3J><ouIJQ_Ziug=
zud`Hm8B&?um{jR#FNfNRsPJI<?0q}g_Sw5Bkuo>4M0l{3`EF9B%mxm%6A|ITeD<by
zsn&=&)J_TdJJ@r&jZ-<@9$zy*VCnBNq;k3=snX0L6S_M^^mnjux+~en>Fz|z{FtS`
zgRRU@lPYC$sGW%Z4h}N^uRZlr?PrQm;K82NeVoeb{`lJY1xtaKA(hnwNtJf4bf}$(
z0uL5ezfQKXdMJ@H53>|_u$B33Qte;r5fKF*93?A<+9^VT2YXhJaVo3F<7?*+ECpVM
zR8~(U)mS|#qQHZN)t{4Xte#4w%+o9d9&Ba)np7#{KOmnGQQ*N*vT~@MA{2PAXZ0MX
zvU)zgcK*Rq;AKc<^+HmO)r%quJXl!$JK4tSr9{fiux8542wRz%1jA)sOMxe%z=L5m
zy-T&J-hpMQ_9}&_@L<nuHcn+Wdweaunx(?akjiY1qzW@TK+Y+m!h?m`T*)?Ob0<<}
z9+nCZwlec3Rmz-4g(srIgJCwkOSQ^u<fYpDs*>d8`P|PIVB7s{!91wHg;<ijq$<rt
zk}CcEgd|Tyk_X%UY_SY*?c(gDZVA|~T{5Y1?cENw6FJll4sx%a7B`35sp?R>JRfe$
zvW?qvc~F1Lvv%K1s&ZQ~snVZ&^-3bUZ^FiH<qUA`Dy&28V7qp;q{_Aa0CIJaL+#)o
z_v%;VR`1noDC|%>*uO*9;?#HO+VQox4r>F>45{?iORBUu^G4nJA{%hRcCUU*1`OY;
z$HrG@97~o5Tb=PqwU=pvh%65dvQ#TnhnH#_DMEq=ds>@tYCD_8*Unp661)tlv^Gzw
zv|~euEkq=Euzc{|o^1QzZJ9`!tyl^?*vf33R4MZXhuVoK@ZjJR<?vGNor+N4!JgH&
zoXTpu_}Y0lOM#ammDTo1m3A!E-YcTOgN4=mlWnYaNTke;ECn8HWj>HpDPybYokbLQ
zaFnbZYNrSV9_(37=2TX@#@Eh=Sqi)isjPNOs<GN#M1cnjt38u#toBNz%-$>o9&Bay
zO{$c+)S-4F3OqRY^gq>7ZGVNR@L<pE08V9gV0<lol%>MUkjm_!q)JQgcBq|*3J(@$
zpGdYbo03SGLs%+2*vcH1R4Fsgp>`rFJUICDKf+i3eyMh(sw8=NK2zk;Y?~sF$%Fbk
zmL<tcs?t0zsnXwm4z&}J<iR#Yo|plyJ&ARw9c<TrDyeeqRt~ijIn)jgKK+l(&7pRx
zI@B)Dhude^#_jYxsK3v$cHc~@ayv7r(x2)7ERo$eVdHjo2DtVd)}eN=U3*?q<=R(I
z?9LZC)D8|l{g1${PX8Aw>`*({ze6wL)OYB`@wIpfYXi;<sq`*Qs<e2{#@%Hi8*sul
z{eLL~hNu6_<EwK8OO^*)oi8WVUZ$%=WO;CqrP|srTdI9U9V$H7GrN{kJGw5uj=suL
z;bll=c70N%qkSA|C!)fG<+JyVWZP%&#ze||lO@7~t<1NRDrJ0CzDYFH-3;^Do8F~b
zBj!*$CFt*9&*>IU<#cO&&3uogzsr!y>9(XwGuKV%ZWqzt!NTc>$u>@RBvR&1mi`X5
zGIu3a%IrdaC!)WDgUtWyOubb5i6RtuuxE7-r?R>?zIJ}bQs8AsWp!UtrJY+GYA2$=
zgN4;ElWnXXNTkezECn8HWqzGhDf4CuJP`#R93?A<+9^VT2YXhJa4M@u<7?-4ECpVM
zR925A)mS|)qQHZN)gO~>te!}u%#$ny9&Ba)oKz`eS^boV0uPRol|$_mp}>PZt7kZs
z)wA)n^EZ|PFGDJ;=aOoyo)=Nz!NTgF$u?FmBvR%@mI4p9GXGAhlp!p6Ni@{WuvTF;
zy-T&J-hpMQHlspRc(7+SGp90}CBBwk#ZuvANM$x#Ql+IU9BL<`!h?m`Ym#lu=18Q>
zoGcX{Y-Q$3s+9Qx6`qI+59Z%%dY5XI*~m+^*QrX9m*;aon~!bxv)AWA{msvk<Rw*U
zE|65|Z-xoof+CVU*zRWwXMk%LVI67*+qG{@s$BbbhuVo8Y6l0oS5J%Em~IJG9cq{7
z!)+<Haa%eM>hDdg-8Yk}+?GwM^ygl^oXGB*uyI=<16;cz>rgw`u6=V-<yyb0tt@h=
z9USCd{fgY`y?RxJ9cl;rcj)Sz`VJivUyEz7HsH*VN^h;CN{dTw)U7SD0Vizt>UA?<
z_+GtUe0A1m$?{;U^OmIA%QRL*mInt}suilkOSKIZA;E(^tqGjk&cyiI*@z{<%aBTI
zlcY*Jz9erdBEf^@gST0-?Sr>@B4xH<Dezz`^Y)~M{{bCpC!)ZEgHM#hOSN|>LV*W+
zR@-nYt4Z;-^G=omFGDJ;ZIdePSgLI&qQHZN)q9d{thP_2%zIf1JlM*-KdDk?HVQlu
z1s)tFD~H-CGNt<f>{;#1sjPO1ubmIF6nGg@Sxru=vD#Hcfd>n#k0jez?UqQH-B}7e
z*vjmgR4MZnhuVoK@L*UCHvLbvRNF@(Dm>UT+mBP3b@8>dKTCy|A(hzyNtKq~JfS;K
zM1=<nvyUa)m>rZznS)s>JlM*7BB|jr4z&|e;laVD{}I0O_e-@yRVB&G^O+(KXWJBc
zL>|=Nkt|7GQkCY>NtOOS>QFloNgixd<R>%0wa2jzwS(>26Ot;|ZtYMzkwfj^;M4!e
z+#G7BszdGae7K#$Hg2cpLH(V^+I=&r%Iz~rmHtfsr;F^q2^+UFGQhQGvJSO_?b^>J
zRjws1I$PvWJ2?3CKLWQp{hzC_L+xPy4n3b!-=P=8*W!h&4LCET(z__B(&9A^wG-KZ
z6SnF93mGsx{a+eioy%CVJlN`dDXI1{T`nTagM%#9)_K`d?Mij1@L<pEDo*X_>i9bP
z3QL8TA(h#+NtKQcaHyS#3J;dg-q(_CpS|l7DRTo$ga=!hZzNU9tmIHT5fL8DXK#9!
zYK@pf?UbOugFUC4IF-}Q@ip@umi{h7DyLhLD$P7Qp}SQ?e+LVv?<d<h-Ihq1+gbWM
z*vkAcsZ!?q^miiqJ2=SvzwXpawI3-$fd_k5cXKMMAII0uPgn}P45_T{NvgE-oAKSf
zA__cMSp7WN#_GOA%G}RV;K5ermr0c}>rvo|DDdDYS?$sNN)ZY?*t2?wQ&~M6Upv2H
zDey9+vU((`#_CZK1s*J{exGb(^;jZh9%m`=U@P;-q)M6XDDXrScyN@g4($G<2n8PO
zSv|$6te%dqoxiXYco|YzJ(E;p^{j{j4;EH`PqwjoE|D_NvlMu+mHB5<rOc}-@I(}N
zFs!EcUOm-2uq@U7r4SV!?3um9smx~Zkxff8vQ&7mwKQ{5r6pRmSwvKLurQl7*~V<P
zM9R$0QsKc?<~2!`GTT$(iKy^km`(3etuh;VsrFh`N%Hc1?q_qe?S3{-9@O9KSdzS?
z>ObIoNtOQoK$0gS$%E~F_J#~_?E>tiZb8_tT_~w??Mvgkg+)W%B5;s<^|ZK+=@wOW
zO83S*A8w1WjoadRP=8CXcHc~@a$72?(w}?v(jvQW!p3cx3~=qT?4)iv*sfhZsd6o?
z+6tnfZbdlAz4{fo)qC|y3Qy_Y4EuNJ%AEQRT_wI2S7mL$nIV<l>PeLr$8FS&5!rwf
zwtMxO88CdWUMs#jYqMl|u+>>NsrE9hCnC#(gDlkw)#0Vu28xj2!JgJwPHksgeC=$=
zlHg@Xr8Oa`($1k1x``qZJXk(>8z<X7c$*|rW>b~|54JL!B~{8?IkwweM1cnfpD2fy
zYHw470uT1Aw&YY+TgBJTJ6H<545_TPNvgDCsWwSOfd>n#cO~0cZJS7$?N|yt*vjNP
z#&8+?FKsWPz=NY?bzt{CMJVuK&uRxwWwm2`?d-%-;AKc<wR2L9)h;3mJXlzLDA~qp
zaw27RWhwArEAx@0N}10&15ZSO2M3@2r&_A*p%4`w?3wMwsm%6{ucduhD!dG-%=SyF
zwB)dkPDF(V3$vkQ8?yrvDRUr8g$G-ik0n*gtTDDbNJNDP2cP~&_{!fe)jqB&NnW1M
z6gh=$Q{*9eP=ANAle)u_sx%Kzs`Pi##O??YNgixd<WU*m+N0S?-7&CTdu&qW+V_v|
zJ}DaNj)Q|w|08o7(;ctsl<tH)A8seIjoV3iP=6=0cHc~@ayuod(x2)7RFT~`VdM7c
z3~=pd*h$^#uwDDvq{_7mPVCMQ4RvS2!KeQbxYg<ZEQP0ZpM(87^lVOjhn^E(i|4X7
z;LMOp@BE}ni{Ib4yFg?EPS~dZ&u75!^nX!&buMPf@?fj;g{0ccbg76e4-T?aTlZy4
zwJ)keg$H|Pmvd@ISH#!Rl`IurhE!%(B~?1|2as2bsPJI<>|K*=`|MqtNSW(cB0Sj2
zd@ZR`#!~Hi5fL8DXOEfTfPMe#FzB+Y>i(;kI{xory^YwI?(43lzk@xe8#$HJH{)yO
zTP*!uhEz^BB~_Zab3%8si2e>1PTx(oak?duGPknycd(WDep02(S@d@z`a3wt{J-AR
zOSK;;LV*W+R(EhJt2^Us=SM6BUWQawcPCZa@h$1cA__cMSp78F#_FC#%G}FR;K5er
z=Sh_^%TwTqDDdDYS?$sNLJ<l)*t2?oQ&~M2Upv2IDey9+vU(_~#_C}a1s*J{ew%D#
z^++OR9%U);U@P<cq)Hjbl|Lq;z=NY?bzt`gMJVuK&*}+IW%Xoy?fi+Qz{`-z>Zzm}
ztEWX2c(AbgYqE{iGl`UWmZiXht<2w(DrM%Pz!Op6!LXX%rP@^Qz_L{PheA|%uxIuH
zr!spnzLx&QQsHGtW%g21r6oVB%&<;cnh_ReGx7ezZ01DD%)(OP!B%G0q)M47RCppP
zJQ!xvyHu;pMqaADT2+#~JfHj79BjLv&6x-F_ga=DFR4m%?xae8Pmtt^Nb+F2pUs;A
zuAPsa)V&_IYv)g@Tx<8pH;9J11>hj}>S=Ks(=Dj#ly0FsA8re?joTu5P=AZEcHc~@
za$78^(w}?v;-aB$3D~$TnE|d{ik;Lg4coPEN~&CY^Z0HV(NMQ69OPd8irnhGdO3xs
zbj!p39l8RizC%}xuf>&E8*pYwrMGfYrNvn`>Q)iifD^WR^=cU~e6L<TzB*%AvOL)8
zteI4Mnbs1K<-tLgYK7|XQf(bYNbq1!YdubFXZ`rv*?=X%%aBTIY*MA2(<XG|L?n2y
zeDKC6+oqHWiIkbhQsBW>X5*wvnRUi>n}{gz;NTPG@KWuqicsLep4H}@%4&=F+IbsG
zftMkb)s{(>b}ZGl5>ep6!fNYe8>?*+DKm+sz=N&KyOJtpXw|kAQQ*N*vO2JPw;~jH
zuxGVBr?Pr)eC@oCrNGOO%4&zC8mk>e6nL<(`arUc)y|2O*@dORgRRVmk}74+9NSG6
zQQ*PBr~j#zY9CgJ3J><ocH>lLyT{kk9xN4JhE!&IB~@BFb$qwChzbuDX8R`FnC+KH
znU1BxgRRU^Ql-rKRCppPJUICDKf+i3eyR3RRY~&le5S~Q*fvEToCo#yah4=6sY-K7
zQl-BOCU%F2Nb+EtA`i;|*B;JJ>W+Zz+9Q)H*E&?`DA7=NG#q^TADP>j?if|4bjRlT
zaQh_NxE+@V^>;jL_syg#w-b{p{h9tx64`wdHg2EF0N0+vPU=pD?b_3lD%Y+%vHP@W
zsQU~YeEJ`OTb=$-S9nVIS=hfr&*0Q|=$Y}gcou5|&J3ya&Q7YdxY{P&IU*Zy!Z!V%
zmjT1m|M~INxqv0hgRRczlWH&1MIy31ILK0My_YT3E>VXH5BAJ1<<yQYi?5?EvQ&5(
zQkh+zRO#sS@!b_7Dm+*|dtXkrefF+Oq|DVU5gu%1u1Ttt89T1KRz!pc^V##|Zn~Cg
zjo6s(t4h${!JgCgoXY8j_?r1TOMjOkmD7z$m1gdn(0x-xe+LVvZztP0-IPd~n_2oh
z*vfo2sZwU14ZB-J^mj0vmYf!+oJhWt@59~qj3%{%efzg@YWugx_q;z~N$oPEwtq)b
z<$3=)vAa`5Y6naEcO~1}zdMmKKW0hoU@P;}q)M6R$9DIKNbO+SU*u)&ca!&bnEL)s
zQ~%&Tdmp%Z_cP<D>0saZeVp3({qa5HFIZ~245^JjkW_ib`>E+f)O4^k{_A90;}0cL
z=3$na4z@DCO{$c6$a!`mYC4$4mwv^@hj!a*@=lZY+GVHyJow)kPdEpA0*`Sjfyd)}
z=0C86a~V<zJdsp+W`DZ&q=;}176N}xwh?$Lkup!SgmbW!`D;?8%;Fn%&xi=;;QxN*
z$8>))jy4YVjX%e!jXxjXGya36jmwbQ_zOvuXIyeZ_o9e44wlCMoos9Tr9{fiux`rC
z2wRz%1jA*%M;j-ijf2yE<@1)=Y5$Mx(+z#ld;V3%lfc2Az-*jKVD|W)`PD25T!vHv
zb0k#=Tr#nnQ$zv>3xT<kZ3N~{q|7`l2^?%?=1r=U`PSHOJ`o8VoYpJ9`tW;xe&dMR
zVBh!xoZ9$;@jc^0EKysA)W#P{syyREL~SCXHdq>8EZNrh;)#@5g7r%mY-N^8s+4)P
zU%Eto>4MXG<yT+jzu)uA81Do**b`WeQwb~|-!rekIzcW&DuI=fD$nda|7MXB<X|DN
zO0tc>s)>|YjkT>LY-PqIRmyB+TS<{^CE@>m<;Qeu88@X{8}^N_!>Nt08{ado$4=_j
z&yd>q21%7?oM%G!7ST{Q7M8}xCEFU`Fp)Cj*-705*vd>ys+5^=!)_zdP`5Fh_ACG2
z@A*xPpVDm#djfCeR05mD_spBKle#T3q!M^rQstSyII(-XXsFu~76Mx(+X%cPkuqDe
zle%qSD>EsnQpT3P?-UJn?}F2M<;M)a=eIR(O1B;C8-F*aHvXRYo^gA2Qup2rsg1uc
zsq&1U8{fTOG}P??OXE8x+Zx{~kuo1(Cv`i+R%Vx^N}1g@>^>+O>OKUg^~zgo{MS4`
z+4w2luCOQYVNNCRk@%i@H+E9Ddxlg3dn8q!*?WFZ(NMP+EClvWwh`DTkuv+Tle+z2
zE7K)a%Di*KZhz5GHw6FpD?g??z_=;hfv|7<qnz6K$Krd&gV;&k!5LB;|9Dd68A)hA
zAsXtYz|#03$+pH1O{C0W?4<5+*vcG{R4H@(xb8^NP<IrZ_ACG2@A;#RpVA!zdjiLD
zDuGYN_sqw!{yH#2DuEM{D$jiT#O_3qzYc_jz{$xr0-s8x%qgrT18ikZORAKadtCQv
zktG8Rfj7SFE5ZT$?!WV1`|dfpt6y)r(~YzSfPL#{aBAyk#`m0OvDN??Qd>Vesq&mV
zPVCMRSp&e*`gzH=*3VC*%mu6^0BmJGpHwOHqjB9uB1-_6)|dFN)>qGe@7EtSR$Hb2
zR=L~W-6aP5o)7!(FXhzkFN^PKzsUNYpCPsT%abZkd#UgFBH#02>Hf>fw(hS=q|DW<
z@A<HmxhAPn<{01eMZV|5bk9unJ^y9hkMNVsYE%Dw@l{v*rVo2o*K;bX8{%u{>#T43
z8B$r@m{e)!Ry%EoeA9=8)wh#vtZquA%+0KC`mmMxZc?SpyX>?f@=YI(lGPsF_Y|=J
zfIX|*IF;4y@wM{<)&d|yDyut^YOL-QSpdMo>aJuPtGg2^^JCTm0JbteO{$dH!d@F9
z3jjDuRtI)JQ^e{3_N?yXR95%L*Um3ktAh-wtR6_Jv3gKsbpQ*iUnkpGJ(Nh9hgquw
z*vkAisZwSWs{@hM0Sv4E{`q3M-ht)ki{B|^*#LWHk8vuq$Kz}153FTFhE!%xBvo4a
zvt@(GvH=!me@?bBdn%DKPqUT{u$B31Ql*Rotez2BHo!3Z@1HOJ|CzNJ<i8G^{F|y4
z8F@bc$MYQ9{*UMRJgC2auof9fRhlm(Rr*`PB12@60o(ua{5u0&`w}~;n_<1FuANaZ
zeC@mwx|u{n-OTX+{{PkJxQ*#%QFTi9syrWVv$BobY<W<Bv$OunFR9AyHAxk2{{LzY
zk-zeTjoWK8z_oL+le)QKyLO(W%C&ZUf1PNkn-~7y|GydqxBCBA^C>)~dp+#mq4RU<
zJM<0lwYUK55B)Nv(pxC0(&82ybqkC9p&xAjf3;`^4FCVt8{@097(1z39JV@3B-LJ~
zB}GHsQt<!&eDVKFb@=Ctr4^ady$SZTmf_TPmW{8S<yd>|XGo>BLQ<uj4JULfitM!y
z%Lnhx$+i#P%88U&g|z_(Y-LtUs+6%M%<3W=aKOPQ%Hf|c)=<P|8?a}!7N@dWJHB?-
zVQsdNA(hp7NtJf|e6haBW*e}udP}m6)!0PJjAQM!0b7~zNtH5_$8{4#_S%4>WOZP-
zks|imfIX{CIF;3=@wM|-)?OPKQdw=DRAaS;$X*+;uzGv4jn$Tkl-Y{4*9L55woa;)
z@t0=Xi0rii2cQ0@`uXCW3fXT1_RO~BRA$@7*V4OL`)y=MWww1%rKMw?crCKu1}x0p
zpKN2cLn38%WbL;BTbU0eRm#}!cW06PHsIjX{|NsL-v4~@K~-(Qk>@i-PG;K_xoaNO
z--lTna3ob}?v_;P@A`?|?jjp-z&1thnE|fdi=EW%4coQ*BsKgJjqmmq4R!m$!KeR`
zxsB;MRi||O=lO6OVjH&u@}T|>Wc`(2QkC1sk}Ca~{tpuQD?iw{eLMqP`w4bZHwCt9
z4@qkHb(zo|DjMnzgM&~1BXFzJ|KSQx>5hQ?JM>6SeTN<uUyDbx{?IQ&D!pTqDlKld
zara4)KlFob`aeDchNu4%;;VBaJE=PfwmK&#)n2AgiH5pU;Q#&k;{Rv*-*wkj{HL@3
zbMF7g_EdGIbf>|d*{3<RqtC?G(dn%1_A{h1J0q#m(GP96FS6Y}ET6s4CEGrGXD3qT
z9M&Eju$4J4sZz!PR_BX`x(i@Fd;k6O#dHuG(_N^9-8Nv)=^{?$ba8ymT*BIIBSR{u
zOOq<i*uUX2k=-_6;q;|s8>h<?DRTvDvklnFd^xF7=9Y2YRU(^hz(MB!p7q}T!*QRT
z_Upc)h@CcI&+1xEWp!PA?R=HB(?*6=R@Wz0+F4{mcZ0}I8?dnYMzW37jfs@`CTph+
z*vfo6sZwSuJ8g*Uv;jxSYLD(air8xd_N;E<R93gf*UtA?du?P$Wp!Ipjn(ZUdu_nN
z>W9fTR(B*)=1$gL8?cqRE2&b(VPkiT?6m<$$?CxFCyLl>1NN-$;Z#=l#@EizSbJ?`
zNM&_jQjOL9B71GX!s?gFHdYTLQszO{UK_BL`E^pI%x3o55Dj$?!?2p(rP@^Qz_L{P
zjY9U@fIYKEIF;F>@wN0j)_xlqQkgxLRB7osCti!}w*d>YKPKClJ&{P6Ct3S#z*gqZ
zNtH4_TThAXw*kX!dY5XI*~m+^zo=>hjy#|H*)wdrpFNug_4hZ{1{_IMn$IOw`kQk?
z_q@mk9I)Nb{+R);eSw|Sy$IX2|4OP{yV`{A-=d-JB{;~vdRpAZbTh0UZZpDgn~81Q
zX3m59n}zjPeo0ksvnEygbFZFF<gff-<M!$daP4c@N!=W<T{~w|<=TIZ?_Mh!>gIxj
z+^b)aTfJA$t?-m?9@xJ_U&pEM(0Su)aX!```ejI^H-A#4#p^ce-XQXaez4uE7tDa+
zd-X!`)mfOG)GY#Aokf#sFVh=EL)~I<kfmCoI=oa{T#+f=60oPWB&W8sRDA6$&Dv`}
zLn^Ihk}B=Ib3(VQ$X@%feDIb}wtetcNTke)tPMC|EA!^0N|`yvbt{W(zySxJD2JD7
zt14o%4cN0<ol{wjiLaeCSetERNM*HFQl%YBwY5by+kl1Dy2&<H>m^cVeb!zZu$6gB
zQl-qn<GQgTdu_l`vO2KaP!W4=z@F6vPGvPQzIHZZ?X{61mDMImHCCI7?6m<4tId*a
ztTs=i%oeP@Hef6B_M}Rg`|PzLveyP2eEOeisrC+q?6(1XX4`Novq|x_^iI}(8yQlW
zZJShS=|Lx6i|n@n3$ynm+n8;iNSXJt_S=B1%=?onWiGVehRA*!aPaAWgs=SlQf((y
zZNQP|Gez#qwkdL#JgC19vNqsIs?waCROxT=jk;Y$HsFA5iu_0hxOO+zZ)31syGK&x
z+B5w&Ci2@D9DMp8ncJ9dZ&jys`{em>+m~(J_RE9%>sWu~msI68lvL@@^nZZJU-`kt
z?V}ms+K;i5x`SZ5_TZ$-wf^}(E*k1S0SBM{N8nbc|0xPj=?;PYJM>UaeTN<vUyFyc
z{?IQ&D!n6<DlJ~Rad(u+ANs*I{U4J7!_)t<@zwbxJE=PkwmQcr)n29(L_^()aFC_i
z>M#3Cvy;@B(wz)@W}o8Jj!ucMqf=Si?Po}3_UWWbM}M>3zQ}g_uzdDDn{4~+osmeH
zGg*6Zz*gpSNtH62kL%7B*@FY-vp2m<wMJ}Acdin4+kic%^Es8%1@Sd=A#1md45^$h
zN~$!o%*5_uk=-_6;q--M8>dSXDRUWXvklnFd?~3?#-iqOk<B(>IQ{oun)P?$m;EM~
zzcjnjXj^K)zWu8>wf(E(d)}|Gw$#Xw+WxgkmFK-@LU*0WmKw0M|FvXW``0H@<_6Z5
z8nBi5MpC8BkS#Stw$y-We;Pk&uHJsq{FcEs&wzdRH*sqBH^=w1-(hW@ks-DFTaqeI
zOW}E|$Y0#S(*5_7ZQb9NNSWJNn`gjQ=7&j@GLvkcA+mV}O!t1sc%`2-Er$QE%Iv>>
z()^JkHqU@PtGhXs)sN$A=O?VqGcu&Ix+kg9&V4q|5ZOEf7FIt`wz0Y|kuvwQHqU^q
z%rBEFW%jgrhREg_aFnd}=zgV$%`;%n>LE^L^>BRc{D!r8Mut>Yk0jMtJu0$!1}v<8
zpKN3GSR!Q}XKkJVTbVy5Rm$vc^9+&AGvFv$**rrLn`gkD)l;0x>go8}`3r0Fj0~x)
zo=K{)dRAof3|LtGJ=w<UxkSo5&)Pf#wle=rs+1XH^9+&AGhkRvW1alJ-hpME{4a%U
zo&kGiFL5ff88#>_&B)q31GbiCPO7wYgUvHU{<04iX0s;Sn9Y_5v)NfctHD<0HA$5+
zm)Sf+<YzS)X46<FPm|fm>*Uv}YV(XdpDz=0v+c{oJb6%muVZbVkyQN$oG+=;-)kmx
zuNT=o1GX;{Z^!`GF2MRt4Yq3+N~&Bt$AoTSk>Awd|M;Rb-P~-Rp{mU@@_e{0#x`z?
z=Ry4~!TOWJq$;<ik}CcAqO`QgpA^EzZJ7*k?Xs-RGhn-R`J~FVx7j>HWb+L8KfWkU
zAGi8NX(feio&o!J=*pb>4qYX_7FT6$o{=Gy-s(w}7WcGyhREg_uzgWlGXsXdD6JJ=
zowZq;XTVlx-K5&fw4TW38SsCslczy-c%8g~A~w%}J*}~v+RnK6+S!n`c}9j*S`(5g
z?W|z)43W(<VEN!}oNW8xZIVctO<9{~z*c6nq)Hj<<jqAk&wztZl*8-fw<%)t4A`^U
zl2ci26<<5=U~Qg}A(hoONtJf8d4|a58L+T=SF(-OwuzM4j<tCPY-Qe)R4L=X>9-fz
zJOhrBmCZ90v3UmUS?$27tagmAot;>lXJklawR2L9)h;5NXTZYhL&-K)lM^YkD{J!%
z*vfn)sZu7JXNYW`0SBM{r&=fPp^(ipV9#tXPGz=td@b$6+B_peDzp8PDlJ_(q3c98
z&wz#5P_m8L0g043koB_~Y-K){R4FrqBdkS!R)d32|0Aro%H|oW+B_rAXNsJ{wkh(E
zJgC1z*-71DNmZJMCsq2p-{u)2n`gi_MIMy_u05Ldn;LA_9-CCT_B5Mki2SAo2cP~&
z=4SH@Rc)S;=fmwpwsAWt59;q^*5(;WRc@ywRr)jipDMC>25j6uodK@>3~TcY*slF-
zQsr9z{AY-4o&g7+{zu?er~k7Qws{8Z-=Sx7>O1tD_*y)dwRuK{RC?zpRa*SS#@z)X
zn`gi_{eM0KhNu6F;;VBpYx4})>U<%o_A*^6vUvvlAM4~-nEwC!C(SRaWAhByGrOEq
zJGvsij;>^Fo{=Gy*;PrEj;^<PhREg_uzdEeNw$6Vu1%!Ob*#-ZU@P;rq)Hi^7hEs0
zc?Qg9ZyM|5X%n+~h7vZ<fIX)hIhE5l<7?(ytj#krq;k3`snX1f6T6#5HqU^C(|40?
zoNh^^%&n}=Ghi$8{iI46Ka<@ivUvs^Wd5)Jec%72`2$64o&kGScW^4JJL7BTN36{=
zGNiJ)JE_vnlQz!~**pUlRzFR)vAQRbGWW7J&w#DW&yy-;-fi;?k<ByUC|TJ&LlK*2
zz@F6uoXYCK_}cjuYx9f@sjMDKs<C=lWb+JISp7EH#_Ewo$~?;2JOj2ezfY=^d7sTQ
zL^jWWqhw|C3`K070ee<Ya4M@O<7?+ntj#krq_TP{smAJQk<Bw;VfEK!8>?p$Df29A
z^9<O^{5`4mZ}ptW<{2=orgy0})jO~()&8N7%`;%n>;+C`_F{Z3{fo7EMut>oFC|r4
znsq`qgDz~I0SmL4c>iHGb0TGCVg0NITbWstDrGLUd4|Z(YB0>Ecd1sHjl5KQwW>DH
z$n&|M&B3<&*_?S$f3Ib2o{?0gId@W}zxgM0^N4Jo0o(m--VAW<e5~KpV7qqyq{_9V
zLT?cHO$`omubvh+n`fwM^Nc(nZVR)G+ah^Te~Yp<&q%6rTP&&4pL_M<BAaKx#%;+A
zaP3m8%`;%T_DxBZYyI;tBeHo09OPd8irnhGdO3w{o&o!J=n9<r4qY+67FS|zo{=Gy
z-pWap7VorqhREg_u-&Uy%Yfl~_3H7}8N=E<1GYMACe>c1wL~`0fP*a63f1AI+B%BZ
zJOlQ$*5lN6){n294Op9JWJskoHmTB%zlR<tvUvt9AH4C&wkc&oB4sABHqU^q%*IKT
zGPBz}LuB&|IQT?4yi|LuA~w%}J*&+*mDLvUwevRC<{24MS#6nAX(yX!h-{t#3#+Y@
zZLGFQq|E<o?L6S5sG2Pfh&ksRFh`V?sO~bmV9t#>fg++JqD8aDQOsFURLrQT7(a6k
zsF*PaRLlY6XU_TkPuJdhi*H$eGyC56d#9?VZlA6*%-rtlY~9YVY@PuI&2~yT&E9OD
zLB!@6U>jAjc?KkGo&grBcE(Anc9BmxyJFcqLqn3P-IWSedl0dC2I#5=C>yHwR0Pdl
zST@f9gJy4~oaP@k&mdy+46yp?zu8i4Fhp#g0T#;k!%52amrqGUuxy^8AxYU#rMx8m
zTI@g~HqQWE*+I&NvV#>tV_1Gx1A``4%4yE4vk^r6tOiy;{kL$-@0(}9%H|n5A5-LE
z*q9;@*NfD51eVP+lu9zoN_l;A_pzgh*gOM_DRQI+aO^Qyep3VE*yEJ)W5=+01`)rh
zfz?m{Eo)=*3|QGbL+3-=$=J|#ie99?Q?YEGp;Xd#x>8;r)BhPnY@PvzwzD*VV@F}>
zgahN)bCmL9@2<DeM0CP|)ldH|XiL-oc@Wb$2Ns{uF*xZHdVzdOybw#{oQ5QM7c1o@
z-rCnLA);{(jOqU}4QQJF$I6H2ax9&5VDMb2RJ={&i0GUHt68eWFMmyceR?%KG}VEH
zvhg^nqHE<-(REmw>NF%NyFn?hi2q7{BN4yEf&Sh5kFxRIyIB!5w_s_i1B2!^rJUv{
zn(ByXssr`it8}Rr!q_|m3R>#GLet$iNz*;@Ddt`*Ep-}_G~KV17jtKWJwQZD9q5`K
zQZ_VAPz24xSX%19pm|g&r#Y0CIwD%?Kxt~_C(Wkc1oe~V<BX<v4lK%l5+{}alzh(n
zG?v~u4N2usQp(T!BG0KNqIVAT@}E~W%6~x-G%sT5odbjBWu=^k$ArB?MDHA^@>}^y
zvnGDhe2u~M$bm)muj8cZ-;mE~-^9`*ry;5Ox0GtCo*guZ=#c}x`gfI$>fciY&HGq-
z<iMc$P${PwOphE9J#wI`Z{;UVu0=EWNpmVB^v8jPs!wr}s?X$8&gWSA<1{3x`cf$`
z=L!1bi0F?4UDemhhN^EALGvw^{x~pbzE{d=_(}5zBKqUNHmVw8KS4q}99XFO1t+Qc
zRX*kXhNT@&Lz1dLlnPaU644F^x~jjG4ORarf~H-qXxf88(}AFgW+;1Z5V7Y5*hW>2
zHZvq_!~qtnX2nUWW|L1jvt!waLqn3PIh6`ka}lu-2k5HiQ8rY~s|cF;ux!Ku2F(IW
zIn63Oi<*dyI6$f5y_nvQ>NENT`Z{@Gh}fwEER-#ZlawtcpOO~GvQvkKBxOq~<t4qw
zP8~$-)B(D(Wt0tN%PN9qIV?Y`fkCspQclALt}77nvl=L6t*nz<Ro3!4c_moc&qL?q
zW?~g=+)S*h7pZSGEc<yVm1M4=l-I|f{;o;HejZ@lOmx-&j$IqeZ)#v1+eIlqcCmV^
zA>ubRa0WL@6>F=puCSJ^o6d)}?%2@QLoZTaPi$trluFvxQ_Ab(MrnPb+%^D1Tdf9g
zY#o+%I53VaDdopbE?GSh?Qq}>Zj>t1mTr{#Kun7qSbRb^#7UpfjpS3}##mb9G$hH}
zR4Fg9Umx3yh!#08Zj`prfTkOzE#<?r6_y@3FnG36D&D4TiRh67XRuCgMJg$Ee)NNc
z{y4CZwLMNMX9xL|vm=)NI1Nd%c2>&E=|O)S5&dzXfAMxxHokbfD}rVZEd6m{&<s$@
zX?V!lo<#J=fz?lxg{mPo5E9zqz(Um^oTO?W`IIvlOFNu~Bvtz<<>jzW-k*qeIM7uc
zplqlbstB3`u{@6&44Q+Ka+>4n>|i3EM-8@7Rih1uglAEMg{l!aNmZkK$~gqfv#2#B
zsX9!lP<1#F&!Prh)sf1Es<I+zj>7URYA|R<D&;hP@hoa0o<$8-Km8Z~RzCe72N92-
z1`A~;;3Q=y%BQ50usnWRLz1#nl=6}$*4wE>JboH<Wv43}%Fa*(&6!w!Rs(}(lu}N!
zO`V-h#LsGA_0xX~>#bs)JQ~)rovZUPMV^O^De`>1NPT0lnO&e%l6j$0Uf=3{>>{Gv
zE(T+YyhH;y_EIdrsey6qSf%{f(IvZ_h~Lz}>Zkvfwbj^_u$FC{&WE<Eu%Yd0y-0o6
zU^5%9RMK{>QeGd^|8+#UT@Qw~8#I7pZ^Y6K2gb4gQOb|ypZ{hey5T^yjaYT3EvmXD
zX+d4O0Ne^SJ#t|28ND4ReMaw)Pmy<G>5<crB=ByfyvY6g+C4<{$boSIxK9I`E&%t-
zhvxw-O>$uHJfu{-Q4@%0k^^V3Ql8EQVA>xwAAyHHIj~SR5hqpjn0zXF97~^^h9qTA
zD&-ZuP_n0p=#vBe+xLvJ@$H+W2%2ZH^vQui^Sn|{Gl@PqBKqV&efwHjDYq`H#$JMg
zE;+E!^a@VW^s0P{c@0aKoQ5P#uPf!nOs%&!i0G07UDFh0L(^M|pm`fhmmC;0?<(aq
z<LHtjqDu~}<_eJh1AcL}{s0m><iJAJM>t8<$MPv>DwYm84N0m#Rm#iZHu*CmI^;lC
z^@Xyb>Ptn?{1;1y92hiTE9EroKJX0@9dckBRSmK4AfX`+EL8n~lT`gEpK^Y}(h#R1
zN!2e(g{ohPXov${)$huNsy`G#^Cy-kQiDPBw^B}X5>KQi;)&FCZC2H2?IGc5)L@}%
zCY+?IqkPJl8OzhCH6*E;RjE)l8xc>V23^%0%7&^r6+tr>mZwpJK{Jn1PO~mgqbA~M
z)S&*YDqXBK`vm&cdVYv_05w=BTM#EHTSz`7EsW&>)EbhMEvl54^hn7TBjN$npetKK
z*-*BmB50Pv^1~V!G|MREG;h_}vPAr_21;3_i?yVz<;7YjSj)D&&c_vP1#Dc=R@95s
zw-Pq9m6b{|S5eCAd%0w*66LlU7+18_HGpH+!1Aja7{{)qlpp(P$vPA9s~T9%)w(rp
zHMS0{W$U8zp{)iR+Sb*J)Ylc8SvRGUw(d%KeO#@35arer3~jwMfMeIg(hvv6u^TAm
z$9AZ<S|S?az-kt2)76%))x9C6Ne(PNq4hZF6WSo368m6jlGBhRZ$qWL#8vy)Mnp8p
zfpM+gL<5?x)tkzPXEQ8aa$xXmp;Ww0TN2SF2UfFKbE(Cx^1-$>By`Aug{*CHQaPD?
z%ISxtLrz1ItnHQZa#p58j))F9(7$*)DH~tBofSc|3ziN!FlcsD%4ue%Lym|JIk5Wa
zzfd*A`a?oP99XE@6DO(KOFrcc#L^I_AxYIBrMw&#Yx@w<5C^)deU%MW`zeBEe=JX=
z27~4RrJUycIvYyF6RE*As%o@hknl8Wuuyd{PEuv^DQ7sAr%`K2QZ+)UP}NAp)2KmL
zb*QqT>M%vn9FFB_)L_sYsg%>KP-kT#o<<E;Km8Z~RzCe74N=)ff`zhUaFViP<x|pe
zSRO#FAxYT@N_k0(@BnHe9zYGcvXhk!Wv3{D=2R>{tbsvux>8QVf2%u#h#%I#>Zkt}
zZuyJF+F7ucZIsT(6nQo_rpR;jBK3{NW_GSpN#=P<d3|3r*!e`cjR9kdyg&mu_ChSb
zs)2Fr#Y*|HN0jUmB7Ri^tDpW`)>dPe!CJPlIv?6D$A-2m^dj|LiOp=BQc2rYN_l-u
z|5p>`b`2QX#%ln_UW=t44vb^3SIUp&pZ^9T`r*Lpr~ekTrRo1Bh-sYzi%;myIO!95
zi+oDF6-(=!h9r5nE9E7g+1KtMqIC|8>HjVbXqx`-mJiQ8SbFEc;JHt!c$@AgqIV9g
zW~mmx{Wbll`5-*B)q#bw2{@^uhviezBUsw%G$bjTsFYW9Ic;@B{1ylLckc;h<Gc5y
zB50n%(pCos%`-|l4NZZQh-j+=_1&v<sTRU&>^UfCssjs6FW@9iFUqHwm#{R|X-Lxa
zic((8LJjsR5lwZVYnrTVXnI`{G;d&Ossn>&ic(H<8BKLWG}VF9)XI;VO}`2<A=*2P
zrh5)7%6|_hmH)nc&ietD?l}!f<$t7<pZB(U`<RICInc}hMA<0+Q$^5xhNXKB44N;L
zavCQ3FNx@$16BT#|N1W=|9&FCSB#@U4lD})1}7E%t$fb-9hL?;4M~Onpp>7HCj$IP
zM1vgYh5xK<6#k1MXnw`gAO{A`?@Bq%F}>{%A{yjC6+Z0~0lIYl-@>QeDf%zQ(+39@
z0{+2C0^0TVGq=am2M5NPXHv?~%udlAiRgm^UBE2LhJaZWAz(HveQ;pV%%POiEK468
z5q)srjNkbho11ZTwt+?A^Wvn!=abJF=f~37rXlH_Ur;GO<F7mxnuyLe&<kHg*(iKb
zMbIpU&1`WnXqHgQX%_2kOA_U_6xjMZKkZJ@OEbP~%YcP|WpR>#<>YhbPT0(r*N`M&
z1*QDV^Y*b7iE>*BbO9?X8v<5Q1kI}0%vJ+~W_6{UhW`$+22pNnf~~#t>4^YqF|KT#
z!J_cBaZ=&y$mfh*u$k3pNGg0?rTmQ7m8>gKZrwmHyt}edcn?L;^u+R87Z^0_DdjZB
z@>>@XzjcAFz4QF>{xrLz*D}6rbzmW&H%<~zlFymzv6(e!ND|OTDL*rx`MyNCZ3w!6
zjg$=m8!Lil6KrOif<d#HQcm+-oo!B(+ZN!A-}xHbl5u6*3M>lW8YdOLjeO3yEjF`E
zLsH@Wl=3t3nct2mx9veMd<SKt@EsLFvlBM6oxz~lMJcD5r?>4&l-q7#>+k%upZVPx
zU$#BKLO_3<Bw&Di&b%i!v%NGV2^gr9pSjdvdlTh02y_AaC>sI>D}rWUY-am`L9@S7
zPIF|P4I#?y0I;=pK0Og&DC5d@AXpSW3?~(SkbKT~Fg7#OkW~0^rTmPv;^#!UjR3vy
zMrEV$Lli-CD3<*>z@Rx?DW_Sfw;e&m{v2Ry?>uXbX+HC1#<T4PSO_>8CkYrSpEDnW
z<xd1PBndc9DL*rx`QwTB6G6}goTzLFI7tySCu7-l0}Pr|m2#Sf-gX)hyKaCpe&=iK
z492mm23Qn+7EUUBlzh&3HkMsAG$a*1TB#`fTq1VW0KM?@m5sv3D1zn!EW2ueL35E(
zPQ!oLyqJhxHNe*2`Ds7%molC$G{8c@Sezu_a`~M33M^Y_Xh;$;PANb0tZbn{#1<N$
z3%Ewv5HMa5G}mHTGJrvIy;4roo-H(pSTcYTu=Ky~ZT{~Q0d8U>YXGn){brn0`YrM~
z=dD=Q02-1?zg;Om=Z*Dt2N7!k&`ZBd*(m*PMbO-XWeETV&3#Ha&4z5BLBtXO{J$TX
znr3%5dhff~gN)-AA1n%=fRhS;SUzWb1j{YHhNQwLD&=RKt==9Z;uatD!k<t!3V%`&
zG*4l<#Rr4t8Ks<N&)znPh+BN{|9)ti7e4LR^*P3KKMxiHUcgBLUX;(7U&3-fuOUgm
zD@ys9`MSPJ#Qi+z0wyaP0$x`H%^O(m=fR+vqLkBI!2LWC_w(S4-}xGQhjHA=gGJ%*
z;iSUfm(LkLz;Y+AA*t|>l=3rf&Ye6Fck-YY{)w_t_@|1X`3%dQJQy@zDCIQ!_O>sH
zxRVE4f9I$Dx_-rY?&!fnz&AKaz_;=_^LJS8=rtq>_(3T@^T`eNBN2D>pbPj}*%0uH
zB4~caaz_sa&F@M%%^hrcLBt(B*xEavZp;2+9Cz|yQTRVNsql6sKVy3=ck*DIaVDkw
zjBD3hM<VXzK`(q3Wux#}6;b$XSnlM(pqWD{r|HT@@kHFogRQ;u|Gq7ooAKPygN1;3
zagu=f<a6fvvE0#XNP6cNRLalHXMQ0f?&v`mu!yoDU{OWTEQaNd9t@f#lyaI*b+#lC
zcl6+l-}xF_nsMC8gGJ%X;-tctlg}ADVY!nB<BTgP<!8+6ZABvP<UucdWo4uARTM$9
zDwaEWFlbg+%4t^XZEFy5Cl9v%&QJT9UyJeF(SwD6wQ-Vwb>wsAE?Dm9H6*?B>ni1E
ze!t$j5^+Zlx`6J=hJYT5py`R_jvfq}^^|fN{?c=OBJSwH*53JaTUN_B?&QIu@ZLD7
z@REGaSdZmS9*i^gQOeJ_Ouh9b;!Ym)!Z%Vj3g1`}G@D?#lLv!lGo_qn)84i@5qI)n
zYw!HOZ_Bo1Ja_b9Az*8qBw!o)oOxR;ck~*P-uZq?`I-65Z%4!(J?H{<P&NeYs0f;!
zu-wsuL9>fePQ%kFcO~ME9-Q$zUt_y7jyrj<D7-&TDtv%^&bTL*J9#k9I8Z4+BfA~%
zO~jo%=!Ng2Y!p6N5j6W^xswNjW`CueX8+zcgorzNu=RI-+Ryw@#&bsx76OLhBmoD>
z=gbFVxue&R^v(}gs_D%2mJ@MD54wOxWkbLril8|Z%N;!!G>0qYG$ZTm2qNz2K?zu<
zUAuNe_uISwUIz>tIHcug9hDi%%{*Atel$+1eWZL&dJLAEc`#0ToKk*Lo`!fl5jXRo
z*M6e1QTs`Xpg9@K%{&-1rz+(%{CaR25jXRoYVY1|y0y>fZ|Ki}g8O{1&~z3~(lkmw
z#hi`hK3_x9zsG2$yqHFI&m-bKA9PLUD;t`|D1zn!Ecf|f&|IXH)6B<xJ`wl%V4E}z
zu}h)g)*mc1jm1ftE|*U+S75pI2cwvAN`<DYh`99!UDGwnhNkg~pt%;ytv?tv*DK{T
z3vugD#H~NrCQXfY6BMioz(Ui_I7!nj@+sz4ENcP{N&il_D;1jVAYx4bx~9974NZ3|
zg61A9YXUH6?o-NXXbQcbh&2HyP3yI*^0Vi^$UO)RO9rseH328-dRRUMJ%VM)07gL*
zmGXj~<yQeBmJFcldP3RI^`s(bp2D(Z0E6ZkrJRPf&Lkq144`yv+^!m3E&n;jb8xb(
z0QH;I3)uM0>P5XsbuVFARw$MJ@m^8NtNW2<1rf^%Fn+U|tN|SRIySR6z&Q3zrTo}-
z^)`hlx3@t3X7&H4tj6Akvuy9^Y$$sd8_M3(i&XbMmX08$lClq#^6L0C?js^Pg1}HV
zRRcKo6KrOmf^qC;O8K#`mh5w)+`a&%Y_oR%FaKugx418%E!%&=;<Nb`PWo(qEuR9v
z!DjZYh9q&{Ddh#8(8s<f%Iya*evA821Dbw|`$;}LKVvid1q_~Fm5Mj%H=^8r2lZQA
z_jZ*FEq(?+*#3Z`Y=44<sK0PhF@MXan18T5F{{2=G3^POiut@`9f){h7U*BNj>^Ut
zZe~T$%!18qRxmWprj%>Cu-0ZL%54s?+R3rdG{okFqHJ@4g{HZ2lBRj&Q_Q^B%;wXO
z^ba+^QeF&yhOq!qZVQ60X(44p)540NSp=KeqF~T0rj*n0Wb4I=a$5pylcq*n5{j}d
z1s0l?#z~r%kxwzpVl!J#Lz1RWN`<E7iE>*3bWJNN8=6*91kK7=cH9GlW>uw}W=gHC
zM#PSLV6_u|@$cjl{u<D*zaCiVS_>!X>MWmv*2dBcq#;RH7p1(QOXvk6q8A8sU0sz8
zUELHx(;dt9dSKA>RLW^ewbqM>?e)NFC;S$E$S?i?V|_T;ZBJ)oTCBy!v{<JXsjfGc
z-S(79BI}j%>aJ<91|oLb17lk3s{tIlAvUv(z&LherTo~lOSTD7ZkvMDPWUY=tFg`C
zEZgQf8_Kr8hO#a7BGqk$r3Xl<q--0dygDZQZHeds0z+9p4dB@Au$gTS#<4pn<;UJx
zZ#xp@wi8(Ggx`X)G~w?IZP|7Ki_hk+IO(&wn|un~9h=!68j{5ISIP@Kr>_kl;-(vn
z34bpQXqxZ`%7<rfY-WSN;Mqs1c#{Sb<+d-V3BOmn$|n42|4M#8Sjx6PSm+vplj=D@
zKJ^U6@|Y|QNxFt9<@I#oF<C@BCJXfMoGBaMx#5bS$+4M@0E4DcDW~aGXNM5wb||Rt
zT-WMk)!1PWl<jb^P;>-NQgozzN-1MAJ4!>6qNA1aQbyI=NTS@10bS9t%7&uj6hU)5
zHnS7JpgB<~r=gwnB%<6-1}mTO(_e!Ye<6Pg6lFUVEHs^llQf+!pJL9yW_G5ABu!^2
z<;7f9vQb32oejFCbCeBDqZL7OE|yL9z@Rx_DW_Sk&c+b2$sX7yO+)NLDA-O9EHqt=
zlQdl-pJFb>()yzzNz+)RLeu3$wElpu=}Kio(>O)YT!m#fJuqmlQOaqSuCwt(?4}2{
zNmHX;2L&7HfrX|UaFV7Q<x|W}ST@qrkfiBmr9#s!L~Nu7x~AKd4NbQzg60k^8|i^T
zbC*(1vmhJk5wVdTC{2|u#hQKg{K|YUG;F2^7P{`oNxB}8PeBi2*-TGElCB9#c|l*4
z>|r7{(*s@CqsoS^iHe|k49jMEV9-3Fl+!HEW_m<yrUyz_Web+1tL3HG({Qr4p3cVg
zYZ5lDU(f1As(TL0-g-(UkuNCa)$vT-7m3(g4~*;A%NoG3uV6EK6^vtFQ_7EBsNN<M
z<@P#Q`IWgfWi|E&oMn4cXG7T(Y$$t6FH+sxSo(dGO3L0<%B$nb{2megK42*OKm$1T
zLu_UrfpP4|O8K$BmuxCgZl8daUzw+?EM1vDg|=*;fyHO@bDZ?q{6anjeu>TOzZ#On
zeWjEact{`nnkct#z_>Dhs{u_{=I`Xg^F21RAHd-GQK@*7ej>{4XRz|6m<w%Miv0pb
z*?t8JQNQ7&Vt$uTF@IorB9?|EQGY4r#qfaNzlnGv7U*BNb`50X3)fx|G##*+%>)Kb
zN2Q!*T&>Mal-n#|wUcAhQfyWz$~GHVXqp`-X_`Ym#mtG#Y%UE+n&wu@i(x4?4^eLO
zg05*kWkb{ailA8lo7sY3&@7~s)9|zV!bG_(0=7w0qb&+W*%ku}O^f3sO-sn9m?g29
zEu|qz)6z<Xre%n7TNZRp%PAY0Iw^u?c`Q5XfkCsPQclBCY$YOg)B~%X@S82gR)L27
z^T0yaYB)*P>hdXQ4J@5L8j^IarIZ&mw%$4u(dh%au62|RU0oDGQ-fvuJTPdwD&;h5
z)mk?qw$B5ro$y<@tuK~hJ>X<FJ)Mndu@^R`#r5<e)vb?ZH$A12$Xcbmy73KGN5pP=
zU`&f84dB>%EWdMsacm!@{8;|{s4o$}bAi=P_$@1|v5nv?+r~N@$~M7<vQ70O)oq5Q
z!$+y4Yzw8lIwt%riRkbFL)q3Ez_Ht4Gusx7V>6}v*oW$^A5m`Gfz?j<EhtM9{`Sz8
zZ3nRUZ0?AYKAStqr@)=DneCz>N!+eVd4Y%YwcUtv+Z~Jve-904n(+I}hi3pbvpvD!
z*-NQ-lLivywl`S$QY`*hyy>st2f<RdeZWH3V4PIXzVfMOKP->I(vYNUh*Dn9COig<
zh{s@o{+&Be+4#;4Qv}UH*vt+FgT|C{nicD8I8kmnsP9~5OR<nuV<R9aTO(K~Is_*v
zI#fQT9EQ#8a1BX{j!?==;XmjcNt9a|bVWxg8;Xur1kFfnX2*a*bF5NMvp}64N0i&~
zVC6G@@mWj{l|2E9vYiMPnohz=nogEaF{fZNJ5@uHrqh)2V(u^5=|s7m0lKC$l?_d2
zDS~DcmQD1)pgBh=r{Pzz(L`*b2ewJm5IYYFw#x$xO=ED9rVHd#%!ODQdo(0zx>%{u
zbO{lSJ)mp4Oxe&hRuMFpW7#ba44NyIa++>+Hjaqh^1wD}YP74NV52;+&@>(=X}VTE
z#axGFqdW~snr=`kG~GzVMtPuX`j4`q>1IXH+=6AJJTPc(Q_5+UVxv4FHp&B~sj{V5
zv(KI{#qNZL&GNuP*WEZt*FEwn=w2+F<!MOLb-z+x(8ncvfQZfVK-cw<vY~5&B4{4Q
zvRNJ&G><CfG%PVE60unxC|#8;#geX;mtv2@$(DLL8`rNVv2p!+N-t8~(^$6DQ!0s^
zq?A|pWyzi;VoN<Ru3yh<0LQ+7&Fn=mj(tffKbBwAUM9-z6|nLvb8E_K>{U3+_L|Oy
zvdP#`_PSoAx;L=={hv}v*%YO`I<Cxb5%KqbU?_V>1330wY-aC)aqRm_`LUms>;t0Q
zJ_IYjGEY}ox-x$RZP`8si_hj%ob=iJL_P(6ip}gZ4N2lYSIP@yCzvmYa{CgDEAxLf
zpy|r|m3(-<#%A^n7(Cx96>rjaM7ezrR=yN-p-oG%AD}4Pk6<C{C!AEw&+;ke7i?y~
zYDg0Gn^IoP_a*zCD7QaA|HA#LY<%JVQUuN4*v$R`gQi`dpt-Tu+7sp00jzd%Y+8!V
z1V!08f`z7;agwH4<WtP7*vw|rkfdpLrCbwBu{nrxn-g?Rb155|=2isFJlM?U1%qZj
zrJUyXTAQCJw*|m9X==0up(xuzV4-PYoTO<H`4qD#mWN|$NYb>pQlV)HA|8$fx~8R+
z4NXfcf@T?PX3K&>vz$^+^G2<8BFb%fu-XZ~*-~r;Xv(%CSm;^_C+S*QJ_W6U&1_W-
zNxD{3$_pA(Z>tmKwg%|B)>Jlht)&Q>&e+V>27_iDrJSZ~t#u*Ftp==i!Y{sA{#eNV
zLx=4>;($S&i>27QaF(sB&c?LZ4I9&9cfCk;J+PVeR4R$=rIc4Us=?MH%58lxro|03
zfMaX1nbm=DY;UFf*dt3;BFe2Etaie0Sy_!Wz*)9FIvdLRVnf-6dXefj!tz)wrINBu
zl=AAB@HZvmu~=Xz+gt-Ub_;A~TY_=yR!aG?Pt@DiM7eDPRy*Ohpe#-J+d^Bm3@kpI
z{czG}b36GIxIH$r9W*3~+fgYmuuosxi72<7!I<!O(SW82e^>eN?1s&3cQAPNP%7S}
z{zSPA04raL)%@$hv3tT&w!OeY*Fc<9&))K>XAm~CeKaKL8myGpGoWPq66LlZ=-;{h
zm5uM*5Jk`&fX!?u7&Hee<uohR*)XEq4g&R^t86J2vTE#L2+C$)p=dZxQk2W5lo8m>
z8Z{&-Iz%Zig`b@dCCcqE&=no7Y$!TH5j00)Gb@8ZbCgm}!ympKO_bY6u<{vyUH$oG
zT8bS5McIx83r)x2Bu&T5r<fD4JP%7llBScC@?xIhd00d|4-0fnrz#toPE!QU>DbK9
z0E6aCrJQE&Iy;Lfw^3l5G!3z{p(xuqV4-O=PSSL)e2O^_o7wpqk~ED`Dl}a{l-q@%
zYr06;&~&jPXfDBKb}1M%mnr2mOV-&~qTDVA+oY+{u7IL!SAvD6aX3lSRq`q3YHViL
zXh_mDUa8P@Em3aQfv)L#Wkb^qilDg>%YJ%b(ELX!r(wx*GZFjgfznjjQmol$&zEAi
zLc`v9V4>@FoTTdx`4n^~mc8>dB<Z?aDKF@+lHEhZ-g%(wx=-2Ab-yBL9>B779vCza
zDdjY?*VzOjw$1~ktFon7($(@(>=8KGMo(ws`ZW<7*RRL)BGo;PWg9)ElE^2O^6I{2
z8$BYn(F5c9^^68^>?CYv&w_F6b4vNK{2AZ#M7g~HR(@q}O<9e-2xr+|(%De<GB%XG
zq8F*|RV)v|QYtB%tdv*BmHBld9)bmivNtt=W2ay<dkc(X-&V?x<)8i?qTJpEE59;N
zS6R9;zXxsE-Uo}%<_9?Gv-zQX3j7G0*~c1^#7$Mo3!JNueL|Gmr(j%}KhuDwEA!{_
z;rRla*_UAO{8y=XlfEL#?Q5{|rI-tCT8e!GMcKXu3sK+Uq+-68Pcc7WGy73PlBl1Q
z@?s{J>}R6fegXXp_p7q;h5JnrG{0jr`vVM`Kb3NtV`}X$qTK!ltDPL1mSX=vQMPt{
zn>4k@Nt!yyr<j?rnRV2Vq-kcQycm{Zvk>JrE9jbLQ#Lfst_Yeru$j#X2F+YbIn6t@
zHaAgj^MGyA)M)cUQMUQO;@@e0oTO<1`4qDtmWN+yNYb>hQlV)PA|8GPx~9dH4NZ$H
zf@TS9W=n!Wvy@U!!>^Z16XmuHSnY)0Y$>)ZG-X>3EOd3kNxGJoPeChSGh0zZlCG7M
z@`CQKx0Q)<TLpAot1271R#ODc>e$TI0E1>trJQEIT3d@Kx6WX-6MhS~^~F+bZ8*!e
zj?Tuk*aaKYVvSy;x^=OcbyX^f?531gcW8rkC(5k{7}H`;4dB>b*v!@g<Jk3;@?(!K
z*#<<p)q>Sd_$@1|u{t=*)>~&oSqU4;>h&VkHDGz{l~PGrU!}Y{Cj1SFc<dDz$~M*j
zj@<-HLmC*zZl;tU`)IvwPDDc*SnY)0g0eK>ZwW1ZYhdx&+!`l+Hn)*af!ku~Thovv
zuAfp~;K_Y$J0kklz?krN(14~1e@FT7?1ZIp4Gf-Ll!`ZLS0WnMz{;0m>;7vgwmU4e
zwSk4M{y3?g0rIJ5Pb_V18j^GkRLbi)xMX`1(bfj~cWxhL<2yH45j6W^X=?+6W`Cue
zW*Ii9Afl}e)OW73rC7+Sv7r#q=LQyvhT$Yd2g#?DgR%6vX-HBuTq!T5Lxbf+^tpkq
zs8QKabciBo4#m>v1_sUHN;%Cu^tlny=LS|j<9BVo6e~kP{~K6nIvOWw8Y!P*j=|FZ
zrXfkwaY}hHcd?lT5&dtVYdTTc&~%a_Ximn`{{{xlsY*G`{Pe#O(f<avNz)KJ0}A@z
zz(UhmI7!ne`4n?Dmi{*lNt#A06`IZ^qW=waP3J2cn#L%C<^nAJZ(z_|q?FSvO#d4Z
z{cm8KG&S0#P|*Jd7MjN5Bu$sgr<g0S^uK9H(lkz~&~z0M{coUax<=X1G+q%j*J9~^
z1B2#zrJROe#cm*?{|%I;%9diyK6}0ty9pY4;J`xH%{WQdE%GVoRxCYm8j^I~u9O!<
zE7ToC^uU3x>n>$O*WHSsxd%%R92hkBDdjY4(*s9D4;(06l`X}Ru9lZ#55h@9oX*De
zYXUZ|Uk~d=s(S=WL!44c<V2;sx{mCjK}16w7}u{SG=O8D#Afys7{@-Xlpp(3$(|v~
zZ4y}dmAN%#HTEo=WqVF%L)r7#Q1*geq`DWeJo`$ir0iv-ygIJTuMqL<D=?J3rU4u~
z8JpSbU>y5~QhqG|^luX7HU+Hw$~;|V>B{^Tv}Jo6EIym>;H1yyyYeaUJ#1$0Ye*9J
zfl^-JvVH7BqTD_L<I4Q81~grnr^<)t6D&<}VDNmVRJ=)_6VVh0R=yN-p-oG%FQK3T
z4lG1{g_DZ;T0X^mgQWpZLz1ZPl=5PDz}ELfG{AxWh5J$2_`?092%4X<^uK{Y^Q%%$
zb3m>AMnwM`SncH4v=sXT3i{u`LepP3Nz>o*Ddrz6{cjsKE2cd`Q!y;XIuOzS2D+w>
z%7&(y6+tr#mi{*|G|i@zYkHjiHzNAqz&2@Wv^k-m{|ziO&5e^Z%_E;;=Ec(grXlIy
zX?~@=7&I+FME@J;nif(vG%c(Mnnkemzkxxsm{Lx2CjD<j^uK}CPWa81VoO3p4;)zN
zS{f(mT1GwvEsLcGPD7HePD*(}x7OS8MD)Ocu4_eQL)S`*pjjD94;&aYt19I*9qEB1
zq6ZGFcEWGrw!T=3tpO(uaXK5*;#%047CY-js#_aNL!44cWEZ8pI%}{RA{yerm=?Qg
z0LOO2X4V~yV|ys&$L?0Lo<zCz0;`?yTUJ(M>%m#J^>sFsZGa7BwR(~2>aaW-OR1!+
zq?A|3gkMj@qp`qH)<*+4wl6lb4Z%2eBc=S<b`7>MQEr=n)lT>=C`%LmrqGscGqCt<
zZjO^an_I}Iz%8+vZKWYe+}28Yf&2EgZHRK)7K{l$(}1Q4zn^?~w!_jC2L{g$O2wPB
zBN0t;VC74(uK!w!?F<V&aA2WpSDaMOZt|&TcPu?{8j^JNSIX<zuVe#==z#<MJGYmz
z@tqr}2%5dIw7`KuvyW0vvwWQmCZYun)OW73rC7+SvHc*R`wc7<4Z%r@4v<eNL$P$f
zX-HBuOersALcJYCME4u$icHy1G+YrhIhO7>FlZW;a+*cxej}p$4Xk{|@78=Nb{G`&
zzk!9OBXE+YBjr;}8B710h9pf#E9J$USF({r^uK|w=~!h$({YNRIUY;@8yGYvD&;gC
z=zk-k{|#)DrXh9;6!gD=g{ISRlBUz;Q_LAy`rkApX*x@(&@_sO{x{Gyouh1M8m$PL
zbFuWlfkAV=QcklN{cl9{zkzMi)Myt%LH`?AXu23DX}Ux{#axP||4l=Zrm;$erpt-w
ze*<09mCA;uaf+b13QPYR7&O-?<uv?ffbm51zk$+J*;1_8XU~^n*Fi%M99ZbO0VnCY
zQ9cFTgrx^gLz1qWmGXk<6}yFq9yri--KK2lx?K@8cVOv(1B2!+rJQC7df<rYfdi$h
zvZYwk)$&s8UN~up)7iLw-H(my*8_Ty>K??>5T{fUIYB9}?mZgfh-iod<NEce25{^|
zY-W#vaqQzt`LREg><OaWo&+nuGPkCz#-4(+Y)|WKD0>DQ$|mVWs(Tj8^RARi%AQxs
ztK-W20uj%<0z=tL8o;qHV>5dNjALI_%8z9?+}DV5n+#TdWuC6GbY*@W+OoX?7N5;G
zanfgVihK%u3!B;78j{4lqm&o8Kp%UTD7W{(xH7-50Zmut59Gu1A(o~%FnB&zD&C~2
zL^Q>Ll`q9yXwy>cQz&SF0}D}~<D_D~kWVpRVrhWWkR<9WrM#GNCHtC)1~|~aaNjB$
zU%2lSLGwM9{x>jaepJe7_O7*`i0FR<tDPL1mSVp^LH`?AX!;E&Y5HA0#r%P#|4l=Z
zroWW(Vpz8PO+^12=$hJXL^d?FR|HK5Ed6g_&~#MFX>O(ejfnm?uuYm8ZB{7ge*+6m
zv*RR9bI7NdIkEJ=X-LvEw^E^L9wPeRK-V;%vY}~yMbIpOrT+~KnuU~dnv-g6VIun9
zz-lM_W=pX}p`iy3EOaf7lXNX1pMsXe(gUX<N!QX!c|lX^Z5bkZ;6T^4oU);-lOkxA
z$I=4_2F;2}InA8(z!A{{2Ua`bw{TluEX7uVlZH5*jcIW;Y)p%*>qV+t14~1kQc2`m
zN_lm44c3{6hBz>$#dS1*W4mD4%n^)Z*Hy}o-H7ekiP+2$taie0Sy_#BhqG)wbT*Xr
z#D=n7dXehZ!}8!OrINA@l=AAB@N0>9@D&)!dTRj3mav)CgK=zwQhqG|^nHkO>kC#p
z;kTeHP52u^TegkB;<LFiPWo(aBA)^`#b&mdh9q&DE9C_;k8DAd+m>KV_*-c}(}cgZ
ze0a9O(i8^<Po`A7N&SduiUTWOigo+fQfzxz=z#+ZT|45WdUldeJv(FRfzyzrYgeVb
zp5sfl8xcKlpnvD~P&U4E{S`qo080xT7&Ln+<uq&7*+3#%;6Q!nDqD($tQs2x0o`w4
zp=dBpQnaspO4$!f_nU?!MMISGQeLjN1BmE;16|R9%7&t0il8|NOZOWXG^Ui(@Ysvt
zM0CG_mCyLyn=i#iKtcZ-SZF!~CuurVKE)h{rT<MslBOe+@?tJ5*^xx_zk#mlC}l&_
z(TboMiKYJy44Pw=a+*2me<Py*4Q!L9A$9^3^uK|Hrju}zrjzAU%qdv<-!vp?I!&q2
zbUG3JZ=h>BQ`yjTmLh0IVd;MZgXSEioQ6khk0zr34Q!L9MmrA*`rp7p(-@qj=>qu_
zb0L=gHw{UeE><cuT|z|v8|a!YQ#Le>RRqoDSo+_<pt({hr}?wi#u3r~21-+9OR;93
zJzt7l4GleTV4-U~PSSO)d<wb_OAnleBwaTs<puEw+#8ALfdgIFf0PYfH!FhX7A!q*
zV9?yAl+*lOYqt~80|!c1WlOQ7tL3HGop90+r?YYWx*HqUuY2?&)!mDwAx^0z@_wbf
zy60(#BcdS=jO*7!8o;p=u$es!#<7nm<;PAe*`q|cO#~~yGPkCz#vX&SY>(?~D0>1M
z%AVAVRQD8?ohy|}%AQfmtK-T%iHMym!BF;`25{{2*vwu4<JcFK@?$?N*-J#Zy$n`<
zWuC6GbY*@8+OoY07N5=6aMEXUvV01B9n0pG8j{4lsgxJ^LxW8rV)IHcuFP+1K+~1^
z9r^IQi=`<J44(IuiZ|&4BAVjB%9mm;v}q~!5fn7QfrY54IH{OV<WtP2SQ_9oB#HW5
zDKF-tl6^r$103jIxc@2}U%0OnLGv}1{x>jazE#R;M%3DOMD)Lb)lQC0OR*oIp#Kdl
zH2s8=H2o}}Vt&EW|E3{H({D<7G2BA^PDKA3=$igiHZ=XE2%5jK^uK{Y(~g;<iRK3S
z--zgc1KXsj(Pn~z{x`7DG&4@pG>d$SnH5X_n}#G!vnv&v<{+Z~4RlR&DI1#RRs_vF
zSo+_<pqWo8rx`^58xj3)V6_u|v!&RA(9i=17P=P3NxBx1PeF@f>4DRbq-$}dyr4hp
zZ3!ZJ;6T^4l(L~~X+_X1gQW)!44UPXa+=>auuequz=73H_$}Pl7fZ1f;G`i=XJcAi
z2^-Vm%6gINR>9H`r&JQTno?ffAq}=V5e;!*Op9x30LQL{&8#yR$F8lEAKSlV>k#GE
z1*~?$Z&_K5)xcS{b#*qBb;X9VZhDdGx?|b7QmLe@r&3-W6MioucCG|N+4>s5u^V7B
zs|DlOI;H$r{^@%Y<yHc#o$y;wmL~jqXv@|B7N5;NIO((5S3U)9h-LFi4N2lQR>}*!
zqOWa2#O9S?O!%8=K+}Z3xqNuGz|s^42G3SX#hbJ>5lwMm<x8>d|5}P|3kyAPV4<rY
zPO4`+`P8#LmL50_NxF7a%Ii6`WV#`u2M+Y_+%C$-cWzfj(Cmh#1r7|FJ(O}9x?B1a
z(E<nRJ6G9KEM(Q#o)FOe1{R72;v_|T%cqn<Si0XdBq<uKl$WwXgY8R1_Z#Sn_E$C(
z4N(Nm0a&`<z@Rx$DW{p2?l&U3-@wXe{2tAhVh2M({~K6n8jh1R<?<<J1eX3c4M~~~
zQOb*92gO5)=zjxU)8Wd7rXv(Vb0n7jH!x_9Qp#yM(*H(8{~OpQO+)M$DCmC!3r)x2
zBu&T5r<fD4^uK9H(sYtiq3L8I`rkm;bgHtU=`=;qoQ|dc4Gfwym2w(>={t*v{x`5q
zni}nFDCmC!3r(YOlBRRzQ_Oi-`rkApX&R$cXu5!i{x{GyU8HPix>yl3mtg6C1B2!=
zrJRO8Bppjc{~IVxl`X}ZefE4Qb_F!_z=4IXaX3lWRq`q5YAii)8j^I4SIP@wM~iET
z=z#-W*Y(PVt{W6Vb0d}>I524bqm<LkOb;9pJ#e6OVW#(RixwUbk#x1Z6uT8N8sb1*
zzi!9I_3IA3NOgB&X^2xQiM(4WuWkwraYQu4fpPu1PXjpier#qBfN|`DO8K#`mFyv+
z+$MmPUzuA|R$~vtS++-XHk3Vz4P_JcBGo;HW#>wzlCme1^6I!UKS{*Sm0&1)S_3%t
z8Ej^gz&Q3<rTkca?|Y6Yx97piugud`mafb%KwGvK!Q!*|5>EPTzAT>tU%|3@rG_MN
zuPNmP{@7rXiP*dnj4SgS8qjoQep5a?Q?N9}fx+{(Qt>9eLqt;?Sou=Sg*Gk4-h+Y$
zIIs})0ZuCBL-`c*5tarx4N0P=D&@tTQ?gHpXn+I#3-_6_@rC<b5j0<5>3;)*=D$ig
zP5)Z^iirL<u-eJ7X({#%6!gD=g{JRtlBVzFQ_K%o`rkApY5GYiFNUSq&qVaUfv)LS
zWkb_%ilF%&OaB`fG=D1PH1t*dMMVD_*d|Sl_74>FzirZ_sXb28)ImPQ%!H-?O+%8V
znUxAnvk=k$2D+x%lnqU@D}rVYEd6g_(9ETj(`-fm8xj3)V6_u|v!&R)(9i=17XMcB
z<0M@R$fuwMvGl-cNYb^iQeMy}^|lBRJ#e7wT1?r{wYVZ^mcY^j2L{bjN;%C;^uQ6(
z0|!<+;kR&GUo6Fzg_DLjosDU+6E>#B<@F-ft$?K=PN^hvC8fN&K@GMt5e;!*OpB{(
z0LQL|&1`irj$K13KelJd)+EYpEwI`Nzhz}L))~&St*x`6Y#nSU>!KH_t_I7_l}aUL
zU6u0cnDDz1v2!ID%6e!3$M(eX)HN`UT~8@L_WOEUpNOZffz?j<EhtM9el4^-qzx=S
zo4s+;XR{=q0_(ACUa28TTpy*pz`grgUm`ZI1Y^SANCTQC{Eg+qvk8`_I52oNQ!3u1
z&53A=11n#O_4wCPY)e?^fddO&TjQj9wvkUg+hXZ~(~zXApHf~AOR?>U=z#<MJGX<f
z@txaI5i~ntX@LWSW*4QLhK`+GiD-cX^_{D1DHgJLs2c=yzk!9K{y0g|0Qr=%CzkFv
z4M~ayD&?h2<)LmwbiaYFXdh)m(O^Z;?2D!Q4Gfz7m2w)k?HEEt_ZwLGjNh~QQfw#`
z^uK|HreQcq(?Rkn=3p%SZyJ&`4OhyGxr8S-6Vd+$x~4{DL(?IOpg9yv{~H)Ihb!eY
zbI|`rME@JuCQU=E3<dpfV4>+~oTO=_e2O^+OaGgOBu&RD6`GDGqW=waO(!ZFnod##
z&B<8$-@u?bRVk<8SFzKG=zjy-q^Z%)fP(%vu+VfCPSP|=KE<4krT<MslBUs0g{E_f
z=zjxU)A`DVrZI}3xd2Q58yGYfDdjZ$hmDJg=zjyHsj{V5v(KI{#V&=09yqYjH5Mo7
zx?Da5U4f+sPD7HeaY}hX?d$C-B6{FJ*L97up=-P%Xs*T50|y4p^-4Jne@uD<5j}9A
zba5wC!BQ;gYI!Mk6Pz@}>1<rTZpN;fMBbtosqR)R4RJ~(k+&=5)xA}+JBVnA1NTbD
z-lYK?dp9<-d%!sMUZwn4wtT;jD7X8;%CF3=DXXyu;4IsNIvdI!!iKU5dXefL#<Fvz
zQc2mPN_ll$nI{slb0rwc9@hYleFB@=lVBYClu~|dhkAROD7R<8%CF4RRhF*Ilb|iz
zvtaSrd=4kQ_0P+vz!$J=Ua28T+)GM%f%EjSmx<WC5{xVJs~XUBWqwUQJd?3B#eu=|
zhEnk+y-7q<99a2M%!M{B#omH~1~{+~^$t!d=3V&|^B$H4I1NdnK2XYw;ju*@643w$
z`WNnFW#bDsRS`6wVCjDYgXS}(oMx|D`<#gWH?Z2tv1uvxB^30(frX~8aFV94<x|Wz
zSo+^IBx(9iDKCbl*!M*Azk#mlM`c6PPl}-V8B6~g7&N~s<uupR|3*ar8`vgIjrIo=
z^uK|HroV8KroZJ=%s*KA-!^SlOnZX=)zpEA{x{GybyPMq&8!HTS+MlKfuU(OrCifa
z^uH0&{{~h&;Wt}~%?S-XaA2WpZk(iR9{ChBFP0uS4N3o2^DE^AJyvfE5YYn%x~_$k
z4P6T>f@TpcJ#b*qET)vx{IG#7PDBqJSnY)0!fk!A6k8Hb8sc;|rp2YPF)c2m7pZPp
zEDdo=C6S$!^6K&iTb_u9I54Kg6*YikSHfntG8o6MqLd%oyJV{p<+d7F?S$X5vKm_*
z&a$ncv!QHFY$#hxFH&7+EIU^!m6WZclvl@u--U>sE5T5<t_E;yS8Qh8z&N(MQhw~@
zdh0=yTTigs3BLtpX~ORXZQ0fXi_hlzIO(&wfqV+A#j<&&h9q&lmGT0g=xZe+Hm?L@
z!f()krU}1~e0cg|X^I1bXCtNJP1=}<rZ}+jrC85@EyXs4g&sJt(6u>Es%H!N)Uzd)
z9ykq2y0%uz>p7!j+Yr$M2l{s|Q#QVH{S-m79hMe2FlcsA%4xdR*^WfCz=8VCRkjoh
zSv9sZ1a!ZFg`!<?lA_(@Q_AjGy5BS;DeAA3m(sq$1`zRI)j?OZm$IQ~pdx7Y#?t);
z2F*T7ISseNgNf*V11q2Ldo^E*?FR+@Z(yNm2u{*;fP9J>ilzTeLz1RpN_jCX#SS8(
z{|$6arfg^$t_YeOOaB`fG>u9*4ZlhpLPY-?*d|Rw>@X<ke*+6mN8lt)N6M#|GM4@~
z4N01gRw^`&B%=QfbWO)98=8(&1kLeS`rp8yIZ-L6;g8-<BBK8dY?G!&I|T~*-@roC
zX*fyK>GCP&3@rU`8j>`frBrAdMMVD_=$g(^HZ+Y^1kJfv`rp8yIbSKK;TPF4MD)Lb
z(p1?}tl4MJmtq$}Lk}ES=(-pu>AFNd1zn1z2TnthuCYpaK{MCe<wW$rfv)RHWkc6E
zMbKP@r3VfSnroDD8vgUfcp`e>K<TP%DVB7#ycD|*P8#BLHm+YcVB`9Aqh6%Co3J#*
zDV0Rttdv*x91U?qG{k{%{kly9IQDjIW_N&b?43&avG14cE~4D-1}ncZx2CMd?t!yx
z_v&mYyAK=6?$?V{_W+iiE0s#h9#YDy<H|gNh@C6JQ1*xhaO|Vl%qD_y>|;v#v9s3O
z<3zbV0akuxp02WVWquOcvONVBpUtOn(r5D-`4l(_%jT6DlEgiylo$AAgFR2g=9OSv
znP1d^rYrMH^5J<IOH&*eJg+JhZ_;Z-G{u3HFU4GF(^BkpC}@BK3sG<4q++JXr<k{}
zG{9*{67`N!Ud;I=dzXj?IMBav?<*T$xDOOT^C6c0H!x^ER?2C5*V<Gf`rp86C&#9x
z*r!m?{{|MCKF3L#zK~BbUt;Nh(~zX;E2X>`mSSHM(f<azrf-!EP2VYk=6fvtZ(z{;
zsFc&(PX8Mb{cm8KG&R~UP|*Jd7Mgy;Nt%9_PceUB>3`FZr0FlELet+w^uK|wsU6Qi
z|6fh*6+zPhOaB`fG#!<4n$GmU5z+q!Ry*N0TZ+vJ4LxvRp=)-Wq-zfO6f`H69ykq2
zy5?5O3%a)6<{_d74s>1fDI5P*^DBa80W3XmV9+e2l+#S!z!oN=2M(-u!f)ZWzF3Ma
z3MUP5IvdmC;@FrLm(Yt;w<MN^IHi)vrIqsPj%=`Hh-iodV_IBJ130!5HnZiyICcf4
z{MbGvTahTYmB4Bz{Far~*vfF0Z55pjWvgOC*=l-`>Q=|HbEQ&A*_ujubxinc5wUY6
z7|Pbx0FGS;n^_kyj;&G3kL9O~b%}E83RXMex1cOd_}!o_TX(SdZ1%uOpUs}~DX<ro
z%_}t|iCbSOFYwd8wgC~FSAsF&*J(i0gx^~}JS8kmabWN?C>3u~A0nFKz{;0mz5cZn
z+YlCd;J`xH#yF{-P2^M0rdWF5G$iTTTq&<-RLQm=q6ZH2@7z|(#&>ROMbK=6r3DTQ
znoKFDp|h$V5iM|_zH^l=#X?q%Z4Uw6Z(yNlN1UW+C;61JGnVc*4M~c2Rmw}*tig68
zqWcYWMSCb4iux;pW&oD%H!x`SQp#!Awqqa>-EW{2Ej#`Hf`5<=+rK||y*7xkG`fLB
z@q=+v@%zf>tovbUbkmSj{1B!5tg|%O0Yo&qfnNN9%0}_S6hU(kmPR))XiO=m;a+|?
z5shx3ieG%1;%(6I0|({9JFnAu;IIQ5yVwXu(aQ!FWgmi*%05&+=Q|8bFPny>vX4;8
z&vy&GY((_3fnN4e%0}5oD}rVumR>e6XpU9NX<nw6jfh@0P-S<TPTBc>LkIOAG<2{2
zdgtu~M$@(i7UiFWlgd9?KIc6JOWT@;r1DQw%Fp{Vk8LBOZ4LDD&r~+bKT8obqp-BC
zfkAVQQcg2_ZyQZS+Zs5dcfH2WV-%ffU{UrMoK*G&@;TpySUS@*B$a)!Qhq)bNS6@N
znFe~<mnj=%k5vTC<ybn?z@WKODW~bt+r|;mnFdb3>@H0oma7>>;~7|#Jsu~OeXV@X
zcO90-GYv^)-=LJA?<E?~h-f?mz3l%e8)e_D2%1~4G@gM$bDL65^9v7OBckyPoc@QU
zOXq1nEO#=RmNKv?|8AUA{yp+J@4Z-B$}}XEf4@?GUOp@j5YbWwdif728|6<>1kJ-(
zeyIY3=24}bW<`FfBI1`Sa7OQXjXlmNo|*<0Wj~3N%6>{d=X)B<Q`0mgl|4x*KVRp1
zdzOf&rh#7e^U6lqFDQcMMJ$^!f<g1LQcklpn=ulx86&8&XX7iXS)`c%UgO2GO$Li9
zU&l#Rz9Ap_H?f&b(U4T-TS_@SyI8(Wl-oO?SNX28QRRDzpm`sg*#}_Ie5jPu45+h@
zh;sWFq{<N^_RMXn{Lb5K-oC>^Gc82b`oB)?I@k8-^?8Sm?ezb%`-=sRY%jB?S^sr9
ZA9&DV+iX7bEPA0m|LcKWai)&o{{VZ}X@39!

literal 0
HcmV?d00001

diff --git a/examples/stable-diffusion/quantization/stable-diffusion-xl/measure_config.json b/examples/stable-diffusion/quantization/stable-diffusion-xl/measure_config.json
new file mode 100644
index 0000000000..5a250cad7c
--- /dev/null
+++ b/examples/stable-diffusion/quantization/stable-diffusion-xl/measure_config.json
@@ -0,0 +1,6 @@
+{
+    "method": "HOOKS",
+    "mode": "MEASURE",
+    "observer": "maxabs",
+    "dump_stats_path": "quantization/stable-diffusion-xl/measure/fp8"
+}
diff --git a/examples/stable-diffusion/quantization/stable-diffusion-xl/quantize_config.json b/examples/stable-diffusion/quantization/stable-diffusion-xl/quantize_config.json
new file mode 100644
index 0000000000..5d686e659d
--- /dev/null
+++ b/examples/stable-diffusion/quantization/stable-diffusion-xl/quantize_config.json
@@ -0,0 +1,7 @@
+{
+    "method": "HOOKS",
+    "mode": "QUANTIZE",
+    "observer": "maxabs",
+    "scale_method": "maxabs_hw",
+    "dump_stats_path": "quantization/stable-diffusion-xl/measure/fp8"
+}
diff --git a/examples/stable-diffusion/text_to_video_generation.py b/examples/stable-diffusion/text_to_video_generation.py
new file mode 100755
index 0000000000..8813e321cf
--- /dev/null
+++ b/examples/stable-diffusion/text_to_video_generation.py
@@ -0,0 +1,216 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+# Adapted from ../stable-diffusion/text_to_image_generation.py
+
+import argparse
+import logging
+import sys
+from pathlib import Path
+
+import torch
+from diffusers.utils.export_utils import export_to_video
+
+from optimum.habana.diffusers import GaudiTextToVideoSDPipeline
+from optimum.habana.transformers.gaudi_configuration import GaudiConfig
+from optimum.habana.utils import set_seed
+
+
+try:
+    from optimum.habana.utils import check_optimum_habana_min_version
+except ImportError:
+
+    def check_optimum_habana_min_version(*a, **b):
+        return ()
+
+
+# Will error if the minimal version of Optimum Habana is not installed. Remove at your own risks.
+check_optimum_habana_min_version("1.16.0.dev0")
+
+
+logger = logging.getLogger(__name__)
+
+
+def main():
+    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument(
+        "--model_name_or_path",
+        default="ali-vilab/text-to-video-ms-1.7b",
+        type=str,
+        help="Path to pre-trained model",
+    )
+    # Pipeline arguments
+    parser.add_argument(
+        "--prompts",
+        type=str,
+        nargs="*",
+        default="Spiderman is surfing",
+        help="The prompt or prompts to guide the video generation.",
+    )
+    parser.add_argument(
+        "--num_videos_per_prompt", type=int, default=1, help="The number of videos to generate per prompt."
+    )
+    parser.add_argument("--batch_size", type=int, default=1, help="The number of videos in a batch.")
+    parser.add_argument(
+        "--height",
+        type=int,
+        default=0,
+        help="The height in pixels of the generated videos (0=default from model config).",
+    )
+    parser.add_argument(
+        "--width",
+        type=int,
+        default=0,
+        help="The width in pixels of the generated videos (0=default from model config).",
+    )
+    parser.add_argument("--num_frames", type=int, default=20, help="The number of frames in the generated videos.")
+    parser.add_argument(
+        "--num_inference_steps",
+        type=int,
+        default=50,
+        help=(
+            "The number of denoising steps. More denoising steps usually lead to a higher quality videos at the expense"
+            " of slower inference."
+        ),
+    )
+    parser.add_argument(
+        "--guidance_scale",
+        type=float,
+        default=7.5,
+        help=(
+            "Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598)."
+            " Higher guidance scale encourages to generate videos that are closely linked to the text `prompt`,"
+            " usually at the expense of lower video quality."
+        ),
+    )
+    parser.add_argument(
+        "--negative_prompts",
+        type=str,
+        nargs="*",
+        default=None,
+        help="The prompt or prompts not to guide the video generation.",
+    )
+    parser.add_argument(
+        "--eta",
+        type=float,
+        default=0.0,
+        help="Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502.",
+    )
+    parser.add_argument(
+        "--output_type",
+        type=str,
+        choices=["mp4", "np"],
+        default="mp4",
+        help="Whether to return mp4 or Numpy arrays.",
+    )
+
+    parser.add_argument(
+        "--pipeline_save_dir",
+        type=str,
+        default=None,
+        help="The directory where the generation pipeline will be saved.",
+    )
+    parser.add_argument(
+        "--video_save_dir",
+        type=str,
+        default="./generated-videos",
+        help="The directory where videos will be saved.",
+    )
+
+    parser.add_argument("--seed", type=int, default=42, help="Random seed for initialization.")
+
+    # HPU-specific arguments
+    parser.add_argument("--use_habana", action="store_true", help="Use HPU.")
+    parser.add_argument(
+        "--use_hpu_graphs", action="store_true", help="Use HPU graphs on HPU. This should lead to faster generations."
+    )
+    parser.add_argument(
+        "--dtype",
+        default="bf16",
+        choices=["bf16", "fp32", "autocast_bf16"],
+        help="Which runtime dtype to perform generation in.",
+    )
+    args = parser.parse_args()
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO)
+    logger.info(f"Arguments: {args}")
+
+    # Set video resolution
+    kwargs_call = {}
+    if args.width > 0 and args.height > 0:
+        kwargs_call["width"] = args.width
+        kwargs_call["height"] = args.height
+    kwargs_call["num_frames"] = args.num_frames
+
+    gaudi_config_kwargs = {"use_fused_adam": True, "use_fused_clip_norm": True}
+    if args.dtype == "autocast_bf16":
+        gaudi_config_kwargs["use_torch_autocast"] = True
+
+    gaudi_config = GaudiConfig(**gaudi_config_kwargs)
+    logger.info(f"Gaudi Config: {gaudi_config}")
+
+    kwargs = {
+        "use_habana": args.use_habana,
+        "use_hpu_graphs": args.use_hpu_graphs,
+        "gaudi_config": gaudi_config,
+    }
+    if args.dtype == "bf16":
+        kwargs["torch_dtype"] = torch.bfloat16
+    elif args.dtype == "fp32":
+        kwargs["torch_dtype"] = torch.float32
+
+    # Generate images
+    pipeline: GaudiTextToVideoSDPipeline = GaudiTextToVideoSDPipeline.from_pretrained(
+        args.model_name_or_path, **kwargs
+    )
+    set_seed(args.seed)
+    outputs = pipeline(
+        prompt=args.prompts,
+        num_videos_per_prompt=args.num_videos_per_prompt,
+        batch_size=args.batch_size,
+        num_inference_steps=args.num_inference_steps,
+        guidance_scale=args.guidance_scale,
+        negative_prompt=args.negative_prompts,
+        eta=args.eta,
+        output_type="pil" if args.output_type == "mp4" else args.output_type,  # Naming inconsistency in base class
+        **kwargs_call,
+    )
+
+    # Save the pipeline in the specified directory if not None
+    if args.pipeline_save_dir is not None:
+        pipeline.save_pretrained(args.pipeline_save_dir)
+
+    # Save images in the specified directory if not None and if they are in PIL format
+    if args.video_save_dir is not None:
+        if args.output_type == "mp4":
+            video_save_dir = Path(args.video_save_dir)
+            video_save_dir.mkdir(parents=True, exist_ok=True)
+            logger.info(f"Saving images in {video_save_dir.resolve()}...")
+
+            for i, video in enumerate(outputs.videos):
+                filename = video_save_dir / f"video_{i + 1}.mp4"
+                export_to_video(video, str(filename.resolve()))
+        else:
+            logger.warning("--output_type should be equal to 'mp4' to save images in --video_save_dir.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/stable-diffusion/training/download_train_datasets.py b/examples/stable-diffusion/training/download_train_datasets.py
new file mode 100755
index 0000000000..6ff500c9ef
--- /dev/null
+++ b/examples/stable-diffusion/training/download_train_datasets.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+import shutil
+from pathlib import Path
+
+from huggingface_hub import hf_hub_download, snapshot_download
+
+
+# Download Cat-Toy example dataset
+local_dir = "./cat"
+snapshot_download(
+    repo_id="diffusers/cat_toy_example",
+    local_dir=local_dir,
+    repo_type="dataset",
+    ignore_patterns=".gitattributes",
+)
+cache_dir = Path(local_dir, ".cache")
+if cache_dir.is_dir():
+    shutil.rmtree(cache_dir)
+
+# Download Dog example dataset
+local_dir = "./dog"
+snapshot_download(
+    repo_id="diffusers/dog-example",
+    local_dir=local_dir,
+    repo_type="dataset",
+    ignore_patterns=".gitattributes",
+)
+cache_dir = Path(local_dir, ".cache")
+if cache_dir.is_dir():
+    shutil.rmtree(cache_dir)
+
+# Download ControlNet example images
+local_dir = "./cnet"
+file_path1 = hf_hub_download(
+    repo_id="huggingface/documentation-images",
+    subfolder="diffusers/controlnet_training",
+    filename="conditioning_image_1.png",
+    repo_type="dataset",
+    local_dir=local_dir,
+)
+file_path2 = hf_hub_download(
+    repo_id="huggingface/documentation-images",
+    subfolder="diffusers/controlnet_training",
+    filename="conditioning_image_2.png",
+    repo_type="dataset",
+    local_dir=local_dir,
+)
+shutil.move(file_path1, local_dir)
+shutil.move(file_path2, local_dir)
+cache_dir = Path(local_dir, ".cache")
+if cache_dir.is_dir():
+    shutil.rmtree(cache_dir)
+sub_dir = Path(local_dir, "diffusers")
+if sub_dir.is_dir():
+    shutil.rmtree(sub_dir)
diff --git a/examples/text-generation/quantization_config/pow2_quant.json b/examples/text-generation/quantization_config/pow2_quant.json
new file mode 100644
index 0000000000..e1f2eb1c6e
--- /dev/null
+++ b/examples/text-generation/quantization_config/pow2_quant.json
@@ -0,0 +1,7 @@
+{
+    "method": "HOOKS",
+    "mode": "QUANTIZE",
+    "observer": "maxabs",
+    "scale_method": "maxabs_pow2",
+    "dump_stats_path": "./hqt_output/measure"
+}
diff --git a/examples/text-generation/quantization_config/weight_opt_quant.json b/examples/text-generation/quantization_config/weight_opt_quant.json
new file mode 100644
index 0000000000..1ec2dc6b6a
--- /dev/null
+++ b/examples/text-generation/quantization_config/weight_opt_quant.json
@@ -0,0 +1,7 @@
+{
+    "method": "HOOKS",
+    "mode": "QUANTIZE",
+    "observer": "maxabs",
+    "scale_method": "maxabs_hw_opt_weight",
+    "dump_stats_path": "./hqt_output/measure"
+}
diff --git a/examples/text-generation/requirements_awq.txt b/examples/text-generation/requirements_awq.txt
new file mode 100644
index 0000000000..5632195c99
--- /dev/null
+++ b/examples/text-generation/requirements_awq.txt
@@ -0,0 +1,2 @@
+triton==3.1.0
+autoawq
diff --git a/examples/video-comprehension/README.md b/examples/video-comprehension/README.md
new file mode 100644
index 0000000000..e26d4c228c
--- /dev/null
+++ b/examples/video-comprehension/README.md
@@ -0,0 +1,43 @@
+<!---
+Copyright 2024 The HuggingFace Team. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+#  Examples
+
+This directory contains example scripts that demonstrate how to perform video comprehension on Gaudi with graph mode.
+
+## Single-HPU inference
+
+### Video-LLaVA Model
+
+```bash
+python3 run_example.py \
+    --model_name_or_path "LanguageBind/Video-LLaVA-7B-hf" \
+    --warmup 3 \
+    --n_iterations 5 \
+    --batch_size 1 \
+    --use_hpu_graphs \
+    --bf16 \
+    --output_dir ./
+```
+Models that have been validated:
+  - [LanguageBind/Video-LLaVA-7B-hf ](https://huggingface.co/LanguageBind/Video-LLaVA-7B-hf)
+
+CogvideoX test:
+```bash
+python3 run_example.py \
+    --model_name_or_path THUDM/CogVideoX-2b \
+    --pipeline_type 'cogvideox' \
+    --output_dir 'cogvideo_out' \
+```
+
+
diff --git a/examples/video-comprehension/requirements.txt b/examples/video-comprehension/requirements.txt
new file mode 100644
index 0000000000..7ed65352d9
--- /dev/null
+++ b/examples/video-comprehension/requirements.txt
@@ -0,0 +1,2 @@
+av == 12.1.0
+sentencepiece == 0.2.0
diff --git a/examples/video-comprehension/run_example.py b/examples/video-comprehension/run_example.py
new file mode 100644
index 0000000000..544f4c280b
--- /dev/null
+++ b/examples/video-comprehension/run_example.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+
+import argparse
+import json
+import logging
+import os
+import time
+from pathlib import Path
+
+import av
+import numpy as np
+import torch
+from huggingface_hub import hf_hub_download
+from transformers import VideoLlavaProcessor
+
+from optimum.habana.transformers.modeling_utils import (
+    GaudiVideoLlavaForConditionalGeneration,
+    adapt_transformers_to_gaudi,
+)
+from optimum.habana.diffusers import GaudiCogVideoXPipeline
+from optimum.habana.transformers.gaudi_configuration import GaudiConfig
+from diffusers.utils.export_utils import export_to_video
+
+
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%m/%d/%Y %H:%M:%S",
+    level=logging.INFO,
+)
+logger = logging.getLogger(__name__)
+
+
+def read_video_pyav(container, indices):
+    frames = []
+    container.seek(0)
+    start_index = indices[0]
+    end_index = indices[-1]
+    for i, frame in enumerate(container.decode(video=0)):
+        if i > end_index:
+            break
+        if i >= start_index and i in indices:
+            frames.append(frame)
+    return np.stack([x.to_ndarray(format="rgb24") for x in frames])
+
+
+def cogvideoX_generate(args):
+    gaudi_config_kwargs = {"use_fused_adam": True, "use_fused_clip_norm": True}
+    gaudi_config_kwargs["use_torch_autocast"] = True
+
+    gaudi_config = GaudiConfig(**gaudi_config_kwargs)
+    logger.info(f"Gaudi Config: {gaudi_config}")
+
+    kwargs = {
+        "use_habana": True,
+        "use_hpu_graphs": True,
+        "gaudi_config": gaudi_config,
+    }
+    kwargs["torch_dtype"] = torch.bfloat16
+    pipeline: GaudiCogVideoXPipeline = GaudiCogVideoXPipeline.from_pretrained(args.model_name_or_path, **kwargs)
+    pipeline.vae.enable_tiling()
+    pipeline.vae.enable_slicing()
+    video = pipeline(
+        prompt=args.prompt,
+        num_videos_per_prompt=1,
+        num_inference_steps=50,
+        num_frames=49,
+        guidance_scale=6,
+        generator=torch.Generator(device="cpu").manual_seed(42),
+    ).frames[0]
+    video_save_dir = Path(args.output_dir)
+    video_save_dir.mkdir(parents=True, exist_ok=True)
+    filename = video_save_dir / "cogvideoX_out.mp4"
+    export_to_video(video, str(filename.resolve()), fps=8)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--model_name_or_path",
+        default=None,
+        type=str,
+        help="Path to pre-trained model",
+    )
+    parser.add_argument(
+        "--pipeline_type",
+        type=str,
+        nargs="*",
+        default="sdp",
+        help="pipeline type:sdp or cogvideoX",
+    )
+    parser.add_argument(
+        "--video_path",
+        default=None,
+        type=str,
+        nargs="*",
+        help='Path to video as input. Can be a single string (eg: --image_path "URL1"), or a list of space-separated strings (eg: --video_path "URL1" "URL2")',
+    )
+    parser.add_argument(
+        "--prompt",
+        default=None,
+        type=str,
+        help='Optional argument to give a prompt of your choice as input. is a single string (eg: --prompt "Hello world")',
+    )
+    parser.add_argument(
+        "--use_hpu_graphs",
+        action="store_true",
+        help="Whether to use HPU graphs or not. Using HPU graphs should give better latencies.",
+    )
+    parser.add_argument("--max_new_tokens", type=int, default=100, help="Number of tokens to generate.")
+    parser.add_argument(
+        "--bf16",
+        action="store_true",
+        help="Whether to perform generation in bf16 precision.",
+    )
+    parser.add_argument(
+        "--output_dir",
+        default=None,
+        type=str,
+        help="Output directory to store results in.",
+    )
+    parser.add_argument(
+        "--token",
+        default=None,
+        type=str,
+        help="The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
+        "generated when running `huggingface-cli login` (stored in `~/.huggingface`).",
+    )
+    parser.add_argument("--batch_size", type=int, default=1, help="Input batch size.")
+    parser.add_argument("--warmup", type=int, default=3, help="Number of warmup iterations for benchmarking.")
+    parser.add_argument("--n_iterations", type=int, default=5, help="Number of inference iterations for benchmarking.")
+    parser.add_argument(
+        "--ignore_eos",
+        action="store_true",
+        help="Whether to disable stopping with eos token when calling `generate`.",
+    )
+    parser.add_argument(
+        "--use_flash_attention",
+        action="store_true",
+        help="Whether to enable Habana Flash Attention, provided that the model supports it.",
+    )
+    parser.add_argument(
+        "--flash_attention_recompute",
+        action="store_true",
+        help="Whether to enable Habana Flash Attention in recompute mode on first token generation. This gives an opportunity of splitting graph internally which helps reduce memory consumption.",
+    )
+
+    args = parser.parse_args()
+    if args.pipeline_type[0] == "cogvideox":
+        cogvideoX_generate(args)
+        return None
+
+    os.environ.setdefault("EXPERIMENTAL_WEIGHT_SHARING", "FALSE")
+
+    if args.video_path is None:
+        args.video_path = [
+            hf_hub_download(
+                repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset"
+            )
+        ]
+
+    if args.prompt is None:
+        args.prompt = ["USER: <video>Why is this video funny? ASSISTANT:"]
+    video_paths = args.video_path
+    video_paths_len = len(video_paths)
+
+    prompts = args.prompt
+    if args.batch_size > video_paths_len:
+        # Dynamically extends to support larger batch sizes
+        num_path_to_add = args.batch_size - video_paths_len
+        for i in range(num_path_to_add):
+            video_paths.append(video_paths[i % video_paths_len])
+            prompts.append(prompts[i % video_paths_len])
+    elif args.batch_size < video_paths_len:
+        video_paths = video_paths[: args.batch_size]
+
+    video_clips = []
+
+    for video_path in video_paths:
+        container = av.open(video_path)
+        num_frames = container.streams.video[0].frames
+        indices = np.arange(0, num_frames, num_frames / 8).astype(int)
+        clip = read_video_pyav(container, indices)
+        video_clips.append(clip)
+
+    if args.bf16:
+        model_dtype = torch.bfloat16
+    else:
+        model_dtype = torch.float32
+
+    adapt_transformers_to_gaudi()
+    model = GaudiVideoLlavaForConditionalGeneration.from_pretrained(args.model_name_or_path)
+    model = model.to(model_dtype)
+    device = torch.device("hpu")
+    model = model.to(device)
+    if args.use_hpu_graphs:
+        from habana_frameworks.torch.hpu import wrap_in_hpu_graph
+
+        model = wrap_in_hpu_graph(model)
+
+    processor = VideoLlavaProcessor.from_pretrained(args.model_name_or_path)
+    processor.tokenizer.padding_side = "left"
+    inputs = processor(text=prompts, videos=video_clips, return_tensors="pt")
+    inputs = inputs.to(device)
+
+    # warm up
+    for i in range(args.warmup):
+        generate_ids = model.generate(
+            **inputs,
+            lazy_mode=True,
+            hpu_graphs=args.use_hpu_graphs,
+            max_new_tokens=args.max_new_tokens,
+            ignore_eos=args.ignore_eos,
+            use_flash_attention=args.use_flash_attention,
+            flash_attention_recompute=args.flash_attention_recompute,
+        )
+    torch.hpu.synchronize()
+
+    start = time.perf_counter()
+    for i in range(args.n_iterations):
+        generate_ids = model.generate(
+            **inputs,
+            lazy_mode=True,
+            hpu_graphs=args.use_hpu_graphs,
+            max_new_tokens=args.max_new_tokens,
+            ignore_eos=args.ignore_eos,
+            use_flash_attention=args.use_flash_attention,
+            flash_attention_recompute=args.flash_attention_recompute,
+        )
+        generate_texts = processor.batch_decode(
+            generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )
+    end = time.perf_counter()
+    duration = end - start
+
+    # Let's calculate the number of generated tokens
+    n_input_tokens = inputs["input_ids"].shape[1]
+    n_output_tokens = 0
+    for i in range(generate_ids.shape[0]):
+        n_input_tokens = torch.sum(inputs["attention_mask"][i, :]).item()
+        # We have to subtract the number of input tokens as they are part of the returned sequence
+        n_output_tokens += len(generate_ids[i]) - n_input_tokens
+
+    total_new_tokens_generated = args.n_iterations * n_output_tokens
+    throughput = total_new_tokens_generated / duration
+    logger.info(f"result = {generate_texts}")
+    logger.info(
+        f"time = {(end - start) * 1000 / args.n_iterations}ms, Throughput (including tokenization) = {throughput} tokens/second"
+    )
+
+    # Store results if necessary
+    if args.output_dir is not None:
+        output_dir = Path(args.output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+
+        results = {
+            "throughput": throughput,
+            "output": generate_texts,
+        }
+        with (output_dir / "results.json").open("w", encoding="utf-8") as f:
+            json.dump(results, f, ensure_ascii=False, indent=4)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/optimum/habana/AutoAWQ/gemm_hpu.py b/optimum/habana/AutoAWQ/gemm_hpu.py
new file mode 100644
index 0000000000..e60e4e9ee6
--- /dev/null
+++ b/optimum/habana/AutoAWQ/gemm_hpu.py
@@ -0,0 +1,153 @@
+import torch
+import torch.nn as nn
+from awq.modules.linear.gemm import WQLinear_GEMM
+from awq.utils.packing_utils import reverse_awq_order, unpack_awq
+
+
+try:
+    import habana_frameworks.torch.hpu  # noqa: F401
+
+    convert_from_uint4 = torch.ops.hpu.convert_from_uint4
+except Exception as e:
+    hpu_import_exception = e
+
+    def error_raiser_hpu(*args, **kwargs):
+        raise ValueError(
+            f"Trying to use HPU, but could not import the HPU framework with the following error: {hpu_import_exception}"
+        )
+
+    convert_from_uint4 = error_raiser_hpu
+
+
+def unpack_weight_and_zeros(qweight, qzeros, bits):
+    # Unpack the qweight and qzeros tensors
+    iweight, izeros = unpack_awq(qweight, qzeros, bits)
+    # Reverse the order of the iweight and izeros tensors
+    iweight, izeros = reverse_awq_order(iweight, izeros, bits)
+
+    # overflow checks
+    iweight = torch.bitwise_and(iweight, (2**bits) - 1)
+    izeros = torch.bitwise_and(izeros, (2**bits) - 1)
+
+    return iweight, izeros
+
+
+def pack_tensor(input, bits=4):
+    normal = input.to(torch.int32)
+    q = torch.zeros(
+        (normal.shape[0], normal.shape[1] // 32 * bits),
+        dtype=torch.int32,
+        device=input.device,
+    )
+    i = 0
+    col = 0
+    while col < q.shape[1]:
+        for j in range(i, i + (32 // bits)):
+            q[:, col] |= normal[:, j] << (bits * (j - i))
+        i += 32 // bits
+        col += 1
+    q = q.to(torch.int32)
+    return q
+
+
+class WQLinear_HPU(WQLinear_GEMM):
+    def __init__(self, w_bit, group_size, in_features, out_features, bias, dev, training=False):
+        nn.Module.__init__(self)
+        assert w_bit == 4, "Only 4 bit are supported for now."
+        self.in_features = in_features
+        self.out_features = out_features
+        self.w_bit = w_bit
+        self.group_size = group_size if group_size != -1 else in_features
+        self.scale_dtype = torch.float32
+        self.training = training
+
+        # quick sanity check (make sure aligment)
+        assert self.in_features % self.group_size == 0
+        assert out_features % (32 // self.w_bit) == 0
+        self.pack_num = 32 // self.w_bit
+
+        self.init_ipex = False
+
+        self.register_buffer(
+            "qzeros",
+            torch.zeros(
+                (in_features // self.group_size, out_features // self.pack_num),
+                dtype=torch.int32,
+                device=dev,
+            ),
+        )
+        self.register_buffer(
+            "scales",
+            torch.zeros(
+                (in_features // self.group_size, out_features),
+                dtype=torch.bfloat16,
+                device=dev,
+            ),
+        )
+        if bias:
+            self.register_buffer(
+                "bias",
+                torch.zeros((out_features), dtype=torch.bfloat16, device=dev),
+            )
+        else:
+            self.bias = None
+        self.register_buffer(
+            "qweight",
+            torch.zeros((in_features, out_features // self.pack_num), dtype=torch.int32, device=dev),
+        )
+        self._preprocess = False
+
+    def _preprocessing(self):
+        device = self.qweight.device
+        weight, zeros = unpack_weight_and_zeros(self.qweight.cpu(), self.qzeros.cpu(), self.w_bit)
+        self.qweight = pack_tensor(weight).to(device)
+        self.qzeros = pack_tensor(zeros).to(device)
+        self._preprocess = True
+
+    def post_init(self):
+        self._preprocessing()
+
+    @classmethod
+    def from_linear(cls, linear, w_bit, group_size, init_only=False, scales=None, zeros=None):
+        awq_linear = cls(
+            w_bit,
+            group_size,
+            linear.in_features,
+            linear.out_features,
+            linear.bias is not None,
+            linear.weight.device,
+        )
+        if init_only:  # just prepare for loading sd
+            return awq_linear
+        raise NotImplementedError("Only inference is supported for HPU kernels")
+
+    def forward(self, x):
+        assert self._preprocess is True, (
+            "module.post_init() must be called before module.forward(). Use hpu_post_init() on the whole model."
+        )
+        out_shape = x.shape[:-1] + (self.out_features,)
+        x = x.reshape(-1, x.shape[-1])
+        weights = convert_from_uint4(self.qweight, self.scales, self.qzeros, x.dtype)
+        outputs = torch.matmul(x, weights)
+
+        outputs = outputs + self.bias if self.bias is not None else outputs
+        outputs = outputs.reshape(out_shape)
+
+        return outputs
+
+    def extra_repr(self) -> str:
+        return "in_features={}, out_features={}, bias={}, w_bit={}, group_size={}".format(
+            self.in_features,
+            self.out_features,
+            self.bias is not None,
+            self.w_bit,
+            self.group_size,
+        )
+
+
+def hpu_post_init(model):
+    for _, submodule in model.named_modules():
+        if isinstance(submodule, WQLinear_HPU):
+            submodule.post_init()
+
+    return model
diff --git a/optimum/habana/accelerate/utils/modeling.py b/optimum/habana/accelerate/utils/modeling.py
new file mode 100644
index 0000000000..2dbbdb951e
--- /dev/null
+++ b/optimum/habana/accelerate/utils/modeling.py
@@ -0,0 +1,52 @@
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Device similarity check compatible with hpu
+"""
+
+import torch
+
+
+def gaudi_check_device_same(first_device, second_device):
+    """
+    Copied from https://github.com/huggingface/accelerate/blob/6b2d968897c91bc3f96274b4679d84e9950ad908/src/accelerate/utils/modeling.py#L50
+    difference is addition of HPU device checks
+
+    Args:
+        first_device (`torch.device`):
+            First device to check
+        second_device (`torch.device`):
+            Second device to check
+    """
+    if first_device.type != second_device.type:
+        return False
+
+    if first_device.type == "cuda" and first_device.index is None:
+        # In case the first_device is a cuda device and have
+        # the index attribute set to `None`, default it to `0`
+        first_device = torch.device("cuda", index=0)
+
+    elif first_device.type == "hpu" and first_device.index is None:
+        first_device = torch.device("hpu", index=0)
+
+    if second_device.type == "cuda" and second_device.index is None:
+        # In case the second_device is a cuda device and have
+        # the index attribute set to `None`, default it to `0`
+        second_device = torch.device("cuda", index=0)
+
+    elif second_device.type == "hpu" and second_device.index is None:
+        second_device = torch.device("hpu", index=0)
+
+    return first_device == second_device
diff --git a/optimum/habana/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py b/optimum/habana/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
new file mode 100644
index 0000000000..9528821520
--- /dev/null
+++ b/optimum/habana/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py
@@ -0,0 +1,668 @@
+# Copyright 2024 Alibaba DAMO-VILAB and The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+from dataclasses import dataclass
+from math import ceil
+from typing import Any, Dict, List, Optional, Union
+
+import numpy as np
+import PIL
+import torch
+from diffusers.image_processor import PipelineImageInput
+from diffusers.models import AutoencoderKL
+from diffusers.models.unets.unet_i2vgen_xl import I2VGenXLUNet
+from diffusers.pipelines.i2vgen_xl.pipeline_i2vgen_xl import I2VGenXLPipeline, _center_crop_wide, _resize_bilinear
+from diffusers.schedulers import DDIMScheduler
+from diffusers.utils import (
+    BaseOutput,
+    logging,
+    replace_example_docstring,
+)
+from diffusers.video_processor import VideoProcessor
+from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
+
+from ....transformers.gaudi_configuration import GaudiConfig
+from ....utils import HabanaProfile, speed_metrics, warmup_inference_steps_time_adjustment
+from ...models.attention_processor import (
+    AttnProcessor2_0,
+)
+from ..pipeline_utils import GaudiDiffusionPipeline
+
+
+logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+
+EXAMPLE_DOC_STRING = """
+    Examples:
+        ```py
+        >>> import torch
+        >>> from diffusers import I2VGenXLPipeline
+        >>> from diffusers.utils import export_to_gif, load_image
+
+        >>> pipeline = GaudiI2VGenXLPipeline.from_pretrained(
+        ...     "ali-vilab/i2vgen-xl", torch_dtype=torch.float16, variant="fp16"
+        ... )
+        >>> pipeline.enable_model_cpu_offload()
+
+        >>> image_url = (
+        ...     "https://huggingface.co/datasets/diffusers/docs-images/resolve/main/i2vgen_xl_images/img_0009.png"
+        ... )
+        >>> image = load_image(image_url).convert("RGB")
+
+        >>> prompt = "Papers were floating in the air on a table in the library"
+        >>> negative_prompt = "Distorted, discontinuous, Ugly, blurry, low resolution, motionless, static, disfigured, disconnected limbs, Ugly faces, incomplete arms"
+        >>> generator = torch.manual_seed(8888)
+
+        >>> frames = pipeline(
+        ...     prompt=prompt,
+        ...     image=image,
+        ...     num_inference_steps=50,
+        ...     negative_prompt=negative_prompt,
+        ...     guidance_scale=9.0,
+        ...     generator=generator,
+        ... ).frames[0]
+        >>> video_path = export_to_gif(frames, "i2v.gif")
+        ```
+"""
+
+
+@dataclass
+class GaudiI2VGenXLPipelineOutput(BaseOutput):
+    r"""
+     Output class for image-to-video pipeline.
+     Copied from https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py#L75
+        - Add throughputs to the output class
+    Args:
+         frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
+             List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing
+             denoised
+     PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
+    `(batch_size, num_frames, channels, height, width)`
+    """
+
+    frames: Union[torch.Tensor, np.ndarray, List[List[PIL.Image.Image]]]
+    throughput: float
+
+
+class GaudiI2VGenXLPipeline(
+    GaudiDiffusionPipeline,
+    I2VGenXLPipeline,
+):
+    r"""
+    Copied from https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py#L90
+        - Use the GaudiDiffusionPipeline as the base class
+        - Add the GaudiI2VGenXLPipelineOutput as the output class
+        - Add the autocast into the __call__ method
+        - Modify the __init__ method to inherit from GaudiDiffusionPipeline
+
+    Pipeline for image-to-video generation as proposed in [I2VGenXL](https://i2vgen-xl.github.io/).
+
+    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
+    implemented for all pipelines (downloading, saving, running on a particular device, etc.).
+
+    Args:
+        vae ([`AutoencoderKL`]):
+            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
+        text_encoder ([`CLIPTextModel`]):
+            Frozen text-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
+        tokenizer (`CLIPTokenizer`):
+            A [`~transformers.CLIPTokenizer`] to tokenize text.
+        unet ([`I2VGenXLUNet`]):
+            A [`I2VGenXLUNet`] to denoise the encoded video latents.
+        scheduler ([`DDIMScheduler`]):
+            A scheduler to be used in combination with `unet` to denoise the encoded image latents.
+    """
+
+    model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae"
+
+    def __init__(
+        self,
+        vae: AutoencoderKL,
+        text_encoder: CLIPTextModel,
+        tokenizer: CLIPTokenizer,
+        image_encoder: CLIPVisionModelWithProjection,
+        feature_extractor: CLIPImageProcessor,
+        unet: I2VGenXLUNet,
+        scheduler: DDIMScheduler,
+        use_habana: bool = False,
+        use_hpu_graphs: bool = False,
+        gaudi_config: Union[str, GaudiConfig] = None,
+        bf16_full_eval: bool = False,
+        sdp_on_bf16: bool = False,
+    ):
+        GaudiDiffusionPipeline.__init__(
+            self,
+            use_habana,
+            use_hpu_graphs,
+            gaudi_config,
+            bf16_full_eval,
+            sdp_on_bf16,
+        )
+
+        I2VGenXLPipeline.__init__(
+            self,
+            vae,
+            text_encoder,
+            tokenizer,
+            image_encoder,
+            feature_extractor,
+            unet,
+            scheduler,
+        )
+
+        if use_habana:
+            self.unet.set_attn_processor(AttnProcessor2_0())
+
+        if use_hpu_graphs:
+            from habana_frameworks.torch.hpu import wrap_in_hpu_graph
+
+            self.unet = wrap_in_hpu_graph(self.unet, disable_tensor_cache=True)
+
+        self.register_modules(
+            vae=vae,
+            text_encoder=text_encoder,
+            tokenizer=tokenizer,
+            image_encoder=image_encoder,
+            feature_extractor=feature_extractor,
+            unet=unet,
+            scheduler=scheduler,
+        )
+        self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+        # `do_resize=False` as we do custom resizing.
+        self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor, do_resize=False)
+        self.to(self._device)
+
+    @classmethod
+    def _split_and_cat_tensors(cls, batch_size, input_a, input_b=None, do_classifier_free_guidance=True):
+        if input_a is None:
+            return None, 0
+
+        input_a_batches = list(torch.split(input_a, batch_size))
+        if input_b is not None:
+            input_b_batches = list(torch.split(input_b, batch_size))
+
+        num_dummy_samples = 0
+        if input_a_batches[-1].shape[0] < batch_size:
+            num_dummy_samples = batch_size - input_a_batches[-1].shape[0]
+            # Pad input a
+            sequence_to_stack = (input_a_batches[-1],) + tuple(
+                torch.zeros_like(input_a_batches[-1][0][None, :]) for _ in range(num_dummy_samples)
+            )
+            input_a_batches[-1] = torch.vstack(sequence_to_stack)
+
+            if input_b is not None:
+                # Pad input a
+                sequence_to_stack = (input_b_batches[-1],) + tuple(
+                    torch.zeros_like(input_b_batches[-1][0][None, :]) for _ in range(num_dummy_samples)
+                )
+                input_b_batches[-1] = torch.vstack(sequence_to_stack)
+
+        if input_b is not None and do_classifier_free_guidance:
+            # For classifier free guidance, we need to do two forward passes.
+            # Here we concatenate the unconditional and text embeddings into a single batch
+            # to avoid doing two forward passes
+            for i, (input_b_batch, input_a_batch) in enumerate(zip(input_b_batches, input_a_batches[:])):
+                input_a_batches[i] = torch.cat([input_b_batch, input_a_batch])
+
+        input_a_batches = torch.stack(input_a_batches)
+        return input_a_batches, num_dummy_samples
+
+    """Copied from https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py#L320
+        - Commented out the code for do_classifier_free_guidance
+    """
+
+    def _encode_image(self, image, device, num_videos_per_prompt):
+        dtype = next(self.image_encoder.parameters()).dtype
+
+        if not isinstance(image, torch.Tensor):
+            image = self.video_processor.pil_to_numpy(image)
+            image = self.video_processor.numpy_to_pt(image)
+
+            # Normalize the image with CLIP training stats.
+            image = self.feature_extractor(
+                images=image,
+                do_normalize=True,
+                do_center_crop=False,
+                do_resize=False,
+                do_rescale=False,
+                return_tensors="pt",
+            ).pixel_values
+
+        image = image.to(device=device, dtype=dtype)
+        image_embeddings = self.image_encoder(image).image_embeds
+        image_embeddings = image_embeddings.unsqueeze(1)
+
+        # duplicate image embeddings for each generation per prompt, using mps friendly method
+        bs_embed, seq_len, _ = image_embeddings.shape
+        image_embeddings = image_embeddings.repeat(1, num_videos_per_prompt, 1)
+        image_embeddings = image_embeddings.view(bs_embed * num_videos_per_prompt, seq_len, -1)
+
+        # if self.do_classifier_free_guidance:
+        #     negative_image_embeddings = torch.zeros_like(image_embeddings)
+        #     image_embeddings = torch.cat([negative_image_embeddings, image_embeddings])
+
+        return image_embeddings
+
+    """Copied from https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py#L441
+        - Commented out the code for do_classifier_free_guidance
+    """
+
+    def prepare_image_latents(
+        self,
+        image,
+        device,
+        num_frames,
+        num_videos_per_prompt,
+    ):
+        image = image.to(device=device)
+        image_latents = self.vae.encode(image).latent_dist.sample()
+        image_latents = image_latents * self.vae.config.scaling_factor
+
+        # Add frames dimension to image latents
+        image_latents = image_latents.unsqueeze(2)
+
+        # Append a position mask for each subsequent frame
+        # after the intial image latent frame
+        frame_position_mask = []
+        for frame_idx in range(num_frames - 1):
+            scale = (frame_idx + 1) / (num_frames - 1)
+            frame_position_mask.append(torch.ones_like(image_latents[:, :, :1]) * scale)
+        if frame_position_mask:
+            frame_position_mask = torch.cat(frame_position_mask, dim=2)
+            image_latents = torch.cat([image_latents, frame_position_mask], dim=2)
+
+        # duplicate image_latents for each generation per prompt, using mps friendly method
+        image_latents = image_latents.repeat(num_videos_per_prompt, 1, 1, 1, 1)
+
+        # if self.do_classifier_free_guidance:
+        #     image_latents = torch.cat([image_latents] * 2)
+
+        return image_latents
+
+    """Copied from https://github.com/huggingface/diffusers/blob/v0.31.0/src/diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py#L501
+        - Add the autocast
+        - Add the batching support
+        - Add the throughput calculation
+    """
+
+    @torch.no_grad()
+    @replace_example_docstring(EXAMPLE_DOC_STRING)
+    def __call__(
+        self,
+        prompt: Union[str, List[str]] = None,
+        image: PipelineImageInput = None,
+        height: Optional[int] = 704,
+        width: Optional[int] = 1280,
+        target_fps: Optional[int] = 16,
+        num_frames: int = 16,
+        num_inference_steps: int = 50,
+        guidance_scale: float = 9.0,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        eta: float = 0.0,
+        num_videos_per_prompt: Optional[int] = 1,
+        decode_chunk_size: Optional[int] = 1,
+        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+        latents: Optional[torch.Tensor] = None,
+        prompt_embeds: Optional[torch.Tensor] = None,
+        negative_prompt_embeds: Optional[torch.Tensor] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+        clip_skip: Optional[int] = 1,
+        batch_size: int = 1,
+        profiling_warmup_steps: Optional[int] = 0,
+        profiling_steps: Optional[int] = 0,
+        **kwargs,
+    ):
+        r"""
+        The call function to the pipeline for image-to-video generation with [`I2VGenXLPipeline`].
+
+        Args:
+            prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
+            image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.Tensor`):
+                Image or images to guide image generation. If you provide a tensor, it needs to be compatible with
+                [`CLIPImageProcessor`](https://huggingface.co/lambdalabs/sd-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json).
+            height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
+                The height in pixels of the generated image.
+            width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
+                The width in pixels of the generated image.
+            target_fps (`int`, *optional*):
+                Frames per second. The rate at which the generated images shall be exported to a video after
+                generation. This is also used as a "micro-condition" while generation.
+            num_frames (`int`, *optional*):
+                The number of video frames to generate.
+            num_inference_steps (`int`, *optional*):
+                The number of denoising steps.
+            guidance_scale (`float`, *optional*, defaults to 7.5):
+                A higher guidance scale value encourages the model to generate images closely linked to the text
+                `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
+            negative_prompt (`str` or `List[str]`, *optional*):
+                The prompt or prompts to guide what to not include in image generation. If not defined, you need to
+                pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
+            eta (`float`, *optional*):
+                Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
+                to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
+            num_videos_per_prompt (`int`, *optional*):
+                The number of images to generate per prompt.
+            decode_chunk_size (`int`, *optional*):
+                The number of frames to decode at a time. The higher the chunk size, the higher the temporal
+                consistency between frames, but also the higher the memory consumption. By default, the decoder will
+                decode all frames at once for maximal quality. Reduce `decode_chunk_size` to reduce memory usage.
+            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
+                A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
+                generation deterministic.
+            latents (`torch.Tensor`, *optional*):
+                Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
+                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
+                tensor is generated by sampling using the supplied random `generator`.
+            prompt_embeds (`torch.Tensor`, *optional*):
+                Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
+                provided, text embeddings are generated from the `prompt` input argument.
+            negative_prompt_embeds (`torch.Tensor`, *optional*):
+                Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
+                not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
+            output_type (`str`, *optional*, defaults to `"pil"`):
+                The output format of the generated image. Choose between `PIL.Image` or `np.array`.
+            return_dict (`bool`, *optional*, defaults to `True`):
+                Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
+                plain tuple.
+            cross_attention_kwargs (`dict`, *optional*):
+                A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
+                [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
+            clip_skip (`int`, *optional*):
+                Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
+                the output of the pre-final layer will be used for computing the prompt embeddings.
+            batch_size (`int`, *optional*, defaults to 1):
+                The number of videos in a batch.
+
+        Examples:
+
+        Returns:
+            [`pipelines.i2vgen_xl.pipeline_i2vgen_xl.I2VGenXLPipelineOutput`] or `tuple`:
+                If `return_dict` is `True`, [`pipelines.i2vgen_xl.pipeline_i2vgen_xl.I2VGenXLPipelineOutput`] is
+                returned, otherwise a `tuple` is returned where the first element is a list with the generated frames.
+        """
+        with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=self.gaudi_config.use_torch_autocast):
+            height = height or self.unet.config.sample_size * self.vae_scale_factor
+            width = width or self.unet.config.sample_size * self.vae_scale_factor
+
+            # 1. Check inputs. Raise error if not correct
+            self.check_inputs(prompt, image, height, width, negative_prompt, prompt_embeds, negative_prompt_embeds)
+            # 2. Define call parameters
+            if prompt is not None and isinstance(prompt, str):
+                num_prompts = 1
+            elif prompt is not None and isinstance(prompt, list):
+                num_prompts = len(prompt)
+            else:
+                num_prompts = prompt_embeds.shape[0]
+
+            num_batches = ceil((num_videos_per_prompt * num_prompts) / batch_size)
+            logger.info(
+                f"{num_prompts} prompt(s) received, {num_videos_per_prompt} generation(s) per prompt,"
+                f" {batch_size} sample(s) per batch, {num_batches} total batch(es)."
+            )
+            if num_batches < 3:
+                logger.warning("The first two iterations are slower so it is recommended to feed more batches.")
+
+            device = self._execution_device
+            # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
+            # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
+            # corresponds to doing no classifier free guidance.
+            self._guidance_scale = guidance_scale
+
+            # 3.1 Encode input text prompt
+            prompt_embeds, negative_prompt_embeds = self.encode_prompt(
+                prompt,
+                device,
+                num_videos_per_prompt,
+                negative_prompt,
+                prompt_embeds=prompt_embeds,
+                negative_prompt_embeds=negative_prompt_embeds,
+                clip_skip=clip_skip,
+            )
+            # For classifier free guidance, we need to do two forward passes.
+            # Here we concatenate the unconditional and text embeddings into a single batch
+            # to avoid doing two forward passes
+            # if self.do_classifier_free_guidance:
+            #     prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
+
+            # 3.2 Encode image prompt
+            # 3.2.1 Image encodings.
+            # https://github.com/ali-vilab/i2vgen-xl/blob/2539c9262ff8a2a22fa9daecbfd13f0a2dbc32d0/tools/inferences/inference_i2vgen_entrance.py#L114
+            cropped_image = _center_crop_wide(image, (width, width))
+            cropped_image = _resize_bilinear(
+                cropped_image, (self.feature_extractor.crop_size["width"], self.feature_extractor.crop_size["height"])
+            )
+            image_embeddings = self._encode_image(cropped_image, device, num_videos_per_prompt)
+
+            # 3.2.2 Image latents.
+            resized_image = _center_crop_wide(image, (width, height))
+            image = self.video_processor.preprocess(resized_image).to(device=device)
+            needs_upcasting = (
+                self.vae.dtype == torch.float16 or self.vae.dtype == torch.bfloat16
+            ) and self.vae.config.force_upcast
+
+            if needs_upcasting:
+                cast_dtype = self.vae.dtype
+                self.vae.to(dtype=torch.float32)
+
+            image_latents = self.prepare_image_latents(
+                image,
+                device=device,
+                num_frames=num_frames,
+                num_videos_per_prompt=num_videos_per_prompt,
+            )
+            image_latents = image_latents.to(image_embeddings.dtype)
+
+            # cast back to fp16/bf16 if needed
+            if needs_upcasting:
+                self.vae.to(dtype=cast_dtype)
+
+            # 3.3 Prepare additional conditions for the UNet.
+            # if self.do_classifier_free_guidance:
+            #     fps_tensor = torch.tensor([target_fps, target_fps]).to(device)
+            # else:
+            fps_tensor = torch.tensor([target_fps]).to(device)
+            fps_tensor = fps_tensor.repeat(num_prompts * num_videos_per_prompt, 1).ravel()
+
+            # 4. Prepare timesteps
+            self.scheduler.set_timesteps(num_inference_steps, device=device)
+            timesteps = self.scheduler.timesteps
+
+            # 5. Prepare latent variables
+            num_channels_latents = self.unet.config.in_channels
+            latents = self.prepare_latents(
+                num_prompts * num_videos_per_prompt,
+                num_channels_latents,
+                num_frames,
+                height,
+                width,
+                prompt_embeds.dtype,
+                device,
+                generator,
+                latents,
+            )
+
+            # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+            extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+            # 7 Split into batches (HPU-specific step)
+            latents_batches, num_dummy_samples = self._split_and_cat_tensors(batch_size, latents)
+            prompt_embeds_batches, _ = self._split_and_cat_tensors(
+                batch_size, prompt_embeds, negative_prompt_embeds, self.do_classifier_free_guidance
+            )
+            image_latents_batches, _ = self._split_and_cat_tensors(
+                batch_size, image_latents, image_latents, self.do_classifier_free_guidance
+            )
+            image_embeddings_batches, _ = self._split_and_cat_tensors(
+                batch_size, image_embeddings, torch.zeros_like(image_embeddings), self.do_classifier_free_guidance
+            )
+            fps_tensor_batches, _ = self._split_and_cat_tensors(
+                batch_size, fps_tensor, fps_tensor, self.do_classifier_free_guidance
+            )
+
+            # 8. Denoising loop
+            throughput_warmup_steps = kwargs.get("throughput_warmup_steps", 3)
+            use_warmup_inference_steps = (
+                num_batches <= throughput_warmup_steps and num_inference_steps > throughput_warmup_steps
+            )
+
+            outputs = {
+                "videos": [],
+            }
+            t0 = time.time()
+            t1 = t0
+
+            hb_profiler = HabanaProfile(
+                warmup=profiling_warmup_steps,
+                active=profiling_steps,
+                record_shapes=False,
+            )
+            hb_profiler.start()
+
+            for j in self.progress_bar(range(num_batches)):
+                # The throughput is calculated from the 3rd iteration
+                # because compilation occurs in the first two iterations
+                if j == throughput_warmup_steps:
+                    t1 = time.time()
+                if use_warmup_inference_steps:
+                    t0_inf = time.time()
+
+                latents_batch = latents_batches[0]
+                latents_batches = torch.roll(latents_batches, shifts=-1, dims=0)
+                prompt_embeds_batch = prompt_embeds_batches[0]
+                prompt_embeds_batches = torch.roll(prompt_embeds_batches, shifts=-1, dims=0)
+                fps_tensor_batch = fps_tensor_batches[0]
+                fps_tensor_batches = torch.roll(fps_tensor_batches, shifts=-1, dims=0)
+                image_latents_batch = image_latents_batches[0]
+                image_latents_batches = torch.roll(image_latents_batches, shifts=-1, dims=0)
+                image_embeddings_batch = image_embeddings_batches[0]
+                image_embeddings_batches = torch.roll(image_embeddings_batches, shifts=-1, dims=0)
+
+                if hasattr(self.scheduler, "_init_step_index"):
+                    # Reset scheduler step index for next batch
+                    self.scheduler._init_step_index(timesteps[0])
+
+                for i in self.progress_bar(range(len(timesteps))):
+                    if use_warmup_inference_steps and i == throughput_warmup_steps:
+                        t1_inf = time.time()
+                        t1 += t1_inf - t0_inf
+
+                    # expand the latents if we are doing classifier free guidance
+                    t = timesteps[0]
+                    timesteps = torch.roll(timesteps, shifts=-1, dims=0)
+
+                    latent_model_input = (
+                        torch.cat([latents_batch] * 2) if self.do_classifier_free_guidance else latents_batch
+                    )
+                    latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+                    # predict the noise residual
+                    noise_pred = self.unet(
+                        latent_model_input,
+                        t,
+                        encoder_hidden_states=prompt_embeds_batch,
+                        fps=fps_tensor_batch,
+                        image_latents=image_latents_batch,
+                        image_embeddings=image_embeddings_batch,
+                        cross_attention_kwargs=cross_attention_kwargs,
+                        return_dict=False,
+                    )[0]
+
+                    # perform guidance
+                    if self.do_classifier_free_guidance:
+                        noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                        noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                    # reshape latents
+                    bs, channel, frames, width, height = latents_batch.shape
+                    latents_batch = latents_batch.permute(0, 2, 1, 3, 4).reshape(bs * frames, channel, width, height)
+                    noise_pred = noise_pred.permute(0, 2, 1, 3, 4).reshape(bs * frames, channel, width, height)
+
+                    # compute the previous noisy sample x_t -> x_t-1
+                    latents_batch = self.scheduler.step(noise_pred, t, latents_batch, **extra_step_kwargs).prev_sample
+
+                    # reshape latents back
+                    latents_batch = (
+                        latents_batch[None, :].reshape(bs, frames, channel, width, height).permute(0, 2, 1, 3, 4)
+                    )
+                    # call the callback, if provided
+                    # if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                    #     self.progress_bar.update()
+
+                hb_profiler.step()
+
+                if use_warmup_inference_steps:
+                    t1 = warmup_inference_steps_time_adjustment(
+                        t1, t1_inf, num_inference_steps, throughput_warmup_steps
+                    )
+
+                # 8. Post processing
+                if output_type == "latent":
+                    video = latents_batch
+                else:
+                    # cast back to fp16/bf16 if needed
+                    if needs_upcasting:
+                        self.vae.to(dtype=cast_dtype)
+
+                    video_tensor = self.decode_latents(latents_batch, decode_chunk_size=decode_chunk_size)
+                    video = self.video_processor.postprocess_video(video=video_tensor, output_type=output_type)
+
+                outputs["videos"].append(video)
+
+                if not self.use_hpu_graphs:
+                    self.htcore.mark_step()
+
+            hb_profiler.stop()
+            speed_metrics_prefix = "generation"
+            speed_measures = speed_metrics(
+                split=speed_metrics_prefix,
+                start_time=t0,
+                num_samples=num_batches * batch_size
+                if t1 == t0 or use_warmup_inference_steps
+                else (num_batches - throughput_warmup_steps) * batch_size,
+                num_steps=num_batches * batch_size * num_inference_steps,
+                start_time_after_warmup=t1,
+            )
+            logger.info(f"Speed metrics: {speed_measures}")
+
+            # Remove dummy generations if needed
+            if num_dummy_samples > 0:
+                outputs["videos"][-1] = outputs["videos"][-1][:-num_dummy_samples]
+
+            # Process generated images
+            for i, video in enumerate(outputs["videos"][:]):
+                if i == 0:
+                    outputs["videos"].clear()
+
+                if output_type == "pil":
+                    outputs["videos"] += video
+                elif output_type in ["np", "numpy"] and isinstance(video, np.ndarray):
+                    if len(outputs["videos"]) == 0:
+                        outputs["videos"] = video
+                    else:
+                        outputs["videos"] = np.concatenate((outputs["videos"], video), axis=0)
+                else:
+                    outputs["videos"] += [*video]
+
+            # 9. Offload all models
+            self.maybe_free_model_hooks()
+
+            if not return_dict:
+                return outputs["videos"]
+
+            return GaudiI2VGenXLPipelineOutput(
+                frames=outputs["videos"],
+                throughput=speed_measures[f"{speed_metrics_prefix}_samples_per_second"],
+            )
diff --git a/optimum/habana/quantizers/bitsandbytes.py b/optimum/habana/quantizers/bitsandbytes.py
new file mode 100644
index 0000000000..ee56b55d53
--- /dev/null
+++ b/optimum/habana/quantizers/bitsandbytes.py
@@ -0,0 +1,265 @@
+from functools import lru_cache
+from typing import Any, Dict, List, Optional
+
+from transformers.modeling_utils import PreTrainedModel
+from transformers.pytorch_utils import Conv1D
+from transformers.quantizers.quantizers_utils import get_module_from_name
+from transformers.utils import (
+    ACCELERATE_MIN_VERSION,
+    get_available_devices,
+    is_accelerate_available,
+    is_bitsandbytes_multi_backend_available,
+    is_ipex_available,
+    is_torch_available,
+    logging,
+)
+from transformers.utils.import_utils import _is_package_available
+
+
+if is_torch_available():
+    import torch
+
+_bitsandbytes_available = _is_package_available("bitsandbytes")
+logger = logging.get_logger(__name__)
+
+
+def gaudi_bitsandbytesconfig_post_init(self):
+    r"""
+    Safety checker that arguments are correct - also replaces some NoneType arguments with their default values.
+    Copied from https://github.com/huggingface/transformers/blob/53fad641cfdb5105e2470bcf3ef17ea8e25cc300/src/transformers/utils/quantization_config.py#L430
+    Only difference is removed check on bitsandbytes version
+    """
+    if not isinstance(self.load_in_4bit, bool):
+        raise TypeError("load_in_4bit must be a boolean")
+
+    if not isinstance(self.load_in_8bit, bool):
+        raise TypeError("load_in_8bit must be a boolean")
+
+    if not isinstance(self.llm_int8_threshold, float):
+        raise TypeError("llm_int8_threshold must be a float")
+
+    if self.llm_int8_skip_modules is not None and not isinstance(self.llm_int8_skip_modules, list):
+        raise TypeError("llm_int8_skip_modules must be a list of strings")
+    if not isinstance(self.llm_int8_enable_fp32_cpu_offload, bool):
+        raise TypeError("llm_int8_enable_fp32_cpu_offload must be a boolean")
+
+    if not isinstance(self.llm_int8_has_fp16_weight, bool):
+        raise TypeError("llm_int8_has_fp16_weight must be a boolean")
+
+    if self.bnb_4bit_compute_dtype is not None and not isinstance(self.bnb_4bit_compute_dtype, torch.dtype):
+        raise TypeError("bnb_4bit_compute_dtype must be torch.dtype")
+
+    if not isinstance(self.bnb_4bit_quant_type, str):
+        raise TypeError("bnb_4bit_quant_type must be a string")
+
+    if not isinstance(self.bnb_4bit_use_double_quant, bool):
+        raise TypeError("bnb_4bit_use_double_quant must be a boolean")
+
+
+@lru_cache()
+def gaudi_is_bitsandbytes_available():
+    """
+    Copied from https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/utils/import_utils.py#L871
+    Only difference is that CUDA related checks are removed.
+    """
+    if not is_torch_available() or not _bitsandbytes_available:
+        return False
+
+    # Newer versions of `bitsandbytes` can be imported on systems without CUDA.
+    return True
+
+
+def gaudi_validate_bnb_backend_availability(raise_exception=False):
+    """
+    Validates if the available devices are supported by bitsandbytes, optionally raising an exception if not.
+    Copied from https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/integrations/bitsandbytes.py#L545
+    Only difference is that CUDA related functions calls are deleted.
+    """
+    if is_bitsandbytes_multi_backend_available():
+        return _gaudi_validate_bnb_multi_backend_availability(raise_exception)
+
+
+def _gaudi_validate_bnb_multi_backend_availability(raise_exception):
+    """
+    Copied https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/integrations/bitsandbytes.py#L484
+    Only difference is addition of check for HPU
+    """
+    import bitsandbytes as bnb
+
+    bnb_supported_devices = getattr(bnb, "supported_torch_devices", set())
+    available_devices = get_available_devices()
+
+    if "hpu" in bnb_supported_devices:
+        logger.debug("Multi-backend validation successful.")
+        return True
+
+    if available_devices == {"cpu"} and not is_ipex_available():
+        from importlib.util import find_spec
+
+        if find_spec("intel_extension_for_pytorch"):
+            logger.warning(
+                "You have Intel IPEX installed but if you're intending to use it for CPU, it might not have the right version. Be sure to double check that your PyTorch and IPEX installs are compatible."
+            )
+
+        available_devices.discard("cpu")  # Only Intel CPU is supported by BNB at the moment
+
+    if not available_devices.intersection(bnb_supported_devices):
+        if raise_exception:
+            bnb_supported_devices_with_info = set(  # noqa: C401
+                '"cpu" (needs an Intel CPU and intel_extension_for_pytorch installed and compatible with the PyTorch version)'
+                if device == "cpu"
+                else device
+                for device in bnb_supported_devices
+            )
+            err_msg = (
+                f"None of the available devices `available_devices = {available_devices or None}` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = {bnb_supported_devices_with_info}`. "
+                "Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installation#multi-backend"
+            )
+
+            logger.error(err_msg)
+            raise RuntimeError(err_msg)
+
+        logger.warning("No supported devices found for bitsandbytes multi-backend.")
+        return False
+
+    logger.debug("Multi-backend validation successful.")
+    return True
+
+
+def gaudi_validate_environment(self, *args, **kwargs):
+    """
+    Copied from https://github.com/huggingface/transformers/blob/5523e38b553ff6c46b04d2376870fcd842feeecc/src/transformers/quantizers/quantizer_bnb_4bit.py#L68
+    Only difference is deletion of bitsandbytes version checks
+    """
+    if not is_accelerate_available():
+        raise ImportError(
+            f"Using `bitsandbytes` 4-bit quantization requires Accelerate: `pip install 'accelerate>={ACCELERATE_MIN_VERSION}'`"
+        )
+    if not gaudi_is_bitsandbytes_available():
+        raise ImportError(
+            "Using `bitsandbytes` 4-bit quantization requires the latest version of bitsandbytes: `pip install -U bitsandbytes`"
+        )
+
+    bnb_multibackend_is_enabled = is_bitsandbytes_multi_backend_available()
+    gaudi_validate_bnb_backend_availability(raise_exception=True)
+
+    if kwargs.get("from_tf", False) or kwargs.get("from_flax", False):
+        raise ValueError(
+            "Converting into 4-bit or 8-bit weights from tf/flax weights is currently not supported, please make"
+            " sure the weights are in PyTorch format."
+        )
+
+    device_map = kwargs.get("device_map", None)
+    if (
+        device_map is not None
+        and isinstance(device_map, dict)
+        and not self.quantization_config.llm_int8_enable_fp32_cpu_offload
+    ):
+        device_map_without_lm_head = {
+            key: device_map[key] for key in device_map.keys() if key not in self.modules_to_not_convert
+        }
+        if set(device_map.values()) == {"cpu"} and bnb_multibackend_is_enabled:
+            pass
+        elif "cpu" in device_map_without_lm_head.values() or "disk" in device_map_without_lm_head.values():
+            raise ValueError(
+                "Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the "
+                "quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules "
+                "in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to "
+                "`from_pretrained`. Check "
+                "https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu "
+                "for more details. "
+            )
+
+
+def gaudi_create_quantized_param(
+    self,
+    model: "PreTrainedModel",
+    param_value: "torch.Tensor",
+    param_name: str,
+    target_device: "torch.device",
+    state_dict: Dict[str, Any],
+    unexpected_keys: Optional[List[str]] = None,
+):
+    """
+    Copied from https://github.com/huggingface/transformers/blob/62c60a30181a65e1a3a7f19c3055a240a6a21335/src/transformers/quantizers/quantizer_bnb_4bit.py#L138
+    only diiference is addition of HPU device
+    """
+    import bitsandbytes as bnb
+
+    module, tensor_name = get_module_from_name(model, param_name)
+
+    if tensor_name not in module._parameters:
+        raise ValueError(f"{module} does not have a parameter or a buffer named {tensor_name}.")
+
+    old_value = getattr(module, tensor_name)
+
+    if tensor_name == "bias":
+        if param_value is None:
+            new_value = old_value.to(target_device)
+        else:
+            new_value = param_value.to(target_device)
+
+        new_value = torch.nn.Parameter(new_value, requires_grad=old_value.requires_grad)
+        module._parameters[tensor_name] = new_value
+        return
+
+    if not isinstance(module._parameters[tensor_name], bnb.nn.Params4bit):
+        raise ValueError("this function only loads `Linear4bit components`")
+    if (
+        old_value.device == torch.device("meta")
+        and target_device not in ["meta", torch.device("meta")]
+        and param_value is None
+    ):
+        raise ValueError(f"{tensor_name} is on the meta device, we need a `value` to put in on {target_device}.")
+
+    # construct `new_value` for the module._parameters[tensor_name]:
+    if self.pre_quantized:
+        # 4bit loading. Collecting components for restoring quantized weight
+        # This can be expanded to make a universal call for any quantized weight loading
+
+        if not self.is_serializable:
+            raise ValueError(
+                "Detected int4 weights but the version of bitsandbytes is not compatible with int4 serialization. "
+                "Make sure to download the latest `bitsandbytes` version. `pip install --upgrade bitsandbytes`."
+            )
+
+        if (param_name + ".quant_state.bitsandbytes__fp4" not in state_dict) and (
+            param_name + ".quant_state.bitsandbytes__nf4" not in state_dict
+        ):
+            raise ValueError(
+                f"Supplied state dict for {param_name} does not contain `bitsandbytes__*` and possibly other `quantized_stats` components."
+            )
+
+        quantized_stats = {}
+        for k, v in state_dict.items():
+            if param_name + "." in k:
+                quantized_stats[k] = v
+                if unexpected_keys is not None and k in unexpected_keys:
+                    unexpected_keys.remove(k)
+
+        param_kwargs = {}
+        if self.is_bnb_supports_quant_storage_module:
+            param_kwargs["module"] = module
+
+        new_value = bnb.nn.Params4bit.from_prequantized(
+            data=param_value,
+            quantized_stats=quantized_stats,
+            requires_grad=False,
+            device=target_device,
+            **param_kwargs,
+        )
+    else:
+        if target_device == "hpu":
+            new_value = param_value.to("hpu")
+        else:
+            new_value = param_value.to("cpu")
+
+        # Support models using `Conv1D` in place of `nn.Linear` (e.g. openai-community/gpt2) by transposing the weight matrix prior to quantization.
+        # Since weights are saved in the correct "orientation", we skip transposing when loading.
+        if issubclass(module.source_cls, Conv1D):
+            new_value = new_value.T
+
+        kwargs = old_value.__dict__
+        new_value = bnb.nn.Params4bit(new_value, requires_grad=False, **kwargs).to(target_device)
+
+    module._parameters[tensor_name] = new_value
diff --git a/optimum/habana/transformers/integrations/awq.py b/optimum/habana/transformers/integrations/awq.py
new file mode 100644
index 0000000000..c925a76311
--- /dev/null
+++ b/optimum/habana/transformers/integrations/awq.py
@@ -0,0 +1,216 @@
+import importlib
+from enum import Enum
+
+import torch.nn as nn
+from packaging import version
+from transformers.modeling_utils import PreTrainedModel
+from transformers.utils import is_accelerate_available, is_auto_awq_available
+from transformers.utils.quantization_config import (
+    AwqBackendPackingMethod,
+)
+
+from optimum.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+
+class GaudiAWQLinearVersion(str, Enum):
+    GEMM = "gemm"
+    GEMV = "gemv"
+    EXLLAMA = "exllama"
+    HPU = "hpu"
+
+    @staticmethod
+    def from_str(version: str):
+        version = version.lower()
+        if version == "gemm":
+            return GaudiAWQLinearVersion.GEMM
+        elif version == "gemv":
+            return GaudiAWQLinearVersion.GEMV
+        elif version == "exllama":
+            return GaudiAWQLinearVersion.EXLLAMA
+        elif version == "hpu":
+            return GaudiAWQLinearVersion.HPU
+        else:
+            raise ValueError(f"Unknown GaudiAWQLinearVersion {version}")
+
+
+# override post_init in AwqConfig
+def gaudi_awq_config_post_init(self):
+    """
+    Adapted from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/utils/quantization_config.py#L818
+    - support HPU.
+    """
+    if self.backend not in [AwqBackendPackingMethod.AUTOAWQ]:
+        raise ValueError(
+            f"Only supported quantization backends in {AwqBackendPackingMethod.AUTOAWQ} - not recognized backend {self.backend}"
+        )
+
+    self.version = GaudiAWQLinearVersion.from_str(self.version)
+    if self.version not in [
+        GaudiAWQLinearVersion.HPU,
+        GaudiAWQLinearVersion.GEMM,
+    ]:
+        raise ValueError(
+            f"Only supported versions are in [GaudiAWQLinearVersion.HPU, GaudiAWQLinearVersion.GEMM] - not recognized version {self.version}"
+        )
+
+    if self.do_fuse and self.fuse_max_seq_len is None:
+        raise ValueError(
+            "You cannot enable fused modules without specifying a `fuse_max_seq_len`, make sure to pass a valid `fuse_max_seq_len` for your usecase"
+        )
+
+    if self.do_fuse:
+        awq_version_supports_fusing = False
+        MIN_AWQ_VERSION = "0.1.7"
+        if is_auto_awq_available():
+            awq_version_supports_fusing = version.parse(importlib.metadata.version("autoawq")) >= version.parse(
+                MIN_AWQ_VERSION
+            )
+
+        if not awq_version_supports_fusing:
+            raise ValueError(
+                f"You current version of `autoawq` does not support module fusing, please upgrade `autoawq` package to at least {MIN_AWQ_VERSION}."
+            )
+
+    if self.modules_to_not_convert is not None:
+        awq_version_supports_non_conversion = False
+        MIN_AWQ_VERSION = "0.1.8"
+        if is_auto_awq_available():
+            awq_version_supports_non_conversion = version.parse(
+                importlib.metadata.version("autoawq")
+            ) >= version.parse(MIN_AWQ_VERSION)
+
+        if not awq_version_supports_non_conversion:
+            raise ValueError(
+                f"You current version of `autoawq` does not support module quantization skipping, please upgrade `autoawq` package to at least {MIN_AWQ_VERSION}."
+            )
+
+    if self.do_fuse and self.modules_to_fuse is not None:
+        raise ValueError("You current implementation of `autoawq` does not support do_fuse and modules_to_fuse.")
+
+
+def gaudi_replace_with_awq_linear(
+    model,
+    modules_to_not_convert=None,
+    quantization_config=None,
+    current_key_name=None,
+    has_been_replaced=False,
+) -> bool:
+    """
+    Adapted from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/integrations/awq.py#L90
+    - support HPU.
+    """
+    if modules_to_not_convert is None:
+        modules_to_not_convert = []
+
+    assert quantization_config is not None
+    backend = quantization_config.backend
+
+    if not is_auto_awq_available():
+        raise ValueError(
+            "AWQ (either `autoawq` or `llmawq`) is not available. Please install it with `pip install autoawq` or check out the installation guide in https://github.com/mit-han-lab/llm-awq"
+        )
+
+    if backend == AwqBackendPackingMethod.AUTOAWQ and quantization_config.version == GaudiAWQLinearVersion.HPU:
+        from ...AutoAWQ.gemm_hpu import WQLinear_HPU
+
+        target_cls = WQLinear_HPU
+    else:
+        raise ValueError(f"Unrecognized AWQ version: {quantization_config.version} and backend {backend}")
+
+    for name, module in model.named_children():
+        if current_key_name is None:
+            current_key_name = []
+        current_key_name.append(name)
+
+        if isinstance(module, nn.Linear) and name not in modules_to_not_convert:
+            # Check if the current key is not in the `modules_to_not_convert`
+            if not any(key in ".".join(current_key_name) for key in modules_to_not_convert):
+                in_features = module.in_features
+                out_features = module.out_features
+
+                model._modules[name] = target_cls(
+                    w_bit=quantization_config.bits,
+                    group_size=quantization_config.group_size,
+                    in_features=in_features,
+                    out_features=out_features,
+                    bias=module.bias is not None,
+                    dev=module.weight.device,
+                )
+                has_been_replaced = True
+
+                # Force requires grad to False to avoid unexpected errors
+                model._modules[name].requires_grad_(False)
+        if len(list(module.children())) > 0:
+            _, has_been_replaced = gaudi_replace_with_awq_linear(
+                module,
+                modules_to_not_convert=modules_to_not_convert,
+                current_key_name=current_key_name,
+                quantization_config=quantization_config,
+                has_been_replaced=has_been_replaced,
+            )
+        # Remove the last key for recursion
+        current_key_name.pop(-1)
+    return model, has_been_replaced
+
+
+def post_init_awq_gemm_hpu_modules(model):
+    """
+    Runs post init for gemm hpu layers which performs:
+        - Weights unpacking, reordering and repacking
+    """
+    from ...AutoAWQ.gemm_hpu import hpu_post_init
+
+    model = hpu_post_init(model)
+
+    return model
+
+
+def gaudi_awq_quantizer_process_model_after_weight_loading(self, model):
+    if self.quantization_config.version == GaudiAWQLinearVersion.HPU:
+        model = post_init_awq_gemm_hpu_modules(model)
+    else:
+        raise ValueError(f"Unrecognized AWQ version: {self.quantization_config.version}, only hpu is supported")
+
+
+def gaudi_awq_quantizer_validate_environment(self, device_map, **kwargs):
+    if not is_auto_awq_available():
+        raise ImportError("Loading an AWQ quantized model requires auto-awq library (`pip install autoawq`)")
+
+    if not is_accelerate_available():
+        raise ImportError("Loading an AWQ quantized model requires accelerate (`pip install accelerate`)")
+
+    if device_map is None:
+        logger.warning_once(
+            "You have loaded an AWQ model on CPU and have a CUDA device available, make sure to set "
+            "your model on a GPU device in order to run your model."
+        )
+    elif device_map is not None:
+        if isinstance(device_map, dict) and ("cpu" in device_map.values() or "disk" in device_map.values()):
+            raise ValueError(
+                "You are attempting to load an AWQ model with a device_map that contains a CPU or disk device."
+                " This is not supported. Please remove the CPU or disk device from the device_map."
+            )
+
+
+def gaudi_awq_quantizer_process_model_before_weight_loading(self, model: "PreTrainedModel", **kwargs):
+    from transformers.integrations import get_keys_to_not_convert, replace_quantization_scales
+
+    self.modules_to_not_convert = get_keys_to_not_convert(model)
+
+    if self.quantization_config.modules_to_not_convert is not None:
+        self.modules_to_not_convert.extend(self.quantization_config.modules_to_not_convert)
+
+    model, has_been_replaced = gaudi_replace_with_awq_linear(
+        model, quantization_config=self.quantization_config, modules_to_not_convert=self.modules_to_not_convert
+    )
+
+    model = replace_quantization_scales(model, model.config.model_type)
+
+    if not has_been_replaced:
+        logger.warning(
+            "You are loading an AWQ model but no linear modules were found in your model."
+            " Please double check your model architecture, or submit an issue on github if you think this is a bug."
+        )
diff --git a/optimum/habana/transformers/modeling_utils_transformers.py b/optimum/habana/transformers/modeling_utils_transformers.py
new file mode 100644
index 0000000000..532b69189b
--- /dev/null
+++ b/optimum/habana/transformers/modeling_utils_transformers.py
@@ -0,0 +1,89 @@
+import os
+from typing import Optional, Union
+from zipfile import is_zipfile
+
+import torch
+from packaging import version
+from transformers.integrations import is_deepspeed_zero3_enabled
+from transformers.modeling_utils import is_fsdp_enabled, is_local_dist_rank_0
+from transformers.utils import (
+    is_safetensors_available,
+)
+
+
+if is_safetensors_available():
+    from safetensors import safe_open
+    from safetensors.torch import load_file as safe_load_file
+
+
+def load_state_dict(
+    checkpoint_file: Union[str, os.PathLike],
+    is_quantized: bool = False,
+    map_location: Optional[Union[str, torch.device]] = None,
+    weights_only: bool = True,
+):
+    """
+    Reads a PyTorch checkpoint file, returning properly formatted errors if they arise.
+
+    Copied from transformers v4.48.2 for DeepSeek-R1 support https://github.com/huggingface/transformers/blob/b673c16cad81c71f70903a9a63f5b5f06014aa9e/src/transformers/modeling_utils.py#L493
+    Delete after upgrade transformers v4.45.2 to v4.48
+    """
+    if checkpoint_file.endswith(".safetensors") and is_safetensors_available():
+        # Check format of the archive
+        with safe_open(checkpoint_file, framework="pt") as f:
+            metadata = f.metadata()
+        if metadata is not None and metadata.get("format") not in ["pt", "tf", "flax", "mlx"]:
+            raise OSError(
+                f"The safetensors archive passed at {checkpoint_file} does not contain the valid metadata. Make sure "
+                "you save your model with the `save_pretrained` method."
+            )
+        return safe_load_file(checkpoint_file)
+    try:
+        if map_location is None:
+            if (
+                (
+                    is_deepspeed_zero3_enabled()
+                    and torch.distributed.is_initialized()
+                    and torch.distributed.get_rank() > 0
+                )
+                or (is_fsdp_enabled() and not is_local_dist_rank_0())
+            ) and not is_quantized:
+                map_location = "meta"
+            else:
+                map_location = "cpu"
+        extra_args = {}
+        # mmap can only be used with files serialized with zipfile-based format.
+        if (
+            isinstance(checkpoint_file, str)
+            and map_location != "meta"
+            and version.parse(torch.__version__) >= version.parse("2.1.0")
+            and is_zipfile(checkpoint_file)
+        ):
+            extra_args = {"mmap": True}
+        weights_only_kwarg = {"weights_only": weights_only}
+        return torch.load(
+            checkpoint_file,
+            map_location=map_location,
+            **weights_only_kwarg,
+            **extra_args,
+        )
+    except Exception as e:
+        try:
+            with open(checkpoint_file) as f:
+                if f.read(7) == "version":
+                    raise OSError(
+                        "You seem to have cloned a repository without having git-lfs installed. Please install "
+                        "git-lfs and run `git lfs install` followed by `git lfs pull` in the folder "
+                        "you cloned."
+                    )
+                else:
+                    raise ValueError(
+                        f"Unable to locate the file {checkpoint_file} which is necessary to load this pretrained "
+                        "model. Make sure you have saved the model properly."
+                    ) from e
+        except (UnicodeDecodeError, ValueError):
+            raise OSError(
+                f"Unable to load weights from pytorch checkpoint file for '{checkpoint_file}' "
+                f"at '{checkpoint_file}'. "
+                "If you tried to load a PyTorch model from a TF 2.0 checkpoint, please set from_tf=True."
+            )
diff --git a/optimum/habana/transformers/models/deepseek_v3/__init__.py b/optimum/habana/transformers/models/deepseek_v3/__init__.py
new file mode 100644
index 0000000000..f76872762e
--- /dev/null
+++ b/optimum/habana/transformers/models/deepseek_v3/__init__.py
@@ -0,0 +1,2 @@
+from .configuration_deepseek_v3 import DeepseekV3Config
+from .modeling_deepseek_v3 import DeepseekV3ForCausalLM
diff --git a/optimum/habana/transformers/models/deepseek_v3/configuration_deepseek_v3.py b/optimum/habana/transformers/models/deepseek_v3/configuration_deepseek_v3.py
new file mode 100644
index 0000000000..af5eb623b1
--- /dev/null
+++ b/optimum/habana/transformers/models/deepseek_v3/configuration_deepseek_v3.py
@@ -0,0 +1,217 @@
+"""
+DeepSeekV3 model configuration. Copied from https://huggingface.co/deepseek-ai/DeepSeek-R1/resolve/main/configuration_deepseek.py
+"""
+
+from transformers.configuration_utils import PretrainedConfig
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+DEEPSEEK_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
+
+
+class DeepseekV3Config(PretrainedConfig):
+    r"""
+    This is the configuration class to store the configuration of a [`DeepseekV3Model`]. It is used to instantiate an DeepSeek
+    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
+    defaults will yield a similar configuration to that of the DeepSeek-V3.
+
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
+    documentation from [`PretrainedConfig`] for more information.
+
+
+    Args:
+        vocab_size (`int`, *optional*, defaults to 129280):
+            Vocabulary size of the Deep model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`DeepseekV3Model`]
+        hidden_size (`int`, *optional*, defaults to 4096):
+            Dimension of the hidden representations.
+        intermediate_size (`int`, *optional*, defaults to 11008):
+            Dimension of the MLP representations.
+        moe_intermediate_size (`int`, *optional*, defaults to 1407):
+            Dimension of the MoE representations.
+        num_hidden_layers (`int`, *optional*, defaults to 32):
+            Number of hidden layers in the Transformer decoder.
+        num_nextn_predict_layers (`int`, *optional*, defaults to 1):
+            Number of nextn predict layers in the DeepSeekV3 Model.
+        num_attention_heads (`int`, *optional*, defaults to 32):
+            Number of attention heads for each attention layer in the Transformer decoder.
+        n_shared_experts (`int`, *optional*, defaults to None):
+            Number of shared experts, None means dense model.
+        n_routed_experts (`int`, *optional*, defaults to None):
+            Number of routed experts, None means dense model.
+        routed_scaling_factor (`float`, *optional*, defaults to 1.0):
+            Scaling factor or routed experts.
+        topk_method (`str`, *optional*, defaults to `gready`):
+            Topk method used in routed gate.
+        n_group (`int`, *optional*, defaults to None):
+            Number of groups for routed experts.
+        topk_group (`int`, *optional*, defaults to None):
+            Number of selected groups for each token(for each token, ensuring the selected experts is only within `topk_group` groups).
+        num_experts_per_tok (`int`, *optional*, defaults to None):
+            Number of selected experts, None means dense model.
+        moe_layer_freq (`int`, *optional*, defaults to 1):
+            The frequency of the MoE layer: one expert layer for every `moe_layer_freq - 1` dense layers.
+        first_k_dense_replace (`int`, *optional*, defaults to 0):
+            Number of dense layers in shallow layers(embed->dense->dense->...->dense->moe->moe...->lm_head).
+                                                            \--k dense layers--/
+        norm_topk_prob (`bool`, *optional*, defaults to False):
+            Whether to normalize the weights of the routed experts.
+        scoring_func (`str`, *optional*, defaults to 'softmax'):
+            Method of computing expert weights.
+        aux_loss_alpha (`float`, *optional*, defaults to 0.001):
+            Auxiliary loss weight coefficient.
+        seq_aux = (`bool`, *optional*, defaults to True):
+            Whether to compute the auxiliary loss for each individual sample.
+        num_key_value_heads (`int`, *optional*):
+            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
+            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
+            `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When
+            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
+            by meanpooling all the original heads within that group. For more details checkout [this
+            paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
+            `num_attention_heads`.
+        hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
+            The non-linear activation function (function or string) in the decoder.
+        max_position_embeddings (`int`, *optional*, defaults to 2048):
+            The maximum sequence length that this model might ever be used with.
+        initializer_range (`float`, *optional*, defaults to 0.02):
+            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
+        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
+            The epsilon used by the rms normalization layers.
+        use_cache (`bool`, *optional*, defaults to `True`):
+            Whether or not the model should return the last key/values attentions (not used by all models). Only
+            relevant if `config.is_decoder=True`.
+        pad_token_id (`int`, *optional*):
+            Padding token id.
+        bos_token_id (`int`, *optional*, defaults to 1):
+            Beginning of stream token id.
+        eos_token_id (`int`, *optional*, defaults to 2):
+            End of stream token id.
+        pretraining_tp (`int`, *optional*, defaults to 1):
+            Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
+            document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is
+            necessary to ensure exact reproducibility of the pretraining results. Please refer to [this
+            issue](https://github.com/pytorch/pytorch/issues/76232).
+        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
+            Whether to tie weight embeddings
+        rope_theta (`float`, *optional*, defaults to 10000.0):
+            The base period of the RoPE embeddings.
+        rope_scaling (`Dict`, *optional*):
+            Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
+            strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
+            `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update
+            `max_position_embeddings` to the expected new maximum.
+        attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
+            Whether to use a bias in the query, key, value and output projection layers during self-attention.
+        attention_dropout (`float`, *optional*, defaults to 0.0):
+            The dropout ratio for the attention probabilities.
+
+    ```python
+    >>> from transformers import DeepseekV3Model, DeepseekV3Config
+
+    >>> # Initializing a Deepseek-V3 style configuration
+    >>> configuration = DeepseekV3Config()
+
+    >>> # Accessing the model configuration
+    >>> configuration = model.config
+    ```"""
+
+    model_type = "deepseek_v3"
+    keys_to_ignore_at_inference = ["past_key_values"]
+
+    def __init__(
+        self,
+        vocab_size=129280,
+        hidden_size=7168,
+        intermediate_size=18432,
+        moe_intermediate_size=2048,
+        num_hidden_layers=61,
+        num_nextn_predict_layers=1,
+        num_attention_heads=128,
+        num_key_value_heads=128,
+        n_shared_experts=1,
+        n_routed_experts=256,
+        ep_size=1,
+        routed_scaling_factor=2.5,
+        kv_lora_rank=512,
+        q_lora_rank=1536,
+        qk_rope_head_dim=64,
+        v_head_dim=128,
+        qk_nope_head_dim=128,
+        topk_method="noaux_tc",
+        n_group=8,
+        topk_group=4,
+        num_experts_per_tok=8,
+        moe_layer_freq=1,
+        first_k_dense_replace=3,
+        norm_topk_prob=True,
+        scoring_func="sigmoid",
+        aux_loss_alpha=0.001,
+        seq_aux=True,
+        hidden_act="silu",
+        max_position_embeddings=4096,
+        initializer_range=0.02,
+        rms_norm_eps=1e-6,
+        use_cache=True,
+        pad_token_id=None,
+        bos_token_id=0,
+        eos_token_id=1,
+        pretraining_tp=1,
+        tie_word_embeddings=False,
+        rope_theta=10000.0,
+        rope_scaling=None,
+        attention_bias=False,
+        attention_dropout=0.0,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.moe_intermediate_size = moe_intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_nextn_predict_layers = num_nextn_predict_layers
+        self.num_attention_heads = num_attention_heads
+        self.n_shared_experts = n_shared_experts
+        self.n_routed_experts = n_routed_experts
+        self.ep_size = ep_size
+        self.routed_scaling_factor = routed_scaling_factor
+        self.kv_lora_rank = kv_lora_rank
+        self.q_lora_rank = q_lora_rank
+        self.qk_rope_head_dim = qk_rope_head_dim
+        self.v_head_dim = v_head_dim
+        self.qk_nope_head_dim = qk_nope_head_dim
+        self.topk_method = topk_method
+        self.n_group = n_group
+        self.topk_group = topk_group
+        self.num_experts_per_tok = num_experts_per_tok
+        self.moe_layer_freq = moe_layer_freq
+        self.first_k_dense_replace = first_k_dense_replace
+        self.norm_topk_prob = norm_topk_prob
+        self.scoring_func = scoring_func
+        self.aux_loss_alpha = aux_loss_alpha
+        self.seq_aux = seq_aux
+        # for backward compatibility
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.pretraining_tp = pretraining_tp
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.rope_scaling = rope_scaling
+        self.attention_bias = attention_bias
+        self.attention_dropout = attention_dropout
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
diff --git a/optimum/habana/transformers/models/deepseek_v3/modeling_deepseek_v3.py b/optimum/habana/transformers/models/deepseek_v3/modeling_deepseek_v3.py
new file mode 100644
index 0000000000..fb7751545a
--- /dev/null
+++ b/optimum/habana/transformers/models/deepseek_v3/modeling_deepseek_v3.py
@@ -0,0 +1,1913 @@
+# coding=utf-8
+# Copyright 2023 DeepSeek-AI and The HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+PyTorch DeepSeekV3 model. Adapted from https://huggingface.co/deepseek-ai/DeepSeek-R1/resolve/main/modeling_deepseek.py
+
+The main differences are:
+- Use Gaudi Flash Attention
+- Optimized KV cache with support for static shapes
+- Use fused Gaudi MoE, RoPE, and RMSNorm operators
+- Enable expert parallelism
+"""
+
+import math
+import warnings
+from typing import List, Optional, Tuple, Union
+
+import habana_frameworks.torch.core as htcore
+import torch
+import torch.distributed as dist
+import torch.nn.functional as F
+import torch.utils.checkpoint
+from torch import nn
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from transformers.activations import ACT2FN
+from transformers.cache_utils import Cache
+from transformers.generation import GenerationMixin
+from transformers.integrations.deepspeed import is_deepspeed_available
+from transformers.modeling_outputs import (
+    BaseModelOutputWithPast,
+    CausalLMOutputWithPast,
+    SequenceClassifierOutputWithPast,
+)
+from transformers.modeling_utils import PreTrainedModel
+from transformers.pytorch_utils import (
+    ALL_LAYERNORM_LAYERS,
+)
+from transformers.utils import (
+    add_start_docstrings,
+    add_start_docstrings_to_model_forward,
+    logging,
+    replace_return_docstrings,
+)
+
+from ....distributed.tensorparallel import _all_reduce
+from ...modeling_attn_mask_utils import _gaudi_prepare_4d_causal_attention_mask
+from ..modeling_all_models import apply_customized_rope_module
+from .configuration_deepseek_v3 import DeepseekV3Config
+
+
+logger = logging.get_logger(__name__)
+
+_CONFIG_FOR_DOC = "DeepseekV3Config"
+
+# Maximum number of experts supported by dynamic MoE op (mixture_of_experts)
+SLICE_MAX_EXPERT = 80
+
+# import hpu fused ops
+try:
+    from habana_frameworks.torch.hpex.kernels import RotaryPosEmbeddingHelperV2 as FusedRoPE
+
+    print("Using HPU fused kernel for apply_rotary_pos_emb")
+except ImportError:
+    print("Not using HPU fused kernel for apply_rotary_pos_emb")
+    FusedRoPE = None
+
+try:
+    from habana_frameworks.torch.hpex.normalization import FusedRMSNorm
+
+    print("Using HPU fused kernel for RMSNorm")
+except ImportError:
+    print("Not using HPU fused kernel for RMSNorm")
+    FusedRMSNorm = None
+
+try:
+    from habana_frameworks.torch.hpex.kernels import FusedSDPA
+except ImportError:
+    print("Not using HPU fused scaled dot-product attention kernel.")
+    FusedSDPA = None
+
+
+def _get_unpad_data(attention_mask):
+    seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
+    indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
+    max_seqlen_in_batch = seqlens_in_batch.max().item()
+    cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.torch.int32), (1, 0))
+    return (
+        indices,
+        cu_seqlens,
+        max_seqlen_in_batch,
+    )
+
+
+class DeepseekV3RMSNorm(nn.Module):
+    def __init__(self, hidden_size, eps=1e-6):
+        """
+        DeepseekV3RMSNorm is equivalent to T5LayerNorm
+        """
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(hidden_size))
+        self.variance_epsilon = eps
+
+    def forward(self, hidden_states):
+        if hidden_states.device.type == "hpu" and FusedRMSNorm:
+            # use hpu fused rmsnorm
+            # mixed dtypes are not good for FusedRMSNorm, both inputs need to have same dtype
+            if hidden_states.dtype != self.weight.dtype:
+                orig_dtype = hidden_states.dtype
+                hidden_states = FusedRMSNorm.apply(
+                    hidden_states.to(self.weight.dtype), self.weight, self.variance_epsilon
+                )
+                return hidden_states.to(orig_dtype)
+            else:
+                hidden_states = FusedRMSNorm.apply(hidden_states, self.weight, self.variance_epsilon)
+                return hidden_states
+        else:
+            input_dtype = hidden_states.dtype
+            hidden_states = hidden_states.to(torch.float32)
+            variance = hidden_states.pow(2).mean(-1, keepdim=True)
+            hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+            return self.weight * hidden_states.to(input_dtype)
+
+
+ALL_LAYERNORM_LAYERS.append(DeepseekV3RMSNorm)
+
+
+class DeepseekV3RotaryEmbedding(nn.Module):
+    def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
+        super().__init__()
+
+        self.dim = dim
+        self.max_position_embeddings = max_position_embeddings
+        self.base = base
+        inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+
+        # Build here to make `torch.jit.trace` work.
+
+        # make it static (max_position_embeddings) instead of updating depending on
+        # longest seq_len seen till now: seq_len > self.max_seq_len_cached
+        self.max_seq_len_cached = max_position_embeddings
+        self._set_cos_sin_cache(
+            seq_len=self.max_seq_len_cached,
+            device=self.inv_freq.device,
+            dtype=torch.get_default_dtype(),
+        )
+
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+
+        freqs = torch.outer(t, self.inv_freq.to(t.device))
+        # Different from paper, but it uses a different permutation in order to obtain the same calculation
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False)
+
+    def forward(self, x, seq_len=None):
+        # x: [bs, num_attention_heads, seq_len, head_size]
+        if seq_len is not None and seq_len > self.max_seq_len_cached:
+            self._set_cos_sin_cache(seq_len=seq_len, device=x.device, dtype=x.dtype)
+
+        return (
+            self.cos_cached[:seq_len].to(dtype=x.dtype),
+            self.sin_cached[:seq_len].to(dtype=x.dtype),
+        )
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaLinearScalingRotaryEmbedding with Llama->DeepseekV3
+class DeepseekV3LinearScalingRotaryEmbedding(DeepseekV3RotaryEmbedding):
+    """DeepseekV3RotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev"""
+
+    def __init__(
+        self,
+        dim,
+        max_position_embeddings=2048,
+        base=10000,
+        device=None,
+        scaling_factor=1.0,
+    ):
+        self.scaling_factor = scaling_factor
+        super().__init__(dim, max_position_embeddings, base, device)
+
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+        t = t / self.scaling_factor
+
+        freqs = torch.outer(t, self.inv_freq)
+        # Different from paper, but it uses a different permutation in order to obtain the same calculation
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False)
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaDynamicNTKScalingRotaryEmbedding with Llama->DeepseekV3
+class DeepseekV3DynamicNTKScalingRotaryEmbedding(DeepseekV3RotaryEmbedding):
+    """DeepseekV3RotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla"""
+
+    def __init__(
+        self,
+        dim,
+        max_position_embeddings=2048,
+        base=10000,
+        device=None,
+        scaling_factor=1.0,
+    ):
+        self.scaling_factor = scaling_factor
+        super().__init__(dim, max_position_embeddings, base, device)
+
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+
+        if seq_len > self.max_position_embeddings:
+            base = self.base * (
+                (self.scaling_factor * seq_len / self.max_position_embeddings) - (self.scaling_factor - 1)
+            ) ** (self.dim / (self.dim - 2))
+            inv_freq = 1.0 / (base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
+            self.register_buffer("inv_freq", inv_freq, persistent=False)
+
+        t = torch.arange(self.max_seq_len_cached, device=device, dtype=self.inv_freq.dtype)
+
+        freqs = torch.outer(t, self.inv_freq)
+        # Different from paper, but it uses a different permutation in order to obtain the same calculation
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False)
+        self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False)
+
+
+# Inverse dim formula to find dim based on number of rotations
+def yarn_find_correction_dim(num_rotations, dim, base=10000, max_position_embeddings=2048):
+    return (dim * math.log(max_position_embeddings / (num_rotations * 2 * math.pi))) / (2 * math.log(base))
+
+
+# Find dim range bounds based on rotations
+def yarn_find_correction_range(low_rot, high_rot, dim, base=10000, max_position_embeddings=2048):
+    low = math.floor(yarn_find_correction_dim(low_rot, dim, base, max_position_embeddings))
+    high = math.ceil(yarn_find_correction_dim(high_rot, dim, base, max_position_embeddings))
+    return max(low, 0), min(high, dim - 1)  # Clamp values just in case
+
+
+def yarn_get_mscale(scale=1, mscale=1):
+    if scale <= 1:
+        return 1.0
+    return 0.1 * mscale * math.log(scale) + 1.0
+
+
+def yarn_linear_ramp_mask(min, max, dim):
+    if min == max:
+        max += 0.001  # Prevent singularity
+
+    linear_func = (torch.arange(dim, dtype=torch.float32) - min) / (max - min)
+    ramp_func = torch.clamp(linear_func, 0, 1)
+    return ramp_func
+
+
+class DeepseekV3YarnRotaryEmbedding(DeepseekV3RotaryEmbedding):
+    def __init__(
+        self,
+        dim,
+        max_position_embeddings=2048,
+        base=10000,
+        device=None,
+        scaling_factor=1.0,
+        original_max_position_embeddings=4096,
+        beta_fast=32,
+        beta_slow=1,
+        mscale=1,
+        mscale_all_dim=0,
+    ):
+        self.scaling_factor = scaling_factor
+        self.original_max_position_embeddings = original_max_position_embeddings
+        self.beta_fast = beta_fast
+        self.beta_slow = beta_slow
+        self.mscale = mscale
+        self.mscale_all_dim = mscale_all_dim
+        super().__init__(dim, max_position_embeddings, base, device)
+
+    def _set_cos_sin_cache(self, seq_len, device, dtype):
+        self.max_seq_len_cached = seq_len
+        dim = self.dim
+
+        freq_extra = 1.0 / (self.base ** (torch.arange(0, dim, 2, dtype=torch.float32, device=device) / dim))
+        freq_inter = 1.0 / (
+            self.scaling_factor * self.base ** (torch.arange(0, dim, 2, dtype=torch.float32, device=device) / dim)
+        )
+
+        low, high = yarn_find_correction_range(
+            self.beta_fast,
+            self.beta_slow,
+            dim,
+            self.base,
+            self.original_max_position_embeddings,
+        )
+        inv_freq_mask = 1.0 - yarn_linear_ramp_mask(low, high, dim // 2).to(device=device, dtype=torch.float32)
+        inv_freq = freq_inter * (1 - inv_freq_mask) + freq_extra * inv_freq_mask
+        self.register_buffer("inv_freq", inv_freq, persistent=False)
+
+        t = torch.arange(seq_len, device=device, dtype=torch.float32)
+
+        freqs = torch.outer(t, inv_freq)
+
+        _mscale = float(
+            yarn_get_mscale(self.scaling_factor, self.mscale)
+            / yarn_get_mscale(self.scaling_factor, self.mscale_all_dim)
+        )
+
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self.register_buffer("cos_cached", (emb.cos() * _mscale).to(dtype), persistent=False)
+        self.register_buffer("sin_cached", (emb.sin() * _mscale).to(dtype), persistent=False)
+
+
+def apply_customized_rope(q, k, cos, sin, position_ids, training=True):
+    if q.device.type == "hpu" and FusedRoPE:  # use fused hpu op
+        return apply_customized_rope_module(q, k, cos, sin, position_ids, training)
+    else:
+        return apply_rotary_pos_emb(q, k, cos, sin, position_ids)
+
+
+# Copied from transformers.models.llama.modeling_llama.rotate_half
+def rotate_half(x):
+    """Rotates half the hidden dims of the input."""
+    x1 = x[..., : x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2 :]
+    return torch.cat((-x2, x1), dim=-1)
+
+
+# Copied from transformers.models.llama.modeling_llama.apply_rotary_pos_emb
+def apply_rotary_pos_emb(q: torch.Tensor, cos, sin, position_ids, unsqueeze_dim=1):
+    """Applies Rotary Position Embedding to the query and key tensors.
+    Args:
+        q (`torch.Tensor`): The query tensor.
+        cos (`torch.Tensor`): The cosine part of the rotary embedding.
+        sin (`torch.Tensor`): The sine part of the rotary embedding.
+        position_ids (`torch.Tensor`):
+            The position indices of the tokens corresponding to the query and key tensors. For example, this can be
+            used to pass offsetted position ids when working with a KV-cache.
+        unsqueeze_dim (`int`, *optional*, defaults to 1):
+            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
+            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
+            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
+            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
+            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
+            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
+    Returns:
+        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
+    """
+
+    b, h, s, d = q.shape
+    q = q.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d)
+
+    if q.device.type == "hpu" and FusedRoPE:
+        return FusedRoPE.apply(
+            q, cos.unsqueeze(0).unsqueeze(0).clone(), sin.unsqueeze(0).unsqueeze(0).clone(), position_ids
+        )
+    else:
+        cos = cos[position_ids].unsqueeze(unsqueeze_dim)
+        sin = sin[position_ids].unsqueeze(unsqueeze_dim)
+        q_embed = (q * cos) + (rotate_half(q) * sin)
+        return q_embed
+
+
+class DeepseekV3MLP(nn.Module):
+    def __init__(self, config, hidden_size=None, intermediate_size=None):
+        super().__init__()
+        self.config = config
+        self.hidden_size = config.hidden_size if hidden_size is None else hidden_size
+        self.intermediate_size = config.intermediate_size if intermediate_size is None else intermediate_size
+
+        self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
+        self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False)
+        self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False)
+        self.act_fn = ACT2FN[config.hidden_act]
+
+    def forward(self, x):
+        down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
+        return down_proj
+
+
+class MoEGate(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.top_k = config.num_experts_per_tok
+        self.n_routed_experts = config.n_routed_experts
+        self.routed_scaling_factor = config.routed_scaling_factor
+        self.scoring_func = config.scoring_func
+        self.seq_aux = config.seq_aux
+        self.topk_method = config.topk_method
+        self.n_group = config.n_group
+        self.topk_group = config.topk_group
+
+        # topk selection algorithm
+        self.norm_topk_prob = config.norm_topk_prob
+        self.gating_dim = config.hidden_size
+        self.weight = nn.Parameter(torch.empty((self.n_routed_experts, self.gating_dim)))
+        if self.topk_method == "noaux_tc":
+            self.e_score_correction_bias = nn.Parameter(torch.empty((self.n_routed_experts)))
+        self.reset_parameters()
+
+    def reset_parameters(self) -> None:
+        import torch.nn.init as init
+
+        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+
+    def forward(self, hidden_states):
+        bsz, seq_len, h = hidden_states.shape
+        ### compute gating score
+        hidden_states = hidden_states.view(-1, h)
+        logits = F.linear(hidden_states.type(torch.float32), self.weight.type(torch.float32), None)
+        if self.scoring_func == "sigmoid":
+            scores = logits.sigmoid()
+        else:
+            raise NotImplementedError(f"insupportable scoring function for MoE gating: {self.scoring_func}")
+
+        ### select top-k experts
+        if self.topk_method == "noaux_tc":
+            assert not self.training
+            scores_for_choice = scores.view(bsz * seq_len, -1) + self.e_score_correction_bias.unsqueeze(0)
+            group_scores = (
+                scores_for_choice.view(bsz * seq_len, self.n_group, -1).topk(2, dim=-1)[0].sum(dim=-1)
+            )  # [n, n_group]
+            group_idx = torch.topk(group_scores, k=self.topk_group, dim=-1, sorted=False)[1]  # [n, top_k_group]
+            group_mask = torch.zeros_like(group_scores)  # [n, n_group]
+            group_mask.scatter_(1, group_idx, 1)  # [n, n_group]
+            score_mask = (
+                group_mask.unsqueeze(-1)
+                .expand(bsz * seq_len, self.n_group, self.n_routed_experts // self.n_group)
+                .reshape(bsz * seq_len, -1)
+            )  # [n, e]
+            tmp_scores = scores_for_choice.masked_fill(~score_mask.bool(), 0.0)  # [n, e]
+            _, topk_idx = torch.topk(tmp_scores, k=self.top_k, dim=-1, sorted=False)
+            topk_weight = scores.gather(1, topk_idx)
+        else:
+            raise NotImplementedError(f"insupportable TopK function for MoE gating: {self.topk_method}")
+
+        ### norm gate to sum 1
+        if self.top_k > 1 and self.norm_topk_prob:
+            denominator = topk_weight.sum(dim=-1, keepdim=True) + 1e-20
+            topk_weight = topk_weight / denominator
+        topk_weight = topk_weight * self.routed_scaling_factor  # must multiply the scaling factor
+
+        return topk_idx, topk_weight
+
+
+class DeepseekV3MoE(nn.Module):
+    """
+    A mixed expert module containing shared experts.
+    """
+
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.num_experts_per_tok = config.num_experts_per_tok
+
+        if hasattr(config, "ep_size") and config.ep_size > 1:
+            assert config.ep_size == dist.get_world_size()
+            self.ep_size = config.ep_size
+            self.experts_per_rank = config.n_routed_experts // config.ep_size
+            self.ep_rank = dist.get_rank()
+            self.experts = nn.ModuleList(
+                [
+                    (
+                        DeepseekV3MLP(config, intermediate_size=config.moe_intermediate_size)
+                        if i >= self.ep_rank * self.experts_per_rank and i < (self.ep_rank + 1) * self.experts_per_rank
+                        else None
+                    )
+                    for i in range(config.n_routed_experts)
+                ]
+            )
+        else:
+            self.ep_size = 1
+            self.experts_per_rank = config.n_routed_experts
+            self.ep_rank = 0
+            self.experts = nn.ModuleList(
+                [
+                    DeepseekV3MLP(config, intermediate_size=config.moe_intermediate_size)
+                    for i in range(config.n_routed_experts)
+                ]
+            )
+        self.gate = MoEGate(config)
+        if config.n_shared_experts is not None:
+            intermediate_size = config.moe_intermediate_size * config.n_shared_experts
+            self.shared_experts = DeepseekV3MLP(config=config, intermediate_size=intermediate_size)
+
+        # Slice experts for max experts supported by fused dynamic mixture_of_experts op
+        self.expert_slice = math.ceil(self.experts_per_rank / SLICE_MAX_EXPERT)
+        self.expert_chunk = math.ceil(self.experts_per_rank / self.expert_slice)
+
+    def forward(self, hidden_states):
+        identity = hidden_states
+        orig_shape = hidden_states.shape
+        topk_idx, topk_weight = self.gate(hidden_states)
+        hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
+        # we cast back to the input dtype
+        topk_weight = topk_weight.to(hidden_states.dtype)
+        batch = orig_shape[0]
+        sequence_length = orig_shape[1]
+        hidden_dim = orig_shape[2]
+        # changes for expert parallelism -- replacement for moe_infer()
+        if self.training:
+            padded_weights = torch.zeros(
+                (batch * sequence_length, self.config.n_routed_experts),
+                dtype=topk_weight.dtype,
+                device=topk_weight.device,
+            )
+            padded_weights.scatter_(-1, topk_idx, topk_weight)
+            padded_weights = padded_weights.reshape(-1, sequence_length, self.config.n_routed_experts)
+            padded_weights = padded_weights.permute(2, 0, 1).unsqueeze(-1)
+
+            final_hidden_states = torch.zeros(
+                (batch, sequence_length, hidden_dim), dtype=hidden_states.dtype, device=hidden_states.device
+            )
+            for i, expert in enumerate(self.experts):
+                current_hidden_state = expert(hidden_states)
+                current_padded_weight = padded_weights[i]
+                final_hidden_states = (
+                    final_hidden_states
+                    + current_hidden_state.reshape(-1, sequence_length, hidden_dim) * current_padded_weight
+                )
+            final_hidden_states = final_hidden_states.type(hidden_states.dtype)
+            final_hidden_states = final_hidden_states.view(*orig_shape)
+            # final_hidden_states = AddAuxiliaryLoss.apply(final_hidden_states, aux_loss)
+        else:
+            final_hidden_states = torch.zeros(
+                (batch * sequence_length, hidden_dim), dtype=hidden_states.dtype, device=hidden_states.device
+            )
+            # changes to support hpu fused dynamic MoE op -- replacement for moe_infer()
+            # loop through expert slices due to limits on max. experts supported by mixture_of_experts op
+            for idx in range(self.expert_slice):
+                experts_min = (self.ep_rank * self.experts_per_rank) + (self.expert_chunk * idx)
+                experts_max = min((experts_min + self.expert_chunk), (self.ep_rank + 1) * self.experts_per_rank)
+                experts_range = range(experts_min, experts_max)
+                gate_proj_list = [self.experts[i].gate_proj.weight.squeeze() for i in experts_range]
+                down_proj_list = [self.experts[i].down_proj.weight.squeeze() for i in experts_range]
+                up_proj_list = [self.experts[i].up_proj.weight.squeeze() for i in experts_range]
+
+                hidden_states_slice = torch.ops.hpu.mixture_of_experts(
+                    hidden_states=hidden_states,
+                    expert_routing_table=topk_idx,
+                    router_weights=topk_weight,
+                    w1=gate_proj_list,
+                    w2=up_proj_list,
+                    w3=down_proj_list,
+                    permuted_weights=True,
+                    activation="silu",
+                    experts_min=experts_min,
+                    experts_max=experts_max - 1,
+                )
+                final_hidden_states = final_hidden_states + hidden_states_slice
+                htcore.mark_step()
+
+            if self.ep_size > 1:
+                final_hidden_states = _all_reduce(final_hidden_states)
+            elif is_deepspeed_available():
+                from deepspeed import comm as dist
+
+                if dist.is_initialized():
+                    dist.all_reduce(final_hidden_states, op=dist.ReduceOp.SUM)
+
+            final_hidden_states = final_hidden_states.type(hidden_states.dtype)
+            final_hidden_states = final_hidden_states.reshape(-1, sequence_length, hidden_dim)
+
+        if self.config.n_shared_experts is not None:
+            final_hidden_states = final_hidden_states + self.shared_experts(identity)
+
+        return final_hidden_states
+
+
+# Functional apis need to be wrapped in classes for quantization on hpu
+class Matmul(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x, y):
+        return torch.matmul(x, y)
+
+
+def gaudi_deepseekv3_repeat_kv(
+    query_states: torch.Tensor,
+    key_states: torch.Tensor,
+    value_states: torch.Tensor,
+    attention_mask: torch.Tensor,
+    n_rep: int,
+):
+    """
+    Copied from repeat_kv: https://github.com/huggingface/transformers/blob/v4.37.0/src/transformers/models/mixtral/modeling_mixtral.py
+    The only differences are:
+    - Append num_key_value_heads == 1 check as kv states can be broadcasted during matmuls so need to expand and reshape them.
+    - Add new args query_states, key_states, value_states and attention_mask and update the logic for expansion.
+    The query states go from (batch, num_heads, seqlen, head_dim) to (batch, num_key_value_heads, n_rep, seqlen, head_dim)
+    The key/value states go from (batch, num_key_value_heads, seqlen, head_dim) to (batch, num_key_value_heads, 1, seqlen, head_dim)
+    """
+    batch, num_key_value_heads, kv_len, head_dim = key_states.shape
+    if n_rep == 1 or num_key_value_heads == 1:
+        return query_states, key_states, value_states, attention_mask
+
+    new_kv_shape = (batch, num_key_value_heads, 1, kv_len, head_dim)
+    key_states = key_states.reshape(new_kv_shape)
+    value_states = value_states.reshape(new_kv_shape)
+
+    batch, q_heads, q_len, head_dim = query_states.shape
+    new_q_shape = (batch, num_key_value_heads, n_rep, q_len, head_dim)
+    query_states = query_states.reshape(new_q_shape)
+
+    if attention_mask is not None:
+        # Add groups dim and set to 1
+        attention_mask = attention_mask.unsqueeze(1)
+
+    return query_states, key_states, value_states, attention_mask
+
+
+# hpu specific. kv cache handling. similar to optimum-habana deepseek_v2
+class KVCache(torch.nn.Module):
+    def __init__(self):
+        super(KVCache, self).__init__()
+        self.cache = None
+        self.inp_seq_len = -1
+
+    def allocate(self, inp_seq_len, dtype, device, shape):
+        if self.cache is None or self.cache.shape != shape:
+            self.inp_seq_len = inp_seq_len
+            self.cache = torch.zeros(shape, dtype=dtype, device=device)
+        else:
+            assert self.inp_seq_len == inp_seq_len, (
+                f"inp_seq_len must be the same. self.inp_seq_len:{self.inp_seq_len} inp_seq_len:{inp_seq_len}"
+            )
+            self.cache.fill_(0)
+
+    def update(self, prev, cur, dim, idx, inp_seq_len):
+        orig_cur = cur
+        if prev.shape == cur.shape:
+            prev.copy_(cur)
+            return orig_cur
+        if cur.shape[1] > 1 and cur.shape[1] <= prev.shape[1]:
+            # Initialize
+            prev[:, :inp_seq_len, :].copy_(cur)
+            return orig_cur
+        assert cur.shape[1] == 1, f"Cannot update kv-cache. Unsupported shapes. prev:{prev.shape} cur:{cur.shape}"
+
+        if idx is not None:
+            prev.index_copy_(dim, idx - 1, cur)
+            return prev
+        else:
+            return torch.cat((prev, cur), dim=dim)
+
+    def get_shape(self):
+        if self.cache is None:
+            return None
+        return self.cache.shape
+
+    def forward(self, cur, dim, idx):
+        return self.update(self.cache, cur, dim, idx, self.inp_seq_len)
+
+
+# hpu specific fused op. wrapped in a class as functional apis not supported for quantization
+class ModuleFusedSDPA(torch.nn.Module):
+    def __init__(self, fusedSDPA, scale, attention_dropout, enable_recompute, flash_attention_fp8):
+        super().__init__()
+        self._hpu_kernel_fsdpa = fusedSDPA
+        self.scale = scale
+        self.attention_dropout = attention_dropout
+        self.enable_recompute = enable_recompute
+        self.flash_attention_fp8 = flash_attention_fp8
+
+    def forward(
+        self,
+        query,
+        key,
+        value,
+        attn_mask,
+        dropout_p,
+        is_casual,
+        scale,
+        softmax_mode,
+        recompute_mode,
+        valid_sequence_lengths,
+        padding_side="left",
+    ):
+        return self._hpu_kernel_fsdpa.apply(
+            query,
+            key,
+            value,
+            attn_mask,
+            dropout_p,
+            is_casual,
+            scale,
+            softmax_mode,
+            recompute_mode,
+            valid_sequence_lengths,
+            padding_side,
+        )
+
+
+# Copied from transformers.models.llama.modeling_llama.LlamaAttention with Llama->DeepseekV3
+class DeepseekV3Attention(nn.Module):
+    """Multi-headed attention from 'Attention Is All You Need' paper"""
+
+    def __init__(self, config: DeepseekV3Config, layer_idx: Optional[int] = None):
+        super().__init__()
+        self.config = config
+        self.layer_idx = layer_idx
+        if layer_idx is None:
+            logger.warning_once(
+                f"Instantiating {self.__class__.__name__} without passing `layer_idx` is not recommended and will "
+                "to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` "
+                "when creating this class."
+            )
+
+        self.attention_dropout = config.attention_dropout
+        self.hidden_size = config.hidden_size
+        self.num_heads = config.num_attention_heads
+
+        self.max_position_embeddings = config.max_position_embeddings
+        self.rope_theta = config.rope_theta
+        self.q_lora_rank = config.q_lora_rank
+        self.qk_rope_head_dim = config.qk_rope_head_dim
+        self.kv_lora_rank = config.kv_lora_rank
+        self.v_head_dim = config.v_head_dim
+        self.qk_nope_head_dim = config.qk_nope_head_dim
+        self.q_head_dim = config.qk_nope_head_dim + config.qk_rope_head_dim
+
+        self.is_causal = True
+
+        if self.q_lora_rank is None:
+            self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.q_head_dim, bias=False)
+        else:
+            self.q_a_proj = nn.Linear(self.hidden_size, config.q_lora_rank, bias=config.attention_bias)
+            self.q_a_layernorm = DeepseekV3RMSNorm(config.q_lora_rank)
+            self.q_b_proj = nn.Linear(config.q_lora_rank, self.num_heads * self.q_head_dim, bias=False)
+
+        self.kv_a_proj_with_mqa = nn.Linear(
+            self.hidden_size,
+            config.kv_lora_rank + config.qk_rope_head_dim,
+            bias=config.attention_bias,
+        )
+        self.kv_a_layernorm = DeepseekV3RMSNorm(config.kv_lora_rank)
+        self.kv_b_proj = nn.Linear(
+            config.kv_lora_rank,
+            self.num_heads * (self.q_head_dim - self.qk_rope_head_dim + self.v_head_dim),
+            bias=False,
+        )
+
+        self.o_proj = nn.Linear(
+            self.num_heads * self.v_head_dim,
+            self.hidden_size,
+            bias=config.attention_bias,
+        )
+        self._init_rope()
+
+        self.num_key_value_groups = self.num_heads // config.num_key_value_heads
+        # hpu specific wrapping functional api into nn.module classes for quantization
+        self.matmul_qk = Matmul()
+        self.matmul_av = Matmul()
+        self.k_cache = KVCache()
+        self.v_cache = KVCache()
+        self.inp_seq_len = -1
+
+        self.softmax_scale = self.q_head_dim ** (-0.5)
+        if self.config.rope_scaling is not None:
+            mscale_all_dim = self.config.rope_scaling.get("mscale_all_dim", 0)
+            scaling_factor = self.config.rope_scaling["factor"]
+            if mscale_all_dim:
+                mscale = yarn_get_mscale(scaling_factor, mscale_all_dim)
+                self.softmax_scale = self.softmax_scale * mscale * mscale
+
+        self.norm_factor = self.softmax_scale
+        # hpu specific warpping functional api into nn.module classes for quantization
+        self.fused_scaled_dot_product_attention = (
+            ModuleFusedSDPA(
+                FusedSDPA,
+                scale=self.norm_factor,
+                attention_dropout=self.attention_dropout,
+                enable_recompute=False,
+                flash_attention_fp8=getattr(config, "flash_attention_fp8", False),
+            )
+            if FusedSDPA
+            else None
+        )
+
+    def _init_rope(self):
+        if self.config.rope_scaling is None:
+            self.rotary_emb = DeepseekV3RotaryEmbedding(
+                self.qk_rope_head_dim,
+                max_position_embeddings=self.max_position_embeddings,
+                base=self.rope_theta,
+            )
+        else:
+            scaling_type = self.config.rope_scaling["type"]
+            scaling_factor = self.config.rope_scaling["factor"]
+            if scaling_type == "linear":
+                self.rotary_emb = DeepseekV3LinearScalingRotaryEmbedding(
+                    self.qk_rope_head_dim,
+                    max_position_embeddings=self.max_position_embeddings,
+                    scaling_factor=scaling_factor,
+                    base=self.rope_theta,
+                )
+            elif scaling_type == "dynamic":
+                self.rotary_emb = DeepseekV3DynamicNTKScalingRotaryEmbedding(
+                    self.qk_rope_head_dim,
+                    max_position_embeddings=self.max_position_embeddings,
+                    scaling_factor=scaling_factor,
+                    base=self.rope_theta,
+                )
+            elif scaling_type == "yarn":
+                kwargs = {
+                    key: self.config.rope_scaling[key]
+                    for key in [
+                        "original_max_position_embeddings",
+                        "beta_fast",
+                        "beta_slow",
+                        "mscale",
+                        "mscale_all_dim",
+                    ]
+                    if key in self.config.rope_scaling
+                }
+                self.rotary_emb = DeepseekV3YarnRotaryEmbedding(
+                    self.qk_rope_head_dim,
+                    max_position_embeddings=self.max_position_embeddings,
+                    scaling_factor=scaling_factor,
+                    base=self.rope_theta,
+                    **kwargs,
+                )
+            else:
+                raise ValueError(f"Unknown RoPE scaling type {scaling_type}")
+
+    # hpu-specific, similar to other model files in OH
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        compressed_kv_cache_shape = (batch_size, max_seq_len, self.kv_lora_rank)
+        k_pe_cache_shape = (batch_size, max_seq_len, self.qk_rope_head_dim)
+        device = self.kv_a_proj_with_mqa.weight.device
+        dtype = self.config.torch_dtype
+
+        self.k_cache.allocate(inp_seq_len, dtype, device, compressed_kv_cache_shape)
+        self.v_cache.allocate(inp_seq_len, dtype, device, k_pe_cache_shape)
+
+    def update_sincos_cache(self, seq_len):
+        # Call rotary emb forward() to update cos/sin cache when infering more than self.max_position_embeddings
+        # This helps in avoiding creation of these caches during actual model forward pass and
+        # reduce memory consumption and improve performance.
+        if seq_len > self.max_position_embeddings:
+            self.max_position_embeddings = seq_len
+            _, _ = self.rotary_emb(self.k_b_proj.weight, seq_len=seq_len)
+
+    def reorder(self, tensor, beam_idx, dim_a, dim_b):
+        updated = tensor.index_select(0, beam_idx)
+        tensor.copy_(updated)
+
+    def reorder_kv_cache(self, beam_idx: torch.LongTensor):
+        if self.k_cache.cache is None:
+            return (None, None)
+
+        head_dim = self.k_cache.cache.size(-1)
+        seq_length = self.k_cache.cache.size(-2)
+        self.reorder(self.k_cache.cache, beam_idx, seq_length, head_dim)
+        self.reorder(self.v_cache.cache, beam_idx, seq_length, head_dim)
+        return (self.k_cache.cache.shape, self.v_cache.cache.shape)
+
+    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return tensor.view(bsz, seq_len, self.num_heads, self.v_head_dim).transpose(1, 2).contiguous()
+
+    def split_kv_b_proj(self):
+        kv_b_proj_weight = self.kv_b_proj.weight.view(self.num_heads, -1, self.kv_lora_rank)
+        self.q_absorb = kv_b_proj_weight[:, : self.qk_nope_head_dim, :].unsqueeze(0).transpose(0, 1)
+        self.out_absorb = kv_b_proj_weight[:, self.qk_nope_head_dim :, :].unsqueeze(0)
+
+    def compress_kv(
+        self,
+        hidden_states_kv: torch.Tensor,
+        kv_position_ids: torch.LongTensor,
+        past_key_value: Optional[Cache] = None,
+    ) -> torch.Tensor:
+        # return the RoPE'ed & compressed kv
+        bsz, kv_seq_len, _ = hidden_states_kv.size()
+        compressed_kv = self.kv_a_proj_with_mqa(hidden_states_kv)
+        compressed_kv, k_pe = torch.split(compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1)
+        compressed_kv = self.kv_a_layernorm(compressed_kv)
+        k_pe = k_pe.view(bsz, kv_seq_len, 1, self.qk_rope_head_dim).transpose(1, 2)
+        cos, sin = self.rotary_emb.cos_cached, self.rotary_emb.sin_cached
+        k_pe = apply_rotary_pos_emb(k_pe, cos, sin, kv_position_ids).view(bsz, kv_seq_len, self.qk_rope_head_dim)
+        return compressed_kv, k_pe
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: bool = False,
+        use_cache: bool = False,
+        token_idx: Optional[torch.Tensor] = None,
+        reuse_cache: Optional[bool] = False,
+        cache_idx: int = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        attn_softmax_bf16: Optional[bool] = False,
+        use_flash_attention: Optional[bool] = False,
+        flash_attention_recompute: Optional[bool] = False,
+        flash_attention_causal_mask: Optional[bool] = False,
+        flash_attention_fast_softmax: Optional[bool] = False,
+        valid_sequence_lengths: Optional[torch.Tensor] = None,
+        num_virtual_tokens: int = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        """
+        Attention masks and past cache are removed.
+        Input:
+        - hidden_states: [bsz, q_len, hidden_size]
+        - position_ids: [bsz, q_len]
+        """
+
+        if "padding_mask" in kwargs:
+            warnings.warn(
+                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+            )
+
+        if self.training:
+            if "padding_mask" in kwargs:
+                warnings.warn(
+                    "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+                )
+            bsz, q_len, _ = hidden_states.size()
+            if self.q_lora_rank is None:
+                q = self.q_proj(hidden_states)
+            else:
+                q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states)))
+            q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2)
+            q_nope, q_pe = torch.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
+
+            compressed_kv = self.kv_a_proj_with_mqa(hidden_states)
+            compressed_kv, k_pe = torch.split(compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1)
+            k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2)
+            kv = (
+                self.kv_b_proj(self.kv_a_layernorm(compressed_kv))
+                .view(bsz, q_len, self.num_heads, self.qk_nope_head_dim + self.v_head_dim)
+                .transpose(1, 2)
+            )
+
+            k_nope, value_states = torch.split(kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1)
+            kv_seq_len = value_states.shape[-2]
+            if past_key_value is not None:
+                if self.layer_idx is None:
+                    raise ValueError(
+                        f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} "
+                        "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class "
+                        "with a layer index."
+                    )
+
+                if token_idx is None:
+                    if hasattr(past_key_value, "get_usable_length"):
+                        kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+                    else:
+                        kv_seq_len += past_key_value[0].shape[-2]
+                else:
+                    if num_virtual_tokens is not None and num_virtual_tokens == past_key_value[0].shape[-2]:
+                        kv_seq_len = past_key_value[0].shape[-2] + kv_seq_len
+                    else:
+                        kv_seq_len = past_key_value[0].shape[-2]
+
+            cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len)
+            q_pe, k_pe = apply_customized_rope(q_pe, k_pe, cos, sin, position_ids, self.training)
+
+            query_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim)
+            query_states[:, :, :, : self.qk_nope_head_dim] = q_nope
+            query_states[:, :, :, self.qk_nope_head_dim :] = q_pe
+
+            key_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim)
+            key_states[:, :, :, : self.qk_nope_head_dim] = k_nope
+            key_states[:, :, :, self.qk_nope_head_dim :] = k_pe
+
+            if past_key_value is not None:
+                cache_kwargs = {"sin": sin, "cos": cos}  # Specific to RoPE models
+                key_states, value_states = past_key_value.update(
+                    key_states, value_states, self.layer_idx, cache_kwargs
+                )
+            # hpu specific optimization, similar to other modeling files in optimum-habana
+            if use_flash_attention and FusedSDPA is not None:
+                if q_len == 1:
+                    # next token
+                    attn_output = self.fused_scaled_dot_product_attention(
+                        query_states,
+                        key_states,
+                        value_states,
+                        attention_mask,
+                        0.0,
+                        False,
+                        None,
+                        "None",
+                        False,
+                        None,
+                        "None",
+                    )
+                else:
+                    # first token
+                    softmax_mode = "fast" if flash_attention_fast_softmax else "None"
+                    if flash_attention_causal_mask:
+                        attn_output = self.fused_scaled_dot_product_attention(
+                            query_states,
+                            key_states,
+                            value_states,
+                            None,
+                            0.0,
+                            True,
+                            None,
+                            softmax_mode,
+                            flash_attention_recompute,
+                            valid_sequence_lengths,
+                            "left",
+                        )
+                    else:
+                        attn_output = self.fused_scaled_dot_product_attention(
+                            query_states,
+                            key_states,
+                            value_states,
+                            attention_mask,
+                            0.0,
+                            False,
+                            None,
+                            softmax_mode,
+                            flash_attention_recompute,
+                            None,
+                            "None",
+                        )
+
+            else:
+                query_states, key_states, value_states, attention_mask = gaudi_deepseekv3_repeat_kv(
+                    query_states, key_states, value_states, attention_mask, self.num_key_value_groups
+                )
+
+                attn_weights = self.matmul_qk(query_states, key_states.transpose(-2, -1)) * self.softmax_scale
+                htcore.mark_step()
+
+                if attention_mask is not None:  # no matter the length, we just slice it
+                    causal_mask = attention_mask
+                    if cache_position is not None:
+                        causal_mask = attention_mask[:, :, cache_position, : key_states.shape[-2]]
+                    attn_weights = attn_weights + causal_mask.float()
+
+                if attn_softmax_bf16:
+                    attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1, dtype=query_states.dtype)
+                else:
+                    # upcast attention to fp32
+                    attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(
+                        query_states.dtype
+                    )
+                attn_weights = torch.nn.functional.dropout(
+                    attn_weights, p=self.attention_dropout, training=self.training
+                )
+                attn_output = self.matmul_av(attn_weights, value_states)
+        else:
+            # inference
+            hidden_states_q = hidden_states
+            hidden_states_kv = hidden_states
+            self.split_kv_b_proj()
+            q_position_ids = position_ids
+            kv_position_ids = position_ids
+            bsz, q_len, _ = hidden_states_q.size()
+
+            if self.q_lora_rank is None:
+                q = self.q_proj(hidden_states_q)
+            else:
+                q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states_q)))
+
+            q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2)
+
+            q_nope, q_pe = torch.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1)
+
+            kv_seq_len = q_pe.shape[-2]
+
+            if past_key_value is not None:
+                if self.layer_idx is None:
+                    raise ValueError(
+                        f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} "
+                        "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class "
+                        "with a layer index."
+                    )
+                if token_idx is None:
+                    if hasattr(past_key_value, "get_usable_length"):
+                        kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+                    else:
+                        kv_seq_len += past_key_value[0].shape[-2]
+                else:
+                    if reuse_cache:
+                        kv_seq_len = past_key_value[0][-2]
+                    else:
+                        kv_seq_len = past_key_value[0].shape[-2]
+
+            cos, sin = self.rotary_emb(q_pe, seq_len=kv_seq_len)
+            q_pe = apply_rotary_pos_emb(q_pe, cos, sin, q_position_ids)
+            q_nope = torch.matmul(q_nope.transpose(0, 1), self.q_absorb).transpose(0, 1)
+            compressed_kv, k_pe = self.compress_kv(hidden_states_kv, kv_position_ids)
+
+            # update & get all compressed_kv, k_pe
+            if use_cache:
+                if reuse_cache:
+                    if past_key_value is not None and isinstance(past_key_value[0], torch.Tensor):
+                        # prefix tuning case. attach past_key_value to generate first token.
+                        compressed_kv = torch.cat((past_key_value[0], compressed_kv), -2)
+                        k_pe = torch.cat((past_key_value[1], k_pe), -2)
+
+                    compressed_kv = self.k_cache(compressed_kv, 1, token_idx)
+
+                    k_pe = self.v_cache(k_pe, 1, token_idx)
+                    past_key_value = (self.k_cache.get_shape(), self.v_cache.get_shape())
+
+                else:
+                    if past_key_value is None:
+                        dtype_1 = hidden_states.dtype
+                        device_1 = hidden_states.device
+                        past_key = torch.zeros(compressed_kv.shape, dtype=dtype_1, device=device_1)
+                        past_value = torch.zeros(k_pe.shape, dtype=dtype_1, device=device_1)
+                        past_key_value = (past_key, past_value)
+                    compressed_kv = self.k_cache.update(
+                        past_key_value[0], compressed_kv, 1, token_idx, self.inp_seq_len
+                    )
+                    k_pe = self.v_cache.update(past_key_value[1], k_pe, 1, token_idx, self.inp_seq_len)
+
+                    if token_idx is None:
+                        past_key_value = (compressed_kv, k_pe)
+
+                if cache_idx is not None and q_len == 1:
+                    compressed_kv = compressed_kv[:, :cache_idx, :]
+
+                    k_pe = k_pe[:, :cache_idx, :]
+                    if attention_mask is not None:
+                        attention_mask = attention_mask[:, :, :, :cache_idx]
+
+                    kv_seq_len = compressed_kv.shape[-2]
+            else:
+                past_key_value = None
+
+            kv_seq_len = compressed_kv.size(1)
+
+            k_pe = k_pe.view(bsz, 1, kv_seq_len, self.qk_rope_head_dim)
+
+            attn_weights = (
+                torch.matmul(q_pe, k_pe.mT) + torch.matmul(q_nope, compressed_kv.unsqueeze(-3).mT)
+            ) * self.softmax_scale
+
+            if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
+                raise ValueError(
+                    f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
+                    f" {attn_weights.size()}"
+                )
+            # Commenting below line as MMLU tasks are failing with this assertion
+            # assert attention_mask is not None
+            if attention_mask is not None:
+                attn_weights = attn_weights + attention_mask
+
+            # upcast attention to fp32
+            attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(q_nope.dtype)
+
+            attn_weights = nn.functional.dropout(attn_weights, p=self.attention_dropout, training=self.training)
+            attn_output = torch.einsum("bhql,blc->bhqc", attn_weights, compressed_kv)
+
+            attn_output = torch.matmul(attn_output.permute(2, 1, 0, 3), self.out_absorb.mT).permute(2, 1, 0, 3)
+
+        if attn_output.size() != (bsz, self.num_heads, q_len, self.v_head_dim):
+            raise ValueError(
+                f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.v_head_dim)}, but is"
+                f" {attn_output.size()}"
+            )
+
+        attn_output = attn_output.transpose(1, 2).contiguous()
+
+        attn_output = attn_output.reshape(bsz, q_len, self.num_heads * self.v_head_dim)
+
+        attn_output = self.o_proj(attn_output)
+
+        if not output_attentions:
+            attn_weights = None
+
+        return attn_output, attn_weights, past_key_value
+
+
+class DeepseekV3DecoderLayer(nn.Module):
+    def __init__(self, config: DeepseekV3Config, layer_idx: int):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+
+        self.self_attn = DeepseekV3Attention(config=config, layer_idx=layer_idx)
+
+        self.mlp = (
+            DeepseekV3MoE(config)
+            if (
+                config.n_routed_experts is not None
+                and layer_idx >= config.first_k_dense_replace
+                and layer_idx % config.moe_layer_freq == 0
+            )
+            else DeepseekV3MLP(config)
+        )
+        self.input_layernorm = DeepseekV3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+        self.post_attention_layernorm = DeepseekV3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        self.self_attn.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+
+    def reorder_kv_cache(self, beam_idx: torch.LongTensor):
+        return self.self_attn.reorder_kv_cache(beam_idx)
+
+    def update_sincos_cache(self, seq_len):
+        self.self_attn.update_sincos_cache(seq_len)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Tuple[torch.Tensor]] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        token_idx: Optional[torch.Tensor] = None,
+        reuse_cache: Optional[bool] = False,
+        cache_idx: int = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        attn_softmax_bf16: Optional[bool] = False,
+        use_flash_attention: Optional[bool] = False,
+        flash_attention_recompute: Optional[bool] = False,
+        flash_attention_causal_mask: Optional[bool] = False,
+        flash_attention_fast_softmax: Optional[bool] = False,
+        valid_sequence_lengths: Optional[torch.Tensor] = None,
+        num_virtual_tokens: int = None,
+        **kwargs,
+    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
+        """
+        Args:
+            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+            attention_mask (`torch.FloatTensor`, *optional*):
+                attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1,
+                query_sequence_length, key_sequence_length)` if default attention is used.
+            output_attentions (`bool`, *optional*):
+                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+                returned tensors for more detail.
+            use_cache (`bool`, *optional*):
+                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+                (see `past_key_values`).
+            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
+        """
+        if "padding_mask" in kwargs:
+            warnings.warn(
+                "Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
+            )
+        residual = hidden_states
+
+        hidden_states = self.input_layernorm(hidden_states)
+
+        # Self Attention
+        hidden_states, self_attn_weights, present_key_value = self.self_attn(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_value=past_key_value,
+            output_attentions=output_attentions,
+            use_cache=use_cache,
+            token_idx=token_idx,
+            reuse_cache=reuse_cache,
+            cache_idx=cache_idx,
+            cache_position=cache_position,
+            attn_softmax_bf16=attn_softmax_bf16,
+            use_flash_attention=use_flash_attention,
+            flash_attention_recompute=flash_attention_recompute,
+            flash_attention_causal_mask=flash_attention_causal_mask,
+            flash_attention_fast_softmax=flash_attention_fast_softmax,
+            valid_sequence_lengths=valid_sequence_lengths,
+            num_virtual_tokens=num_virtual_tokens,
+            **kwargs,
+        )
+        hidden_states = residual + hidden_states
+
+        # Fully Connected
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+
+        outputs = (hidden_states,)
+
+        if output_attentions:
+            outputs += (self_attn_weights,)
+
+        if use_cache:
+            outputs += (present_key_value,)
+
+        return outputs
+
+
+DeepseekV3_START_DOCSTRING = r"""
+    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
+    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+    etc.)
+
+    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
+    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+    and behavior.
+
+    Parameters:
+        config ([`DeepseekV3Config`]):
+            Model configuration class with all the parameters of the model. Initializing with a config file does not
+            load the weights associated with the model, only the configuration. Check out the
+            [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+"""
+
+
+@add_start_docstrings(
+    "The bare DeepseekV3 Model outputting raw hidden-states without any specific head on top.",
+    DeepseekV3_START_DOCSTRING,
+)
+class DeepseekV3PreTrainedModel(PreTrainedModel):
+    config_class = DeepseekV3Config
+    base_model_prefix = "model"
+    supports_gradient_checkpointing = True
+    _no_split_modules = ["DeepseekV3DecoderLayer"]
+    _skip_keys_device_placement = "past_key_values"
+    _supports_flash_attn_2 = False
+    _supports_cache_class = True
+
+    def _init_weights(self, module):
+        std = self.config.initializer_range
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=std)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+
+
+DeepseekV3_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+            Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+            it.
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are input IDs?](../glossary#input-ids)
+        attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            If `past_key_values` is used, optionally only the last `input_ids` have to be input (see
+            `past_key_values`).
+
+            If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`]
+            and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more
+            information on the default strategy.
+
+            - 1 indicates the head is **not masked**,
+            - 0 indicates the head is **masked**.
+        position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
+            config.n_positions - 1]`.
+
+            [What are position IDs?](../glossary#position-ids)
+        past_key_values (`Cache` or `tuple(tuple(torch.FloatTensor))`, *optional*):
+            Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
+            blocks) that can be used to speed up sequential decoding. This typically consists in the `past_key_values`
+            returned by the model at a previous stage of decoding, when `use_cache=True` or `config.use_cache=True`.
+
+            Two formats are allowed:
+            - a [`~cache_utils.Cache`] instance;
+            - Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
+            shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`). This is also known as the legacy
+            cache format.
+
+            The model will output the same cache format that is fed as input. If no `past_key_values` are passed, the
+            legacy cache format will be returned.
+
+            If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that don't
+            have their past key value states given to this model) of shape `(batch_size, 1)` instead of all `input_ids`
+            of shape `(batch_size, sequence_length)`.
+        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
+            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
+            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
+            model's internal embedding lookup matrix.
+        use_cache (`bool`, *optional*):
+            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
+            `past_key_values`).
+        output_attentions (`bool`, *optional*):
+            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
+            tensors for more detail.
+        output_hidden_states (`bool`, *optional*):
+            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
+            more detail.
+        return_dict (`bool`, *optional*):
+            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
+"""
+
+
+@add_start_docstrings(
+    "The bare DeepseekV3 Model outputting raw hidden-states without any specific head on top.",
+    DeepseekV3_START_DOCSTRING,
+)
+class DeepseekV3Model(DeepseekV3PreTrainedModel):
+    """
+    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`DeepseekV3DecoderLayer`]
+
+    Args:
+        config: DeepseekV3Config
+    """
+
+    def __init__(self, config: DeepseekV3Config):
+        super().__init__(config)
+        self.padding_idx = config.pad_token_id
+        self.vocab_size = config.vocab_size
+
+        self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
+        self.layers = nn.ModuleList(
+            [DeepseekV3DecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
+        )
+        self._attn_implementation = "eager"
+        self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
+        self.norm = DeepseekV3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
+
+        self.gradient_checkpointing = False
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        for layer in self.layers:
+            layer.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+
+    def reorder_kv_cache(self, beam_idx: torch.LongTensor):
+        return tuple(layer.reorder_kv_cache(beam_idx) for layer in self.layers)
+
+    def update_sincos_cache(self, seq_len):
+        for layer in self.layers:
+            layer.update_sincos_cache(seq_len)
+
+    def get_input_embeddings(self):
+        return self.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.embed_tokens = value
+
+    @add_start_docstrings_to_model_forward(DeepseekV3_INPUTS_DOCSTRING)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        token_idx: Optional[torch.Tensor] = None,
+        attn_softmax_bf16: Optional[bool] = False,
+        reuse_cache: Optional[bool] = False,
+        use_flash_attention: Optional[bool] = False,
+        flash_attention_recompute: Optional[bool] = False,
+        flash_attention_causal_mask: Optional[bool] = False,
+        flash_attention_fast_softmax: Optional[bool] = False,
+        cache_idx: int = None,
+        lazy_mode: Optional[bool] = True,
+        valid_sequence_lengths: Optional[torch.Tensor] = None,
+        num_virtual_tokens: int = None,
+    ) -> Union[Tuple, BaseModelOutputWithPast]:
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # retrieve input_ids and inputs_embeds
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
+        elif input_ids is not None:
+            batch_size, seq_length = input_ids.shape[:2]
+        elif inputs_embeds is not None:
+            batch_size, seq_length = inputs_embeds.shape[:2]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+
+        past_key_values_length = 0
+        if past_key_values is not None:
+            past_key_values_length = past_key_values[0][0].shape[2]
+
+        if position_ids is None:
+            device = input_ids.device if input_ids is not None else inputs_embeds.device
+            position_ids = torch.arange(
+                past_key_values_length,
+                seq_length + past_key_values_length,
+                dtype=torch.long,
+                device=device,
+            )
+            position_ids = position_ids.unsqueeze(0)
+
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+
+        # 4d mask is passed through the layers
+        attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
+        if attention_mask is not None:
+            attention_mask = _gaudi_prepare_4d_causal_attention_mask(
+                attention_mask,
+                (batch_size, seq_length),
+                inputs_embeds,
+                past_key_values_length,
+            )
+
+        # embed positions
+        hidden_states = inputs_embeds
+
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        next_decoder_cache = () if use_cache else None
+
+        if lazy_mode:
+            htcore.mark_step()
+
+        for idx, decoder_layer in enumerate(self.layers):
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+
+            past_key_value = past_key_values[idx] if past_key_values is not None else None
+            if (
+                lazy_mode
+                and not self.training
+                and (torch.distributed.is_initialized() is False or torch.distributed.get_world_size() == 1)
+            ):
+                htcore.mark_step()
+            layer_outputs = decoder_layer(
+                hidden_states,
+                attention_mask=attention_mask,
+                position_ids=position_ids,
+                past_key_value=past_key_value,
+                output_attentions=output_attentions,
+                use_cache=use_cache,
+                token_idx=token_idx,
+            )
+            if (
+                lazy_mode
+                and not self.training
+                and (torch.distributed.is_initialized() is False or torch.distributed.get_world_size() == 1)
+            ):
+                htcore.mark_step()
+            hidden_states = layer_outputs[0]
+
+            if use_cache:
+                next_decoder_cache += (layer_outputs[2 if output_attentions else 1],)
+
+            if output_attentions:
+                all_self_attns += (layer_outputs[1],)
+
+        hidden_states = self.norm(hidden_states)
+
+        # add hidden states from the last decoder layer
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+
+        next_cache = next_decoder_cache if use_cache else None
+        if not return_dict:
+            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+        return BaseModelOutputWithPast(
+            last_hidden_state=hidden_states,
+            past_key_values=next_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+        )
+
+
+class DeepseekV3ForCausalLM(DeepseekV3PreTrainedModel, GenerationMixin):
+    _tied_weights_keys = ["lm_head.weight"]
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = DeepseekV3Model(config)
+        self.vocab_size = config.vocab_size
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    def get_output_embeddings(self):
+        return self.lm_head
+
+    def set_output_embeddings(self, new_embeddings):
+        self.lm_head = new_embeddings
+
+    def set_decoder(self, decoder):
+        self.model = decoder
+
+    def get_decoder(self):
+        return self.model
+
+    def allocate_kv_cache(self, batch_size, max_seq_len, inp_seq_len):
+        self.model.allocate_kv_cache(batch_size, max_seq_len, inp_seq_len)
+        self.kv_cache_len = max_seq_len
+
+    def reorder_kv_cache(self, beam_idx: torch.LongTensor):
+        return self.model.reorder_kv_cache(beam_idx)
+
+    def update_sincos_cache(self, seq_len):
+        self.model.update_sincos_cache(seq_len)
+
+    @add_start_docstrings_to_model_forward(DeepseekV3_INPUTS_DOCSTRING)
+    @replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        token_idx: Optional[torch.Tensor] = None,
+    ) -> Union[Tuple, CausalLMOutputWithPast]:
+        r"""
+        Args:
+            labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
+                config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+                (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.
+
+        Returns:
+
+        Example:
+
+        ```python
+        >>> from transformers import AutoTokenizer, DeepseekV3ForCausalLM
+
+        >>> model = DeepseekV3ForCausalLM.from_pretrained(PATH_TO_CONVERTED_WEIGHTS)
+        >>> tokenizer = AutoTokenizer.from_pretrained(PATH_TO_CONVERTED_TOKENIZER)
+
+        >>> prompt = "Hey, are you conscious? Can you talk to me?"
+        >>> inputs = tokenizer(prompt, return_tensors="pt")
+
+        >>> # Generate
+        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
+        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
+        ```"""
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
+        outputs = self.model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            token_idx=token_idx,
+        )
+
+        hidden_states = outputs[0]
+        logits = self.lm_head(hidden_states)
+        logits = logits.float()
+
+        loss = None
+        if labels is not None:
+            # Shift so that tokens < n predict n
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = CrossEntropyLoss()
+            shift_logits = shift_logits.view(-1, self.config.vocab_size)
+            shift_labels = shift_labels.view(-1)
+            # Enable model parallelism
+            shift_labels = shift_labels.to(shift_logits.device)
+            loss = loss_fct(shift_logits, shift_labels)
+
+        if not return_dict:
+            output = (logits,) + outputs[1:]
+            return (loss,) + output if loss is not None else output
+
+        return CausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+
+    def prepare_inputs_for_generation(
+        self,
+        input_ids,
+        past_key_values=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        **kwargs,
+    ):
+        token_idx = kwargs.get("token_idx")
+        past_length = 0
+        max_cache_length = None
+        if past_key_values is not None:
+            if token_idx is not None:
+                input_ids = torch.index_select(input_ids, 1, token_idx - 1)
+            else:
+                if isinstance(past_key_values, Cache):
+                    cache_length = past_key_values.get_seq_length()
+                    past_length = past_key_values.seen_tokens
+                    max_cache_length = past_key_values.get_max_length()
+                else:
+                    cache_length = past_length = past_key_values[0][0].shape[2]
+                    max_cache_length = None
+
+            # Keep only the unprocessed tokens:
+            # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
+            # some of the inputs are exclusivelly passed as part of the cache (e.g. when passing input_embeds as
+            # input)
+            if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]:
+                input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
+            # 2 - If the past_length is smaller than input_ids', then input_ids holds all input tokens. We can discard
+            # input_ids based on the past_length.
+            elif past_length < input_ids.shape[1]:
+                input_ids = input_ids[:, past_length:]
+            # 3 - Otherwise (past_length >= input_ids.shape[1]), let's assume input_ids only has unprocessed tokens.
+
+            # If we are about to go beyond the maximum cache length, we need to crop the input attention mask.
+            if (
+                max_cache_length is not None
+                and attention_mask is not None
+                and cache_length + input_ids.shape[1] > max_cache_length
+            ):
+                attention_mask = attention_mask[:, -max_cache_length:]
+
+        position_ids = kwargs.get("position_ids", None)
+        if attention_mask is not None and position_ids is None:
+            # create position_ids on the fly for batch generation
+            position_ids = attention_mask.long().cumsum(-1) - 1
+            position_ids.masked_fill_(attention_mask == 0, 1)
+            if past_key_values:
+                if token_idx is not None:
+                    position_ids = torch.index_select(position_ids, 1, token_idx - 1)
+                else:
+                    position_ids = position_ids[:, -input_ids.shape[1] :]
+
+        # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
+        if inputs_embeds is not None and past_key_values is None:
+            model_inputs = {"inputs_embeds": inputs_embeds}
+        else:
+            model_inputs = {"input_ids": input_ids.contiguous()}
+
+        model_inputs.update(
+            {
+                "position_ids": position_ids,
+                "past_key_values": past_key_values,
+                "use_cache": kwargs.get("use_cache"),
+                "attention_mask": attention_mask,
+                "token_idx": token_idx,
+            }
+        )
+        return model_inputs
+
+
+@add_start_docstrings(
+    """
+    The DeepseekV3 Model transformer with a sequence classification head on top (linear layer).
+
+    [`DeepseekV3ForSequenceClassification`] uses the last token in order to do the classification, as other causal models
+    (e.g. GPT-2) do.
+
+    Since it does classification on the last token, it requires to know the position of the last token. If a
+    `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row. If
+    no `pad_token_id` is defined, it simply takes the last value in each row of the batch. Since it cannot guess the
+    padding tokens when `inputs_embeds` are passed instead of `input_ids`, it does the same (take the last value in
+    each row of the batch).
+    """,
+    DeepseekV3_START_DOCSTRING,
+)
+class DeepseekV3ForSequenceClassification(DeepseekV3PreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.model = DeepseekV3Model(config)
+        self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
+
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.model.embed_tokens
+
+    def set_input_embeddings(self, value):
+        self.model.embed_tokens = value
+
+    @add_start_docstrings_to_model_forward(DeepseekV3_INPUTS_DOCSTRING)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
+        r"""
+        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
+            Labels for computing the sequence classification/regression loss. Indices should be in `[0, transformers.,
+            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
+            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        transformer_outputs = self.model(
+            input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        hidden_states = transformer_outputs[0]
+        logits = self.score(hidden_states)
+
+        if input_ids is not None:
+            batch_size = input_ids.shape[0]
+        else:
+            batch_size = inputs_embeds.shape[0]
+
+        if self.config.pad_token_id is None and batch_size != 1:
+            raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
+        if self.config.pad_token_id is None:
+            sequence_lengths = -1
+        else:
+            if input_ids is not None:
+                sequence_lengths = (torch.eq(input_ids, self.config.pad_token_id).int().argmax(-1) - 1).to(
+                    logits.device
+                )
+            else:
+                sequence_lengths = -1
+
+        pooled_logits = logits[torch.arange(batch_size, device=logits.device), sequence_lengths]
+
+        loss = None
+        if labels is not None:
+            labels = labels.to(logits.device)
+            if self.config.problem_type is None:
+                if self.num_labels == 1:
+                    self.config.problem_type = "regression"
+                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+                    self.config.problem_type = "single_label_classification"
+                else:
+                    self.config.problem_type = "multi_label_classification"
+
+            if self.config.problem_type == "regression":
+                loss_fct = MSELoss()
+                if self.num_labels == 1:
+                    loss = loss_fct(pooled_logits.squeeze(), labels.squeeze())
+                else:
+                    loss = loss_fct(pooled_logits, labels)
+            elif self.config.problem_type == "single_label_classification":
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(pooled_logits.view(-1, self.num_labels), labels.view(-1))
+            elif self.config.problem_type == "multi_label_classification":
+                loss_fct = BCEWithLogitsLoss()
+                loss = loss_fct(pooled_logits, labels)
+        if not return_dict:
+            output = (pooled_logits,) + transformer_outputs[1:]
+            return ((loss,) + output) if loss is not None else output
+
+        return SequenceClassifierOutputWithPast(
+            loss=loss,
+            logits=pooled_logits,
+            past_key_values=transformer_outputs.past_key_values,
+            hidden_states=transformer_outputs.hidden_states,
+            attentions=transformer_outputs.attentions,
+        )
diff --git a/optimum/habana/transformers/models/qwen2_vl/__init__.py b/optimum/habana/transformers/models/qwen2_vl/__init__.py
new file mode 100644
index 0000000000..72a587c799
--- /dev/null
+++ b/optimum/habana/transformers/models/qwen2_vl/__init__.py
@@ -0,0 +1,9 @@
+from .modeling_qwen2_vl import (
+    GaudiQwen2VisionSdpaAttention,
+    GaudiQwen2VisionTransformerPretrainedModel,
+    GaudiQwen2VLDecoderLayer,
+    GaudiQwen2VLForConditionalGeneration,
+    GaudiQwen2VLModel,
+    GaudiQwen2VLSdpaAttention,
+    GaudiQwen2VLVisionBlock,
+)
diff --git a/optimum/habana/transformers/models/qwen2_vl/modeling_qwen2_vl.py b/optimum/habana/transformers/models/qwen2_vl/modeling_qwen2_vl.py
new file mode 100644
index 0000000000..d2f0706dd6
--- /dev/null
+++ b/optimum/habana/transformers/models/qwen2_vl/modeling_qwen2_vl.py
@@ -0,0 +1,755 @@
+# coding=utf-8
+# Copyright 2024 the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch Gaudi Qwen2-VL model."""
+
+from typing import List, Optional, Tuple, Union
+
+import torch
+import torch.nn.functional as F
+from torch.nn import CrossEntropyLoss
+from transformers.cache_utils import Cache, StaticCache
+from transformers.modeling_outputs import (
+    BaseModelOutputWithPast,
+)
+from transformers.models.qwen2_vl.modeling_qwen2_vl import (
+    Qwen2VisionTransformerPretrainedModel,
+    Qwen2VLCausalLMOutputWithPast,
+    Qwen2VLConfig,
+    Qwen2VLDecoderLayer,
+    Qwen2VLForConditionalGeneration,
+    Qwen2VLModel,
+    Qwen2VLSdpaAttention,
+    Qwen2VLVisionBlock,
+    VisionSdpaAttention,
+    _prepare_4d_causal_attention_mask_with_cache_position,
+    apply_multimodal_rotary_pos_emb,
+    apply_rotary_pos_emb_vision,
+    repeat_kv,
+)
+from transformers.utils import logging
+
+
+try:
+    from habana_frameworks.torch.hpex.kernels import FusedSDPA
+except ImportError:
+    print("Not using HPU fused scaled dot-product attention kernel.")
+    FusedSDPA = None
+
+logger = logging.get_logger(__name__)
+
+
+class ModuleFusedSDPA(torch.nn.Module):
+    def __init__(self, fusedSDPA):
+        super().__init__()
+        self._hpu_kernel_fsdpa = fusedSDPA
+
+    def forward(self, query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode):
+        return self._hpu_kernel_fsdpa.apply(query, key, value, attn_mask, dropout_p, is_casual, scale, softmax_mode)
+
+
+# from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L383
+class GaudiQwen2VisionSdpaAttention(VisionSdpaAttention):
+    def __init__(self, dim: int, num_heads: int = 16) -> None:
+        super().__init__(dim, num_heads)
+        self.fused_scaled_dot_product_attention = ModuleFusedSDPA(FusedSDPA) if FusedSDPA else None
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        cu_seqlens: torch.Tensor,
+        rotary_pos_emb: torch.Tensor = None,
+        use_flash_attention: Optional[bool] = False,
+    ) -> torch.Tensor:
+        """
+        Copied from https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L390
+        The only differences are:
+        - add new args use_flash_attention
+        - add FusedSDPA
+        """
+        seq_length = hidden_states.shape[0]
+        q, k, v = self.qkv(hidden_states).reshape(seq_length, 3, self.num_heads, -1).permute(1, 0, 2, 3).unbind(0)
+        q = apply_rotary_pos_emb_vision(q.unsqueeze(0), rotary_pos_emb).squeeze(0)
+        k = apply_rotary_pos_emb_vision(k.unsqueeze(0), rotary_pos_emb).squeeze(0)
+
+        attention_mask = torch.zeros([1, seq_length, seq_length], device=q.device, dtype=torch.bool)
+        for i in range(1, len(cu_seqlens)):
+            attention_mask[:, cu_seqlens[i - 1] : cu_seqlens[i], cu_seqlens[i - 1] : cu_seqlens[i]] = True
+        q = q.transpose(0, 1)
+        k = k.transpose(0, 1)
+        v = v.transpose(0, 1)
+
+        if FusedSDPA is not None and use_flash_attention:
+            attn_output = self.fused_scaled_dot_product_attention(q, k, v, attention_mask, 0.0, False, None, "None")
+        else:
+            attn_output = F.scaled_dot_product_attention(q, k, v, attention_mask, dropout_p=0.0)
+
+        attn_output = attn_output.transpose(0, 1)
+        attn_output = attn_output.reshape(seq_length, -1)
+        attn_output = self.proj(attn_output)
+        del attention_mask
+        return attn_output
+
+
+# from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L418
+class GaudiQwen2VLVisionBlock(Qwen2VLVisionBlock):
+    def __init__(self, config, attn_implementation: str = "sdpa") -> None:
+        super().__init__(config, attn_implementation)
+
+        self.attn = GaudiQwen2VisionSdpaAttention(config.embed_dim, num_heads=config.num_heads)
+
+    def forward(
+        self,
+        hidden_states,
+        cu_seqlens,
+        rotary_pos_emb,
+        use_flash_attention: Optional[bool] = False,
+    ) -> torch.Tensor:
+        """
+        Copied from https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L430
+        The only differences are:
+        - add new args use_flash_attention
+        """
+        hidden_states = hidden_states + self.attn(
+            self.norm1(hidden_states),
+            cu_seqlens=cu_seqlens,
+            rotary_pos_emb=rotary_pos_emb,
+            use_flash_attention=use_flash_attention,
+        )
+        hidden_states = hidden_states + self.mlp(self.norm2(hidden_states))
+        return hidden_states
+
+
+# from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1058
+class GaudiQwen2VisionTransformerPretrainedModel(Qwen2VisionTransformerPretrainedModel):
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        grid_thw: torch.Tensor,
+        use_flash_attention: Optional[bool] = False,
+    ) -> torch.Tensor:
+        """
+        Copied from https://github.com/huggingface/transformers/blob/53fad641cfdb5105e2470bcf3ef17ea8e25cc300/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1118
+        The only differences are:
+        - add new args use_flash_attention
+        """
+        hidden_states = self.patch_embed(hidden_states)
+        rotary_pos_emb = self.rot_pos_emb(grid_thw)
+
+        cu_seqlens = torch.repeat_interleave(grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]).cumsum(
+            dim=0, dtype=torch.int32
+        )
+        cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0)
+
+        for blk in self.blocks:
+            hidden_states = blk(
+                hidden_states,
+                cu_seqlens=cu_seqlens,
+                rotary_pos_emb=rotary_pos_emb,
+                use_flash_attention=use_flash_attention,
+            )
+
+        return self.merger(hidden_states)
+
+
+# from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L821
+class GaudiQwen2VLSdpaAttention(Qwen2VLSdpaAttention):
+    """
+    Qwen2 attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from
+    `Qwen2Attention` as the weights of the module stays untouched. The only changes are on the forward pass to adapt to
+    SDPA API.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self.fused_scaled_dot_product_attention = ModuleFusedSDPA(FusedSDPA) if FusedSDPA else None
+
+    # Adapted from Qwen2Attention.forward
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Cache] = None,
+        output_attentions: bool = False,
+        use_cache: bool = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,  # will become mandatory in v4.46
+        use_flash_attention: Optional[bool] = False,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        """
+        Copied from https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L829
+        The only differences are:
+        - add new args use_flash_attention
+        - add FusedSDPA
+        """
+        if output_attentions:
+            # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented.
+            logger.warning_once(
+                "Qwen2VLModel is using Qwen2VLSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. Falling back to the manual attention implementation, "
+                'but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.'
+            )
+            return super().forward(
+                hidden_states=hidden_states,
+                attention_mask=attention_mask,
+                position_ids=position_ids,
+                past_key_value=past_key_value,
+                output_attentions=output_attentions,
+                use_cache=use_cache,
+                cache_position=cache_position,
+            )
+
+        bsz, q_len, _ = hidden_states.size()
+        query_states = self.q_proj(hidden_states)
+        key_states = self.k_proj(hidden_states)
+        value_states = self.v_proj(hidden_states)
+
+        query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
+        key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+
+        kv_seq_len = key_states.shape[-2]
+        if past_key_value is not None:
+            kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx)
+        if position_embeddings is None:
+            logger.warning_once(
+                "The attention layers in this model are transitioning from computing the RoPE embeddings internally "
+                "through `position_ids` (2D tensor with the indexes of the tokens), to using externally computed "
+                "`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.46 `position_ids` will be "
+                "removed and `position_embeddings` will be mandatory."
+            )
+            cos, sin = self.rotary_emb(value_states, position_ids)
+        else:
+            cos, sin = position_embeddings
+        query_states, key_states = apply_multimodal_rotary_pos_emb(
+            query_states, key_states, cos, sin, self.rope_scaling["mrope_section"]
+        )
+
+        if past_key_value is not None:
+            cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}  # Specific to RoPE models
+            key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
+
+        key_states = repeat_kv(key_states, self.num_key_value_groups)
+        value_states = repeat_kv(value_states, self.num_key_value_groups)
+
+        causal_mask = attention_mask
+        if attention_mask is not None:  # no matter the length, we just slice it
+            causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
+
+        # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask,
+        # Reference: https://github.com/pytorch/pytorch/issues/112577.
+        if query_states.device.type == "cuda" and attention_mask is not None:
+            query_states = query_states.contiguous()
+            key_states = key_states.contiguous()
+            value_states = value_states.contiguous()
+
+        # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment
+        # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling.
+        # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1.
+        is_causal = True if causal_mask is None and q_len > 1 else False
+
+        if FusedSDPA is not None and use_flash_attention:
+            attn_output = self.fused_scaled_dot_product_attention(
+                query_states,
+                key_states,
+                value_states,
+                causal_mask,
+                self.attention_dropout if self.training else 0.0,
+                is_causal,
+                None,  # scale
+                "None",  #'fast'
+            )
+        else:
+            attn_output = torch.nn.functional.scaled_dot_product_attention(
+                query_states,
+                key_states,
+                value_states,
+                attn_mask=causal_mask,
+                dropout_p=self.attention_dropout if self.training else 0.0,
+                is_causal=is_causal,
+            )
+
+        attn_output = attn_output.transpose(1, 2).contiguous()
+        attn_output = attn_output.view(bsz, q_len, self.hidden_size)
+
+        attn_output = self.o_proj(attn_output)
+
+        return attn_output, None, past_key_value
+
+
+# from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L930
+class GaudiQwen2VLDecoderLayer(Qwen2VLDecoderLayer):
+    def __init__(self, config: Qwen2VLConfig, layer_idx: int):
+        super().__init__(config, layer_idx)
+        self.self_attn = GaudiQwen2VLSdpaAttention(config, layer_idx)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_value: Optional[Tuple[torch.Tensor]] = None,
+        output_attentions: Optional[bool] = False,
+        use_cache: Optional[bool] = False,
+        cache_position: Optional[torch.LongTensor] = None,
+        position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,  # will become mandatory in v4.46
+        **kwargs,
+    ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
+        """
+        Copied from https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L946
+        The only differences are:
+        - add new kwargs use_flash_attention
+        """
+        """
+        Args:
+            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+            attention_mask (`torch.FloatTensor`, *optional*): attention mask of size
+                `(batch, sequence_length)` where padding elements are indicated by 0.
+            output_attentions (`bool`, *optional*):
+                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
+                returned tensors for more detail.
+            use_cache (`bool`, *optional*):
+                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
+                (see `past_key_values`).
+            past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states
+            cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*):
+                Indices depicting the position of the input sequence tokens in the sequence.
+            position_embeddings (`Tuple[torch.FloatTensor, torch.FloatTensor]`, *optional*):
+                Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`,
+                with `head_dim` being the embedding dimension of each attention head.
+            kwargs (`dict`, *optional*):
+                Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code
+                into the model
+        """
+        use_flash_attention = kwargs.get("use_flash_attention", None)
+
+        residual = hidden_states
+
+        hidden_states = self.input_layernorm(hidden_states)
+
+        # Self Attention
+        hidden_states, self_attn_weights, present_key_value = self.self_attn(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_value=past_key_value,
+            output_attentions=output_attentions,
+            use_cache=use_cache,
+            cache_position=cache_position,
+            position_embeddings=position_embeddings,
+            use_flash_attention=use_flash_attention,
+        )
+        hidden_states = residual + hidden_states
+
+        # Fully Connected
+        residual = hidden_states
+        hidden_states = self.post_attention_layernorm(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+
+        outputs = (hidden_states,)
+
+        if output_attentions:
+            outputs += (self_attn_weights,)
+
+        if use_cache:
+            outputs += (present_key_value,)
+
+        return outputs
+
+
+# from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1137
+class GaudiQwen2VLModel(Qwen2VLModel):
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        use_flash_attention: Optional[bool] = False,
+    ) -> Union[Tuple, BaseModelOutputWithPast]:
+        """
+        Copied from Qwen2VLModel https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1161
+        The only differences are:
+        - add new arg use_flash_attention
+        - fixes graph recompilation due to torch.arange
+        """
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        if (input_ids is None) ^ (inputs_embeds is not None):
+            raise ValueError(
+                "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
+            )
+
+        if self.gradient_checkpointing and self.training:
+            if use_cache:
+                logger.warning_once(
+                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                )
+                use_cache = False
+
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+
+        if cache_position is None:
+            past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
+            # causes graph recompilations
+            # cache_position = torch.arange(
+            #    past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
+            # )
+            cache_position = torch.arange(0, inputs_embeds.shape[1], device=inputs_embeds.device) + past_seen_tokens
+
+        # the hard coded `3` is for temporal, height and width.
+        if position_ids is None:
+            position_ids = cache_position.view(1, 1, -1).expand(3, inputs_embeds.shape[0], -1)
+        elif position_ids.dim() == 2:
+            position_ids = position_ids[None, ...].expand(3, position_ids.shape[0], -1)
+        causal_mask = self._update_causal_mask(
+            attention_mask, inputs_embeds, cache_position, past_key_values, output_attentions
+        )
+        hidden_states = inputs_embeds
+        # create position embeddings to be shared across the decoder layers
+        position_embeddings = self.rotary_emb(hidden_states, position_ids)
+
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        next_decoder_cache = None
+
+        for decoder_layer in self.layers:
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+
+            if self.gradient_checkpointing and self.training:
+                layer_outputs = self._gradient_checkpointing_func(
+                    decoder_layer.__call__,
+                    hidden_states,
+                    causal_mask,
+                    position_ids,
+                    past_key_values,
+                    output_attentions,
+                    use_cache,
+                    cache_position,
+                    position_embeddings,
+                )
+            else:
+                layer_outputs = decoder_layer(
+                    hidden_states,
+                    attention_mask=causal_mask,
+                    position_ids=position_ids,
+                    past_key_value=past_key_values,
+                    output_attentions=output_attentions,
+                    use_cache=use_cache,
+                    cache_position=cache_position,
+                    position_embeddings=position_embeddings,
+                    use_flash_attention=use_flash_attention,
+                )
+
+            hidden_states = layer_outputs[0]
+
+            if use_cache:
+                next_decoder_cache = layer_outputs[2 if output_attentions else 1]
+
+            if output_attentions:
+                all_self_attns += (layer_outputs[1],)
+
+        hidden_states = self.norm(hidden_states)
+
+        # add hidden states from the last decoder layer
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+
+        next_cache = next_decoder_cache if use_cache else None
+
+        if not return_dict:
+            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+        return BaseModelOutputWithPast(
+            last_hidden_state=hidden_states,
+            past_key_values=next_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+        )
+
+
+# from: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1420
+class GaudiQwen2VLForConditionalGeneration(Qwen2VLForConditionalGeneration):
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        pixel_values: Optional[torch.Tensor] = None,
+        pixel_values_videos: Optional[torch.FloatTensor] = None,
+        image_grid_thw: Optional[torch.LongTensor] = None,
+        video_grid_thw: Optional[torch.LongTensor] = None,
+        rope_deltas: Optional[torch.LongTensor] = None,
+        token_idx: Optional[torch.Tensor] = None,
+        use_flash_attention: Optional[bool] = False,
+    ) -> Union[Tuple, Qwen2VLCausalLMOutputWithPast]:
+        """
+        Copied from Qwen2VLForConditionalGeneration https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1623
+        The only differences are:
+        - add new arg token_idx
+        - add new arg use_flash_attention
+        - add Gaudi Example
+        """
+        r"""
+        Args:
+            labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
+                config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
+                (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
+
+        Returns:
+
+        Example:
+
+        ```python
+        >>> from PIL import Image
+        >>> import requests
+        >>> from transformers import AutoProcessor
+        >>> from optimum.habana.transformers.models import GaudiQwen2VLForConditionalGeneration
+        >>> from optimum.habana.transformers.modeling_utils import adapt_transformers_to_gaudi
+        >>> from habana_frameworks.torch.hpu import wrap_in_hpu_graph
+        >>> adapt_transformers_to_gaudi()
+        >>> model = GaudiQwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
+        >>> model = model.to("hpu")
+        >>> wrap_in_hpu_graph(model)
+        >>> processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
+
+        >>> messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image"},
+                    {"type": "text", "text": "What is shown in this image?"},
+                ],
+            },
+        ]
+        >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+        >>> image = Image.open(requests.get(url, stream=True).raw)
+
+        >>> text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        >>> inputs = processor(text=[text], images=[image], return_tensors="pt")
+        >>> inputs = inputs.to("hpu")
+        >>> generate_kwargs = {
+                "lazy_mode": True,
+                "hpu_graphs": True,
+                "static_shapes": True,
+                "use_cache": True,
+                "cache_implementation": "static",
+                "use_flash_attention": True
+            }
+        >>> # Generate
+        >>> generate_ids = model.generate(**inputs, max_new_tokens=30, **generate_kwargs)
+        >>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
+        "The image shows a street scene in what appears to be a Chinatown area. The focal point is a red stop sign on the left side of the..."
+        ```"""
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        if inputs_embeds is None:
+            inputs_embeds = self.model.embed_tokens(input_ids)
+            if pixel_values is not None:
+                pixel_values = pixel_values.type(self.visual.get_dtype())
+                image_embeds = self.visual(
+                    pixel_values, grid_thw=image_grid_thw, use_flash_attention=use_flash_attention
+                )
+                image_mask = (input_ids == self.config.image_token_id).unsqueeze(-1).expand_as(inputs_embeds)
+                image_embeds = image_embeds.to(inputs_embeds.device, inputs_embeds.dtype)
+                inputs_embeds = inputs_embeds.masked_scatter(image_mask, image_embeds)
+
+            if pixel_values_videos is not None:
+                pixel_values_videos = pixel_values_videos.type(self.visual.get_dtype())
+                video_embeds = self.visual(pixel_values_videos, grid_thw=video_grid_thw)
+                video_mask = (input_ids == self.config.video_token_id).unsqueeze(-1).expand_as(inputs_embeds)
+                video_embeds = video_embeds.to(inputs_embeds.device, inputs_embeds.dtype)
+                inputs_embeds = inputs_embeds.masked_scatter(video_mask, video_embeds)
+
+            if attention_mask is not None:
+                attention_mask = attention_mask.to(inputs_embeds.device)
+
+        outputs = self.model(
+            input_ids=None,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            use_flash_attention=use_flash_attention,
+        )
+
+        hidden_states = outputs[0]
+        logits = self.lm_head(hidden_states)
+        logits = logits.float()
+
+        loss = None
+        if labels is not None:
+            # Shift so that tokens < n predict n
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = CrossEntropyLoss()
+            shift_logits = shift_logits.view(-1, self.config.vocab_size)
+            shift_labels = shift_labels.view(-1)
+            # Enable model parallelism
+            shift_labels = shift_labels.to(shift_logits.device)
+            loss = loss_fct(shift_logits, shift_labels)
+
+        if not return_dict:
+            output = (logits,) + outputs[1:]
+            return (loss,) + output if loss is not None else output
+
+        return Qwen2VLCausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+            rope_deltas=rope_deltas,
+        )
+
+    def prepare_inputs_for_generation(
+        self,
+        input_ids,
+        past_key_values=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        cache_position=None,
+        position_ids=None,
+        use_cache=True,
+        pixel_values=None,
+        pixel_values_videos=None,
+        image_grid_thw=None,
+        video_grid_thw=None,
+        **kwargs,
+    ):
+        """
+        Copied from https://github.com/huggingface/transformers/blob/53fad641cfdb5105e2470bcf3ef17ea8e25cc300/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1748
+        The only differences are:
+        - handle new args token_idx
+        - handle new args use_flash_attention
+        """
+        token_idx = kwargs.get("token_idx", None)
+        use_flash_attention = kwargs.get("use_flash_attention", False)
+        if token_idx is not None:
+            if isinstance(past_key_values, StaticCache):
+                if cache_position.shape[0] > 1:
+                    input_ids = input_ids[:, :token_idx]
+                    attention_mask = attention_mask[:, :token_idx]
+                    cache_position = cache_position[:token_idx]
+                else:
+                    # over-write with token idx
+                    cache_position[0] = token_idx - 1
+
+        # If we have cache: let's slice `input_ids` through `cache_position`, to keep only the unprocessed tokens
+        # Exception 1: when passing input_embeds, input_ids may be missing entries
+        # Exception 2: some generation methods do special slicing of input_ids, so we don't need to do it here
+        if past_key_values is not None:
+            if inputs_embeds is not None:  # Exception 1
+                input_ids = input_ids[:, -cache_position.shape[0] :]
+            elif input_ids.shape[1] != cache_position.shape[0]:  # Default case (the "else", a no op, is Exception 2)
+                input_ids = input_ids[:, cache_position]
+
+        rope_deltas = kwargs.get("rope_deltas", None)
+        if attention_mask is not None and position_ids is None:
+            if cache_position is None or (cache_position is not None and cache_position[0] == 0):
+                position_ids, rope_deltas = self.get_rope_index(
+                    input_ids, image_grid_thw, video_grid_thw, attention_mask
+                )
+            else:
+                batch_size, seq_length = input_ids.shape
+                delta = (
+                    cache_position[0] + rope_deltas if cache_position is not None and rope_deltas is not None else 0
+                )
+                position_ids = torch.arange(seq_length, device=input_ids.device)
+                position_ids = position_ids.view(1, -1).expand(batch_size, -1)
+                position_ids = position_ids.add(delta)
+                position_ids = position_ids.unsqueeze(0).expand(3, -1, -1)
+
+        if cache_position[0] != 0:
+            pixel_values = None
+            pixel_values_videos = None
+
+        # if `inputs_embeds` are passed, we only want to use them in the 1st generation step
+        if inputs_embeds is not None and cache_position[0] == 0:
+            model_inputs = {"inputs_embeds": inputs_embeds, "input_ids": None}
+        else:
+            model_inputs = {"input_ids": input_ids, "inputs_embeds": None}
+
+        if isinstance(past_key_values, StaticCache) and attention_mask.ndim == 2:
+            if model_inputs["inputs_embeds"] is not None:
+                batch_size, sequence_length, _ = inputs_embeds.shape
+                device = inputs_embeds.device
+            else:
+                batch_size, sequence_length = input_ids.shape
+                device = input_ids.device
+
+            dtype = self.lm_head.weight.dtype
+            min_dtype = torch.finfo(dtype).min
+
+            attention_mask = _prepare_4d_causal_attention_mask_with_cache_position(
+                attention_mask,
+                sequence_length=sequence_length,
+                target_length=past_key_values.get_max_length(),
+                dtype=dtype,
+                device=device,
+                min_dtype=min_dtype,
+                cache_position=cache_position,
+                batch_size=batch_size,
+            )
+
+        model_inputs.update(
+            {
+                "position_ids": position_ids,
+                "past_key_values": past_key_values,
+                "use_cache": use_cache,
+                "attention_mask": attention_mask,
+                "pixel_values": pixel_values,
+                "pixel_values_videos": pixel_values_videos,
+                "image_grid_thw": image_grid_thw,
+                "video_grid_thw": video_grid_thw,
+                "rope_deltas": rope_deltas,
+                "token_idx": token_idx,
+                "use_flash_attention": use_flash_attention,
+            }
+        )
+
+        return model_inputs
diff --git a/optimum/habana/transformers/models/video_llava/__init__.py b/optimum/habana/transformers/models/video_llava/__init__.py
new file mode 100644
index 0000000000..57831502dc
--- /dev/null
+++ b/optimum/habana/transformers/models/video_llava/__init__.py
@@ -0,0 +1 @@
+from .modeling_video_llava import GaudiVideoLlavaForConditionalGeneration
diff --git a/optimum/habana/transformers/models/video_llava/modeling_video_llava.py b/optimum/habana/transformers/models/video_llava/modeling_video_llava.py
new file mode 100644
index 0000000000..2ba890c8d5
--- /dev/null
+++ b/optimum/habana/transformers/models/video_llava/modeling_video_llava.py
@@ -0,0 +1,411 @@
+# coding=utf-8
+# Copyright 2024 the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch VideoLlava model."""
+
+from typing import List, Optional, Tuple, Union
+
+import torch
+from torch import nn
+from transformers.models.video_llava.modeling_video_llava import (
+    VideoLlavaCausalLMOutputWithPast,
+    VideoLlavaConfig,
+    VideoLlavaForConditionalGeneration,
+)
+from transformers.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+
+class GaudiVideoLlavaForConditionalGeneration(VideoLlavaForConditionalGeneration):
+    def __init__(self, config: VideoLlavaConfig):
+        super().__init__(config)
+        self.feature_offset = 0
+
+    def _merge_input_ids_with_visual_features(
+        self, visual_features, inputs_embeds, input_ids, attention_mask, labels, token_idx, num_frames=1
+    ):
+        r"""
+        Copied from VideoLlavaForConditionalGeneration._merge_input_ids_with_visual_features: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/video_llava/modeling_video_llava.py
+        The only differences are:
+        - add new args token_idx
+        - add self.feature_offset param
+        """
+        num_images, num_image_patches, embed_dim = visual_features.shape
+        batch_size, sequence_length = input_ids.shape
+        last_token_idx = token_idx + self.feature_offset
+        left_padding = not torch.sum(input_ids[:, last_token_idx - 1] == torch.tensor(self.pad_token_id))
+        special_vision_token = self.config.video_token_index if num_frames > 1 else self.config.image_token_index
+
+        # 1. Create a mask to know where special image tokens are
+        special_image_token_mask = input_ids == special_vision_token
+        num_special_image_tokens = torch.sum(special_image_token_mask, dim=-1)
+        # Compute the maximum embed dimension
+        max_seq_len = (num_special_image_tokens.max() * (num_image_patches * num_frames - 1)) + sequence_length
+        self.feature_offset = self.feature_offset + max_seq_len - sequence_length
+        batch_indices, non_image_indices = torch.where(input_ids != special_vision_token)
+
+        # 2. Compute the positions where text should be written
+        # Calculate new positions for text tokens in merged image-text sequence.
+        # `special_image_token_mask` identifies image tokens. Each image token will be replaced by `nb_text_tokens_per_images - 1` text tokens.
+        # `torch.cumsum` computes how each image token shifts subsequent text token positions.
+        # - 1 to adjust for zero-based indexing, as `cumsum` inherently increases indices by one.
+        new_token_positions = (
+            torch.cumsum((special_image_token_mask * (num_image_patches * num_frames - 1) + 1), dim=-1) - 1
+        )
+        nb_image_pad = max_seq_len - 1 - new_token_positions[:, -1]
+        if left_padding:
+            new_token_positions += nb_image_pad[:, None]  # offset for left padding
+        text_to_overwrite = new_token_positions[batch_indices, non_image_indices]
+
+        # 3. Create the full embedding, already padded to the maximum position
+        # expand input ids so that the second "merge" with videos does not fail
+        final_embedding = torch.zeros(
+            batch_size, max_seq_len, embed_dim, dtype=inputs_embeds.dtype, device=inputs_embeds.device
+        )
+        final_attention_mask = torch.zeros(
+            batch_size, max_seq_len, dtype=attention_mask.dtype, device=inputs_embeds.device
+        )
+        final_input_ids = torch.full(
+            (batch_size, max_seq_len), self.pad_token_id, dtype=input_ids.dtype, device=inputs_embeds.device
+        )
+        # In case the Vision model or the Language model has been offloaded to CPU, we need to manually
+        # set the corresponding tensors into their correct target device.
+        target_device = inputs_embeds.device
+        batch_indices, non_image_indices, text_to_overwrite = (
+            batch_indices.to(target_device),
+            non_image_indices.to(target_device),
+            text_to_overwrite.to(target_device),
+        )
+        attention_mask = attention_mask.to(target_device)
+
+        # 4. Fill the embeddings based on the mask. If we have ["hey" "<image>", "how", "are"]
+        # we need to index copy on [0, 577, 578, 579] for the text and [1:576] for the image features
+        final_embedding[batch_indices, text_to_overwrite] = inputs_embeds[batch_indices, non_image_indices]
+        final_attention_mask[batch_indices, text_to_overwrite] = attention_mask[batch_indices, non_image_indices]
+        final_input_ids[batch_indices, text_to_overwrite] = input_ids[batch_indices, non_image_indices]
+        if labels is not None:
+            final_labels = torch.full(
+                (batch_size, max_seq_len), self.config.ignore_index, dtype=input_ids.dtype, device=input_ids.device
+            )
+            final_labels[batch_indices, text_to_overwrite] = labels[batch_indices, non_image_indices]
+        else:
+            final_labels = None
+
+        # 5. Fill the embeddings corresponding to the images. Anything that is still zeros needs filling
+        image_to_overwrite = torch.full((batch_size, max_seq_len), True, dtype=torch.bool, device=inputs_embeds.device)
+        image_to_overwrite[batch_indices, text_to_overwrite] = False
+        image_to_overwrite &= image_to_overwrite.cumsum(-1) - 1 >= nb_image_pad[:, None].to(target_device)
+
+        if image_to_overwrite.sum() != visual_features.shape[:-1].numel():
+            visual_type = "videos" if num_frames == 8 else "images"
+            num_images //= num_frames
+            raise ValueError(
+                f"The input provided to the model are wrong. The number of {visual_type} tokens is {torch.sum(special_image_token_mask)} while"
+                f" the number of {visual_type} given to the model is {num_images}. This prevents correct indexing and breaks batch generation."
+            )
+
+        final_embedding[image_to_overwrite] = visual_features.contiguous().reshape(-1, embed_dim).to(target_device)
+        final_attention_mask |= image_to_overwrite
+        position_ids = (final_attention_mask.cumsum(-1) - 1).masked_fill_((final_attention_mask == 0), 1)
+
+        return final_embedding, final_attention_mask, final_labels, position_ids, final_input_ids
+
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        pixel_values_images: torch.FloatTensor = None,
+        pixel_values_videos: torch.FloatTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        vision_feature_layer: Optional[int] = None,
+        vision_feature_select_strategy: Optional[str] = None,
+        labels: Optional[torch.LongTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        cache_position: Optional[torch.LongTensor] = None,
+        num_logits_to_keep: int = 0,
+        token_idx: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[Tuple, VideoLlavaCausalLMOutputWithPast]:
+        r"""
+        Copied from VideoLlavaForConditionalGeneration.forward: https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/video_llava/modeling_video_llava.py
+        The only differences are:
+        - add new args token_idx
+        - add new args attn_softmax_bf16
+        - add new args reuse_cache
+        - add new args use_flash_attention
+        - add new args flash_attention_recompute
+        - add new args flash_attention_causal_mask
+        - add new args flash_attention_fast_softmax
+        """
+
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        outputs = self.language_model(
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            use_cache=use_cache,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+            cache_position=cache_position,
+            num_logits_to_keep=0,
+            token_idx=token_idx,
+            trim_logits=kwargs.get("trim_logits"),
+            attn_softmax_bf16=kwargs.get("attn_softmax_bf16"),
+            reuse_cache=kwargs.get("reuse_cache"),
+            use_flash_attention=kwargs.get("use_flash_attention"),
+            flash_attention_recompute=kwargs.get("flash_attention_recompute"),
+            flash_attention_causal_mask=kwargs.get("flash_attention_causal_mask"),
+            flash_attention_fast_softmax=kwargs.get("flash_attention_fast_softmax"),
+            valid_sequence_lengths=kwargs.get("valid_sequence_lengths"),
+            cache_idx=kwargs.get("cache_idx"),
+            lazy_mode=kwargs.get("lazy_mode"),
+            num_virtual_tokens=kwargs.get("num_virtual_tokens"),
+        )
+
+        logits = outputs[0]
+        if logits.shape[1] > 1:
+            logits = logits[:, self.feature_offset :, :]
+
+        loss = None
+        if labels is not None:
+            # Shift so that tokens < n predict n
+            if attention_mask is not None:
+                shift_attention_mask = attention_mask[..., 1:]
+                shift_logits = logits[..., :-1, :][shift_attention_mask.to(logits.device) != 0].contiguous()
+                shift_labels = labels[..., 1:][shift_attention_mask.to(labels.device) != 0].contiguous()
+            else:
+                shift_logits = logits[..., :-1, :].contiguous()
+                shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = nn.CrossEntropyLoss()
+            loss = loss_fct(
+                shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1).to(shift_logits.device)
+            )
+
+        if not return_dict:
+            output = (logits,) + outputs[1:]
+            return (loss,) + output if loss is not None else output
+
+        return VideoLlavaCausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=outputs.past_key_values,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+            image_hidden_states=kwargs.get("image_features", None) if pixel_values_images is not None else None,
+            video_hidden_states=kwargs.get("video_features", None) if pixel_values_videos is not None else None,
+        )
+
+    def prepare_inputs_for_generation(
+        self,
+        input_ids,
+        past_key_values=None,
+        inputs_embeds=None,
+        pixel_values_images=None,
+        pixel_values_videos=None,
+        attention_mask=None,
+        cache_position=None,
+        num_logits_to_keep=None,
+        **kwargs,
+    ):
+        token_idx = kwargs.get("token_idx", None)
+        if token_idx is None:
+            return super().prepare_inputs_for_generation(
+                input_ids=input_ids,
+                past_key_values=past_key_values,
+                inputs_embeds=inputs_embeds,
+                pixel_values_images=pixel_values_images,
+                pixel_values_videos=pixel_values_videos,
+                attention_mask=attention_mask,
+                cache_position=cache_position,
+                num_logits_to_keep=num_logits_to_keep,
+                **kwargs,
+            )
+        # Else, we need to update token_idx when merging features from videos/images with input embeddings
+        labels = kwargs.get("labels", None)
+        if (input_ids is None) ^ (inputs_embeds is not None):
+            raise ValueError(
+                "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
+            )
+
+        if (pixel_values_images is not None or pixel_values_videos is not None) and inputs_embeds is not None:
+            raise ValueError(
+                "You cannot specify both pixel_values and inputs_embeds at the same time, and must specify either one"
+            )
+
+        legacy_processing = False
+        inputs_not_expanded = False
+        if input_ids is not None:
+            img_token_not_enough = (input_ids == self.config.image_token_index).sum(
+                1
+            ).max() < self.config.image_seq_length
+            video_token_not_enough = (input_ids == self.config.video_token_index).sum(
+                1
+            ).max() < self.config.video_seq_length
+            # if the number of image/video tokens is more than image embeddings seq length, then prob we expanded it in processing
+            # not very reliable, but we don't expect one to actually pass 500+ images for one prompt
+            inputs_not_expanded = (img_token_not_enough and pixel_values_images is not None) or (
+                video_token_not_enough and pixel_values_videos is not None
+            )
+        model_inputs = self.language_model.prepare_inputs_for_generation(
+            input_ids,
+            past_key_values=past_key_values,
+            inputs_embeds=inputs_embeds,
+            attention_mask=attention_mask,
+            cache_position=cache_position,
+            num_logits_to_keep=num_logits_to_keep,
+            **kwargs,
+        )
+        position_ids = model_inputs["position_ids"]
+        cache_position = model_inputs["cache_position"]
+        attention_mask = model_inputs["attention_mask"]
+        inputs_embeds = model_inputs.get("inputs_embeds", None)
+        input_ids = model_inputs.get("input_ids", None)
+
+        if inputs_embeds is None:
+            inputs_embeds = self.get_input_embeddings()(input_ids)
+            pixels_present = input_ids.shape[-1] == 1 and (
+                pixel_values_images is not None or pixel_values_videos is not None
+            )
+            legacy_processing = inputs_not_expanded or pixels_present
+
+        vision_feature_layer = kwargs.get("vision_feature_layer", None)
+        vision_feature_layer = (
+            vision_feature_layer if vision_feature_layer is not None else self.config.vision_feature_layer
+        )
+        vision_feature_select_strategy = kwargs.get("vision_feature_select_strategy", None)
+        vision_feature_select_strategy = (
+            vision_feature_select_strategy
+            if vision_feature_select_strategy is not None
+            else self.config.vision_feature_select_strategy
+        )
+        if pixel_values_images is not None or pixel_values_videos is not None:
+            image_outputs, video_outputs, num_frames = self._get_vision_features(
+                pixel_values_images=pixel_values_images,
+                pixel_values_videos=pixel_values_videos,
+                vision_feature_layer=vision_feature_layer,
+                vision_feature_select_strategy=vision_feature_select_strategy,
+            )
+
+            image_features = video_features = None
+            if image_outputs is not None:
+                image_features = self.multi_modal_projector(image_outputs)
+            if video_outputs is not None:
+                video_features = self.multi_modal_projector(video_outputs)
+
+            if legacy_processing:
+                logger.warning_once(
+                    "Expanding inputs for image tokens in Video-LLaVa should be done in processing. "
+                    "Please add `patch_size` and `vision_feature_select_strategy` to the model's processing config or set directly "
+                    "with `processor.patch_size = {{patch_size}}` and processor.vision_feature_select_strategy = {{vision_feature_select_strategy}}`. "
+                    "Using processors without these attributes in the config is deprecated and will throw an error in v4.47."
+                )
+                if input_ids.shape[1] != 1:
+                    self.feature_offset = 0
+                    for features, frames in ((image_features, 1), (video_features, num_frames)):
+                        if features is not None:
+                            (
+                                inputs_embeds,
+                                attention_mask,
+                                labels,
+                                position_ids,
+                                input_ids,
+                            ) = self._merge_input_ids_with_visual_features(
+                                features,
+                                inputs_embeds,
+                                input_ids,
+                                attention_mask,
+                                labels,
+                                token_idx,
+                                num_frames=frames,
+                            )
+                    cache_position = torch.arange(attention_mask.shape[1], device=attention_mask.device)
+                else:
+                    # Retrieve the first layer to inspect the logits and mask out the hidden states
+                    # that are set to 0
+                    first_layer_past_key_value = past_key_values[0][0][:, :, :, 0]
+
+                    # Sum all dimensions of head_dim (-2) to avoid random errors such as: https://github.com/huggingface/transformers/pull/28032#issuecomment-1863691941
+                    batch_index, non_attended_tokens = torch.where(first_layer_past_key_value.float().sum(-2) == 0)
+
+                    target_length = input_ids.shape[1]
+                    past_length = first_layer_past_key_value.shape[-1]
+
+                    extended_attention_mask = torch.ones(
+                        (attention_mask.shape[0], past_length),
+                        dtype=attention_mask.dtype,
+                        device=attention_mask.device,
+                    )
+
+                    # Filter out only the tokens that can be un-attended, this can happen
+                    # if one uses Llava + Fused modules where the cache on the
+                    # first iteration is already big enough, or if one passes custom cache
+                    valid_indices = non_attended_tokens < extended_attention_mask.size(-1)
+                    new_batch_index = batch_index[valid_indices]
+                    new_non_attended_tokens = non_attended_tokens[valid_indices]
+
+                    # Zero-out the places where we don't need to attend
+                    extended_attention_mask[new_batch_index, new_non_attended_tokens] = 0
+                    new_token_idx = token_idx + self.feature_offset
+                    extended_attention_mask[:, new_token_idx - 1 + target_length :] = 0
+                    attention_mask = extended_attention_mask.clone()
+                    position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1
+                    cache_position = new_token_idx
+
+            # TODO: @raushan retain only the new behavior after v4.47
+            else:
+                if image_outputs is not None:
+                    special_image_mask = (
+                        (input_ids == self.config.image_token_index).unsqueeze(-1).expand_as(inputs_embeds)
+                    )
+                    image_features = image_features.to(inputs_embeds.device, inputs_embeds.dtype)
+                    inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, image_features)
+
+                if video_outputs is not None:
+                    special_image_mask = (
+                        (input_ids == self.config.video_token_index).unsqueeze(-1).expand_as(inputs_embeds)
+                    )
+                    video_features = video_features.to(inputs_embeds.device, inputs_embeds.dtype)
+                    inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, video_features)
+
+        model_inputs.update(
+            {
+                "position_ids": position_ids,
+                "cache_position": cache_position,
+                "attention_mask": attention_mask,
+                "token_idx": token_idx + self.feature_offset,
+                "inputs_embeds": inputs_embeds,
+            }
+        )
+        if legacy_processing or cache_position[0] == 0:
+            # If we're in cached decoding stage, pixel values should be None because input ids do not contain special image token anymore
+            # Otherwise we need pixel values to be passed to model
+            model_inputs["pixel_values_images"] = pixel_values_images
+            model_inputs["pixel_values_videos"] = pixel_values_videos
+            model_inputs["image_features"] = image_features
+            model_inputs["video_features"] = video_features
+        return model_inputs
diff --git a/optimum/habana/transformers/models/xlm_roberta/__init__.py b/optimum/habana/transformers/models/xlm_roberta/__init__.py
new file mode 100644
index 0000000000..c411ff07e2
--- /dev/null
+++ b/optimum/habana/transformers/models/xlm_roberta/__init__.py
@@ -0,0 +1 @@
+from .modeling_xlm_roberta import gaudi_XLMRoberta_Sdpa_SelfAttention_forward
diff --git a/optimum/habana/transformers/models/xlm_roberta/modeling_xlm_roberta.py b/optimum/habana/transformers/models/xlm_roberta/modeling_xlm_roberta.py
new file mode 100644
index 0000000000..2d6d5532d5
--- /dev/null
+++ b/optimum/habana/transformers/models/xlm_roberta/modeling_xlm_roberta.py
@@ -0,0 +1,102 @@
+# coding=utf-8
+# Copyright 2019 Facebook AI Research and the HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""PyTorch XLM-RoBERTa model."""
+
+from typing import Optional, Tuple
+
+import torch
+import torch.utils.checkpoint
+from habana_frameworks.torch.hpex.kernels import FusedSDPA
+
+from optimum.utils import logging
+
+
+logger = logging.get_logger(__name__)
+
+
+def gaudi_XLMRoberta_Sdpa_SelfAttention_forward(
+    self,
+    hidden_states: torch.Tensor,
+    attention_mask: Optional[torch.Tensor] = None,
+    head_mask: Optional[torch.FloatTensor] = None,
+    encoder_hidden_states: Optional[torch.FloatTensor] = None,
+    encoder_attention_mask: Optional[torch.FloatTensor] = None,
+    past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+    output_attentions: Optional[bool] = False,
+) -> Tuple[torch.Tensor]:
+    r"""
+    Copied from https://github.com/huggingface/transformers/blob/v4.46.3/src/transformers/models/xlm_roberta/modeling_xlm_roberta.py#L295
+    Changes:
+        - Use HPU's FusedSDPA(fast mode for softmax) to replace `orch.nn.functional.scaled_dot_product_attention`
+    """
+    if self.position_embedding_type != "absolute" or output_attentions or head_mask is not None:
+        # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented.
+        logger.warning_once(
+            "XLMRobertaSdpaSelfAttention is used but `torch.nn.functional.scaled_dot_product_attention` does not support "
+            "non-absolute `position_embedding_type` or `output_attentions=True` or `head_mask`. Falling back to "
+            "the manual attention implementation, but specifying the manual implementation will be required from "
+            "Transformers version v5.0.0 onwards. This warning can be removed using the argument "
+            '`attn_implementation="eager"` when loading the model.'
+        )
+        return super().forward(
+            hidden_states,
+            attention_mask,
+            head_mask,
+            encoder_hidden_states,
+            encoder_attention_mask,
+            past_key_value,
+            output_attentions,
+        )
+
+    bsz, tgt_len, _ = hidden_states.size()
+
+    query_layer = self.transpose_for_scores(self.query(hidden_states))
+
+    # If this is instantiated as a cross-attention module, the keys and values come from an encoder; the attention
+    # mask needs to be such that the encoder's padding tokens are not attended to.
+    is_cross_attention = encoder_hidden_states is not None
+
+    current_states = encoder_hidden_states if is_cross_attention else hidden_states
+    attention_mask = encoder_attention_mask if is_cross_attention else attention_mask
+
+    # Check `seq_length` of `past_key_value` == `len(current_states)` to support prefix tuning
+    if is_cross_attention and past_key_value and past_key_value[0].shape[2] == current_states.shape[1]:
+        key_layer, value_layer = past_key_value
+    else:
+        key_layer = self.transpose_for_scores(self.key(current_states))
+        value_layer = self.transpose_for_scores(self.value(current_states))
+        if past_key_value is not None and not is_cross_attention:
+            key_layer = torch.cat([past_key_value[0], key_layer], dim=2)
+            value_layer = torch.cat([past_key_value[1], value_layer], dim=2)
+
+    if self.is_decoder:
+        past_key_value = (key_layer, value_layer)
+
+    is_causal = (
+        True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False
+    )
+
+    attn_output = FusedSDPA.apply(
+        query_layer, key_layer, value_layer, attention_mask, 0.0, is_causal, None, "fast", False
+    )
+
+    attn_output = attn_output.transpose(1, 2)
+    attn_output = attn_output.reshape(bsz, tgt_len, self.all_head_size)
+
+    outputs = (attn_output,)
+    if self.is_decoder:
+        outputs = outputs + (past_key_value,)
+    return outputs
diff --git a/tests/Habana_Validated_Models.md b/tests/Habana_Validated_Models.md
new file mode 100644
index 0000000000..77f8ec54be
--- /dev/null
+++ b/tests/Habana_Validated_Models.md
@@ -0,0 +1,136 @@
+# The following table lists the models validated through integration tests.
+
+|File|Model|Precision|1x/8x|
+|:----|:----|:----|:----|
+|tests/test_examples.py::TextClassificationExampleTester|bert-large-uncased-whole-word-masking|mixed-precision(bf16)|single_card|
+|tests/test_examples.py::QuestionAnsweringExampleTester|roberta-large|mixed-precision(bf16)|single_card|
+|tests/test_examples.py::EagerModeCausalLanguageModelingExampleTester|google/gemma-2b-it|mixed-precision(bf16)|single_card|
+|tests/test_examples.py::CausalLanguageModelingExampleTester|google/gemma-2b-it|mixed-precision(bf16)|single_card|
+|tests/test_examples.py::ImageClassificationExampleTester|microsoft/swin-base-patch4-window7-224-in22k|mixed-precision(bf16)|single_card|
+|tests/test_examples.py::ImageClassificationExampleTester|google/vit-base-patch16-224-in21k|mixed-precision(bf16)|single_card|
+|tests/test_examples.py::ProteinFoldingExampleTester|facebook/esmfold_v1|mixed-precision(bf16)|single_card|
+|tests/test_examples.py::ProteinFoldingExampleTester2|mila-intel.ProtST-esm1b|mixed-precision(bf16)|single_card|
+|tests/test_examples.py::CausalLanguageModelingLORAExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|single_card|
+|tests/test_peft_inference.py::TestGaudiPeftTextGeneration|huggyllama/llama-7b-prompt-tuning|mixed-precision(bf16)|single_card|
+|tests/test_peft_inference.py::TestGaudiPeftTextGeneration|huggyllama/llama-7b-prefix-tuning|mixed-precision(bf16)|single_card|
+|tests/test_peft_inference.py::TestGaudiPeftTextGeneration|huggyllama/llama-7b-p-tuning|mixed-precision(bf16)|single_card|
+|tests/test_peft_inference.py::TestGaudiPeftTextGeneration|huggyllama/llama-7b-llama-adapter|mixed-precision(bf16)|single_card|
+|tests/test_pipeline.py::TestGaudiPipeline::test_image_to_text|Salesforce/blip-image-captioning-base|mixed-precision(bf16)|single_card|
+|tests/test_pipeline.py::TestGaudiPipeline::test_image_to_text|nlpconnect/vit-gpt2-image-captioning|mixed-precision(bf16)|single_card|
+|tests/test_pipeline.py::TestGaudiPipeline::test_text_to_speech|microsoft/speecht5_tts-16000|mixed-precision(bf16)|single_card|
+|tests/test_pipeline.py::TestGaudiPipeline::test_text_to_speech|facebook/hf-seamless-m4t-medium-16000|mixed-precision(bf16)|single_card|
+|tests/test_pipeline.py::TestGaudiPipeline::test_text_to_speech|facebook/mms-tts-eng-16000|mixed-precision(bf16)|single_card|
+|tests/test_fsdp_examples.py|bert-base-uncased|mixed-precision(bf16)|multi_card|
+|tests/test_fsdp_examples.py|meta-llama/Llama-2-7b-hf|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardVisionLanguageExampleTester|./clip-roberta|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardTextClassificationExampleTester|bert-large-uncased-whole-word-masking|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardQuestionAnsweringExampleTester|roberta-large|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingExampleTester|google/gemma-2b-it|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardImageClassificationExampleTester|microsoft/swin-base-patch4-window7-224-in22k|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardImageClassificationExampleTester|google/vit-base-patch16-224-in21k|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardMaskedLanguageModelingExampleTester|roberta-large|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardAudioClassificationExampleTester|MIT/ast-finetuned-speech-commands-v2|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardAudioClassificationExampleTester|facebook/wav2vec2-base|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardVisionLanguageExampleTester|./clip-roberta|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingLORAExampleTester2|tiiuae/falcon-40b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingLORAExampleTester2|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingLORAExampleTester|tiiuae/falcon-40b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingLORAExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardBridgetowerExampleTester|BridgeTower/bridgetower-large-itm-mlm-itc|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardSeq2SeqSpeechRecognitionExampleTester|openai/whisper-small|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingLORAFSDPCompileExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardSFTExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardSFTChatExampleTester|Qwen/Qwen2-7B|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardSFTChatPeftExampleTester|Qwen/Qwen2-7B|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardDPOExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardRewardExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardPPOExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardProteinFoldingClassificationTester|mila-intel/protst-esm1b-for-sequential-classification|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingPromptTuningExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingPrefixTuningExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingPTuningExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingLlamaAdapterExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingLoRAFP8ExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardImageToTextModelingLoRAExampleTester|meta-llama/Llama-3.2-11B-Vision-Instruct|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardImageToTextModelingLoRAExampleTester|HuggingFaceM4/idefics2-8b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingVeraExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingLnExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingIA3ExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::MultiCardCausalLanguageModelingAdaloraExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_examples.py::DeepSpeedTextClassificationExampleTester|meta-llama/LlamaGuard-7b|mixed-precision(bf16)|deepspeed|
+|tests/test_examples.py::DeepspeedCausalLanguageModelingExampleTester|codellama/CodeLlama-13b-Instruct-hf|mixed-precision(bf16)|deepspeed|
+|tests/test_examples.py::DeepspeedCausalLanguageModelingExampleTester|THUDM/chatglm3-6b|mixed-precision(bf16)|deepspeed|
+|tests/test_examples.py::DeepspeedCausalLanguageModelingExampleTester|google/gemma-2b-it|mixed-precision(bf16)|deepspeed|
+|tests/test_examples.py::DeepspeedCausalLanguageModelingExampleTester|EleutherAI/gpt-neox-20b|mixed-precision(bf16)|deepspeed|
+|tests/test_examples.py::DeepspeedCausalLanguageModelingExampleTester|gpt2-xl|mixed-precision(bf16)|deepspeed|
+|tests/test_examples.py::DeepspeedSummarizationExampleTester|google/flan-t5-xxl|mixed-precision(bf16)|deepspeed|
+|tests/test_examples.py::DeepspeedSFTExampleTester|Qwen/Qwen2-72B|mixed-precision(bf16)|deepspeed|
+|tests/test_examples.py::MultiCardCausalLanguageModelingLoRACPExampleTester|huggyllama/llama-7b|mixed-precision(bf16)|multi_card|
+|tests/test_image_to_text_example.py|llava-hf/llava-v1.6-mistral-7b-hf|mixed-precision(bf16)|single_card|
+|tests/test_image_to_text_example.py|llava-hf/llava-v1.6-vicuna-7b-hf|mixed-precision(bf16)|single_card|
+|tests/test_image_to_text_example.py|llava-hf/llava-v1.6-vicuna-13b-hf|mixed-precision(bf16)|single_card|
+|tests/test_image_to_text_example.py|google/paligemma-3b-mix-224|mixed-precision(bf16)|single_card|
+|tests/test_image_to_text_example.py|HuggingFaceM4/idefics2-8b|mixed-precision(bf16)|single_card|
+|tests/test_image_to_text_example.py|meta-llama/Llama-3.2-11B-Vision-Instruct|mixed-precision(bf16)|single_card|
+|tests/test_image_to_text_example.py|tiiuae/falcon-11B-vlm|mixed-precision(bf16)|single_card|
+|tests/test_image_to_text_example.py|Qwen/Qwen2-VL-2B-Instruct|mixed-precision(bf16)|single_card|
+|tests/test_image_to_text_example.py|Qwen/Qwen2-VL-7B-Instruct|mixed-precision(bf16)|single_card|
+|tests/test_image_to_text_example.py|llava-hf/llava-v1.6-mistral-7b-hf|fp8|single_card|
+|tests/test_image_to_text_example.py|llava-hf/llava-v1.6-vicuna-7b-hf|fp8|single_card|
+|tests/test_image_to_text_example.py|llava-hf/llava-v1.6-vicuna-13b-hf|fp8|single_card|
+|tests/test_openclip_vqa.py|laion/CLIP-ViT-g-14-laion2B-s12B-b42K|mixed-precision(bf16)|single_card|
+|tests/test_openclip_vqa.py|microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|bigscience/bloomz-7b1|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|gpt2-xl|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|EleutherAI/gpt-j-6b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|EleutherAI/gpt-neox-20b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|meta-llama/Llama-2-7b-hf|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|tiiuae/falcon-40b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|bigcode/starcoder|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|Salesforce/codegen2-1B|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|mosaicml/mpt-30b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|mistralai/Mistral-7B-v0.1|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|mistralai/Mixtral-8x7B-v0.1|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|microsoft/phi-2|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|meta-llama/Meta-Llama-3-8B|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|meta-llama/Llama-2-7b-hf|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|stabilityai/stablelm-2-12b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|codellama/CodeLlama-34b-hf|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|bigcode/starcoder2-3b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|adept/persimmon-8b-base|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|google/gemma-7b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|google/gemma-2-9b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|google/gemma-2-27b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|state-spaces/mamba-130m-hf|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|Qwen/Qwen2-7B|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|Qwen/Qwen1.5-MoE-A2.7B|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|tiiuae/falcon-mamba-7b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|openbmb/MiniCPM3-4B|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|baichuan-inc/Baichuan2-7B-Chat|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|baichuan-inc/Baichuan2-13B-Chat|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|deepseek-ai/DeepSeek-V2-Lite|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|THUDM/chatglm3-6b|mixed-precision(bf16)|single_card|
+|tests/test_text_generation_example.py|tiiuae/falcon-180B|fp8|multi_card|
+|tests/test_text_generation_example.py|meta-llama/Llama-2-7b-hf|fp8|single_card|
+|tests/test_text_generation_example.py|meta-llama/Llama-2-70b-hf|fp8|multi_card|
+|tests/test_text_generation_example.py|mistralai/Mistral-7B-Instruct-v0.2|fp8|single_card|
+|tests/test_text_generation_example.py|mistralai/Mixtral-8x7B-v0.1|fp8|multi_card|
+|tests/test_text_generation_example.py|microsoft/phi-2|fp8|single_card|
+|tests/test_text_generation_example.py|TheBloke/Llama-2-7b-Chat-GPTQ|mixed-precision(bf16)|gptq|
+|tests/test_text_generation_example.py|TheBloke/Llama-2-7b-Chat-AWQ|mixed-precision(bf16)|awq|
+|tests/test_text_generation_example.py|bigscience/bloomz|mixed-precision(bf16)|deepspeed|
+|tests/test_text_generation_example.py|meta-llama/Meta-Llama-3-70B-Instruct|mixed-precision(bf16)|deepspeed|
+|tests/test_text_generation_example.py|facebook/opt-66b|mixed-precision(bf16)|deepspeed|
+|tests/test_text_generation_example.py|google/gemma-2-9b|mixed-precision(bf16)|deepspeed|
+|tests/test_text_generation_example.py|google/gemma-2-27b|mixed-precision(bf16)|deepspeed|
+|tests/test_text_generation_example.py|meta-llama/Llama-2-7b-hf|mixed-precision(bf16)|torch.compile|
+|tests/test_text_generation_example.py|meta-llama/Llama-2-7b-hf|mixed-precision(bf16)|distributed_tp|
+|tests/test_text_generation_example.py|gpt2-xl|mixed-precision(bf16)|contrastive_search|
+|tests/test_text_generation_example.py|Qwen/Qwen2-7b-Instruct|mixed-precision(bf16)|beam_search|
+|tests/test_encoder_decoder.py|facebook/bart-large-cnn|mixed-precision(bf16)|single_card|
+|tests/test_encoder_decoder.py|t5-3b|mixed-precision(bf16)|single_card|
+|tests/test_encoder_decoder.py|t5-small|mixed-precision(bf16)|single_card|
+|tests/test_cmnrl.py|train_samples_mnrl0-train_samples_cmnrl0|mixed-precision(bf16)|single_card|
+|tests/test_sentence_transformers.py|sentence-transformers/all-mpnet-base-v2|mixed-precision(bf16)|single_card|
+|tests/test_custom_file_input.py|bigcode/starcoder|mixed-precision(bf16)|single_card|
+|tests/test_diffusers.py|CompVis/stable-diffusion-v1-4|mixed-precision(bf16)|single_card|
diff --git a/tests/baselines/fixture/tests/test_encoder_decoder.json b/tests/baselines/fixture/tests/test_encoder_decoder.json
new file mode 100644
index 0000000000..25c780e5dd
--- /dev/null
+++ b/tests/baselines/fixture/tests/test_encoder_decoder.json
@@ -0,0 +1,32 @@
+{
+  "tests/test_encoder_decoder.py::TestEncoderDecoderModels::test_text_summarization_bf16[facebook/bart-large-cnn-Habana/bart-2-2]": {
+    "gaudi1": {
+      "predict_rougeLsum": 29.174,
+      "predict_samples_per_second": 2.304
+    },
+    "gaudi2": {
+      "predict_rougeLsum": 28.9801,
+      "predict_samples_per_second": 4.339
+    }
+  },
+  "tests/test_encoder_decoder.py::TestEncoderDecoderModels::test_text_summarization_bf16[t5-3b-Habana/t5-2-1]": {
+    "gaudi1": {
+      "predict_rougeLsum": 21.7286,
+      "predict_samples_per_second": 1.005
+    },
+    "gaudi2": {
+      "predict_rougeLsum": 21.8877,
+      "predict_samples_per_second": 3.848
+    }
+  },
+  "tests/test_encoder_decoder.py::TestEncoderDecoderModels::test_text_translation_bf16[t5-small-Habana/t5-2-1]": {
+    "gaudi1": {
+      "predict_bleu": 11.6126,
+      "predict_samples_per_second": 9.188
+    },
+    "gaudi2": {
+      "predict_bleu": 11.7277,
+      "predict_samples_per_second": 11.648
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_fp8_examples.json b/tests/baselines/fixture/tests/test_fp8_examples.json
new file mode 100644
index 0000000000..43aa371fa1
--- /dev/null
+++ b/tests/baselines/fixture/tests/test_fp8_examples.json
@@ -0,0 +1,8 @@
+{
+  "tests/test_fp8_examples.py::test_fp8_train[mistralai/Mistral-7B-Instruct-v0.2-tatsu-lab/alpaca--language-modeling-8-8-run_lora_clm.py]": {
+    "gaudi2": {
+      "eval_accuracy": 0.7538,
+      "train_samples_per_second": 12.373
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_fsdp_examples.json b/tests/baselines/fixture/tests/test_fsdp_examples.json
new file mode 100644
index 0000000000..834ecba8a6
--- /dev/null
+++ b/tests/baselines/fixture/tests/test_fsdp_examples.json
@@ -0,0 +1,14 @@
+{
+  "tests/test_fsdp_examples.py::test_fsdp_bf16[bert-base-uncased-Habana/bert-base-uncased-question-answering-24-8-run_qa.py-full_shard]": {
+    "gaudi2": {
+      "eval_f1": 85.7077,
+      "train_samples_per_second": 2983.533
+    }
+  },
+  "tests/test_fsdp_examples.py::test_fsdp_bf16[meta-llama/Llama-2-7b-hf--language-modeling-8-8-run_lora_clm.py-auto_wrap]": {
+    "gaudi2": {
+      "train_loss": 0.9093,
+      "train_samples_per_second": 85.016
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_image_to_text_example.json b/tests/baselines/fixture/tests/test_image_to_text_example.json
new file mode 100644
index 0000000000..d9bab43d39
--- /dev/null
+++ b/tests/baselines/fixture/tests/test_image_to_text_example.json
@@ -0,0 +1,94 @@
+{
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[HuggingFaceM4/idefics2-8b-1]": {
+    "gaudi2": {
+      "throughput": 21.89944593215077
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[Qwen/Qwen2-VL-2B-Instruct-1]": {
+    "gaudi2": {
+      "throughput": 28.755882208438422
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[Qwen/Qwen2-VL-7B-Instruct-1]": {
+    "gaudi2": {
+      "throughput": 19.32562189532818
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[google/paligemma-3b-mix-224-1]": {
+    "gaudi2": {
+      "throughput": 132.8949150246155
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[llava-hf/llava-1.5-13b-hf-1]": {
+    "gaudi1": {
+      "throughput": 16.704731010481538
+    },
+    "gaudi2": {
+      "throughput": 48.54364937033955
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[llava-hf/llava-1.5-7b-hf-1]": {
+    "gaudi1": {
+      "throughput": 28.04096918512148
+    },
+    "gaudi2": {
+      "throughput": 77.98733740859008
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[llava-hf/llava-v1.6-mistral-7b-hf-1]": {
+    "gaudi1": {
+      "throughput": 10.759228696741
+    },
+    "gaudi2": {
+      "throughput": 33.17984878151546
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[llava-hf/llava-v1.6-vicuna-13b-hf-1]": {
+    "gaudi1": {
+      "throughput": 6.96732060769783
+    },
+    "gaudi2": {
+      "throughput": 23.527610042925
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[llava-hf/llava-v1.6-vicuna-7b-hf-1]": {
+    "gaudi2": {
+      "throughput": 35.00608681379742
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[meta-llama/Llama-3.2-11B-Vision-Instruct-1]": {
+    "gaudi2": {
+      "throughput": 18.974541922240313
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_bf16[tiiuae/falcon-11B-vlm-1]": {
+    "gaudi2": {
+      "throughput": 23.69260849957278
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_fp8[llava-hf/llava-1.5-13b-hf-1]": {
+    "gaudi2": {
+      "throughput": 67.20488222876344
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_fp8[llava-hf/llava-1.5-7b-hf-1]": {
+    "gaudi2": {
+      "throughput": 98.72578382705062
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_fp8[llava-hf/llava-v1.6-mistral-7b-hf-1]": {
+    "gaudi2": {
+      "throughput": 45.011551008367086
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_fp8[llava-hf/llava-v1.6-vicuna-13b-hf-1]": {
+    "gaudi2": {
+      "throughput": 30.9535718774675
+    }
+  },
+  "tests/test_image_to_text_example.py::test_image_to_text_fp8[llava-hf/llava-v1.6-vicuna-7b-hf-1]": {
+    "gaudi2": {
+      "throughput": 45.18544502949674
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_openclip_vqa.json b/tests/baselines/fixture/tests/test_openclip_vqa.json
new file mode 100644
index 0000000000..91f9d7d601
--- /dev/null
+++ b/tests/baselines/fixture/tests/test_openclip_vqa.json
@@ -0,0 +1,18 @@
+{
+  "tests/test_openclip_vqa.py::test_openclip_vqa_bf16[laion/CLIP-ViT-g-14-laion2B-s12B-b42K]": {
+    "gaudi1": {
+      "throughput": 550
+    },
+    "gaudi2": {
+      "throughput": 1472
+    }
+  },
+  "tests/test_openclip_vqa.py::test_openclip_vqa_bf16[microsoft/BiomedCLIP-PubMedBERT_256-vit_base_patch16_224]": {
+    "gaudi1": {
+      "throughput": 1200
+    },
+    "gaudi2": {
+      "throughput": 1816
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_pipeline.json b/tests/baselines/fixture/tests/test_pipeline.json
new file mode 100644
index 0000000000..78bbf4c51f
--- /dev/null
+++ b/tests/baselines/fixture/tests/test_pipeline.json
@@ -0,0 +1,17 @@
+{
+  "tests/test_pipeline.py::TestGaudiPipeline::test_image_to_text[Salesforce/blip-image-captioning-base-44]": {
+    "generated_text": "a soccer player is playing a game on the app"
+  },
+  "tests/test_pipeline.py::TestGaudiPipeline::test_image_to_text[nlpconnect/vit-gpt2-image-captioning-44]": {
+    "generated_text": "a soccer game with a player jumping to catch"
+  },
+  "tests/test_pipeline.py::TestGaudiPipeline::test_text_to_speech[facebook/hf-seamless-m4t-medium]": {
+    "sampling_rate": 16000
+  },
+  "tests/test_pipeline.py::TestGaudiPipeline::test_text_to_speech[facebook/mms-tts-eng]": {
+    "sampling_rate": 16000
+  },
+  "tests/test_pipeline.py::TestGaudiPipeline::test_text_to_speech[microsoft/speecht5_tts]": {
+    "sampling_rate": 16000
+  }
+}
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_sentence_transformers.json b/tests/baselines/fixture/tests/test_sentence_transformers.json
new file mode 100644
index 0000000000..23f4f6af97
--- /dev/null
+++ b/tests/baselines/fixture/tests/test_sentence_transformers.json
@@ -0,0 +1,106 @@
+{
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-MiniLM-L12-v2]": {
+    "gaudi1": {
+      "measured_throughput": 1252.6261862281467
+    },
+    "gaudi2": {
+      "measured_throughput": 3614.2610109716247
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-MiniLM-L6-v2]": {
+    "gaudi1": {
+      "measured_throughput": 1109.160132821451
+    },
+    "gaudi2": {
+      "measured_throughput": 2615.6975354038477
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-distilroberta-v1]": {
+    "gaudi1": {
+      "measured_throughput": 226.90237421623164
+    },
+    "gaudi2": {
+      "measured_throughput": 958.5097903298335
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/all-mpnet-base-v2]": {
+    "gaudi1": {
+      "measured_throughput": 164.36556936723508
+    },
+    "gaudi2": {
+      "measured_throughput": 762.5595168883357
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/distiluse-base-multilingual-cased-v1]": {
+    "gaudi1": {
+      "measured_throughput": 947.844857744754
+    },
+    "gaudi2": {
+      "measured_throughput": 3487.3319366004903
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/distiluse-base-multilingual-cased-v2]": {
+    "gaudi1": {
+      "measured_throughput": 947.7317550605878
+    },
+    "gaudi2": {
+      "measured_throughput": 3807.2486282025716
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/multi-qa-MiniLM-L6-cos-v1]": {
+    "gaudi1": {
+      "measured_throughput": 471.14320842607674
+    },
+    "gaudi2": {
+      "measured_throughput": 1208.3672807492396
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/multi-qa-distilbert-cos-v1]": {
+    "gaudi1": {
+      "measured_throughput": 216.47035182888888
+    },
+    "gaudi2": {
+      "measured_throughput": 944.6166139694299
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/multi-qa-mpnet-base-dot-v1]": {
+    "gaudi1": {
+      "measured_throughput": 116.82789535569364
+    },
+    "gaudi2": {
+      "measured_throughput": 545.3360251829846
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-MiniLM-L3-v2]": {
+    "gaudi1": {
+      "measured_throughput": 3029.398417051629
+    },
+    "gaudi2": {
+      "measured_throughput": 5734.318427972881
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-albert-small-v2]": {
+    "gaudi1": {
+      "measured_throughput": 1139.806075824319
+    },
+    "gaudi2": {
+      "measured_throughput": 3896.1911011860166
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2]": {
+    "gaudi1": {
+      "measured_throughput": 1253.06776127632
+    },
+    "gaudi2": {
+      "measured_throughput": 3558.0778715789693
+    }
+  },
+  "tests/test_sentence_transformers.py::test_compute_embeddings_throughput[sentence-transformers/paraphrase-multilingual-mpnet-base-v2]": {
+    "gaudi1": {
+      "measured_throughput": 518.4762252952173
+    },
+    "gaudi2": {
+      "measured_throughput": 2392.1654748794062
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/baselines/fixture/tests/test_text_generation_example.json b/tests/baselines/fixture/tests/test_text_generation_example.json
new file mode 100644
index 0000000000..de9b3f1014
--- /dev/null
+++ b/tests/baselines/fixture/tests/test_text_generation_example.json
@@ -0,0 +1,444 @@
+{
+  "tests/test_text_generation_example.py::test_text_generation_awq[TheBloke/Llama-2-7b-Chat-AWQ-1-10-False-128-2048]": {
+    "gaudi2": {
+      "throughput": 456.7
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_beam_search[Qwen/Qwen2-7b-Instruct-1-True]": {
+    "gaudi2": {
+      "throughput": 91.24938949709826
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[CohereForAI/c4ai-command-r-v01-1-False-False]": {
+    "gaudi2": {
+      "throughput": 29.50315234651154
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Deci/DeciLM-7B-1-False-False]": {
+    "gaudi2": {
+      "throughput": 115
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-j-6b-1-False-False]": {
+    "gaudi2": {
+      "throughput": 160.5823842101192
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-j-6b-1-True-False]": {
+    "gaudi1": {
+      "throughput": 156.2893125740893
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-neo-2.7B-1-False-False]": {
+    "gaudi2": {
+      "throughput": 257.2476416844122
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[EleutherAI/gpt-neox-20b-1-False-False]": {
+    "gaudi2": {
+      "throughput": 50.67672679310354
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen1.5-7B-1-False-False]": {
+    "gaudi1": {
+      "throughput": 39.29068423087616
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen1.5-7B-4-False-False]": {
+    "gaudi2": {
+      "throughput": 490.8621617893209
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen1.5-MoE-A2.7B-1-True-False]": {
+    "gaudi2": {
+      "throughput": 44.25834541569395
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen2-7B-256-False-True]": {
+    "gaudi2": {
+      "output": "DeepSpeed is a machine learning framework that provides a unified interface for training deep learning models. It is designed to be easy to use and to provide high performance. DeepSpeed is built on top of PyTorch and TensorFlow, and it supports a wide range of models, including transformers, convolutional neural networks, and recurrent neural networks.\nDeepSpeed is a machine learning framework that provides a unified interface for training deep learning models. It is designed to be easy to use and to provide high performance. DeepSpeed is built on top of Py",
+      "throughput": 8870.945160540245
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Qwen/Qwen2.5-7B-4-False-False]": {
+    "gaudi2": {
+      "throughput": 490
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[Salesforce/codegen2-1B-1-False-False]": {
+    "gaudi1": {
+      "throughput": 155.32071248826423
+    },
+    "gaudi2": {
+      "throughput": 446.4029486883532
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[THUDM/chatglm2-6b-1-True-False]": {
+    "gaudi2": {
+      "throughput": 150
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[THUDM/chatglm3-6b-1-True-False]": {
+    "gaudi2": {
+      "throughput": 150
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[adept/persimmon-8b-base-1-False-False]": {
+    "gaudi1": {
+      "throughput": 34.53559807384106
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[adept/persimmon-8b-base-4-False-False]": {
+    "gaudi2": {
+      "throughput": 366.73968820698406
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[baichuan-inc/Baichuan2-13B-Chat-1-False-False]": {
+    "gaudi2": {
+      "throughput": 66
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[baichuan-inc/Baichuan2-7B-Chat-1-True-False]": {
+    "gaudi2": {
+      "throughput": 108
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigcode/starcoder-1-False-False]": {
+    "gaudi1": {
+      "throughput": 15.945023767901013
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigcode/starcoder-256-True-True]": {
+    "gaudi2": {
+      "output": "def print_hello_world():\n    print(\"Hello World\")\n\ndef print_hello_world_twice():\n    print_hello_world()\n    print_hello_world()\n\ndef print_hello_world_thrice():\n    print_hello_world()\n    print_hello_world()\n    print_hello_world()\n\ndef print_hello_world_four_times():\n    print_hello_world()\n    print_hello_world()\n    print_hello_world()\n   ",
+      "throughput": 6846.575763562658
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigcode/starcoder2-3b-1-False-False]": {
+    "gaudi1": {
+      "throughput": 82.09655684566117
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigcode/starcoder2-3b-1-False-True]": {
+    "gaudi2": {
+      "output": "def print_hello_world():\n    print(\"Hello World\")\n\ndef print_hello_world_with_name(name):\n    print(\"Hello World, \" + name)\n\ndef print_hello_world_with_name_and_age(name, age):\n    print(\"Hello World, \" + name + \", \" + str(age))\n\ndef print_hello_world_with_name_and_age_and_gender(name, age, gender):\n    print(\"Hello",
+      "throughput": 261.07213776344133
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[bigscience/bloomz-7b1-1-False-False]": {
+    "gaudi1": {
+      "throughput": 41.7555095197846
+    },
+    "gaudi2": {
+      "throughput": 130.0472971205316
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[codellama/CodeLlama-34b-hf-1-True-False]": {
+    "gaudi2": {
+      "throughput": 32.644
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[deepseek-ai/DeepSeek-V2-Lite-1-False-False]": {
+    "gaudi2": {
+      "throughput": 35
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[facebook/xglm-1.7B-1-False-False]": {
+    "gaudi2": {
+      "throughput": 357.46365062825083
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-2-27b-1-False-True]": {
+    "gaudi2": {
+      "output": "DeepSpeed is a machine learning framework that enables you to train models with trillions of parameters and beyond, using model parallelism to partition large models over multiple GPUs.\n\nThe following is a brief introduction to the DeepSpeed model parallel training.\n\n<h2>1. Introduction</h2>\n\nThe DeepSpeed model parallel training is a simple and effective way to train large models. It is a framework that enables you to train models with trillions of parameters and beyond.\n\nDeepSpeed is a distributed deep learning optimization toolkit that makes it easy and efficient",
+      "throughput": 36.578709544111
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-2-9b-1-False-True]": {
+    "gaudi2": {
+      "output": "DeepSpeed is a machine learning framework that enables training of large-scale deep learning models on a single GPU or across multiple GPUs. It is designed to be easy to use and highly scalable, making it a powerful tool for researchers and practitioners working with large-scale deep learning models.\n\nDeepSpeed is built on top of PyTorch, a popular deep learning framework, and provides a set of tools and libraries that make it easy to train large-scale models. It includes features such as zero-shot inference, which allows models to be",
+      "throughput": 92.302359446567
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-7b-1-False-False]": {
+    "gaudi1": {
+      "throughput": 28.84284625836978
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[google/gemma-7b-1-False-True]": {
+    "gaudi2": {
+      "output": "DeepSpeed is a machine learning framework that enables training of large-scale models on commodity hardware. It is designed to be a drop-in replacement for PyTorch, and it is compatible with the existing PyTorch ecosystem. DeepSpeed is designed to be easy to use, and it provides a number of features that make it easy to train large-scale models. DeepSpeed is designed to be scalable, and it can be used to train models on a single machine or on a cluster of machines. DeepSpeed is designed to be efficient,",
+      "throughput": 109.70751574382221
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[gpt2-xl-1-False-False]": {
+    "gaudi1": {
+      "throughput": 142.11481820425706
+    },
+    "gaudi2": {
+      "throughput": 281.8734689674413
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-1-True-False]": {
+    "gaudi1": {
+      "throughput": 44.39616259946937
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-1-True-True]": {
+    "gaudi2": {
+      "output": "DeepSpeed is a machine learning framework for deep learning. It is designed to be fast and efficient, while also being easy to use. DeepSpeed is based on the TensorFlow framework, and it uses the TensorFlow library to perform computations.\nDeepSpeed is a deep learning framework that is designed to be fast and efficient. It is based on the TensorFlow library and uses the TensorFlow library to perform computations. DeepSpeed is designed to be easy to use and to provide a high level of flex",
+      "throughput": 141.25776956002076
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-512-False-False]": {
+    "gaudi2": {
+      "throughput": 8711
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Llama-2-7b-hf-512-True-False]": {
+    "gaudi2": {
+      "throughput": 12808
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[meta-llama/Meta-Llama-3-8B-1-True-False]": {
+    "gaudi2": {
+      "throughput": 129
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[microsoft/phi-2-1-False-False]": {
+    "gaudi1": {
+      "throughput": 92.53083167241344
+    },
+    "gaudi2": {
+      "throughput": 224.72307766211117
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mistralai/Mistral-7B-v0.1-1-True-False]": {
+    "gaudi1": {
+      "throughput": 41.21906841459711
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mistralai/Mistral-7B-v0.1-1-True-True]": {
+    "gaudi2": {
+      "output": "DeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system.\n\nDeepSpeed is a machine learning framework that accelerates training of large models on a single machine or distributed systems. It is designed to be compatible with PyTorch and TensorFlow, and can be used to train models on a single machine or on a distributed system",
+      "throughput": 130.2172236767782
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mistralai/Mixtral-8x7B-v0.1-1-False-True]": {
+    "gaudi2": {
+      "output": "DeepSpeed is a machine learning framework that enables training of large models on a single machine with a single GPU. It is designed to be easy to use and efficient, and it can be used to train models on a variety of tasks.\n\n## Introduction\n\nDeepSpeed is a machine learning framework that enables training of large models on a single machine with a single GPU. It is designed to be easy to use and efficient, and it can be used to train models on a variety of tasks.\n\n## What is DeepSpeed",
+      "throughput": 23.7931001677926
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mosaicml/mpt-30b-1-False-False]": {
+    "gaudi2": {
+      "throughput": 36.06464336116623
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[mosaicml/mpt-7b-1-False-False]": {
+    "gaudi1": {
+      "throughput": 45.45168927038262
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[openbmb/MiniCPM3-4B-1-False-False]": {
+    "gaudi2": {
+      "throughput": 65.116
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[stabilityai/stablelm-2-12b-1-False-False]": {
+    "gaudi1": {
+      "throughput": 26.80858949645992
+    },
+    "gaudi2": {
+      "throughput": 74.8904496532218
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[state-spaces/mamba-130m-hf-1536-False-False]": {
+    "gaudi2": {
+      "throughput": 5385.511100161605
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[state-spaces/mamba-130m-hf-224-False-False]": {
+    "gaudi1": {
+      "throughput": 794.542
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[tiiuae/falcon-40b-1-True-False]": {
+    "gaudi2": {
+      "throughput": 25.202450111088346
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[tiiuae/falcon-7b-1-True-False]": {
+    "gaudi1": {
+      "throughput": 44.82870145718665
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_bf16_1x[tiiuae/falcon-mamba-7b-1-False-False]": {
+    "gaudi2": {
+      "throughput": 47.1464839567739
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_contrastive_search[gpt2-xl-1-False]": {
+    "gaudi1": {
+      "throughput": 34.48141280163397
+    },
+    "gaudi2": {
+      "throughput": 51.61471298016438
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_deepspeed[Qwen/Qwen2.5-72B-2-1]": {
+    "gaudi2": {
+      "throughput": 26
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_deepspeed[bigscience/bloomz-7b1-8-1]": {
+    "gaudi1": {
+      "throughput": 31.994268212011505
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_deepspeed[bigscience/bloomz-8-1]": {
+    "gaudi2": {
+      "throughput": 36.77314954096159
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_deepspeed[facebook/opt-66b-2-1]": {
+    "gaudi2": {
+      "throughput": 28.48069266504111
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_deepspeed[google/gemma-2-27b-8-1]": {
+    "gaudi2": {
+      "throughput": 87.578709544111
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_deepspeed[google/gemma-2-9b-8-1]": {
+    "gaudi2": {
+      "throughput": 110.12610917383735
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_deepspeed[meta-llama/Llama-2-70b-hf-8-1]": {
+    "gaudi2": {
+      "throughput": 64.10514998902435
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_deepspeed[meta-llama/Meta-Llama-3-70B-Instruct-8-1]": {
+    "gaudi2": {
+      "throughput": 64
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_distributed_tp[meta-llama/Llama-2-7b-hf]": {
+    "gaudi2": {
+      "throughput": 1345.2369318328463
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-207-False-2048-128]": {
+    "gaudi2": {
+      "throughput": 568.5
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-3042-False-128-128]": {
+    "gaudi2": {
+      "throughput": 5374.6
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-4-750-False-128-2048]": {
+    "gaudi2": {
+      "throughput": 7422.4
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-70b-hf-8-172-False-2048-2048]": {
+    "gaudi2": {
+      "throughput": 4656.2
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-1230-False-128-128]": {
+    "gaudi2": {
+      "throughput": 13152.7
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-163-False-128-2048]": {
+    "gaudi2": {
+      "throughput": 4774.7
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-81-False-2048-2048]": {
+    "gaudi2": {
+      "throughput": 1942.9
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[meta-llama/Llama-2-7b-hf-1-94-False-2048-128]": {
+    "gaudi2": {
+      "throughput": 1293.3
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[microsoft/phi-2-1-1-True-128-128]": {
+    "gaudi2": {
+      "throughput": 254.08932787178165
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-120-True-128-2048]": {
+    "gaudi2": {
+      "throughput": 6979.225194247115
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-120-True-2048-128]": {
+    "gaudi2": {
+      "throughput": 1681.4401450088983
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-44-True-2048-2048]": {
+    "gaudi2": {
+      "throughput": 3393.149396451692
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mistral-7B-Instruct-v0.2-1-896-True-128-128]": {
+    "gaudi2": {
+      "throughput": 17068.965283763682
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-1-1-True-128-128]": {
+    "gaudi2": {
+      "throughput": 40.94
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-48-True-2048-2048]": {
+    "gaudi2": {
+      "throughput": 1147.5
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-768-True-128-128]": {
+    "gaudi2": {
+      "throughput": 3428.65
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-96-True-128-2048]": {
+    "gaudi2": {
+      "throughput": 2570.34
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[mistralai/Mixtral-8x7B-v0.1-2-96-True-2048-128]": {
+    "gaudi2": {
+      "throughput": 379.03
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_fp8[tiiuae/falcon-180B-4-950-True-128-128]": {
+    "gaudi2": {
+      "throughput": 2506.68
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_gptq[TheBloke/Llama-2-7b-Chat-GPTQ-1-10-False-128-2048]": {
+    "gaudi2": {
+      "throughput": 456.7
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_torch_compile[meta-llama/Llama-2-7b-hf]": {
+    "gaudi2": {
+      "throughput": 102.27823420713148
+    }
+  },
+  "tests/test_text_generation_example.py::test_text_generation_torch_compile_distributed[meta-llama/Llama-2-7b-hf]": {
+    "gaudi2": {
+      "throughput": 39.72973199515235
+    }
+  }
+}
\ No newline at end of file
diff --git a/tests/baselines/llava_1_5_7b_hf.json b/tests/baselines/llava_1_5_7b_hf.json
new file mode 100644
index 0000000000..83480ce8f2
--- /dev/null
+++ b/tests/baselines/llava_1_5_7b_hf.json
@@ -0,0 +1,38 @@
+{
+    "gaudi2": {
+        "image2text_lora_finetune": {
+            "num_train_epochs": 1,
+            "eval_batch_size": 4,
+            "distribution": {
+                "multi_card": {
+                    "learning_rate": 5e-5,
+                    "train_batch_size": 2,
+                    "train_runtime": 118.5782,
+                    "train_samples_per_second": 25.146,
+                    "eval_accuracy": 0.2122,
+                    "extra_arguments": [
+                        "--bf16",
+                        "--gradient_accumulation_steps 8",
+                        "--eval_strategy no",
+                        "--save_strategy no",
+                        "--warmup_steps 50",
+                        "--lr_scheduler_type constant",
+                        "--max_grad_norm 0.3",
+                        "--logging_steps 1",
+                        "--use_hpu_graphs_for_inference",
+                        "--lora_rank 8",
+                        "--lora_alpha 8",
+                        "--lora_dropout 0.1",
+                        "--lora_target_modules '.*(language_model).*(down_proj|gate_proj|up_proj|k_proj|q_proj|v_proj|o_proj).*$'",
+                        "--low_cpu_mem_usage True",
+                        "--adam_epsilon 1e-08",
+                        "--input_column_name image query",
+                        "--output_column_name answers",
+                        "--remove_unused_columns False",
+                        "--max_seq_length 512"
+                    ]
+                }
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/test_bnb_inference.py b/tests/test_bnb_inference.py
new file mode 100644
index 0000000000..9218869669
--- /dev/null
+++ b/tests/test_bnb_inference.py
@@ -0,0 +1,66 @@
+# coding=utf-8
+# Copyright 2022 the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import os
+
+import torch
+from habana_frameworks.torch.hpu import wrap_in_hpu_graph
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+
+from optimum.habana.transformers import modeling_utils
+
+
+modeling_utils.adapt_transformers_to_gaudi()
+
+assert os.environ.get("GAUDI2_CI", "0") == "1", "Execution does not support on Gaudi1"
+
+MODEL_ID = "meta-llama/Llama-3.2-1B"
+
+
+def get_model(token: str):
+    nf4_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16,
+    )
+
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID, quantization_config=nf4_config, device_map={"": "hpu"}, torch_dtype=torch.bfloat16, token=token.value
+    )
+
+    return model
+
+
+def test_nf4_quantization_inference(token: str):
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=token.value)
+
+    model = get_model(token)
+
+    generation_config = copy.deepcopy(model.generation_config)
+    generation_config.max_new_tokens = 20
+    generation_config.use_cache = True
+    generation_config.use_flash_attention = True
+
+    model = wrap_in_hpu_graph(model)
+
+    input_text = "Hello my name is"
+    inputs = tokenizer(input_text, return_tensors="pt").to(device="hpu")
+
+    torch.manual_seed(42)
+    outputs = model.generate(**inputs, generation_config=generation_config, hpu_graphs=True, lazy_mode=True)
+    decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+    assert decoded_output == "Hello my name is Marlene and I am 36 years old. I am a very happy person, I love to"
diff --git a/tests/test_bnb_qlora.py b/tests/test_bnb_qlora.py
new file mode 100644
index 0000000000..ac33a74ee1
--- /dev/null
+++ b/tests/test_bnb_qlora.py
@@ -0,0 +1,152 @@
+# coding=utf-8
+# Copyright 2022 the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import subprocess
+
+import pytest
+import torch
+from datasets import load_dataset
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, DataCollatorForLanguageModeling
+
+from optimum.habana import GaudiConfig, GaudiTrainer, GaudiTrainingArguments
+from optimum.habana.transformers import modeling_utils
+
+
+modeling_utils.adapt_transformers_to_gaudi()
+
+assert os.environ.get("GAUDI2_CI", "0") == "1", "Execution does not support on Gaudi1"
+try:
+    import sys
+
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "peft==0.12.0"])
+    from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
+except subprocess.CalledProcessError:
+    pytest.fail("Failed to install peft==0.12.0")
+
+MODEL_ID = "meta-llama/Llama-3.2-1B"
+
+
+def print_model_size(model):
+    """
+    Prints the model size in GB.
+    """
+    model_size = sum(p.numel() * p.element_size() for p in model.parameters())
+    model_size_GB = model_size / (1024**3)
+    print(f" Model size : {model_size_GB} GB")
+
+
+def print_trainable_parameters(model):
+    """
+    Prints the number of trainable parameters in the model.
+    """
+    trainable_params = 0
+    all_param = 0
+    for _, param in model.named_parameters():
+        all_param += param.numel()
+        if param.requires_grad:
+            trainable_params += param.numel()
+    print(
+        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
+    )
+
+
+def get_data(tokenizer, dataset_name):
+    dataset = load_dataset(dataset_name)
+    dataset = dataset.shuffle(seed=42)
+    data = dataset.map(lambda example: tokenizer(example["text"]), batched=True)
+    split_data = data["train"].train_test_split(test_size=0.1, seed=42)
+
+    return split_data
+
+
+def get_model(token: str):
+    nf4_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16,
+    )
+
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID, quantization_config=nf4_config, device_map={"": "hpu"}, torch_dtype=torch.bfloat16, token=token.value
+    )
+
+    return model
+
+
+def test_nf4_quantization_inference(token: str):
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=token.value)
+    # needed for llama tokenizer
+    tokenizer.pad_token = tokenizer.eos_token
+
+    model = get_model(token)
+    model.gradient_checkpointing_enable()
+    print_model_size(model)
+
+    model = prepare_model_for_kbit_training(model)
+
+    config = LoraConfig(
+        r=4,
+        lora_alpha=64,
+        target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
+        lora_dropout=0.05,
+        bias="none",
+        task_type="CAUSAL_LM",
+    )
+
+    model = get_peft_model(model, config)
+    print_trainable_parameters(model)
+
+    data = get_data(tokenizer, dataset_name="tatsu-lab/alpaca")
+
+    gaudi_config = GaudiConfig(
+        use_fused_adam=True,
+        use_fused_clip_norm=True,
+        use_torch_autocast=True,
+    )
+
+    training_args = GaudiTrainingArguments(
+        evaluation_strategy="steps",
+        per_device_train_batch_size=8,
+        per_device_eval_batch_size=8,
+        gradient_accumulation_steps=2,
+        max_steps=5,
+        eval_steps=3,
+        warmup_steps=3,
+        learning_rate=2e-4,
+        logging_steps=1,
+        output_dir="results",
+        lr_scheduler_type="linear",
+        use_habana=True,
+        use_lazy_mode=True,
+        pipelining_fwd_bwd=True,
+    )
+
+    trainer = GaudiTrainer(
+        model=model,
+        train_dataset=data["train"],
+        eval_dataset=data["test"],
+        gaudi_config=gaudi_config,
+        args=training_args,
+        data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
+    )
+    model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
+
+    trainer.train()
+    eval_loss = trainer.evaluate()["eval_loss"]
+
+    expected_eval_loss = 1.638
+
+    assert abs(eval_loss - expected_eval_loss) < 5e-2
diff --git a/tests/test_video_llava.py b/tests/test_video_llava.py
new file mode 100644
index 0000000000..30c42b0cd8
--- /dev/null
+++ b/tests/test_video_llava.py
@@ -0,0 +1,77 @@
+import json
+import os
+import re
+import subprocess
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+import pytest
+
+from .test_examples import TIME_PERF_FACTOR
+
+
+if os.environ.get("GAUDI2_CI", "0") == "1":
+    # Gaudi2 CI baselines
+    MODELS_TO_TEST = {
+        "bf16": [
+            ("LanguageBind/Video-LLaVA-7B-hf", 27.72902536827787),
+        ],
+    }
+else:
+    # Gaudi1 CI baselines
+    MODELS_TO_TEST = {
+        "bf16": [
+            ("LanguageBind/Video-LLaVA-7B-hf", 9.22975629675865),
+        ],
+    }
+
+
+def _install_requirements():
+    PATH_TO_EXAMPLE_DIR = Path(__file__).resolve().parent.parent / "examples"
+    cmd_line = f"pip install -r {PATH_TO_EXAMPLE_DIR / 'video-comprehension' / 'requirements.txt'}".split()
+    p = subprocess.Popen(cmd_line)
+    return_code = p.wait()
+    assert return_code == 0
+
+
+def _test_video_llava(model_name: str, baseline: float):
+    _install_requirements()
+    command = ["python3"]
+    path_to_example_dir = Path(__file__).resolve().parent.parent / "examples"
+    env_variables = os.environ.copy()
+
+    command += [
+        f"{path_to_example_dir / 'video-comprehension' / 'run_example.py'}",
+        f"--model_name_or_path {model_name}",
+        "--bf16",
+        "--use_hpu_graphs",
+    ]
+
+    with TemporaryDirectory() as tmp_dir:
+        command.append(f"--output_dir {tmp_dir}")
+        print(f"\n\nCommand to test: {' '.join(command)}\n")
+
+        pattern = re.compile(r"([\"\'].+?[\"\'])|\s")
+        command = [x for y in command for x in re.split(pattern, y) if x]
+
+        proc = subprocess.run(command, env=env_variables)
+
+        # Ensure the run finished without any issue
+        # Use try-except to avoid logging the token if used
+        try:
+            assert proc.returncode == 0
+        except AssertionError as e:
+            if "'--token', 'hf_" in e.args[0]:
+                e.args = (f"The following command failed:\n{' '.join(command[:-2])}",)
+            raise
+
+        with open(Path(tmp_dir) / "results.json") as fp:
+            results = json.load(fp)
+
+        # Ensure performance requirements (throughput) are met
+        assert results["throughput"] >= (2 - TIME_PERF_FACTOR) * baseline
+
+
+@pytest.mark.parametrize("model_name, baseline", MODELS_TO_TEST["bf16"])
+def test_video_llava_bf16(model_name: str, baseline: float):
+    _test_video_llava(model_name, baseline)

From aaa8b3f3ac52b3fc9444a2d6b804411798a9cfe0 Mon Sep 17 00:00:00 2001
From: root <root@G6.sh.intel.com>
Date: Mon, 24 Feb 2025 15:03:44 +0800
Subject: [PATCH 32/32] refine T5Config typo.

---
 tests/test_diffusers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_diffusers.py b/tests/test_diffusers.py
index 109665c0a0..5af4f296f8 100644
--- a/tests/test_diffusers.py
+++ b/tests/test_diffusers.py
@@ -90,6 +90,7 @@
     DPTConfig,
     DPTFeatureExtractor,
     DPTForDepthEstimation,
+    T5Config,
     T5EncoderModel,
     T5Tokenizer,
 )