From 721159d4aee2011afc7992fabe77c707f8565d52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Somoza?= Date: Sat, 2 Mar 2024 16:54:39 -0300 Subject: [PATCH 1/4] initial --- .../pipelines/animatediff/pipeline_animatediff.py | 12 ++++++++---- .../animatediff/pipeline_animatediff_video2video.py | 8 ++++++-- .../pipelines/controlnet/pipeline_controlnet.py | 8 ++++++-- .../controlnet/pipeline_controlnet_img2img.py | 8 ++++++-- .../controlnet/pipeline_controlnet_inpaint.py | 8 ++++++-- .../controlnet/pipeline_controlnet_inpaint_sd_xl.py | 8 ++++++-- .../controlnet/pipeline_controlnet_sd_xl.py | 8 ++++++-- .../controlnet/pipeline_controlnet_sd_xl_img2img.py | 8 ++++++-- .../pipeline_latent_consistency_img2img.py | 8 ++++++-- .../pipeline_latent_consistency_text2img.py | 8 ++++++-- src/diffusers/pipelines/pia/pipeline_pia.py | 8 ++++++-- .../stable_diffusion/pipeline_stable_diffusion.py | 12 ++++++++---- .../pipeline_stable_diffusion_img2img.py | 8 ++++++-- .../pipeline_stable_diffusion_inpaint.py | 8 ++++++-- .../pipeline_stable_diffusion_ldm3d.py | 12 ++++++++---- .../pipeline_stable_diffusion_panorama.py | 12 ++++++++---- .../pipeline_stable_diffusion_xl.py | 8 ++++++-- .../pipeline_stable_diffusion_xl_img2img.py | 8 ++++++-- .../pipeline_stable_diffusion_xl_inpaint.py | 8 ++++++-- .../pipeline_stable_diffusion_xl_adapter.py | 8 ++++++-- 20 files changed, 128 insertions(+), 48 deletions(-) diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index e98d4ad4e37b..18441ae5d416 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -404,8 +404,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) @@ -509,9 +513,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) # Copied from diffusers.pipelines.text_to_video_synthesis.pipeline_text_to_video_synth.TextToVideoSDPipeline.prepare_latents diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index ced64889044f..a10c128dd570 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -482,8 +482,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index eab2f7aa22d0..4d9a04ec08e1 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -514,8 +514,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index 4fc9791d3d8e..492e79595281 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -507,8 +507,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index ce7537d84215..63daf5f938a0 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -632,8 +632,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index 0f1d5ea48e71..686c1ddf775a 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -541,8 +541,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index 9883b4f64790..50ec70409309 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -519,8 +519,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index cf32ae81c562..53780a56caa8 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -571,8 +571,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py index 6d1b1a0db444..6f1f3580359e 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py @@ -457,8 +457,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py index fa27c0fbd5bc..fed49d186315 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py @@ -441,8 +441,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index 485ccb22e5e9..6541715a97ce 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -623,8 +623,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index 5126e6f4c378..be10b2c01d04 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -524,8 +524,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) @@ -639,9 +643,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 9c6fbb2310ac..eb34fe930963 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -568,8 +568,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 3392fd6ddecc..7c82cd24e73b 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -640,8 +640,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py index 502cd340bcd8..f3f73ca55897 100644 --- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py @@ -446,8 +446,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) @@ -553,9 +557,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None): diff --git a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py index 49cc68926b7e..ebc68ab5d9dd 100644 --- a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +++ b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py @@ -418,8 +418,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) @@ -550,9 +554,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index 4a34ae89d245..df089042e28e 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -553,8 +553,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index ef25ca94d16c..47320ceb4175 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -786,8 +786,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index 2cb946eb56ad..1c58f8677871 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -490,8 +490,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index abf743f4f305..ff65fa264342 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -567,8 +567,12 @@ def prepare_ip_adapter_image_embeds( for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) - single_negative_image_embeds = single_negative_image_embeds.repeat(num_images_per_prompt, 1, 1) - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) + single_negative_image_embeds = single_negative_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) From 7df53ca0e1cf306e692faeca1cd4d40585c85ef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Somoza?= Date: Sat, 2 Mar 2024 18:37:20 -0300 Subject: [PATCH 2/4] check_inputs fix to the rest of pipelines --- .../pipelines/animatediff/pipeline_animatediff_video2video.py | 4 ++-- src/diffusers/pipelines/controlnet/pipeline_controlnet.py | 4 ++-- .../pipelines/controlnet/pipeline_controlnet_img2img.py | 4 ++-- .../pipelines/controlnet/pipeline_controlnet_inpaint.py | 4 ++-- .../pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py | 4 ++-- .../pipelines/controlnet/pipeline_controlnet_sd_xl.py | 4 ++-- .../pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py | 4 ++-- .../pipeline_latent_consistency_img2img.py | 4 ++-- .../pipeline_latent_consistency_text2img.py | 4 ++-- src/diffusers/pipelines/pia/pipeline_pia.py | 4 ++-- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 4 ++-- .../stable_diffusion/pipeline_stable_diffusion_inpaint.py | 4 ++-- .../stable_diffusion_xl/pipeline_stable_diffusion_xl.py | 4 ++-- .../pipeline_stable_diffusion_xl_img2img.py | 4 ++-- .../pipeline_stable_diffusion_xl_inpaint.py | 4 ++-- .../t2i_adapter/pipeline_stable_diffusion_xl_adapter.py | 4 ++-- 16 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index a10c128dd570..f40eb2d1721c 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -593,9 +593,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) def get_timesteps(self, num_inference_steps, timesteps, strength, device): diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index 4d9a04ec08e1..20ada7d1dcf6 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -730,9 +730,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) def check_image(self, image, prompt, prompt_embeds): diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index 492e79595281..62fa374ddfb9 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -717,9 +717,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) # Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index 63daf5f938a0..1d0c0029e33b 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -875,9 +875,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) # Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index 686c1ddf775a..bfb6120a69ff 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -821,9 +821,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) def prepare_control_image( diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index 50ec70409309..63a185c3ba65 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -734,9 +734,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) # Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index 53780a56caa8..5be1773b774c 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -798,9 +798,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) # Copied from diffusers.pipelines.controlnet.pipeline_controlnet_sd_xl.StableDiffusionXLControlNetPipeline.check_image diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py index 6f1f3580359e..23254c86bbdb 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py @@ -651,9 +651,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) @property diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py index fed49d186315..2a29bdad8b3f 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py @@ -583,9 +583,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) @property diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index 6541715a97ce..cabfc8225c48 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -582,9 +582,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_ip_adapter_image_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index eb34fe930963..a0151a565ce2 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -689,9 +689,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) def get_timesteps(self, num_inference_steps, strength, device): diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 7c82cd24e73b..3a6b1a43f3ea 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -771,9 +771,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) def prepare_latents( diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index df089042e28e..334c59cab321 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -675,9 +675,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index 47320ceb4175..16894583bd6d 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -616,9 +616,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) def get_timesteps(self, num_inference_steps, strength, device, denoising_start=None): diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index 1c58f8677871..5f1e5d75940a 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -855,9 +855,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) def prepare_latents( diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index ff65fa264342..5acfecf0b34b 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -690,9 +690,9 @@ def check_inputs( raise ValueError( f"`ip_adapter_image_embeds` has to be of type `list` but is {type(ip_adapter_image_embeds)}" ) - elif ip_adapter_image_embeds[0].ndim != 3: + elif ip_adapter_image_embeds[0].ndim not in [3, 4]: raise ValueError( - f"`ip_adapter_image_embeds` has to be a list of 3D tensors but is {ip_adapter_image_embeds[0].ndim}D" + f"`ip_adapter_image_embeds` has to be a list of 3D or 4D tensors but is {ip_adapter_image_embeds[0].ndim}D" ) # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents From 7b08c7a8d06ed059e7bae3a96b98fe076c4e3787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Somoza?= Date: Sat, 2 Mar 2024 18:55:47 -0300 Subject: [PATCH 3/4] add fix for no cfg too --- src/diffusers/pipelines/animatediff/pipeline_animatediff.py | 4 +++- .../pipelines/animatediff/pipeline_animatediff_video2video.py | 4 +++- src/diffusers/pipelines/controlnet/pipeline_controlnet.py | 4 +++- .../pipelines/controlnet/pipeline_controlnet_img2img.py | 4 +++- .../pipelines/controlnet/pipeline_controlnet_inpaint.py | 4 +++- .../pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py | 4 +++- .../pipelines/controlnet/pipeline_controlnet_sd_xl.py | 4 +++- .../pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py | 4 +++- .../pipeline_latent_consistency_img2img.py | 4 +++- .../pipeline_latent_consistency_text2img.py | 4 +++- src/diffusers/pipelines/pia/pipeline_pia.py | 4 +++- .../pipelines/stable_diffusion/pipeline_stable_diffusion.py | 4 +++- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 4 +++- .../stable_diffusion/pipeline_stable_diffusion_inpaint.py | 4 +++- .../stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py | 4 +++- .../pipeline_stable_diffusion_panorama.py | 4 +++- .../stable_diffusion_xl/pipeline_stable_diffusion_xl.py | 4 +++- .../pipeline_stable_diffusion_xl_img2img.py | 4 +++- .../pipeline_stable_diffusion_xl_inpaint.py | 4 +++- .../t2i_adapter/pipeline_stable_diffusion_xl_adapter.py | 4 +++- 20 files changed, 60 insertions(+), 20 deletions(-) diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index 18441ae5d416..429e16201142 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -412,7 +412,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index f40eb2d1721c..0f23737b2dc6 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -490,7 +490,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index 20ada7d1dcf6..f43b0831362a 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -522,7 +522,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index 62fa374ddfb9..a1d775b94ee6 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -515,7 +515,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index 1d0c0029e33b..6aac13121707 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -640,7 +640,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index bfb6120a69ff..13e17d66d12d 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -549,7 +549,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index 63a185c3ba65..977b618e0eef 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -527,7 +527,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index 5be1773b774c..e2851e771551 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -579,7 +579,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py index 23254c86bbdb..fad992200f5d 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py @@ -465,7 +465,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py index 2a29bdad8b3f..194239aac99d 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py @@ -449,7 +449,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index cabfc8225c48..87e8e345b9ee 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -631,7 +631,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index be10b2c01d04..c8b70753fcdb 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -532,7 +532,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index a0151a565ce2..4b480636e8a7 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -576,7 +576,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 3a6b1a43f3ea..322418bb8fe1 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -648,7 +648,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py index f3f73ca55897..e24c4c00ba68 100644 --- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py @@ -454,7 +454,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py index ebc68ab5d9dd..6d6b3ff23921 100644 --- a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +++ b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py @@ -426,7 +426,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index 334c59cab321..d2f23ba6db0f 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -561,7 +561,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index 16894583bd6d..d25134a36b76 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -794,7 +794,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index 5f1e5d75940a..212d75bbc73d 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -498,7 +498,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 5acfecf0b34b..2210bcb6284c 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -575,7 +575,9 @@ def prepare_ip_adapter_image_embeds( ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: - single_image_embeds = single_image_embeds.repeat(num_images_per_prompt, 1, 1) + single_image_embeds = single_image_embeds.repeat( + num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + ) image_embeds.append(single_image_embeds) return image_embeds From 582a27905efa9e0809c6eeb13c5ad90c604970a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Somoza?= Date: Sun, 3 Mar 2024 02:15:56 -0300 Subject: [PATCH 4/4] use of variable --- .../pipelines/animatediff/pipeline_animatediff.py | 7 ++++--- .../animatediff/pipeline_animatediff_video2video.py | 7 ++++--- src/diffusers/pipelines/controlnet/pipeline_controlnet.py | 7 ++++--- .../pipelines/controlnet/pipeline_controlnet_img2img.py | 7 ++++--- .../pipelines/controlnet/pipeline_controlnet_inpaint.py | 7 ++++--- .../controlnet/pipeline_controlnet_inpaint_sd_xl.py | 7 ++++--- .../pipelines/controlnet/pipeline_controlnet_sd_xl.py | 7 ++++--- .../controlnet/pipeline_controlnet_sd_xl_img2img.py | 7 ++++--- .../pipeline_latent_consistency_img2img.py | 7 ++++--- .../pipeline_latent_consistency_text2img.py | 7 ++++--- src/diffusers/pipelines/pia/pipeline_pia.py | 7 ++++--- .../stable_diffusion/pipeline_stable_diffusion.py | 7 ++++--- .../stable_diffusion/pipeline_stable_diffusion_img2img.py | 7 ++++--- .../stable_diffusion/pipeline_stable_diffusion_inpaint.py | 7 ++++--- .../pipeline_stable_diffusion_ldm3d.py | 7 ++++--- .../pipeline_stable_diffusion_panorama.py | 7 ++++--- .../stable_diffusion_xl/pipeline_stable_diffusion_xl.py | 7 ++++--- .../pipeline_stable_diffusion_xl_img2img.py | 7 ++++--- .../pipeline_stable_diffusion_xl_inpaint.py | 7 ++++--- .../t2i_adapter/pipeline_stable_diffusion_xl_adapter.py | 7 ++++--- 20 files changed, 80 insertions(+), 60 deletions(-) diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py index 429e16201142..ce2f6585c601 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff.py @@ -400,20 +400,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py index 0f23737b2dc6..bcfcd3a24b5d 100644 --- a/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +++ b/src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py @@ -478,20 +478,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py index f43b0831362a..8f31dfc2678a 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet.py @@ -510,20 +510,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py index a1d775b94ee6..9d2c76fd7483 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py @@ -503,20 +503,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py index 6aac13121707..c4f1bff5efcd 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py @@ -628,20 +628,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py index 13e17d66d12d..52ffe5a3f356 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py @@ -537,20 +537,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py index 977b618e0eef..0b611350a6f1 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py @@ -515,20 +515,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py index e2851e771551..4deee37f7df1 100644 --- a/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +++ b/src/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py @@ -567,20 +567,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py index fad992200f5d..f64854ea982b 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py @@ -453,20 +453,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py index 194239aac99d..e9bacaa89ba5 100644 --- a/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +++ b/src/diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py @@ -437,20 +437,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/pia/pipeline_pia.py b/src/diffusers/pipelines/pia/pipeline_pia.py index 87e8e345b9ee..bd3e2891f0d6 100644 --- a/src/diffusers/pipelines/pia/pipeline_pia.py +++ b/src/diffusers/pipelines/pia/pipeline_pia.py @@ -619,20 +619,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py index c8b70753fcdb..9e4e6c186ffa 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py @@ -520,20 +520,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py index 4b480636e8a7..b43e0eb2abcd 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py @@ -564,20 +564,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py index 322418bb8fe1..221d5c2cfd3f 100644 --- a/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py @@ -636,20 +636,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py index e24c4c00ba68..dbfb5e08ef23 100644 --- a/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +++ b/src/diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py @@ -442,20 +442,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py index 6d6b3ff23921..feda710e0049 100644 --- a/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +++ b/src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py @@ -414,20 +414,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py index d2f23ba6db0f..776696e9d486 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py @@ -549,20 +549,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py index d25134a36b76..5ba12baad065 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py @@ -782,20 +782,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py index 212d75bbc73d..5b9628f51a41 100644 --- a/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +++ b/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py @@ -486,20 +486,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds) diff --git a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py index 2210bcb6284c..4e0cc61f5c1d 100644 --- a/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +++ b/src/diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py @@ -563,20 +563,21 @@ def prepare_ip_adapter_image_embeds( image_embeds.append(single_image_embeds) else: + repeat_dims = [1] image_embeds = [] for single_image_embeds in ip_adapter_image_embeds: if do_classifier_free_guidance: single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2) single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) single_negative_image_embeds = single_negative_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_negative_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:])) ) single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds]) else: single_image_embeds = single_image_embeds.repeat( - num_images_per_prompt, *([1] * len(single_image_embeds.shape[1:])) + num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:])) ) image_embeds.append(single_image_embeds)