From ce4f4f4545b060f5e1c38326bda540ffd6d7abf8 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 6 Mar 2024 11:41:16 +0530 Subject: [PATCH 01/11] fix: support for loading playground v2.5 single file checkpoint. --- src/diffusers/loaders/single_file.py | 6 + src/diffusers/loaders/single_file_utils.py | 134 ++++++++++++++------- 2 files changed, 94 insertions(+), 46 deletions(-) diff --git a/src/diffusers/loaders/single_file.py b/src/diffusers/loaders/single_file.py index 875858ce7761..2e1e39aa45b9 100644 --- a/src/diffusers/loaders/single_file.py +++ b/src/diffusers/loaders/single_file.py @@ -49,6 +49,7 @@ def build_sub_model_components( model_type=None, image_size=None, torch_dtype=None, + is_playground_model=False, **kwargs, ): if component_name in pipeline_components: @@ -84,6 +85,7 @@ def build_sub_model_components( scheduler_type=scheduler_type, prediction_type=prediction_type, model_type=model_type, + is_playground_model=is_playground_model, ) return scheduler_components @@ -228,6 +230,9 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs): cache_dir=cache_dir, ) + # For now, we determine if the input checkpoint is from Playground like this. + is_playground_model = "edm_mean" in checkpoint and "edm_std" in checkpoint + from ..pipelines.pipeline_utils import _get_pipeline_class pipeline_class = _get_pipeline_class( @@ -262,6 +267,7 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs): load_safety_checker=load_safety_checker, local_files_only=local_files_only, torch_dtype=torch_dtype, + is_playground_model=is_playground_model, **kwargs, ) if not components: diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index fc0d45785d8d..20d587823fd7 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -28,6 +28,7 @@ DDIMScheduler, DDPMScheduler, DPMSolverMultistepScheduler, + EDMDPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler, EulerDiscreteScheduler, HeunDiscreteScheduler, @@ -175,6 +176,7 @@ LDM_VAE_KEY = "first_stage_model." LDM_VAE_DEFAULT_SCALING_FACTOR = 0.18215 +PLAYGROUND_VAE_SCALING_FACTOR = 0.5 LDM_UNET_KEY = "model.diffusion_model." LDM_CONTROLNET_KEY = "control_model." LDM_CLIP_PREFIX_TO_REMOVE = ["cond_stage_model.transformer.", "conditioner.embedders.0.transformer."] @@ -506,13 +508,15 @@ def create_controlnet_diffusers_config(original_config, image_size: int): return controlnet_config -def create_vae_diffusers_config(original_config, image_size, scaling_factor=None): +def create_vae_diffusers_config(original_config, image_size, scaling_factor=None, latents_mean=None, latents_std=None): """ Creates a config for the diffusers based on the config of the LDM model. """ vae_params = original_config["model"]["params"]["first_stage_config"]["params"]["ddconfig"] if scaling_factor is None and "scale_factor" in original_config["model"]["params"]: scaling_factor = original_config["model"]["params"]["scale_factor"] + elif latents_mean and latents_std: + scaling_factor = PLAYGROUND_VAE_SCALING_FACTOR elif scaling_factor is None: scaling_factor = LDM_VAE_DEFAULT_SCALING_FACTOR @@ -531,6 +535,8 @@ def create_vae_diffusers_config(original_config, image_size, scaling_factor=None "layers_per_block": vae_params["num_res_blocks"], "scaling_factor": scaling_factor, } + if latents_mean and latents_std: + config.update({"latents_mean": latents_mean, "latents_std": latents_std}) return config @@ -1223,14 +1229,29 @@ def create_diffusers_unet_model_from_ldm( def create_diffusers_vae_model_from_ldm( - pipeline_class_name, original_config, checkpoint, image_size=None, scaling_factor=None, torch_dtype=None + pipeline_class_name, + original_config, + checkpoint, + image_size=None, + scaling_factor=None, + torch_dtype=None, + is_playground_model=False, ): # import here to avoid circular imports from ..models import AutoencoderKL image_size = set_image_size(pipeline_class_name, original_config, checkpoint, image_size=image_size) - vae_config = create_vae_diffusers_config(original_config, image_size=image_size, scaling_factor=scaling_factor) + if is_playground_model: + vae_config = create_vae_diffusers_config( + original_config, + image_size=image_size, + scaling_factor=scaling_factor, + latents_mean=checkpoint["edm_mean"].flatten().tolist(), + latents_std=checkpoint["edm_std"].flatten().tolist(), + ) + else: + vae_config = create_vae_diffusers_config(original_config, image_size=image_size, scaling_factor=scaling_factor) diffusers_format_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config) ctx = init_empty_weights if is_accelerate_available() else nullcontext @@ -1381,65 +1402,86 @@ def create_scheduler_from_ldm( prediction_type=None, scheduler_type="ddim", model_type=None, + is_playground_model=False, ): - scheduler_config = get_default_scheduler_config() - model_type = infer_model_type(original_config, model_type=model_type) + if is_playground_model: + scheduler_kwargs = { + "algorithm_type": "dpmsolver++", + "dynamic_thresholding_ratio": 0.995, + "euler_at_final": False, + "final_sigmas_type": "zero", + "lower_order_final": True, + "num_train_timesteps": 1000, + "prediction_type": "epsilon", + "rho": 7.0, + "sample_max_value": 1.0, + "sigma_data": 0.5, + "sigma_max": 80.0, + "sigma_min": 0.002, + "solver_order": 2, + "solver_type": "midpoint", + "thresholding": False, + } + scheduler = EDMDPMSolverMultistepScheduler(**scheduler_kwargs) + else: + scheduler_config = get_default_scheduler_config() + model_type = infer_model_type(original_config, model_type=model_type) - global_step = checkpoint["global_step"] if "global_step" in checkpoint else None + global_step = checkpoint["global_step"] if "global_step" in checkpoint else None - num_train_timesteps = getattr(original_config["model"]["params"], "timesteps", None) or 1000 - scheduler_config["num_train_timesteps"] = num_train_timesteps + num_train_timesteps = getattr(original_config["model"]["params"], "timesteps", None) or 1000 + scheduler_config["num_train_timesteps"] = num_train_timesteps - if ( - "parameterization" in original_config["model"]["params"] - and original_config["model"]["params"]["parameterization"] == "v" - ): - if prediction_type is None: - # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"` - # as it relies on a brittle global step parameter here - prediction_type = "epsilon" if global_step == 875000 else "v_prediction" + if ( + "parameterization" in original_config["model"]["params"] + and original_config["model"]["params"]["parameterization"] == "v" + ): + if prediction_type is None: + # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"` + # as it relies on a brittle global step parameter here + prediction_type = "epsilon" if global_step == 875000 else "v_prediction" - else: - prediction_type = prediction_type or "epsilon" + else: + prediction_type = prediction_type or "epsilon" - scheduler_config["prediction_type"] = prediction_type + scheduler_config["prediction_type"] = prediction_type - if model_type in ["SDXL", "SDXL-Refiner"]: - scheduler_type = "euler" + if model_type in ["SDXL", "SDXL-Refiner"]: + scheduler_type = "euler" - else: - beta_start = original_config["model"]["params"].get("linear_start", 0.02) - beta_end = original_config["model"]["params"].get("linear_end", 0.085) - scheduler_config["beta_start"] = beta_start - scheduler_config["beta_end"] = beta_end - scheduler_config["beta_schedule"] = "scaled_linear" - scheduler_config["clip_sample"] = False - scheduler_config["set_alpha_to_one"] = False + else: + beta_start = original_config["model"]["params"].get("linear_start", 0.02) + beta_end = original_config["model"]["params"].get("linear_end", 0.085) + scheduler_config["beta_start"] = beta_start + scheduler_config["beta_end"] = beta_end + scheduler_config["beta_schedule"] = "scaled_linear" + scheduler_config["clip_sample"] = False + scheduler_config["set_alpha_to_one"] = False - if scheduler_type == "pndm": - scheduler_config["skip_prk_steps"] = True - scheduler = PNDMScheduler.from_config(scheduler_config) + if scheduler_type == "pndm": + scheduler_config["skip_prk_steps"] = True + scheduler = PNDMScheduler.from_config(scheduler_config) - elif scheduler_type == "lms": - scheduler = LMSDiscreteScheduler.from_config(scheduler_config) + elif scheduler_type == "lms": + scheduler = LMSDiscreteScheduler.from_config(scheduler_config) - elif scheduler_type == "heun": - scheduler = HeunDiscreteScheduler.from_config(scheduler_config) + elif scheduler_type == "heun": + scheduler = HeunDiscreteScheduler.from_config(scheduler_config) - elif scheduler_type == "euler": - scheduler = EulerDiscreteScheduler.from_config(scheduler_config) + elif scheduler_type == "euler": + scheduler = EulerDiscreteScheduler.from_config(scheduler_config) - elif scheduler_type == "euler-ancestral": - scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler_config) + elif scheduler_type == "euler-ancestral": + scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler_config) - elif scheduler_type == "dpm": - scheduler = DPMSolverMultistepScheduler.from_config(scheduler_config) + elif scheduler_type == "dpm": + scheduler = DPMSolverMultistepScheduler.from_config(scheduler_config) - elif scheduler_type == "ddim": - scheduler = DDIMScheduler.from_config(scheduler_config) + elif scheduler_type == "ddim": + scheduler = DDIMScheduler.from_config(scheduler_config) - else: - raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") + else: + raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") if pipeline_class_name == "StableDiffusionUpscalePipeline": scheduler = DDIMScheduler.from_pretrained("stabilityai/stable-diffusion-x4-upscaler", subfolder="scheduler") From a4e00abb687fe215cc309358baae26b32bd6046f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 6 Mar 2024 13:09:02 +0530 Subject: [PATCH 02/11] remove is_playground_model. --- src/diffusers/loaders/single_file.py | 6 - src/diffusers/loaders/single_file_utils.py | 125 ++++++++++----------- 2 files changed, 62 insertions(+), 69 deletions(-) diff --git a/src/diffusers/loaders/single_file.py b/src/diffusers/loaders/single_file.py index 2e1e39aa45b9..875858ce7761 100644 --- a/src/diffusers/loaders/single_file.py +++ b/src/diffusers/loaders/single_file.py @@ -49,7 +49,6 @@ def build_sub_model_components( model_type=None, image_size=None, torch_dtype=None, - is_playground_model=False, **kwargs, ): if component_name in pipeline_components: @@ -85,7 +84,6 @@ def build_sub_model_components( scheduler_type=scheduler_type, prediction_type=prediction_type, model_type=model_type, - is_playground_model=is_playground_model, ) return scheduler_components @@ -230,9 +228,6 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs): cache_dir=cache_dir, ) - # For now, we determine if the input checkpoint is from Playground like this. - is_playground_model = "edm_mean" in checkpoint and "edm_std" in checkpoint - from ..pipelines.pipeline_utils import _get_pipeline_class pipeline_class = _get_pipeline_class( @@ -267,7 +262,6 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs): load_safety_checker=load_safety_checker, local_files_only=local_files_only, torch_dtype=torch_dtype, - is_playground_model=is_playground_model, **kwargs, ) if not components: diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 20d587823fd7..144e50863cef 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -1235,21 +1235,21 @@ def create_diffusers_vae_model_from_ldm( image_size=None, scaling_factor=None, torch_dtype=None, - is_playground_model=False, ): # import here to avoid circular imports from ..models import AutoencoderKL image_size = set_image_size(pipeline_class_name, original_config, checkpoint, image_size=image_size) - if is_playground_model: - vae_config = create_vae_diffusers_config( - original_config, - image_size=image_size, - scaling_factor=scaling_factor, - latents_mean=checkpoint["edm_mean"].flatten().tolist(), - latents_std=checkpoint["edm_std"].flatten().tolist(), - ) + if "edm_mean" in checkpoint and "edm_std" in checkpoint: + if checkpoint["edm"] is not None and checkpoint["edm_std"] is not None: + vae_config = create_vae_diffusers_config( + original_config, + image_size=image_size, + scaling_factor=scaling_factor, + latents_mean=checkpoint["edm_mean"].flatten().tolist(), + latents_std=checkpoint["edm_std"].flatten().tolist(), + ) else: vae_config = create_vae_diffusers_config(original_config, image_size=image_size, scaling_factor=scaling_factor) diffusers_format_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config) @@ -1402,10 +1402,34 @@ def create_scheduler_from_ldm( prediction_type=None, scheduler_type="ddim", model_type=None, - is_playground_model=False, ): - if is_playground_model: - scheduler_kwargs = { + scheduler_config = get_default_scheduler_config() + model_type = infer_model_type(original_config, model_type=model_type) + + global_step = checkpoint["global_step"] if "global_step" in checkpoint else None + + num_train_timesteps = getattr(original_config["model"]["params"], "timesteps", None) or 1000 + scheduler_config["num_train_timesteps"] = num_train_timesteps + + if ( + "parameterization" in original_config["model"]["params"] + and original_config["model"]["params"]["parameterization"] == "v" + ): + if prediction_type is None: + # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"` + # as it relies on a brittle global step parameter here + prediction_type = "epsilon" if global_step == 875000 else "v_prediction" + + else: + prediction_type = prediction_type or "epsilon" + + scheduler_config["prediction_type"] = prediction_type + + if model_type in ["SDXL", "SDXL-Refiner"]: + scheduler_type = "euler" + + elif model_type == "Playground": + scheduler_config = { "algorithm_type": "dpmsolver++", "dynamic_thresholding_ratio": 0.995, "euler_at_final": False, @@ -1422,66 +1446,41 @@ def create_scheduler_from_ldm( "solver_type": "midpoint", "thresholding": False, } - scheduler = EDMDPMSolverMultistepScheduler(**scheduler_kwargs) - else: - scheduler_config = get_default_scheduler_config() - model_type = infer_model_type(original_config, model_type=model_type) - - global_step = checkpoint["global_step"] if "global_step" in checkpoint else None - - num_train_timesteps = getattr(original_config["model"]["params"], "timesteps", None) or 1000 - scheduler_config["num_train_timesteps"] = num_train_timesteps - - if ( - "parameterization" in original_config["model"]["params"] - and original_config["model"]["params"]["parameterization"] == "v" - ): - if prediction_type is None: - # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"` - # as it relies on a brittle global step parameter here - prediction_type = "epsilon" if global_step == 875000 else "v_prediction" - - else: - prediction_type = prediction_type or "epsilon" - - scheduler_config["prediction_type"] = prediction_type + scheduler = EDMDPMSolverMultistepScheduler(**scheduler_config) - if model_type in ["SDXL", "SDXL-Refiner"]: - scheduler_type = "euler" - - else: - beta_start = original_config["model"]["params"].get("linear_start", 0.02) - beta_end = original_config["model"]["params"].get("linear_end", 0.085) - scheduler_config["beta_start"] = beta_start - scheduler_config["beta_end"] = beta_end - scheduler_config["beta_schedule"] = "scaled_linear" - scheduler_config["clip_sample"] = False - scheduler_config["set_alpha_to_one"] = False + else: + beta_start = original_config["model"]["params"].get("linear_start", 0.02) + beta_end = original_config["model"]["params"].get("linear_end", 0.085) + scheduler_config["beta_start"] = beta_start + scheduler_config["beta_end"] = beta_end + scheduler_config["beta_schedule"] = "scaled_linear" + scheduler_config["clip_sample"] = False + scheduler_config["set_alpha_to_one"] = False - if scheduler_type == "pndm": - scheduler_config["skip_prk_steps"] = True - scheduler = PNDMScheduler.from_config(scheduler_config) + if scheduler_type == "pndm": + scheduler_config["skip_prk_steps"] = True + scheduler = PNDMScheduler.from_config(scheduler_config) - elif scheduler_type == "lms": - scheduler = LMSDiscreteScheduler.from_config(scheduler_config) + elif scheduler_type == "lms": + scheduler = LMSDiscreteScheduler.from_config(scheduler_config) - elif scheduler_type == "heun": - scheduler = HeunDiscreteScheduler.from_config(scheduler_config) + elif scheduler_type == "heun": + scheduler = HeunDiscreteScheduler.from_config(scheduler_config) - elif scheduler_type == "euler": - scheduler = EulerDiscreteScheduler.from_config(scheduler_config) + elif scheduler_type == "euler": + scheduler = EulerDiscreteScheduler.from_config(scheduler_config) - elif scheduler_type == "euler-ancestral": - scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler_config) + elif scheduler_type == "euler-ancestral": + scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler_config) - elif scheduler_type == "dpm": - scheduler = DPMSolverMultistepScheduler.from_config(scheduler_config) + elif scheduler_type == "dpm": + scheduler = DPMSolverMultistepScheduler.from_config(scheduler_config) - elif scheduler_type == "ddim": - scheduler = DDIMScheduler.from_config(scheduler_config) + elif scheduler_type == "ddim": + scheduler = DDIMScheduler.from_config(scheduler_config) - else: - raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") + else: + raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") if pipeline_class_name == "StableDiffusionUpscalePipeline": scheduler = DDIMScheduler.from_pretrained("stabilityai/stable-diffusion-x4-upscaler", subfolder="scheduler") From 9d90d6075329e363a1c8ca2b377a795a1fca1fa4 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 6 Mar 2024 13:10:49 +0530 Subject: [PATCH 03/11] fix: edm key --- src/diffusers/loaders/single_file_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 144e50863cef..66415f0df85d 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -1242,7 +1242,7 @@ def create_diffusers_vae_model_from_ldm( image_size = set_image_size(pipeline_class_name, original_config, checkpoint, image_size=image_size) if "edm_mean" in checkpoint and "edm_std" in checkpoint: - if checkpoint["edm"] is not None and checkpoint["edm_std"] is not None: + if checkpoint["edm_mean"] is not None and checkpoint["edm_std"] is not None: vae_config = create_vae_diffusers_config( original_config, image_size=image_size, From c1d0e091af22007dee36b1b3ad70986b51046227 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 6 Mar 2024 14:40:50 +0530 Subject: [PATCH 04/11] apply Dhruv's comments but errors. --- src/diffusers/loaders/single_file.py | 12 +++- src/diffusers/loaders/single_file_utils.py | 65 +++++++++++++--------- 2 files changed, 50 insertions(+), 27 deletions(-) diff --git a/src/diffusers/loaders/single_file.py b/src/diffusers/loaders/single_file.py index 875858ce7761..7ade582182b7 100644 --- a/src/diffusers/loaders/single_file.py +++ b/src/diffusers/loaders/single_file.py @@ -63,13 +63,20 @@ def build_sub_model_components( num_in_channels=num_in_channels, image_size=image_size, torch_dtype=torch_dtype, + model_type=model_type, ) return unet_components if component_name == "vae": scaling_factor = kwargs.get("scaling_factor", None) vae_components = create_diffusers_vae_model_from_ldm( - pipeline_class_name, original_config, checkpoint, image_size, scaling_factor, torch_dtype + pipeline_class_name, + original_config, + checkpoint, + image_size, + scaling_factor, + torch_dtype, + model_type=model_type, ) return vae_components @@ -245,6 +252,9 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs): load_safety_checker = (kwargs.pop("load_safety_checker", False)) or ( passed_class_obj.get("safety_checker", None) is not None ) + model_type = "Playground" if "edm_mean" in checkpoint and "edm_std" in checkpoint else model_type + + print(f"Model type: {model_type}") init_kwargs = {} for name in expected_modules: diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 66415f0df85d..6fd6bbedf806 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -345,6 +345,7 @@ def set_image_size(pipeline_class_name, original_config, checkpoint, image_size= if image_size: return image_size + print(f"set_image_size: {image_size}") global_step = checkpoint["global_step"] if "global_step" in checkpoint else None model_type = infer_model_type(original_config, model_type) @@ -352,8 +353,9 @@ def set_image_size(pipeline_class_name, original_config, checkpoint, image_size= image_size = original_config["model"]["params"]["unet_config"]["params"]["image_size"] return image_size - elif model_type in ["SDXL", "SDXL-Refiner"]: + elif model_type in ["SDXL", "SDXL-Refiner", "Playground"]: image_size = 1024 + print(f"image size: {image_size}") return image_size elif ( @@ -1178,6 +1180,7 @@ def create_diffusers_unet_model_from_ldm( extract_ema=False, image_size=None, torch_dtype=None, + model_type=None, ): from ..models import UNet2DConditionModel @@ -1196,7 +1199,10 @@ def create_diffusers_unet_model_from_ldm( else: num_in_channels = 4 - image_size = set_image_size(pipeline_class_name, original_config, checkpoint, image_size=image_size) + print(f"From unet: {image_size} (image_size)") + image_size = set_image_size( + pipeline_class_name, original_config, checkpoint, image_size=image_size, model_type=model_type + ) unet_config = create_unet_diffusers_config(original_config, image_size=image_size) unet_config["in_channels"] = num_in_channels unet_config["upcast_attention"] = upcast_attention @@ -1235,20 +1241,25 @@ def create_diffusers_vae_model_from_ldm( image_size=None, scaling_factor=None, torch_dtype=None, + model_type=None, ): # import here to avoid circular imports from ..models import AutoencoderKL - image_size = set_image_size(pipeline_class_name, original_config, checkpoint, image_size=image_size) + image_size = set_image_size( + pipeline_class_name, original_config, checkpoint, image_size=image_size, model_type=model_type + ) if "edm_mean" in checkpoint and "edm_std" in checkpoint: if checkpoint["edm_mean"] is not None and checkpoint["edm_std"] is not None: + edm_mean = checkpoint["edm_mean"].to(dtype=torch_dtype) if torch_dtype else checkpoint["edm_mean"] + edm_std = checkpoint["edm_std"].to(dtype=torch_dtype) if torch_dtype else checkpoint["edm_std"] vae_config = create_vae_diffusers_config( original_config, image_size=image_size, scaling_factor=scaling_factor, - latents_mean=checkpoint["edm_mean"].flatten().tolist(), - latents_std=checkpoint["edm_std"].flatten().tolist(), + latents_mean=edm_mean.flatten().tolist(), + latents_std=edm_std.flatten().tolist(), ) else: vae_config = create_vae_diffusers_config(original_config, image_size=image_size, scaling_factor=scaling_factor) @@ -1287,6 +1298,7 @@ def create_text_encoders_and_tokenizers_from_ldm( torch_dtype=None, ): model_type = infer_model_type(original_config, model_type=model_type) + print(f"From text encoder: {model_type}") if model_type == "FrozenOpenCLIPEmbedder": config_name = "stabilityai/stable-diffusion-2" @@ -1353,7 +1365,7 @@ def create_text_encoders_and_tokenizers_from_ldm( "text_encoder_2": text_encoder_2, } - elif model_type == "SDXL": + elif model_type in ["SDXL", "Playground"]: try: config_name = "openai/clip-vit-large-patch14" tokenizer = CLIPTokenizer.from_pretrained(config_name, local_files_only=local_files_only) @@ -1427,27 +1439,8 @@ def create_scheduler_from_ldm( if model_type in ["SDXL", "SDXL-Refiner"]: scheduler_type = "euler" - elif model_type == "Playground": - scheduler_config = { - "algorithm_type": "dpmsolver++", - "dynamic_thresholding_ratio": 0.995, - "euler_at_final": False, - "final_sigmas_type": "zero", - "lower_order_final": True, - "num_train_timesteps": 1000, - "prediction_type": "epsilon", - "rho": 7.0, - "sample_max_value": 1.0, - "sigma_data": 0.5, - "sigma_max": 80.0, - "sigma_min": 0.002, - "solver_order": 2, - "solver_type": "midpoint", - "thresholding": False, - } - scheduler = EDMDPMSolverMultistepScheduler(**scheduler_config) - + scheduler_type = "edm_dpm_solver_multistep" else: beta_start = original_config["model"]["params"].get("linear_start", 0.02) beta_end = original_config["model"]["params"].get("linear_end", 0.085) @@ -1479,6 +1472,26 @@ def create_scheduler_from_ldm( elif scheduler_type == "ddim": scheduler = DDIMScheduler.from_config(scheduler_config) + elif scheduler_type == "edm_dpm_solver_multistep": + scheduler_config = { + "algorithm_type": "dpmsolver++", + "dynamic_thresholding_ratio": 0.995, + "euler_at_final": False, + "final_sigmas_type": "zero", + "lower_order_final": True, + "num_train_timesteps": 1000, + "prediction_type": "epsilon", + "rho": 7.0, + "sample_max_value": 1.0, + "sigma_data": 0.5, + "sigma_max": 80.0, + "sigma_min": 0.002, + "solver_order": 2, + "solver_type": "midpoint", + "thresholding": False, + } + scheduler = EDMDPMSolverMultistepScheduler(**scheduler_config) + else: raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") From 6d3e82c9cd00483104a6f405d5e5893c2bed5c20 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 6 Mar 2024 14:54:34 +0530 Subject: [PATCH 05/11] fix: things. --- src/diffusers/loaders/single_file.py | 2 -- src/diffusers/loaders/single_file_utils.py | 10 +++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/diffusers/loaders/single_file.py b/src/diffusers/loaders/single_file.py index 7ade582182b7..d1426df073c9 100644 --- a/src/diffusers/loaders/single_file.py +++ b/src/diffusers/loaders/single_file.py @@ -254,8 +254,6 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs): ) model_type = "Playground" if "edm_mean" in checkpoint and "edm_std" in checkpoint else model_type - print(f"Model type: {model_type}") - init_kwargs = {} for name in expected_modules: if name in passed_class_obj: diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 6fd6bbedf806..254bbe63cf8c 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -345,7 +345,6 @@ def set_image_size(pipeline_class_name, original_config, checkpoint, image_size= if image_size: return image_size - print(f"set_image_size: {image_size}") global_step = checkpoint["global_step"] if "global_step" in checkpoint else None model_type = infer_model_type(original_config, model_type) @@ -355,7 +354,6 @@ def set_image_size(pipeline_class_name, original_config, checkpoint, image_size= elif model_type in ["SDXL", "SDXL-Refiner", "Playground"]: image_size = 1024 - print(f"image size: {image_size}") return image_size elif ( @@ -515,7 +513,11 @@ def create_vae_diffusers_config(original_config, image_size, scaling_factor=None Creates a config for the diffusers based on the config of the LDM model. """ vae_params = original_config["model"]["params"]["first_stage_config"]["params"]["ddconfig"] - if scaling_factor is None and "scale_factor" in original_config["model"]["params"]: + if ( + scaling_factor is None + and "scale_factor" in original_config["model"]["params"] + and not (latents_mean and latents_std) + ): scaling_factor = original_config["model"]["params"]["scale_factor"] elif latents_mean and latents_std: scaling_factor = PLAYGROUND_VAE_SCALING_FACTOR @@ -1199,7 +1201,6 @@ def create_diffusers_unet_model_from_ldm( else: num_in_channels = 4 - print(f"From unet: {image_size} (image_size)") image_size = set_image_size( pipeline_class_name, original_config, checkpoint, image_size=image_size, model_type=model_type ) @@ -1298,7 +1299,6 @@ def create_text_encoders_and_tokenizers_from_ldm( torch_dtype=None, ): model_type = infer_model_type(original_config, model_type=model_type) - print(f"From text encoder: {model_type}") if model_type == "FrozenOpenCLIPEmbedder": config_name = "stabilityai/stable-diffusion-2" From 29e6b873c4c1ed713fcb6cde0c953b4773d6b606 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 6 Mar 2024 16:35:37 +0530 Subject: [PATCH 06/11] delegate model_type inference to a function. --- src/diffusers/loaders/single_file.py | 4 ++-- src/diffusers/loaders/single_file_utils.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/diffusers/loaders/single_file.py b/src/diffusers/loaders/single_file.py index d1426df073c9..92b952754b1f 100644 --- a/src/diffusers/loaders/single_file.py +++ b/src/diffusers/loaders/single_file.py @@ -131,11 +131,12 @@ def build_sub_model_components( def set_additional_components( pipeline_class_name, original_config, + checkpoint=None, model_type=None, ): components = {} if pipeline_class_name in REFINER_PIPELINES: - model_type = infer_model_type(original_config, model_type=model_type) + model_type = infer_model_type(original_config, checkpoint=checkpoint, model_type=model_type) is_refiner = model_type == "SDXL-Refiner" components.update( { @@ -252,7 +253,6 @@ def from_single_file(cls, pretrained_model_link_or_path, **kwargs): load_safety_checker = (kwargs.pop("load_safety_checker", False)) or ( passed_class_obj.get("safety_checker", None) is not None ) - model_type = "Playground" if "edm_mean" in checkpoint and "edm_std" in checkpoint else model_type init_kwargs = {} for name in expected_modules: diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 254bbe63cf8c..2f60fb5f6945 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -307,7 +307,11 @@ def is_valid_url(url): return original_config -def infer_model_type(original_config, model_type=None): +def infer_model_type(original_config, checkpoint=None, model_type=None): + if checkpoint is not None: + if "edm_mean" in checkpoint and "edm_std": + return "Playground" + if model_type is not None: return model_type @@ -346,7 +350,7 @@ def set_image_size(pipeline_class_name, original_config, checkpoint, image_size= return image_size global_step = checkpoint["global_step"] if "global_step" in checkpoint else None - model_type = infer_model_type(original_config, model_type) + model_type = infer_model_type(original_config, checkpoint, model_type) if pipeline_class_name == "StableDiffusionUpscalePipeline": image_size = original_config["model"]["params"]["unet_config"]["params"]["image_size"] @@ -1298,7 +1302,7 @@ def create_text_encoders_and_tokenizers_from_ldm( local_files_only=False, torch_dtype=None, ): - model_type = infer_model_type(original_config, model_type=model_type) + model_type = infer_model_type(original_config, checkpoint=checkpoint, model_type=model_type) if model_type == "FrozenOpenCLIPEmbedder": config_name = "stabilityai/stable-diffusion-2" @@ -1416,7 +1420,7 @@ def create_scheduler_from_ldm( model_type=None, ): scheduler_config = get_default_scheduler_config() - model_type = infer_model_type(original_config, model_type=model_type) + model_type = infer_model_type(original_config, checkpoint=checkpoint, model_type=model_type) global_step = checkpoint["global_step"] if "global_step" in checkpoint else None From 2be231cce5759c945e748e7e53a5162ae1f663ea Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 6 Mar 2024 17:53:19 +0530 Subject: [PATCH 07/11] address Dhruv's comment. --- src/diffusers/loaders/single_file_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 2f60fb5f6945..bc952c998cf6 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -308,11 +308,9 @@ def is_valid_url(url): def infer_model_type(original_config, checkpoint=None, model_type=None): - if checkpoint is not None: - if "edm_mean" in checkpoint and "edm_std": - return "Playground" - if model_type is not None: + if "edm_mean" in checkpoint and "edm_std" in checkpoint: + return "Playground" return model_type has_cond_stage_config = ( From 52ba8061d3f4d43f0cbe429a2000ff41550128b7 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 6 Mar 2024 18:00:37 +0530 Subject: [PATCH 08/11] address rest of the comments. --- src/diffusers/loaders/single_file_utils.py | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index bc952c998cf6..e1a14c5f61d1 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -312,6 +312,8 @@ def infer_model_type(original_config, checkpoint=None, model_type=None): if "edm_mean" in checkpoint and "edm_std" in checkpoint: return "Playground" return model_type + elif "edm_mean" in checkpoint and "edm_std" in checkpoint: + return "Playground" has_cond_stage_config = ( "cond_stage_config" in original_config["model"]["params"] @@ -1252,20 +1254,18 @@ def create_diffusers_vae_model_from_ldm( image_size = set_image_size( pipeline_class_name, original_config, checkpoint, image_size=image_size, model_type=model_type ) + model_type = infer_model_type(original_config, checkpoint, model_type) - if "edm_mean" in checkpoint and "edm_std" in checkpoint: - if checkpoint["edm_mean"] is not None and checkpoint["edm_std"] is not None: - edm_mean = checkpoint["edm_mean"].to(dtype=torch_dtype) if torch_dtype else checkpoint["edm_mean"] - edm_std = checkpoint["edm_std"].to(dtype=torch_dtype) if torch_dtype else checkpoint["edm_std"] - vae_config = create_vae_diffusers_config( - original_config, - image_size=image_size, - scaling_factor=scaling_factor, - latents_mean=edm_mean.flatten().tolist(), - latents_std=edm_std.flatten().tolist(), - ) + if model_type == "Playground": + edm_mean = checkpoint["edm_mean"].to(dtype=torch_dtype) if torch_dtype else checkpoint["edm_mean"] + edm_std = checkpoint["edm_std"].to(dtype=torch_dtype) if torch_dtype else checkpoint["edm_std"] else: - vae_config = create_vae_diffusers_config(original_config, image_size=image_size, scaling_factor=scaling_factor) + edm_mean = None + edm_std = None + + vae_config = create_vae_diffusers_config( + original_config, image_size=image_size, scaling_factor=scaling_factor, edm_mean=edm_mean, edm_std=edm_std + ) diffusers_format_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config) ctx = init_empty_weights if is_accelerate_available() else nullcontext From 49b0b516eaaadf70cd145eb3e57f6b8f7d1a8de3 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 6 Mar 2024 18:04:22 +0530 Subject: [PATCH 09/11] fix: kwargs --- src/diffusers/loaders/single_file_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index e1a14c5f61d1..187d4df41837 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -1264,7 +1264,11 @@ def create_diffusers_vae_model_from_ldm( edm_std = None vae_config = create_vae_diffusers_config( - original_config, image_size=image_size, scaling_factor=scaling_factor, edm_mean=edm_mean, edm_std=edm_std + original_config, + image_size=image_size, + scaling_factor=scaling_factor, + latents_mean=edm_mean, + latents_std=edm_std, ) diffusers_format_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config) ctx = init_empty_weights if is_accelerate_available() else nullcontext From 9e35a12587ec0adac6dc55c62d60f162036c60a8 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 6 Mar 2024 18:09:05 +0530 Subject: [PATCH 10/11] fix --- src/diffusers/loaders/single_file_utils.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 187d4df41837..4ba23d1d9a0b 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -520,10 +520,10 @@ def create_vae_diffusers_config(original_config, image_size, scaling_factor=None if ( scaling_factor is None and "scale_factor" in original_config["model"]["params"] - and not (latents_mean and latents_std) + and not (latents_mean is not None and latents_std is not None) ): scaling_factor = original_config["model"]["params"]["scale_factor"] - elif latents_mean and latents_std: + elif latents_mean is not None and latents_std is not None: scaling_factor = PLAYGROUND_VAE_SCALING_FACTOR elif scaling_factor is None: scaling_factor = LDM_VAE_DEFAULT_SCALING_FACTOR @@ -543,7 +543,7 @@ def create_vae_diffusers_config(original_config, image_size, scaling_factor=None "layers_per_block": vae_params["num_res_blocks"], "scaling_factor": scaling_factor, } - if latents_mean and latents_std: + if latents_mean is not None and latents_std is not None: config.update({"latents_mean": latents_mean, "latents_std": latents_std}) return config @@ -1257,8 +1257,12 @@ def create_diffusers_vae_model_from_ldm( model_type = infer_model_type(original_config, checkpoint, model_type) if model_type == "Playground": - edm_mean = checkpoint["edm_mean"].to(dtype=torch_dtype) if torch_dtype else checkpoint["edm_mean"] - edm_std = checkpoint["edm_std"].to(dtype=torch_dtype) if torch_dtype else checkpoint["edm_std"] + edm_mean = ( + checkpoint["edm_mean"].to(dtype=torch_dtype).tolist() if torch_dtype else checkpoint["edm_mean"].tolist() + ) + edm_std = ( + checkpoint["edm_std"].to(dtype=torch_dtype).tolist() if torch_dtype else checkpoint["edm_std"].tolist() + ) else: edm_mean = None edm_std = None From a77e426877e7b9660d921579f7e041db4530fa50 Mon Sep 17 00:00:00 2001 From: DN6 Date: Thu, 7 Mar 2024 13:15:51 +0530 Subject: [PATCH 11/11] update --- src/diffusers/loaders/single_file_utils.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/diffusers/loaders/single_file_utils.py b/src/diffusers/loaders/single_file_utils.py index 4ba23d1d9a0b..085c3c12cdd5 100644 --- a/src/diffusers/loaders/single_file_utils.py +++ b/src/diffusers/loaders/single_file_utils.py @@ -309,11 +309,7 @@ def is_valid_url(url): def infer_model_type(original_config, checkpoint=None, model_type=None): if model_type is not None: - if "edm_mean" in checkpoint and "edm_std" in checkpoint: - return "Playground" return model_type - elif "edm_mean" in checkpoint and "edm_std" in checkpoint: - return "Playground" has_cond_stage_config = ( "cond_stage_config" in original_config["model"]["params"] @@ -329,7 +325,9 @@ def infer_model_type(original_config, checkpoint=None, model_type=None): elif has_network_config: context_dim = original_config["model"]["params"]["network_config"]["params"]["context_dim"] - if context_dim == 2048: + if "edm_mean" in checkpoint and "edm_std" in checkpoint: + model_type = "Playground" + elif context_dim == 2048: model_type = "SDXL" else: model_type = "SDXL-Refiner" @@ -517,14 +515,10 @@ def create_vae_diffusers_config(original_config, image_size, scaling_factor=None Creates a config for the diffusers based on the config of the LDM model. """ vae_params = original_config["model"]["params"]["first_stage_config"]["params"]["ddconfig"] - if ( - scaling_factor is None - and "scale_factor" in original_config["model"]["params"] - and not (latents_mean is not None and latents_std is not None) - ): - scaling_factor = original_config["model"]["params"]["scale_factor"] - elif latents_mean is not None and latents_std is not None: + if (scaling_factor is None) and (latents_mean is not None) and (latents_std is not None): scaling_factor = PLAYGROUND_VAE_SCALING_FACTOR + elif (scaling_factor is None) and ("scale_factor" in original_config["model"]["params"]): + scaling_factor = original_config["model"]["params"]["scale_factor"] elif scaling_factor is None: scaling_factor = LDM_VAE_DEFAULT_SCALING_FACTOR