Skip to content

Commit

Permalink
Merge branch 'dev' into SD3-parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
vladmandic authored Jun 21, 2024
2 parents 1fcd378 + e51599c commit 6b6170b
Show file tree
Hide file tree
Showing 14 changed files with 87 additions and 66 deletions.
7 changes: 4 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@
## Pending

- Diffusers==0.30.0
- https://github.com/huggingface/diffusers/pull/8566
- https://github.com/huggingface/diffusers/pull/8584

## Update for 2024-06-19
## Update for 2024-06-20

### Highlights for 2024-06-19
### Highlights for 2024-06-20

Following zero-day **SD3** release, a week later here's a refresh with more than a few improvements.
But there's more than SD3:
Expand Down Expand Up @@ -51,6 +50,7 @@ But there's more than SD3:
- improved google.colab support
- css tweaks for standardui
- css tweaks for modernui
- additional torch gc checks, thanks @Disty0!

### Fixes

Expand All @@ -68,6 +68,7 @@ But there's more than SD3:
- fix sdxl "has been incorrectly initialized"
- fix api face-hires
- fix api ip-adapter
- fix memory exceptions with ROCm, thanks @Disty0!
- cleanup image metadata
- restructure api examples: `cli/api-*`
- handle theme fallback when invalid theme is specified
Expand Down
1 change: 1 addition & 0 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Main ToDo list can be found at [GitHub projects](https://github.com/users/vladma
- diffusers public callbacks
- include reference styles
- lora: sc lora, dora, etc
- sd3 controlnet: <https://github.com/huggingface/diffusers/pull/8566>

## Experimental

Expand Down
2 changes: 1 addition & 1 deletion installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
if args.quick:
return
log.info(f'Python version={platform.python_version()} platform={platform.system()} bin="{sys.executable}" venv="{sys.prefix}"')
if int(sys.version_info.major) == 3 and int(sys.version_info.minor) == 12 and int(sys.version_info.minor) > 3: # TODO python 3.12.4 or higher cause a mess with pydantic
if int(sys.version_info.major) == 3 and int(sys.version_info.minor) == 12 and int(sys.version_info.micro) > 3: # TODO python 3.12.4 or higher cause a mess with pydantic
log.error(f"Incompatible Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} required 3.12.3 or lower")
if reason is not None:
log.error(reason)
Expand Down
9 changes: 6 additions & 3 deletions modules/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,13 +140,16 @@ def torch_gc(force=False):
used_gpu = round(100 * gpu.get('used', 0) / gpu.get('total', 1)) if gpu.get('total', 1) > 1 else 0
used_ram = round(100 * ram.get('used', 0) / ram.get('total', 1)) if ram.get('total', 1) > 1 else 0
global previous_oom # pylint: disable=global-statement
if force or shared.opts.torch_gc_threshold == 0:
log.debug(f'Forced Torch GC: GPU={used_gpu}% RAM={used_ram}% {mem}')
force = True
elif used_gpu >= shared.opts.torch_gc_threshold or used_ram >= shared.opts.torch_gc_threshold:
log.info(f'High memory utilization: GPU={used_gpu}% RAM={used_ram}% {mem}')
force = True
if oom > previous_oom:
previous_oom = oom
log.warning(f'GPU out-of-memory error: {mem}')
force = True
if used_gpu >= shared.opts.torch_gc_threshold or used_ram >= shared.opts.torch_gc_threshold:
log.info(f'High memory utilization: GPU={used_gpu}% RAM={used_ram}% {mem}')
force = True
if not force:
return

Expand Down
53 changes: 25 additions & 28 deletions modules/pag/pipe_sdxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,8 +461,9 @@ def __init__(
image_encoder=image_encoder,
feature_extractor=feature_extractor,
)
if 'requires_aesthetics_score' in self.config:
self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.default_sample_size = self.unet.config.sample_size
Expand Down Expand Up @@ -1500,7 +1501,7 @@ def __call__(
else:
replace_processor = PAGIdentitySelfAttnProcessor()

if(self.pag_applied_layers_index):
if self.pag_applied_layers_index:
drop_layers = self.pag_applied_layers_index
for drop_layer in drop_layers:
layer_number = int(drop_layer[1:])
Expand All @@ -1517,7 +1518,7 @@ def __call__(
raise ValueError(
f"Invalid layer index: {drop_layer}. Available layers: {len(down_layers)} down layers, {len(mid_layers)} mid layers, {len(up_layers)} up layers."
)
elif(self.pag_applied_layers):
elif self.pag_applied_layers:
drop_full_layers = self.pag_applied_layers
for drop_full_layer in drop_full_layers:
try:
Expand Down Expand Up @@ -1621,7 +1622,7 @@ def __call__(
if XLA_AVAILABLE:
xm.mark_step()

if not output_type == "latent":
if output_type != "latent":
# make sure the VAE is in float32 mode, as it overflows in float16
needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast

Expand Down Expand Up @@ -1656,7 +1657,7 @@ def __call__(
else:
image = latents

if not output_type == "latent":
if output_type != "latent":
# apply watermark if available
if self.watermark is not None:
image = self.watermark.apply_watermark(image)
Expand All @@ -1671,7 +1672,7 @@ def __call__(

#Change the attention layers back to original ones after PAG was applied
if self.do_adversarial_guidance:
if(self.pag_applied_layers_index):
if self.pag_applied_layers_index:
drop_layers = self.pag_applied_layers_index
for drop_layer in drop_layers:
layer_number = int(drop_layer[1:])
Expand All @@ -1685,26 +1686,22 @@ def __call__(
else:
raise ValueError(f"Invalid layer type: {drop_layer[0]}")
except IndexError:
raise ValueError(
f"Invalid layer index: {drop_layer}. Available layers: {len(down_layers)} down layers, {len(mid_layers)} mid layers, {len(up_layers)} up layers."
)
elif(self.pag_applied_layers):
drop_full_layers = self.pag_applied_layers
for drop_full_layer in drop_full_layers:
try:
if drop_full_layer == "down":
for down_layer in down_layers:
down_layer.processor = AttnProcessor2_0()
elif drop_full_layer == "mid":
for mid_layer in mid_layers:
mid_layer.processor = AttnProcessor2_0()
elif drop_full_layer == "up":
for up_layer in up_layers:
up_layer.processor = AttnProcessor2_0()
else:
raise ValueError(f"Invalid layer type: {drop_full_layer}")
except IndexError:
raise ValueError(
f"Invalid layer index: {drop_full_layer}. Available layers are: down, mid and up. If you need to specify each layer index, you can use `pag_applied_layers_index`"
)
raise ValueError(f"Invalid layer index: {drop_layer}. Available layers: {len(down_layers)} down layers, {len(mid_layers)} mid layers, {len(up_layers)} up layers.")
elif self.pag_applied_layers:
drop_full_layers = self.pag_applied_layers
for drop_full_layer in drop_full_layers:
try:
if drop_full_layer == "down":
for down_layer in down_layers:
down_layer.processor = AttnProcessor2_0()
elif drop_full_layer == "mid":
for mid_layer in mid_layers:
mid_layer.processor = AttnProcessor2_0()
elif drop_full_layer == "up":
for up_layer in up_layers:
up_layer.processor = AttnProcessor2_0()
else:
raise ValueError(f"Invalid layer type: {drop_full_layer}")
except IndexError:
raise ValueError(f"Invalid layer index: {drop_full_layer}. Available layers are: down, mid and up. If you need to specify each layer index, you can use `pag_applied_layers_index`")
return StableDiffusionXLPipelineOutput(images=image)
1 change: 1 addition & 0 deletions modules/processing_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ def resize_hires(p, latents): # input=latents output=pil if not latent_upscaler
else:
resized_image = img
resized_images.append(resized_image)
devices.torch_gc()
return resized_images


Expand Down
2 changes: 2 additions & 0 deletions modules/processing_vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ def vae_decode(latents, model, output_type='np', full_quality=True):
if shared.cmd_opts.profile:
t1 = time.time()
shared.log.debug(f'Profile: VAE decode: {t1-t0:.2f}')
devices.torch_gc()
return imgs


Expand All @@ -155,4 +156,5 @@ def vae_encode(image, model, full_quality=True): # pylint: disable=unused-variab
latents = full_vae_encode(image=tensor, model=shared.sd_model)
else:
latents = taesd_vae_encode(image=tensor)
devices.torch_gc()
return latents
34 changes: 18 additions & 16 deletions modules/prompt_parser_diffusers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
debug = shared.log.trace if os.environ.get('SD_PROMPT_DEBUG', None) is not None else lambda *args, **kwargs: None
debug('Trace: PROMPT')
orig_encode_token_ids_to_embeddings = EmbeddingsProvider._encode_token_ids_to_embeddings # pylint: disable=protected-access
token_dict = None
token_type = None
token_dict = None # used by helper get_tokens
token_type = None # used by helper get_tokens
cache = {}
cache_type = None


def compel_hijack(self, token_ids: torch.Tensor,
Expand Down Expand Up @@ -151,7 +150,7 @@ def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, c
if 'StableDiffusion' not in pipe.__class__.__name__ and 'DemoFusion' not in pipe.__class__.__name__ and 'StableCascade' not in pipe.__class__.__name__:
shared.log.warning(f"Prompt parser not supported: {pipe.__class__.__name__}")
return
elif prompts == cache.get('prompts', None) and negative_prompts == cache.get('negative_prompts', None) and clip_skip == cache.get('clip_skip', None) and cache.get('model_type', None) == shared.sd_model_type and steps == cache.get('steps', None):
elif shared.opts.sd_textencoder_cache and prompts == cache.get('prompts', None) and negative_prompts == cache.get('negative_prompts', None) and clip_skip == cache.get('clip_skip', None) and cache.get('model_type', None) == shared.sd_model_type and steps == cache.get('steps', None):
p.prompt_embeds = cache.get('prompt_embeds', None)
p.positive_pooleds = cache.get('positive_pooleds', None)
p.negative_embeds = cache.get('negative_embeds', None)
Expand Down Expand Up @@ -182,18 +181,21 @@ def encode_prompts(pipe, p, prompts: list, negative_prompts: list, steps: int, c
if negative_pooled is not None:
p.negative_pooleds.append(torch.cat([negative_pooled] * len(negative_prompts), dim=0))

cache.update({
'prompt_embeds': p.prompt_embeds,
'negative_embeds': p.negative_embeds,
'positive_pooleds': p.positive_pooleds,
'negative_pooleds': p.negative_pooleds,
'scheduled_prompt': p.scheduled_prompt,
'prompts': prompts,
'negative_prompts': negative_prompts,
'clip_skip': clip_skip,
'steps': steps,
'model_type': shared.sd_model_type
})
if shared.opts.sd_textencoder_cache:
cache.update({
'prompt_embeds': p.prompt_embeds,
'negative_embeds': p.negative_embeds,
'positive_pooleds': p.positive_pooleds,
'negative_pooleds': p.negative_pooleds,
'scheduled_prompt': p.scheduled_prompt,
'prompts': prompts,
'negative_prompts': negative_prompts,
'clip_skip': clip_skip,
'steps': steps,
'model_type': shared.sd_model_type
})
else:
cache.clear()
if debug_enabled:
get_tokens('positive', prompts[0])
get_tokens('negative', negative_prompts[0])
Expand Down
14 changes: 10 additions & 4 deletions modules/sd_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,15 +255,16 @@ def select_checkpoint(op='model'):
shared.log.info(" or use --ckpt-dir <path-to-folder> to specify folder with sd models")
shared.log.info(" or use --ckpt <path-to-checkpoint> to force using specific model")
return None
checkpoint_info = next(iter(checkpoints_list.values()))
# checkpoint_info = next(iter(checkpoints_list.values()))
if model_checkpoint is not None:
if model_checkpoint != 'model.ckpt' and model_checkpoint != 'runwayml/stable-diffusion-v1-5':
shared.log.warning(f"Selected checkpoint not found: {model_checkpoint}")
shared.log.warning(f'Selected: {op}="{model_checkpoint}" not found')
else:
shared.log.info("Selecting first available checkpoint")
# shared.log.warning(f"Loading fallback checkpoint: {checkpoint_info.title}")
shared.opts.data['sd_model_checkpoint'] = checkpoint_info.title
shared.log.info(f'Select: {op}="{checkpoint_info.title if checkpoint_info is not None else None}"')
# shared.opts.data['sd_model_checkpoint'] = checkpoint_info.title
else:
shared.log.info(f'Select: {op}="{checkpoint_info.title if checkpoint_info is not None else None}"')
return checkpoint_info


Expand Down Expand Up @@ -936,6 +937,7 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No

checkpoint_info = checkpoint_info or select_checkpoint(op=op)
if checkpoint_info is None:
print('HERE1')
unload_model_weights(op=op)
return

Expand Down Expand Up @@ -1113,6 +1115,8 @@ def load_diffuser(checkpoint_info=None, already_loaded_state_dict=None, timer=No
if sd_model is None:
shared.log.error('Diffuser model not loaded')
return
if 'requires_aesthetics_score' in sd_model.config:
sd_model.register_to_config(requires_aesthetics_score=False)
sd_model.sd_model_hash = checkpoint_info.calculate_shorthash() # pylint: disable=attribute-defined-outside-init
sd_model.sd_checkpoint_info = checkpoint_info # pylint: disable=attribute-defined-outside-init
sd_model.sd_model_checkpoint = checkpoint_info.filename # pylint: disable=attribute-defined-outside-init
Expand Down Expand Up @@ -1575,6 +1579,7 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model',
else:
load_diffuser(checkpoint_info, already_loaded_state_dict=state_dict, timer=timer, op=op)
if load_dict and next_checkpoint_info is not None:
print('HERE2')
model_data.sd_dict = shared.opts.sd_model_dict
shared.opts.data["sd_model_checkpoint"] = next_checkpoint_info.title
reload_model_weights(reuse_dict=True) # ok we loaded dict now lets redo and load model on top of it
Expand All @@ -1588,6 +1593,7 @@ def reload_model_weights(sd_model=None, info=None, reuse_dict=False, op='model',
shared.opts.data["sd_model_refiner"] = checkpoint_info.title
return model_data.sd_refiner

print('HERE3')
# fallback
shared.log.info(f"Loading using fallback: {op} model={checkpoint_info.title}")
try:
Expand Down
8 changes: 5 additions & 3 deletions modules/sd_samplers_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,17 @@ def single_sample_to_image(sample, approximation=None):
warn_once('Unknown decode type')
approximation = 0
# normal sample is [4,64,64]
if sample.dtype == torch.bfloat16:
sample = sample.to(torch.float16)
try:
if sample.dtype == torch.bfloat16:
sample = sample.to(torch.float16)
except Exception as e:
warn_once(f'live preview: {e}')
if len(sample.shape) > 4: # likely unknown video latent (e.g. svd)
return Image.new(mode="RGB", size=(512, 512))
if len(sample) == 16: # sd_cascade
sd_cascade = True
if len(sample.shape) == 4 and sample.shape[0]: # likely animatediff latent
sample = sample.permute(1, 0, 2, 3)[0]

if shared.native: # [-x,x] to [-5,5]
sample_max = torch.max(sample)
if sample_max > 5:
Expand Down
13 changes: 8 additions & 5 deletions modules/sd_vae_approx.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,24 @@ def forward(self, x):

def nn_approximation(sample): # Approximate NN
global sd_vae_approx_model # pylint: disable=global-statement
# ROCm throws memory exceptions and crashes the GPU with it if we use approx on the GPU
device = devices.device if devices.backend != "rocm" else "cpu"
dtype = devices.dtype_vae if devices.backend != "rocm" else torch.float32
if sd_vae_approx_model is None:
model_path = os.path.join(paths.models_path, "VAE-approx", "model.pt")
sd_vae_approx_model = VAEApprox()
if not os.path.exists(model_path):
model_path = os.path.join(paths.script_path, "models", "VAE-approx", "model.pt")
approx_weights = torch.load(model_path, map_location='cpu' if devices.device.type != 'cuda' else None)
approx_weights = torch.load(model_path, map_location='cpu' if devices.device.type != 'cuda' or devices.backend == "rocm" else None)
sd_vae_approx_model.load_state_dict(approx_weights)
sd_vae_approx_model.eval()
sd_vae_approx_model.to(devices.device, sample.dtype)
sd_vae_approx_model.to(device, dtype)
shared.log.debug(f'VAE load: type=approximate model={model_path}')
try:
in_sample = sample.to(devices.device).unsqueeze(0)
sd_vae_approx_model.to(devices.device, devices.dtype)
in_sample = sample.to(device, dtype).unsqueeze(0)
sd_vae_approx_model.to(device, dtype)
x_sample = sd_vae_approx_model(in_sample)
x_sample = x_sample[0].detach().cpu()
x_sample = x_sample[0].to(torch.float32).detach().cpu()
return x_sample
except Exception as e:
shared.log.error(f'VAE decode approximate: {e}')
Expand Down
Loading

0 comments on commit 6b6170b

Please sign in to comment.