Merge branch 'dev' into SD3-parsing

AI-Casanova · Jun 20, 2024 · 1fcd378 · 1fcd378
2 parents e300975 + eb6e3c3
commit 1fcd378
Show file tree

Hide file tree

Showing 43 changed files with 444 additions and 281 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,44 +3,72 @@
 ## Pending
 
 - Diffusers==0.30.0
-- https://github.com/huggingface/diffusers/issues/8546
 - https://github.com/huggingface/diffusers/pull/8566
 - https://github.com/huggingface/diffusers/pull/8584
 
-## Update for 2024-06-16
-
-### Improvements: SD3
-
-- enable taesd preview and non-full quality mode  
-- enable base LoRA support  
-- simplified loading of model in single-file safetensors format  
-  loading sd3 can now be performed fully offline  
-- add support for nncf compressed weights, thanks @Disty0!
-- add support for sampler shift for Euler FlowMatch  
+## Update for 2024-06-19
+
+### Highlights for 2024-06-19
+
+Following zero-day **SD3** release, a week later here's a refresh with more than a few improvements.  
+But there's more than SD3:
+- support for quantized **T5** text encoder in all models that use T5: FP4/FP8/FP16/INT8 (SD3, PixArt-Σ, etc)
+- support for **PixArt-Sigma** in small/medium/large variants
+- support for **HunyuanDiT 1.1**  
+- (finally) new release of **Torch-DirectML**  
+
+### Model Improvements
+
+- **SD3**: enable tiny-VAE (TAESD) preview and non-full quality mode  
+- SD3: enable base LoRA support  
+- SD3: add support for FP4 quantized T5 text encoder  
+  simply select in *settings -> model -> text encoder*
+- SD3: add support for INT8 quantized T5 text encoder, thanks @Disty0!  
+- SD3: enable cpu-offloading for T5 text encoder, thanks @Disty0!  
+- SD3: simplified loading of model in single-file safetensors format  
+  model load can now be performed fully offline  
+- SD3: add support for NNCF compressed weights, thanks @Disty0!
+- SD3: add support for sampler shift for Euler FlowMatch  
   see *settings -> samplers*, also available as param in xyz grid  
   higher shift means model will spend more time on structure and less on details  
+- SD3: add support for selecting T5 text encoder variant in XYZ grid
+- **Pixart-Σ**: Add *small* (512px) and *large* (2k) variations, in addition to existing *medium* (1k)  
+- Pixart-Σ: Add support for 4/8bit quantized t5 text encoder  
+  *note* by default pixart-Σ uses full fp16 t5 encoder with large memory footprint  
+  simply select in *settings -> model -> text encoder* before or after model load  
+- **HunyuanDiT**: support for model version 1.1  
+
 
 ### Improvements: General
 
+- support FP4 quantized T5 text encoder, in addtion to existing FP8 and FP16
 - support for T5 text-encoder loader in **all** models that use T5  
-  *example*: load FP8 quantized T5 text-encoder into PixArt Sigma  
+  *example*: load FP4 or FP8 quantized T5 text-encoder into PixArt Sigma or Stable Cascade!
 - support for `torch-directml` **0.2.2**, thanks @lshqqytiger!  
   *note*: new directml is finally based on modern `torch` 2.3.1!  
 - extra networks: info display now contains link to source url if model if its known  
   works for civitai and huggingface models  
+- improved google.colab support
 - css tweaks for standardui
+- css tweaks for modernui
 
 ### Fixes
 
 - fix unsaturated outputs, force apply vae config on model load  
 - fix hidiffusion handling of non-square aspect ratios, thanks @ShenZhang-Shin!
 - fix control second pass resize  
-- fix api face-hires
-- fix **hunyuandit** set attention processor
+- fix hunyuandit set attention processor
 - fix civitai download without name
 - fix compatibility with latest adetailer
 - fix invalid sampler warning
 - fix starting from non git repo
+- fix control api negative prompt handling
+- fix saving style without name provided
+- fix t2i-color adapter
+- fix sdxl "has been incorrectly initialized"
+- fix api face-hires
+- fix api ip-adapter
+- cleanup image metadata
 - restructure api examples: `cli/api-*`
 - handle theme fallback when invalid theme is specified
 - remove obsolete training code leftovers

diff --git a/cli/api-json.py b/cli/api-json.py
@@ -0,0 +1,52 @@
+#!/usr/bin/env python
+
+# curl -vX POST http://localhost:7860/sdapi/v1/txt2img --header "Content-Type: application/json" -d @3261.json
+import os
+import json
+import logging
+import argparse
+import requests
+import urllib3
+
+
+sd_url = os.environ.get('SDAPI_URL', "http://127.0.0.1:7860")
+sd_username = os.environ.get('SDAPI_USR', None)
+sd_password = os.environ.get('SDAPI_PWD', None)
+options = {
+    "save_images": True,
+    "send_images": True,
+}
+
+logging.basicConfig(level = logging.INFO, format = '%(asctime)s %(levelname)s: %(message)s')
+log = logging.getLogger(__name__)
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+
+def auth():
+    if sd_username is not None and sd_password is not None:
+        return requests.auth.HTTPBasicAuth(sd_username, sd_password)
+    return None
+
+
+def post(endpoint: str, payload: dict = None):
+    if 'sdapi' not in endpoint:
+        endpoint = f'sdapi/v1/{endpoint}'
+    if 'http' not in endpoint:
+        endpoint = f'{sd_url}/{endpoint}'
+    req = requests.post(endpoint, json = payload, timeout=300, verify=False, auth=auth())
+    return { 'error': req.status_code, 'reason': req.reason, 'url': req.url } if req.status_code != 200 else req.json()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description = 'api-txt2img')
+    parser.add_argument('endpoint', nargs=1, help='endpoint')
+    parser.add_argument('json', nargs=1, help='json data or file')
+    args = parser.parse_args()
+    log.info(f'api-json: {args}')
+    if os.path.isfile(args.json[0]):
+        with open(args.json[0], 'r', encoding='ascii') as f:
+            dct = json.load(f) # TODO fails with b64 encoded images inside json due to string encoding
+    else:
+        dct = json.loads(args.json[0])
+    res = post(endpoint=args.endpoint[0], payload=dct)
+    print(res)
diff --git a/cli/image-encode.py b/cli/image-encode.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+import io
+import os
+import sys
+import base64
+from PIL import Image
+from rich import print # pylint: disable=redefined-builtin
+
+
+def encode(file: str):
+    image = Image.open(file) if os.path.exists(file) else None
+    print(f'Input: file={file} image={image}')
+    if image is None:
+        return None
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    with io.BytesIO() as stream:
+        image.save(stream, 'JPEG')
+        image.close()
+        values = stream.getvalue()
+        encoded = base64.b64encode(values).decode()
+        return encoded
+
+
+if __name__ == "__main__":
+    sys.argv.pop(0)
+    fn = sys.argv[0] if len(sys.argv) > 0 else ''
+    b64 = encode(fn)
+    print('=== BEGIN ===')
+    print(f'{b64}')
+    print('=== END ===')
+
diff --git a/extensions-builtin/Lora/network_overrides.py b/extensions-builtin/Lora/network_overrides.py
@@ -1,7 +1,7 @@
 from modules import shared
 
 
-maybe_diffusers = [
+maybe_diffusers = [ # forced if lora_maybe_diffusers is enabled
     'aaebf6360f7d', # sd15-lcm
     '3d18b05e4f56', # sdxl-lcm
     'b71dcb732467', # sdxl-tcd
@@ -19,14 +19,23 @@
     '8cca3706050b', # hyper-sdxl-1step
 ]
 
-force_diffusers = [
+force_diffusers = [ # forced always
     '816d0eed49fd', # flash-sdxl
     'c2ec22757b46', # flash-sd15
 ]
 
+force_models = [ # forced always
+    'sd3',
+]
+
+force_classes = [ # forced always
+]
+
+
 def check_override(shorthash=''):
     force = False
-    force = force or (shared.sd_model_type == 'sd3') # TODO sd3 forced diffusers for lora load
+    force = force or (shared.sd_model_type in force_models)
+    force = force or (shared.sd_model.__class__.__name__ in force_classes)
     if len(shorthash) < 4:
         return force
     force = force or (any(x.startswith(shorthash) for x in maybe_diffusers) if shared.opts.lora_maybe_diffusers else False)

diff --git a/extensions-builtin/Lora/networks.py b/extensions-builtin/Lora/networks.py
@@ -49,6 +49,7 @@ def assign_network_names_to_compvis_modules(sd_model):
     network_layer_mapping = {}
     if shared.native:
         if not hasattr(shared.sd_model, 'text_encoder') or not hasattr(shared.sd_model, 'unet'):
+            sd_model.network_layer_mapping = {}
             return
         for name, module in shared.sd_model.text_encoder.named_modules():
             prefix = "lora_te1_" if shared.sd_model_type == "sdxl" else "lora_te_"
@@ -66,6 +67,7 @@ def assign_network_names_to_compvis_modules(sd_model):
             module.network_layer_name = network_name
     else:
         if not hasattr(shared.sd_model, 'cond_stage_model'):
+            sd_model.network_layer_mapping = {}
             return
         for name, module in shared.sd_model.cond_stage_model.wrapped.named_modules():
             network_name = name.replace(".", "_")
@@ -87,10 +89,14 @@ def load_diffusers(name, network_on_disk, lora_scale=1.0) -> network.Network:
         return cached
     if not shared.native:
         return None
+    if not hasattr(shared.sd_model, 'load_lora_weights'):
+        shared.log.error(f"LoRA load failed: class={shared.sd_model.__class__} does not implement load lora")
+        return None
     try:
         shared.sd_model.load_lora_weights(network_on_disk.filename)
     except Exception as e:
         errors.display(e, "LoRA")
+        return None
     if shared.opts.lora_fuse_diffusers:
         shared.sd_model.fuse_lora(lora_scale=lora_scale)
     net = network.Network(name, network_on_disk)

diff --git a/extensions-builtin/Lora/ui_extra_networks_lora.py b/extensions-builtin/Lora/ui_extra_networks_lora.py
@@ -102,7 +102,7 @@ def find_version():
 
             return item
         except Exception as e:
-            shared.log.debug(f"Extra networks error: type=lora file={name} {e}")
+            shared.log.debug(f"Networks error: type=lora file={name} {e}")
             from modules import errors
             errors.display('e', 'Lora')
             return None

diff --git a/extensions-builtin/sdnext-modernui b/extensions-builtin/sdnext-modernui
diff --git a/html/locale_en.json b/html/locale_en.json
@@ -230,7 +230,7 @@
   {"id":"","label":"Control Options","localized":"","hint":"Settings related the Control tab"},
   {"id":"","label":"Training","localized":"","hint":"Settings related to model training configuration and directories"},
   {"id":"","label":"Interrogate","localized":"","hint":"Settings related to interrogation configuration"},
-  {"id":"","label":"Extra Networks","localized":"","hint":"Settings related to extra networks user interface, extra networks multiplier defaults, and configuration"},
+  {"id":"","label":"Networks","localized":"","hint":"Settings related to networks user interface, networks multiplier defaults, and configuration"},
   {"id":"","label":"Licenses","localized":"","hint":"View licenses of all additional included libraries"},
   {"id":"","label":"Show all pages","localized":"","hint":"Show all settings pages"}
 ],

diff --git a/html/locale_ko.json b/html/locale_ko.json
@@ -48,7 +48,7 @@
   {"id":"","label":"Interrogate\nDeepBooru","localized":"DeepBooru 모델 사용","hint":"DeepBooru 모델을 사용해 이미지에서 설명을 추출한다."}
 ],
 "extra networks": [
-  {"id":"","label":"Extra networks tab order","localized":"엑스트라 네트워크 탭 순서","hint":"Comma-separated list of tab names; tabs listed here will appear in the extra networks UI first and in order lsited"},
+  {"id":"","label":"Networks tab order","localized":"엑스트라 네트워크 탭 순서","hint":"Comma-separated list of tab names; tabs listed here will appear in the extra networks UI first and in order lsited"},
   {"id":"","label":"UI position","localized":"UI 위치","hint":""},
   {"id":"","label":"UI height (%)","localized":"UI 높이 (%)","hint":""},
   {"id":"","label":"UI sidebar width (%)","localized":"UI 사이드바 너비 (%)","hint":""},

diff --git a/html/reference.json b/html/reference.json
@@ -160,15 +160,30 @@
     "preview": "PixArt-alpha--PixArt-XL-2-1024-MS.jpg",
     "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0"
   },
-  "Pixart-Σ": {
-    "path": "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS",
+  "Pixart-Σ Small": {
+    "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-512-MS",
     "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.",
     "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg",
+    "skip": true,
+    "extras": "width: 512, height: 512, sampler: Default, cfg_scale: 2.0"
+  },
+  "Pixart-Σ Medium": {
+    "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS",
+    "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.",
+    "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg",
+    "skip": true,
+    "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0"
+  },
+  "Pixart-Σ Large": {
+    "path": "huggingface/PixArt-alpha/PixArt-Sigma-XL-2-2K-MS",
+    "desc": "PixArt-Σ, a Diffusion Transformer model (DiT) capable of directly generating images at 4K resolution. PixArt-Σ represents a significant advancement over its predecessor, PixArt-α, offering images of markedly higher fidelity and improved alignment with text prompts.",
+    "preview": "PixArt-alpha--pixart_sigma_sdxlvae_T5_diffusers.jpg",
+    "skip": true,
     "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0"
   },
 
-  "Tencent HunyuanDiT": {
-    "path": "Tencent-Hunyuan/HunyuanDiT-Diffusers",
+  "Tencent HunyuanDiT 1.1": {
+    "path": "Tencent-Hunyuan/HunyuanDiT-v1.1-Diffusers",
     "desc": "Hunyuan-DiT : A Powerful Multi-Resolution Diffusion Transformer with Fine-Grained Chinese Understanding.",
     "preview": "Tencent-Hunyuan-HunyuanDiT.jpg",
     "extras": "width: 1024, height: 1024, sampler: Default, cfg_scale: 2.0"

diff --git a/installer.py b/installer.py
@@ -275,9 +275,12 @@ def install(package, friendly: str = None, ignore: bool = False, reinstall: bool
 
 # execute git command
 @lru_cache()
-def git(arg: str, folder: str = None, ignore: bool = False):
+def git(arg: str, folder: str = None, ignore: bool = False, optional: bool = False):
     if args.skip_git:
         return ''
+    if optional:
+        if 'google.colab' in sys.modules:
+            return ''
     git_cmd = os.environ.get('GIT', "git")
     if git_cmd != "git":
         git_cmd = os.path.abspath(git_cmd)
@@ -306,7 +309,7 @@ def branch(folder=None):
         return None
     branches = []
     try:
-        b = git('branch --show-current', folder)
+        b = git('branch --show-current', folder, optional=True)
         if b == '':
             branches = git('branch', folder).split('\n')
         if len(branches) > 0:
@@ -315,15 +318,15 @@ def branch(folder=None):
                 b = branches[1].strip()
                 log.debug(f'Git detached head detected: folder="{folder}" reattach={b}')
     except Exception:
-        b = git('git rev-parse --abbrev-ref HEAD', folder)
+        b = git('git rev-parse --abbrev-ref HEAD', folder, optional=True)
     if 'main' in b:
         b = 'main'
     elif 'master' in b:
         b = 'master'
     else:
         b = b.split('\n')[0].replace('*', '').strip()
     log.debug(f'Submodule: {folder} / {b}')
-    git(f'checkout {b}', folder, ignore=True)
+    git(f'checkout {b}', folder, ignore=True, optional=True)
     return b
 
 
@@ -396,6 +399,12 @@ def check_python(supported_minors=[9, 10, 11, 12], reason=None):
     if args.quick:
         return
     log.info(f'Python version={platform.python_version()} platform={platform.system()} bin="{sys.executable}" venv="{sys.prefix}"')
+    if int(sys.version_info.major) == 3 and int(sys.version_info.minor) == 12 and int(sys.version_info.minor) > 3: # TODO python 3.12.4 or higher cause a mess with pydantic
+        log.error(f"Incompatible Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} required 3.12.3 or lower")
+        if reason is not None:
+            log.error(reason)
+        if not args.ignore:
+            sys.exit(1)
     if not (int(sys.version_info.major) == 3 and int(sys.version_info.minor) in supported_minors):
         log.error(f"Incompatible Python version: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} required 3.{supported_minors}")
         if reason is not None:
@@ -1035,19 +1044,24 @@ def get_version(force=False):
 
 
 def check_ui(ver):
-    if ver is None or 'branch' not in ver or 'ui' not in ver or ver['branch'] == ver['ui']:
-        return
-    log.debug(f'Branch mismatch: sdnext={ver["branch"]} ui={ver["ui"]}')
+    def same(ver):
+        core = ver['branch'] if ver is not None and 'branch' in ver else 'unknown'
+        ui = ver['ui'] if ver is not None and 'ui' in ver else 'unknown'
+        return core == ui or (core == 'master' and ui == 'main')
+
+    if not same(ver):
+        log.debug(f'Branch mismatch: sdnext={ver["branch"]} ui={ver["ui"]}')
     cwd = os.getcwd()
     try:
         os.chdir('extensions-builtin/sdnext-modernui')
-        git('checkout ' + ver['branch'], ignore=True)
+        target = 'dev' if 'dev' in ver['branch'] else 'main'
+        git('checkout ' + target, ignore=True, optional=True)
         os.chdir(cwd)
         ver = get_version(force=True)
-        if ver['branch'] == ver['ui']:
+        if not same(ver):
             log.debug(f'Branch synchronized: {ver["branch"]}')
         else:
-            log.debug(f'Branch synch failed: sdnext={ver["branch"]} ui={ver["ui"]}')
+            log.debug(f'Branch sync failed: sdnext={ver["branch"]} ui={ver["ui"]}')
     except Exception as e:
         log.debug(f'Branch switch: {e}')
     os.chdir(cwd)

diff --git a/javascript/base.css b/javascript/base.css
@@ -17,6 +17,7 @@
 .tooltip-show { opacity: 0.9; }
 .tooltip-left { right: unset; left: 1em; }
 .toolbutton-selected { background: var(--background-fill-primary) !important; }
+.input-accordion-checkbox { display: none; }
 
 /* live preview */
 .progressDiv { position: relative; height: 20px; background: #b4c0cc; margin-bottom: -3px; }
+4 −0		style.css
+2 −0		themes/Aptro-AmberGlow.css
+4 −2		themes/BrknSoul-Amstrad.css
+2 −0		themes/Default.css
+4 −2		themes/Eoan-Alpha.css
+2 −0		themes/Eoan-Cyan.css
+2 −0		themes/Eoan-Green.css
+2 −0		themes/Eoan-Minimal.css
+2 −0		themes/Eoan-Moonlight.css
+2 −0		themes/Eoan-Orange.css
+2 −0		themes/Eoan-OrangeMinimal.css
+2 −0		themes/Eoan-OrangeMoonlight.css
+2 −0		themes/Eoan-Quad.css
+2 −0		themes/Eoan-Subdued.css
+2 −0		themes/Eoan-Tron.css
+2 −0		themes/Eoan-Yellow.css
+2 −0		themes/IlluZn-Domination.css
+2 −0		themes/IlluZn-LavenderZest.css
+2 −0		themes/IlluZn-Superuser.css
+2 −0		themes/IlluZn-VintageBeige.css
+2 −0		themes/QS-Sunset.css
+3 −1		themes/QS-Teal.css
+3 −1		themes/QS-WhiteRabbit.css
+2 −0		themes/Vlad-Default.css
+2 −0		themes/Vlad-Flat.css