Merge branch 'dev' into flux-lora

AI-Casanova · Sep 29, 2024 · be7f86f · be7f86f
2 parents 6a49db4 + 174add0
commit be7f86f
Show file tree

Hide file tree

Showing 19 changed files with 1,333 additions and 56 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # Change Log for SD.Next
 
-## Update for 2024-09-26
+## Update for 2024-09-29
 
 - **reprocess**
   - new top-level button: reprocess your last generated image(s)  
@@ -18,6 +18,14 @@
     *note* sd/sdxl contain heavily distilled versions of reference models, so switching to reference model produces vastly different results  
   - xyz grid support for text encoder  
   - full prompt parser now correctly works with different prompts in batch  
+- [Ctrl+X](https://github.com/genforce/ctrl-x):
+  - control **structure** (*similar to controlnet*) and **appearance** (*similar to ipadapter*)  
+    without the need for extra models, all via code feed-forwards!
+  - can run in structure-only or appearance-only or both modes
+  - when providing structure and appearance input images, its best to provide a short prompts describing them  
+  - structure image can be *almost anything*: *actual photo, openpose-style stick man, 3d render, sketch, depth-map, etc.*  
+    just describe what it is in a structure prompt so it can be de-structured and correctly applied  
+  - supports sdxl in both txt2img and img2img, simply select from scripts
 - **flux**  
   - avoid unet load if unchanged  
   - mark specific unet as unavailable if load failed  
@@ -76,14 +84,16 @@
   - fix imageviewer exif parser  
   - selectable info view in image viewer, thanks @ZeldaMaster501  
 - **free-u** check if device/dtype are fft compatible and cast as necessary  
-- **experimental**  
-  - flux t5 load from gguf: requires transformers pr  
-  - rocm triton backend for flash attention, thanks @lshqqytiger  
+- **rocm**
+  - additional gpu detection and auto-config code, thanks @lshqqytiger
+  - experimental triton backend for flash attention, thanks @lshqqytiger  
 - **refactor**  
   - modularize main process loop  
   - massive log cleanup  
   - full lint pass  
   - improve inference mode handling  
+- **experimental**  
+  - flux t5 load from gguf: requires transformers pr  
 
 
 ## Update for 2024-09-13
@@ -191,7 +201,9 @@ Examples:
 - **prompt enhance**: improve quality and/or verbosity of your prompts  
   simply select in *scripts -> prompt enhance*
   uses [gokaygokay/Flux-Prompt-Enhance](https://huggingface.co/gokaygokay/Flux-Prompt-Enhance) model  
-- **decode** auto-set upcast if first decode fails  
+- **decode**
+  - auto-set upcast if first decode fails  
+  - restore dtype on upcast  
 - **taesd** configurable number of layers  
   can be used to speed-up taesd decoding by reducing number of ops  
   e.g. if generating 1024px image, reducing layers by 1 will result in preview being 512px  

diff --git a/installer.py b/installer.py
@@ -112,7 +112,7 @@ def get(self):
     }))
     logging.basicConfig(level=logging.ERROR, format='%(asctime)s | %(name)s | %(levelname)s | %(module)s | %(message)s', handlers=[logging.NullHandler()]) # redirect default logger to null
     pretty_install(console=console)
-    traceback_install(console=console, extra_lines=1, max_frames=10, width=console.width, word_wrap=False, indent_guides=False, suppress=[])
+    traceback_install(console=console, extra_lines=1, max_frames=16, width=console.width, word_wrap=False, indent_guides=False, suppress=[])
     while log.hasHandlers() and len(log.handlers) > 0:
         log.removeHandler(log.handlers[0])
 
@@ -477,27 +477,21 @@ def install_rocm_zluda():
     try:
         amd_gpus = rocm.get_agents()
         if len(amd_gpus) == 0:
-            if sys.platform == "win32":
-                log.warning('You do not have perl or any AMDGPUs. The installer may select a wrong device as compute device.')
-                log.info('ROCm: no agent was found')
-            else:
-                log.warning('ROCm: no agent was found')
+            log.warning('ROCm: no agent was found')
         else:
             log.info(f'ROCm: agents={[gpu.name for gpu in amd_gpus]}')
             if args.device_id is None:
-                device = amd_gpus[0]
+                index = 0
                 for idx, gpu in enumerate(amd_gpus):
-                    if gpu.arch == rocm.MicroArchitecture.RDNA:
-                        device = gpu
-                        os.environ.setdefault('HIP_VISIBLE_DEVICES', str(idx))
-                        # if os.environ.get('TENSORFLOW_PACKAGE') == 'tensorflow-rocm': # do not use tensorflow-rocm for navi 3x
-                        #    os.environ['TENSORFLOW_PACKAGE'] = 'tensorflow==2.13.0'
-                        if not device.is_apu:
-                            # although apu was found, there can be a dedicated card. do not break loop.
-                            # if no dedicated card was found, apu will be used.
-                            break
-                    else:
-                        log.debug(f'ROCm: HSA_OVERRIDE_GFX_VERSION auto config skipped for {gpu.name}')
+                    index = idx
+                    # if gpu.name.startswith('gfx11') and os.environ.get('TENSORFLOW_PACKAGE') == 'tensorflow-rocm': # do not use tensorflow-rocm for navi 3x
+                    #    os.environ['TENSORFLOW_PACKAGE'] = 'tensorflow==2.13.0'
+                    if not gpu.is_apu:
+                        # although apu was found, there can be a dedicated card. do not break loop.
+                        # if no dedicated card was found, apu will be used.
+                        break
+                os.environ.setdefault('HIP_VISIBLE_DEVICES', str(index))
+                device = amd_gpus[index]
             else:
                 device_id = int(args.device_id)
                 if device_id < len(amd_gpus):
@@ -571,7 +565,9 @@ def install_rocm_zluda():
             log.debug(f'ROCm hipBLASLt: arch={device.name} available={device.blaslt_supported}')
             rocm.set_blaslt_enabled(device.blaslt_supported)
 
-    if device is not None:
+    if device is None:
+        log.debug('ROCm: HSA_OVERRIDE_GFX_VERSION auto config skipped')
+    else:
         os.environ.setdefault('HSA_OVERRIDE_GFX_VERSION', device.get_gfx_version())
 
     return torch_command