diff --git a/.gitignore b/.gitignore
index fd8ae086..2823bc9e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,12 @@
build*/
test/
-
+.vscode/
.cache/
*.swp
.vscode/
+*.bat
+*.bin
+*.exe
+*.gguf
+output.png
+models/*
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
index cc639fee..0b8fe290 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
[submodule "ggml"]
- path = ggml
- url = https://github.com/leejet/ggml.git
+ path = ggml
+ url = https://github.com/FSSRepo/ggml.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 13d0a1ce..7148431e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,10 +24,24 @@ endif()
# general
#option(SD_BUILD_TESTS "sd: build tests" ${SD_STANDALONE})
option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
+option(SD_CUBLAS "sd: cuda backend" OFF)
+option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF)
option(BUILD_SHARED_LIBS "sd: build shared libs" OFF)
#option(SD_BUILD_SERVER "sd: build server example" ON)
+if(SD_CUBLAS)
+ message("Use CUBLAS as backend stable-diffusion")
+ set(GGML_CUBLAS ON)
+ add_definitions(-DSD_USE_CUBLAS)
+endif()
+
+if(SD_FLASH_ATTN)
+ message("Use Flash Attention for memory optimization")
+ add_definitions(-DSD_USE_FLASH_ATTENTION)
+endif()
+
+set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
# deps
add_subdirectory(ggml)
@@ -38,6 +52,7 @@ target_link_libraries(${SD_LIB} PUBLIC ggml)
target_include_directories(${SD_LIB} PUBLIC .)
target_compile_features(${SD_LIB} PUBLIC cxx_std_11)
+add_subdirectory(common)
if (SD_BUILD_EXAMPLES)
add_subdirectory(examples)
diff --git a/README.md b/README.md
index 95b185fd..05966c1f 100644
--- a/README.md
+++ b/README.md
@@ -9,17 +9,20 @@ Inference of [Stable Diffusion](https://github.com/CompVis/stable-diffusion) in
## Features
- Plain C/C++ implementation based on [ggml](https://github.com/ggerganov/ggml), working in the same way as [llama.cpp](https://github.com/ggerganov/llama.cpp)
+- Super lightweight and without external dependencies.
- 16-bit, 32-bit float support
- 4-bit, 5-bit and 8-bit integer quantization support
- Accelerated memory-efficient CPU inference
- - Only requires ~2.3GB when using txt2img with fp16 precision to generate a 512x512 image
+ - Only requires ~2.3GB when using txt2img with fp16 precision to generate a 512x512 image, enabling Flash Attention just requires ~1.8GB.
- AVX, AVX2 and AVX512 support for x86 architectures
- SD1.x and SD2.x support
+- Full CUDA backend for GPU acceleration, for now just for float16 and float32 models. There are some issues with quantized models and CUDA; it will be fixed in the future.
+- Flash Attention for memory usage optimization (only cpu for now).
- Original `txt2img` and `img2img` mode
- Negative prompt
- [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) style tokenizer (not all the features, only token weighting for now)
- LoRA support, same as [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#lora)
-- Latent Consistency Models support(LCM/LCM-LoRA)
+- Latent Consistency Models support (LCM/LCM-LoRA)
- Sampling method
- `Euler A`
- `Euler`
@@ -40,10 +43,11 @@ Inference of [Stable Diffusion](https://github.com/CompVis/stable-diffusion) in
### TODO
- [ ] More sampling methods
-- [ ] GPU support
- [ ] Make inference faster
- The current implementation of ggml_conv_2d is slow and has high memory usage
- [ ] Continuing to reduce memory usage (quantizing the weights of ggml_conv_2d)
+- [ ] Implement BPE Tokenizer
+- [ ] Add [TAESD](https://github.com/madebyollin/taesd) for faster VAE decoding
- [ ] k-quants support
## Usage
@@ -77,24 +81,20 @@ git submodule update
# curl -L -O https://huggingface.co/stabilityai/stable-diffusion-2-1/blob/main/v2-1_768-nonema-pruned.safetensors
```
-- convert weights to ggml model format
+- convert weights to gguf model format
```shell
- cd models
- pip install -r requirements.txt
- # (optional) python convert_diffusers_to_original_stable_diffusion.py --model_path [path to diffusers weights] --checkpoint_path [path to weights]
- python convert.py [path to weights] --out_type [output precision]
- # For example, python convert.py sd-v1-4.ckpt --out_type f16
+ ./bin/convert sd-v1-4.ckpt -t f16
```
### Quantization
-You can specify the output model format using the --out_type parameter
+You can specify the output model format using the `--type` or `-t` parameter
- `f16` for 16-bit floating-point
- `f32` for 32-bit floating-point
-- `q8_0` for 8-bit integer quantization
-- `q5_0` or `q5_1` for 5-bit integer quantization
+- `q8_0` for 8-bit integer quantization
+- `q5_0` or `q5_1` for 5-bit integer quantization
- `q4_0` or `q4_1` for 4-bit integer quantization
### Build
@@ -115,6 +115,24 @@ cmake .. -DGGML_OPENBLAS=ON
cmake --build . --config Release
```
+##### Using CUBLAS
+
+This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). Recommended to have at least 4 GB of VRAM.
+
+```
+cmake .. -DSD_CUBLAS=ON
+cmake --build . --config Release
+```
+
+### Using Flash Attention
+
+Enabling flash attention reduces memory usage by at least 400 MB. At the moment, it is not supported when CUBLAS is enabled because the kernel implementation is missing.
+
+```
+cmake .. -DSD_FLASH_ATTN=ON
+cmake --build . --config Release
+```
+
### Run
```
@@ -141,6 +159,7 @@ arguments:
--steps STEPS number of sample steps (default: 20)
--rng {std_default, cuda} RNG (default: cuda)
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
+ -b, --batch-count COUNT number of images to generate.
--schedule {discrete, karras} Denoiser sigma schedule (default: discrete)
-v, --verbose print extra info
```
@@ -148,7 +167,7 @@ arguments:
#### txt2img example
```
-./bin/sd -m ../models/sd-v1-4-ggml-model-f16.bin -p "a lovely cat"
+./bin/sd -m ../sd-v1-4-f16.gguf -p "a lovely cat"
```
Using formats of different precisions will yield results of varying quality.
@@ -163,7 +182,7 @@ Using formats of different precisions will yield results of varying quality.
```
-./bin/sd --mode img2img -m ../models/sd-v1-4-ggml-model-f16.bin -p "cat with blue eyes" -i ./output.png -o ./img2img_output.png --strength 0.4
+./bin/sd --mode img2img -m ../models/sd-v1-4-f16.gguf -p "cat with blue eyes" -i ./output.png -o ./img2img_output.png --strength 0.4
```
@@ -172,12 +191,11 @@ Using formats of different precisions will yield results of varying quality.
#### with LoRA
-- convert lora weights to ggml model format
+- convert lora weights to gguf model format
```shell
- cd models
- python convert.py [path to weights] --lora
- # For example, python convert.py marblesh.safetensors
+ bin/convert [lora path] -t f16
+ # For example, bin/convert marblesh.safetensors -t f16
```
- You can specify the directory where the lora weights are stored via `--lora-model-dir`. If not specified, the default is the current working directory.
@@ -187,10 +205,10 @@ Using formats of different precisions will yield results of varying quality.
Here's a simple example:
```
-./bin/sd -m ../models/v1-5-pruned-emaonly-ggml-model-f16.bin -p "a lovely cat" --lora-model-dir ../models
+./bin/sd -m ../models/v1-5-pruned-emaonly-f16.gguf -p "a lovely cat" --lora-model-dir ../models
```
-`../models/marblesh-ggml-lora.bin` will be applied to the model
+`../models/marblesh.gguf` will be applied to the model
#### LCM/LCM-LoRA
@@ -201,7 +219,7 @@ Here's a simple example:
Here's a simple example:
```
-./bin/sd -m ../models/v1-5-pruned-emaonly-ggml-model-f16.bin -p "a lovely cat" --steps 4 --lora-model-dir ../models -v --cfg-scale 1
+./bin/sd -m ../models/v1-5-pruned-emaonly-f16.gguf -p "a lovely cat" --steps 4 --lora-model-dir ../models -v --cfg-scale 1
```
| without LCM-LoRA (--cfg-scale 7) | with LCM-LoRA (--cfg-scale 1) |
@@ -222,7 +240,7 @@ docker build -t sd .
```shell
docker run -v /path/to/models:/models -v /path/to/output/:/output sd [args...]
# For example
-# docker run -v ./models:/models -v ./build:/output sd -m /models/sd-v1-4-ggml-model-f16.bin -p "a lovely cat" -v -o /output/output.png
+# docker run -v ./models:/models -v ./build:/output sd -m /models/sd-v1-4-f16.gguf -p "a lovely cat" -v -o /output/output.png
```
## Memory/Disk Requirements
@@ -230,7 +248,8 @@ docker run -v /path/to/models:/models -v /path/to/output/:/output sd [args...]
| precision | f32 | f16 |q8_0 |q5_0 |q5_1 |q4_0 |q4_1 |
| ---- | ---- |---- |---- |---- |---- |---- |---- |
| **Disk** | 2.7G | 2.0G | 1.7G | 1.6G | 1.6G | 1.5G | 1.5G |
-| **Memory**(txt2img - 512 x 512) | ~2.8G | ~2.3G | ~2.1G | ~2.0G | ~2.0G | ~2.0G | ~2.0G |
+| **Memory** (txt2img - 512 x 512) | ~2.8G | ~2.3G | ~2.1G | ~2.0G | ~2.0G | ~2.0G | ~2.0G |
+| **Memory** (txt2img - 512 x 512) *with Flash Attention* | ~2.4G | ~1.9G | ~1.6G | ~1.5G | ~1.5G | ~1.5G | ~1.5G |
## Contributors
diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
new file mode 100644
index 00000000..715e3b5e
--- /dev/null
+++ b/common/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(TARGET common)
+
+# json.hpp library from: https://github.com/nlohmann/json
+
+add_library(${TARGET} OBJECT common.cpp common.h stb_image.h stb_image_write.h json.hpp)
+
+target_include_directories(${TARGET} PUBLIC .)
+target_link_libraries(${TARGET} PRIVATE stable-diffusion ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PUBLIC cxx_std_11)
+
+# ZIP Library from: https://github.com/kuba--/zip
+
+set(Z_TARGET zip)
+add_library(${Z_TARGET} OBJECT zip.c zip.h miniz.h)
+target_include_directories(${Z_TARGET} PUBLIC .)
\ No newline at end of file
diff --git a/examples/main.cpp b/common/common.cpp
similarity index 57%
rename from examples/main.cpp
rename to common/common.cpp
index f4d6ca88..5db9e06f 100644
--- a/examples/main.cpp
+++ b/common/common.cpp
@@ -1,20 +1,11 @@
-#include
-#include
+#include "common.h"
+#include
#include
#include
-#include
#include
#include
#include
-
-#include "stable-diffusion.h"
-
-#define STB_IMAGE_IMPLEMENTATION
-#include "stb_image.h"
-
-#define STB_IMAGE_WRITE_IMPLEMENTATION
-#define STB_IMAGE_WRITE_STATIC
-#include "stb_image_write.h"
+#include
#if defined(__APPLE__) && defined(__MACH__)
#include
@@ -26,9 +17,6 @@
#include
#endif
-#define TXT2IMG "txt2img"
-#define IMG2IMG "img2img"
-
// get_num_physical_cores is copy from
// https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
// LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
@@ -72,7 +60,7 @@ const char* rng_type_to_str[] = {
"cuda",
};
-// Names of the sampler method, same order as enum SampleMethod in stable-diffusion.h
+// Names of the sampler method, same order as enum sample_method in stable-diffusion.h
const char* sample_method_str[] = {
"euler_a",
"euler",
@@ -84,53 +72,36 @@ const char* sample_method_str[] = {
"lcm",
};
-// Names of the sigma schedule overrides, same order as Schedule in stable-diffusion.h
+// Names of the sigma schedule overrides, same order as sample_schedule in stable-diffusion.h
const char* schedule_str[] = {
"default",
"discrete",
"karras"};
-struct Option {
- int n_threads = -1;
- std::string mode = TXT2IMG;
- std::string model_path;
- std::string lora_model_dir;
- std::string output_path = "output.png";
- std::string init_img;
- std::string prompt;
- std::string negative_prompt;
- float cfg_scale = 7.0f;
- int w = 512;
- int h = 512;
- SampleMethod sample_method = EULER_A;
- Schedule schedule = DEFAULT;
- int sample_steps = 20;
- float strength = 0.75f;
- RNGType rng_type = CUDA_RNG;
- int64_t seed = 42;
- bool verbose = false;
-
- void print() {
- printf("Option: \n");
- printf(" n_threads: %d\n", n_threads);
- printf(" mode: %s\n", mode.c_str());
- printf(" model_path: %s\n", model_path.c_str());
- printf(" lora_model_dir: %s\n", lora_model_dir.c_str());
- printf(" output_path: %s\n", output_path.c_str());
- printf(" init_img: %s\n", init_img.c_str());
- printf(" prompt: %s\n", prompt.c_str());
- printf(" negative_prompt: %s\n", negative_prompt.c_str());
- printf(" cfg_scale: %.2f\n", cfg_scale);
- printf(" width: %d\n", w);
- printf(" height: %d\n", h);
- printf(" sample_method: %s\n", sample_method_str[sample_method]);
- printf(" schedule: %s\n", schedule_str[schedule]);
- printf(" sample_steps: %d\n", sample_steps);
- printf(" strength: %.2f\n", strength);
- printf(" rng: %s\n", rng_type_to_str[rng_type]);
- printf(" seed: %ld\n", seed);
- }
-};
+const char* modes_str[] = {
+ "txt2img",
+ "img2img"};
+
+void print_params(SDParams params) {
+ printf("Option: \n");
+ printf(" n_threads: %d\n", params.n_threads);
+ printf(" mode: %s\n", modes_str[params.mode]);
+ printf(" model_path: %s\n", params.model_path.c_str());
+ printf(" output_path: %s\n", params.output_path.c_str());
+ printf(" init_img: %s\n", params.input_path.c_str());
+ printf(" prompt: %s\n", params.prompt.c_str());
+ printf(" negative_prompt: %s\n", params.negative_prompt.c_str());
+ printf(" cfg_scale: %.2f\n", params.cfg_scale);
+ printf(" width: %d\n", params.width);
+ printf(" height: %d\n", params.height);
+ printf(" sample_method: %s\n", sample_method_str[params.sample_method]);
+ printf(" schedule: %s\n", schedule_str[params.schedule]);
+ printf(" sample_steps: %d\n", params.sample_steps);
+ printf(" strength: %.2f\n", params.strength);
+ printf(" rng: %s\n", rng_type_to_str[params.rng_type]);
+ printf(" seed: %ld\n", params.seed);
+ printf(" batch_count: %d\n", params.batch_count);
+}
void print_usage(int argc, const char* argv[]) {
printf("usage: %s [arguments]\n", argv[0]);
@@ -143,7 +114,7 @@ void print_usage(int argc, const char* argv[]) {
printf(" -m, --model [MODEL] path to model\n");
printf(" --lora-model-dir [DIR] lora model directory\n");
printf(" -i, --init-img [IMAGE] path to the input image, required by img2img\n");
- printf(" -o, --output OUTPUT path to write result image to (default: .\\output.png)\n");
+ printf(" -o, --output OUTPUT path to write result image to (default: ./output.png)\n");
printf(" -p, --prompt [PROMPT] the prompt to render\n");
printf(" -n, --negative-prompt PROMPT the negative prompt (default: \"\")\n");
printf(" --cfg-scale SCALE unconditional guidance scale: (default: 7.0)\n");
@@ -156,95 +127,113 @@ void print_usage(int argc, const char* argv[]) {
printf(" --steps STEPS number of sample steps (default: 20)\n");
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
+ printf(" -b, --batch-count COUNT number of images to generate.\n");
printf(" --schedule {discrete, karras} Denoiser sigma schedule (default: discrete)\n");
printf(" -v, --verbose print extra info\n");
}
-void parse_args(int argc, const char* argv[], Option* opt) {
+void parse_args(int argc, const char** argv, SDParams& params) {
bool invalid_arg = false;
-
+ std::string arg;
for (int i = 1; i < argc; i++) {
- std::string arg = argv[i];
+ arg = argv[i];
if (arg == "-t" || arg == "--threads") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->n_threads = std::stoi(argv[i]);
+ params.n_threads = std::stoi(argv[i]);
} else if (arg == "-M" || arg == "--mode") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->mode = argv[i];
-
+ const char* mode_selected = argv[i];
+ int mode_found = -1;
+ for (int d = 0; d < MODE_COUNT; d++) {
+ if (!strcmp(mode_selected, modes_str[d])) {
+ mode_found = d;
+ }
+ }
+ if (mode_found == -1) {
+ fprintf(stderr, "error: invalid mode %s, must be one of [txt2img, img2img]\n",
+ mode_selected);
+ exit(1);
+ }
+ params.mode = (sd_mode)mode_found;
} else if (arg == "-m" || arg == "--model") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->model_path = argv[i];
+ params.model_path = argv[i];
} else if (arg == "--lora-model-dir") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->lora_model_dir = argv[i];
+ params.lora_model_dir = argv[i];
} else if (arg == "-i" || arg == "--init-img") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->init_img = argv[i];
+ params.input_path = argv[i];
} else if (arg == "-o" || arg == "--output") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->output_path = argv[i];
+ params.output_path = argv[i];
} else if (arg == "-p" || arg == "--prompt") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->prompt = argv[i];
+ params.prompt = argv[i];
} else if (arg == "-n" || arg == "--negative-prompt") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->negative_prompt = argv[i];
+ params.negative_prompt = argv[i];
} else if (arg == "--cfg-scale") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->cfg_scale = std::stof(argv[i]);
+ params.cfg_scale = std::stof(argv[i]);
} else if (arg == "--strength") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->strength = std::stof(argv[i]);
+ params.strength = std::stof(argv[i]);
} else if (arg == "-H" || arg == "--height") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->h = std::stoi(argv[i]);
+ params.height = std::stoi(argv[i]);
} else if (arg == "-W" || arg == "--width") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->w = std::stoi(argv[i]);
+ params.width = std::stoi(argv[i]);
} else if (arg == "--steps") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->sample_steps = std::stoi(argv[i]);
+ params.sample_steps = std::stoi(argv[i]);
+ } else if (arg == "-b" || arg == "--batch-count") {
+ if (++i >= argc) {
+ invalid_arg = true;
+ break;
+ }
+ params.batch_count = std::stoi(argv[i]);
} else if (arg == "--rng") {
if (++i >= argc) {
invalid_arg = true;
@@ -252,9 +241,9 @@ void parse_args(int argc, const char* argv[], Option* opt) {
}
std::string rng_type_str = argv[i];
if (rng_type_str == "std_default") {
- opt->rng_type = STD_DEFAULT_RNG;
+ params.rng_type = STD_DEFAULT_RNG;
} else if (rng_type_str == "cuda") {
- opt->rng_type = CUDA_RNG;
+ params.rng_type = CUDA_RNG;
} else {
invalid_arg = true;
break;
@@ -275,13 +264,13 @@ void parse_args(int argc, const char* argv[], Option* opt) {
invalid_arg = true;
break;
}
- opt->schedule = (Schedule)schedule_found;
+ params.schedule = (Schedule)schedule_found;
} else if (arg == "-s" || arg == "--seed") {
if (++i >= argc) {
invalid_arg = true;
break;
}
- opt->seed = std::stoll(argv[i]);
+ params.seed = std::stoll(argv[i]);
} else if (arg == "--sampling-method") {
if (++i >= argc) {
invalid_arg = true;
@@ -298,81 +287,74 @@ void parse_args(int argc, const char* argv[], Option* opt) {
invalid_arg = true;
break;
}
- opt->sample_method = (SampleMethod)sample_method_found;
+ params.sample_method = (SampleMethod)sample_method_found;
} else if (arg == "-h" || arg == "--help") {
print_usage(argc, argv);
exit(0);
} else if (arg == "-v" || arg == "--verbose") {
- opt->verbose = true;
+ params.verbose = true;
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
print_usage(argc, argv);
exit(1);
}
- if (invalid_arg) {
- fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
- print_usage(argc, argv);
- exit(1);
- }
}
-
- if (opt->n_threads <= 0) {
- opt->n_threads = get_num_physical_cores();
- }
-
- if (opt->mode != TXT2IMG && opt->mode != IMG2IMG) {
- fprintf(stderr, "error: invalid mode %s, must be one of ['%s', '%s']\n",
- opt->mode.c_str(), TXT2IMG, IMG2IMG);
+ if (invalid_arg) {
+ fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
+ print_usage(argc, argv);
exit(1);
}
+ if (params.n_threads <= 0) {
+ params.n_threads = get_num_physical_cores();
+ }
- if (opt->prompt.length() == 0) {
+ if (params.prompt.length() == 0) {
fprintf(stderr, "error: the following arguments are required: prompt\n");
print_usage(argc, argv);
exit(1);
}
- if (opt->model_path.length() == 0) {
+ if (params.model_path.length() == 0) {
fprintf(stderr, "error: the following arguments are required: model_path\n");
print_usage(argc, argv);
exit(1);
}
- if (opt->mode == IMG2IMG && opt->init_img.length() == 0) {
+ if (params.mode == IMG2IMG && params.input_path.length() == 0) {
fprintf(stderr, "error: when using the img2img mode, the following arguments are required: init-img\n");
print_usage(argc, argv);
exit(1);
}
- if (opt->output_path.length() == 0) {
+ if (params.output_path.length() == 0) {
fprintf(stderr, "error: the following arguments are required: output_path\n");
print_usage(argc, argv);
exit(1);
}
- if (opt->w <= 0 || opt->w % 64 != 0) {
+ if (params.width <= 0 || params.width % 64 != 0) {
fprintf(stderr, "error: the width must be a multiple of 64\n");
exit(1);
}
- if (opt->h <= 0 || opt->h % 64 != 0) {
+ if (params.height <= 0 || params.height % 64 != 0) {
fprintf(stderr, "error: the height must be a multiple of 64\n");
exit(1);
}
- if (opt->sample_steps <= 0) {
+ if (params.sample_steps <= 0) {
fprintf(stderr, "error: the sample_steps must be greater than 0\n");
exit(1);
}
- if (opt->strength < 0.f || opt->strength > 1.f) {
+ if (params.strength < 0.f || params.strength > 1.f) {
fprintf(stderr, "error: can only work with strength in [0.0, 1.0]\n");
exit(1);
}
- if (opt->seed < 0) {
+ if (params.seed < 0) {
srand((int)time(NULL));
- opt->seed = rand();
+ params.seed = rand();
}
}
@@ -388,98 +370,22 @@ std::string basename(const std::string& path) {
return path;
}
-int main(int argc, const char* argv[]) {
- Option opt;
- parse_args(argc, argv, &opt);
-
- if (opt.verbose) {
- opt.print();
- printf("%s", sd_get_system_info().c_str());
- set_sd_log_level(SDLogLevel::DEBUG);
- }
-
- bool vae_decode_only = true;
- std::vector init_img;
- if (opt.mode == IMG2IMG) {
- vae_decode_only = false;
-
- int c = 0;
-
- unsigned char* img_data = stbi_load(opt.init_img.c_str(), &opt.w, &opt.h, &c, 3);
- if (img_data == NULL) {
- fprintf(stderr, "load image from '%s' failed\n", opt.init_img.c_str());
- return 1;
- }
- if (c != 3) {
- fprintf(stderr, "input image must be a 3 channels RGB image, but got %d channels\n", c);
- free(img_data);
- return 1;
- }
- if (opt.w <= 0 || opt.w % 64 != 0) {
- fprintf(stderr, "error: the width of image must be a multiple of 64\n");
- free(img_data);
- return 1;
- }
- if (opt.h <= 0 || opt.h % 64 != 0) {
- fprintf(stderr, "error: the height of image must be a multiple of 64\n");
- free(img_data);
- return 1;
- }
- init_img.assign(img_data, img_data + (opt.w * opt.h * c));
- }
-
- StableDiffusion sd(opt.n_threads, vae_decode_only, true, opt.lora_model_dir, opt.rng_type);
- if (!sd.load_from_file(opt.model_path, opt.schedule)) {
- return 1;
- }
-
- std::vector img;
- if (opt.mode == TXT2IMG) {
- img = sd.txt2img(opt.prompt,
- opt.negative_prompt,
- opt.cfg_scale,
- opt.w,
- opt.h,
- opt.sample_method,
- opt.sample_steps,
- opt.seed);
- } else {
- img = sd.img2img(init_img,
- opt.prompt,
- opt.negative_prompt,
- opt.cfg_scale,
- opt.w,
- opt.h,
- opt.sample_method,
- opt.sample_steps,
- opt.strength,
- opt.seed);
- }
-
- if (img.size() == 0) {
- fprintf(stderr, "generate failed\n");
- return 1;
+const char* get_image_params(SDParams params, int seed) {
+ std::string parameter_string = params.prompt + "\n";
+ if (params.negative_prompt.size() != 0) {
+ parameter_string += "Negative prompt: " + params.negative_prompt + "\n";
}
-
- std::string parameter_string = opt.prompt + "\n";
- if (opt.negative_prompt.size() != 0) {
- parameter_string += "Negative prompt: " + opt.negative_prompt + "\n";
- }
- parameter_string += "Steps: " + std::to_string(opt.sample_steps) + ", ";
- parameter_string += "CFG scale: " + std::to_string(opt.cfg_scale) + ", ";
- parameter_string += "Seed: " + std::to_string(opt.seed) + ", ";
- parameter_string += "Size: " + std::to_string(opt.w) + "x" + std::to_string(opt.h) + ", ";
- parameter_string += "Model: " + basename(opt.model_path) + ", ";
- parameter_string += "RNG: " + std::string(rng_type_to_str[opt.rng_type]) + ", ";
- parameter_string += "Sampler: " + std::string(sample_method_str[opt.sample_method]);
- if (opt.schedule == KARRAS) {
+ parameter_string += "Steps: " + std::to_string(params.sample_steps) + ", ";
+ parameter_string += "CFG scale: " + std::to_string(params.cfg_scale) + ", ";
+ parameter_string += "Seed: " + std::to_string(seed) + ", ";
+ parameter_string += "Size: " + std::to_string(params.width) + "x" + std::to_string(params.height) + ", ";
+ parameter_string += "Model: " + basename(params.model_path) + ", ";
+ parameter_string += "RNG: " + std::string(rng_type_to_str[params.rng_type]) + ", ";
+ parameter_string += "Sampler: " + std::string(sample_method_str[params.sample_method]);
+ if (params.schedule == KARRAS) {
parameter_string += " karras";
}
parameter_string += ", ";
parameter_string += "Version: stable-diffusion.cpp";
-
- stbi_write_png(opt.output_path.c_str(), opt.w, opt.h, 3, img.data(), 0, parameter_string.c_str());
- printf("save result image to '%s'\n", opt.output_path.c_str());
-
- return 0;
-}
+ return parameter_string.c_str();
+}
\ No newline at end of file
diff --git a/common/common.h b/common/common.h
new file mode 100644
index 00000000..aea1f11e
--- /dev/null
+++ b/common/common.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include
+#include "stable-diffusion.h"
+
+enum sd_mode {
+ TXT2IMG,
+ IMG2IMG,
+ MODE_COUNT
+};
+
+struct SDParams {
+ int n_threads = -1;
+ sd_mode mode = TXT2IMG;
+
+ std::string model_path;
+ std::string lora_model_dir;
+ std::string output_path = "output.png";
+ std::string input_path;
+
+ std::string prompt;
+ std::string negative_prompt;
+ float cfg_scale = 7.0f;
+ int width = 512;
+ int height = 512;
+ int batch_count = 1;
+
+ SampleMethod sample_method = EULER_A;
+ Schedule schedule = DEFAULT;
+ int sample_steps = 20;
+ float strength = 0.75f;
+ RNGType rng_type = CUDA_RNG;
+ int64_t seed = 42;
+ bool verbose = false;
+};
+
+void print_params(SDParams params);
+
+void print_usage(int argc, const char* argv[]);
+
+void parse_args(int argc, const char** argv, SDParams& params);
+
+const char* get_image_params(SDParams params, int seed);
\ No newline at end of file
diff --git a/common/json.hpp b/common/json.hpp
new file mode 100644
index 00000000..4d1a37ad
--- /dev/null
+++ b/common/json.hpp
@@ -0,0 +1,24596 @@
+// __ _____ _____ _____
+// __| | __| | | | JSON for Modern C++
+// | | |__ | | | | | | version 3.11.2
+// |_____|_____|_____|_|___| https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann
+// SPDX-License-Identifier: MIT
+
+/****************************************************************************\
+ * Note on documentation: The source files contain links to the online *
+ * documentation of the public API at https://json.nlohmann.me. This URL *
+ * contains the most recent documentation and should also be applicable to *
+ * previous versions; documentation for deprecated functions is not *
+ * removed, but marked deprecated. See "Generate documentation" section in *
+ * file docs/README.md. *
+\****************************************************************************/
+
+#ifndef INCLUDE_NLOHMANN_JSON_HPP_
+#define INCLUDE_NLOHMANN_JSON_HPP_
+
+#include // all_of, find, for_each
+#include // nullptr_t, ptrdiff_t, size_t
+#include // hash, less
+#include // initializer_list
+#ifndef JSON_NO_IO
+ #include // istream, ostream
+#endif // JSON_NO_IO
+#include // random_access_iterator_tag
+#include // unique_ptr
+#include // accumulate
+#include // string, stoi, to_string
+#include // declval, forward, move, pair, swap
+#include // vector
+
+// #include
+// __ _____ _____ _____
+// __| | __| | | | JSON for Modern C++
+// | | |__ | | | | | | version 3.11.2
+// |_____|_____|_____|_|___| https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann
+// SPDX-License-Identifier: MIT
+
+
+
+#include
+
+// #include
+// __ _____ _____ _____
+// __| | __| | | | JSON for Modern C++
+// | | |__ | | | | | | version 3.11.2
+// |_____|_____|_____|_|___| https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann
+// SPDX-License-Identifier: MIT
+
+
+
+// This file contains all macro definitions affecting or depending on the ABI
+
+#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK
+ #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH)
+ #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 2
+ #warning "Already included a different version of the library!"
+ #endif
+ #endif
+#endif
+
+#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum)
+#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum)
+#define NLOHMANN_JSON_VERSION_PATCH 2 // NOLINT(modernize-macro-to-enum)
+
+#ifndef JSON_DIAGNOSTICS
+ #define JSON_DIAGNOSTICS 0
+#endif
+
+#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
+ #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0
+#endif
+
+#if JSON_DIAGNOSTICS
+ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag
+#else
+ #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS
+#endif
+
+#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON
+ #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp
+#else
+ #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON
+#endif
+
+#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION
+ #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0
+#endif
+
+// Construct the namespace ABI tags component
+#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b
+#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \
+ NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b)
+
+#define NLOHMANN_JSON_ABI_TAGS \
+ NLOHMANN_JSON_ABI_TAGS_CONCAT( \
+ NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \
+ NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON)
+
+// Construct the namespace version component
+#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \
+ _v ## major ## _ ## minor ## _ ## patch
+#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \
+ NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch)
+
+#if NLOHMANN_JSON_NAMESPACE_NO_VERSION
+#define NLOHMANN_JSON_NAMESPACE_VERSION
+#else
+#define NLOHMANN_JSON_NAMESPACE_VERSION \
+ NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \
+ NLOHMANN_JSON_VERSION_MINOR, \
+ NLOHMANN_JSON_VERSION_PATCH)
+#endif
+
+// Combine namespace components
+#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b
+#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \
+ NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b)
+
+#ifndef NLOHMANN_JSON_NAMESPACE
+#define NLOHMANN_JSON_NAMESPACE \
+ nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \
+ NLOHMANN_JSON_ABI_TAGS, \
+ NLOHMANN_JSON_NAMESPACE_VERSION)
+#endif
+
+#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN
+#define NLOHMANN_JSON_NAMESPACE_BEGIN \
+ namespace nlohmann \
+ { \
+ inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \
+ NLOHMANN_JSON_ABI_TAGS, \
+ NLOHMANN_JSON_NAMESPACE_VERSION) \
+ {
+#endif
+
+#ifndef NLOHMANN_JSON_NAMESPACE_END
+#define NLOHMANN_JSON_NAMESPACE_END \
+ } /* namespace (inline namespace) NOLINT(readability/namespace) */ \
+ } // namespace nlohmann
+#endif
+
+// #include
+// __ _____ _____ _____
+// __| | __| | | | JSON for Modern C++
+// | | |__ | | | | | | version 3.11.2
+// |_____|_____|_____|_|___| https://github.com/nlohmann/json
+//
+// SPDX-FileCopyrightText: 2013-2022 Niels Lohmann
+// SPDX-License-Identifier: MIT
+
+
+
+#include // transform
+#include // array
+#include // forward_list
+#include // inserter, front_inserter, end
+#include