Skip to content

Commit

Permalink
Update for LoRA Adapters: Derived adapters and support for FLUX (open…
Browse files Browse the repository at this point in the history
…vinotoolkit#1602)

Introducing the concept of a "derived" adapter that allows the creation
of pipeline-dependent LoRA naming conventions to original Safetensors
adapters, but in a way that is hidden from the user. So, the user
continues using original Adapter objects to identify adapters, but
internally they are wrapped by "derived" adapter that consists of two
parts: (1) original Adapter reference which plays a role of unique
adapter identifier, and (2) a postponed derivation action that is
applied only once when transformed Adapter tensors are required for the
first time. Applied different derivations for SD/SDXL and FLUX.

Three naming conventions work for FLUX: original diffusers, Kohya, and
XLabs. Still missing: BFL.

Other changes:

- Ignore the original generation config in LoRA LLMPipeline sample to
align with the base greedy sample.
- Fix ProgressBar when it is used for the second time which takes place
in text2image LoRA sample
- Introduce `SharedOptional` class to simplify the code of conditional
property modifications based on adapters present and maintain the same
copy-on-modify behavior when properties are not replicated when it is
not needed.
- Split code of LoRA adapters to more files for better readability.
- Shorten required prefix for LLM's LoRA to be compatible with a wider
set of adapters.
  • Loading branch information
slyalin authored Jan 22, 2025
1 parent ec13531 commit 2d71315
Show file tree
Hide file tree
Showing 24 changed files with 764 additions and 199 deletions.
28 changes: 19 additions & 9 deletions samples/cpp/image_generation/progress_bar.hpp
Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
// Copyright (C) 2023-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include <optional>

#include "indicators/progress_bar.hpp"

bool progress_bar(size_t step, size_t num_steps, ov::Tensor& /* latent */) {
using namespace indicators;

static ProgressBar bar{
option::BarWidth{50},
option::ForegroundColor{Color::green},
option::FontStyles{std::vector<FontStyle>{FontStyle::bold}},
option::ShowElapsedTime{true},
option::ShowRemainingTime{true},
};
static std::optional<ProgressBar> bar;

if (!bar) {
bar.emplace(
option::BarWidth{50},
option::ForegroundColor{Color::green},
option::FontStyles{std::vector<FontStyle>{FontStyle::bold}},
option::ShowElapsedTime{true},
option::ShowRemainingTime{true}
);
}

std::stringstream stream;
stream << "Image generation step " << (step + 1) << " / " << num_steps;

bar.set_option(option::PostfixText{stream.str()});
bar.set_progress((100 * (step + 1)) / num_steps);
bar->set_option(option::PostfixText{stream.str()});
bar->set_progress((100 * (step + 1)) / num_steps);

if (step + 1 == num_steps) {
bar.reset(); // Required when multiple progress bars are used, without recreation of the object the second progress bar won't be displayed correctly
}

return false;
}
6 changes: 6 additions & 0 deletions samples/cpp/text_generation/lora_greedy_causal_lm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ int main(int argc, char* argv[]) try {
Adapter adapter(adapter_path);
LLMPipeline pipe(models_path, device, adapters(adapter)); // register all required adapters here

// Resetting config to set greedy behaviour ignoring generation config from model directory.
// It helps to compare two generations with and without LoRA adapter.
ov::genai::GenerationConfig config;
config.max_new_tokens = 100;
pipe.set_generation_config(config);

std::cout << "Generate with LoRA adapter and alpha set to 0.75:" << std::endl;
std::cout << pipe.generate(prompt, max_new_tokens(100), adapters(adapter, 0.75)) << std::endl;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "openvino/runtime/infer_request.hpp"
#include "openvino/runtime/properties.hpp"
#include "openvino/runtime/tensor.hpp"
#include "openvino/genai/lora_adapter.hpp"

#include "openvino/genai/visibility.hpp"

Expand Down Expand Up @@ -75,10 +76,13 @@ class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel {

void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states);

void set_adapters(const std::optional<AdapterConfig>& adapters);

ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep);

private:
Config m_config;
AdapterController m_adapter_controller;
ov::InferRequest m_request;
std::shared_ptr<ov::Model> m_model;
size_t m_vae_scale_factor;
Expand Down
15 changes: 9 additions & 6 deletions src/cpp/include/openvino/genai/lora_adapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,20 @@ namespace genai {

class OPENVINO_GENAI_EXPORTS AdapterController;
struct AdapterControllerImpl;
class AdapterImpl;

// Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier
class OPENVINO_GENAI_EXPORTS Adapter {
class Impl;
std::shared_ptr<Impl> m_pimpl;
std::shared_ptr<AdapterImpl> m_pimpl;

friend AdapterController;
friend AdapterControllerImpl;
friend bool operator== (const Adapter& a, const Adapter& b);
friend bool operator< (const Adapter& a, const Adapter& b);

friend Adapter flux_adapter_normalization(const Adapter& adapter);
friend Adapter diffusers_adapter_normalization(const Adapter& adapter);

Adapter(const std::shared_ptr<AdapterImpl>& pimpl);
public:
explicit Adapter(const std::filesystem::path& path);
Adapter() = default;
Expand All @@ -40,9 +44,6 @@ class OPENVINO_GENAI_EXPORTS Adapter {
}
};

// bool OPENVINO_GENAI_EXPORTS operator== (const Adapter& a, const Adapter& b);
// bool OPENVINO_GENAI_EXPORTS operator< (const Adapter& a, const Adapter& b);


struct OPENVINO_GENAI_EXPORTS AdapterConfig {
enum Mode {
Expand Down Expand Up @@ -87,6 +88,8 @@ struct OPENVINO_GENAI_EXPORTS AdapterConfig {
float get_alpha(const Adapter& adapter) const;
AdapterConfig& remove(const Adapter&);
const std::vector<Adapter>& get_adapters() const { return adapters; }
std::vector<std::pair<Adapter, float>> get_adapters_and_alphas() const;
void set_adapters_and_alphas(const std::vector<std::pair<Adapter, float>>& adapters);

// Update adapters and alphas from other config. Mode and tensor_name_prefix are updated if they are set not to default values in other config.
// It means that if other.get_mode() == MODE_AUTO, it will not override value in this config. If tensor_name_prefix is not set (== nullopt) then it won't be updated either.
Expand Down
8 changes: 3 additions & 5 deletions src/cpp/src/continuous_batching_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,14 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::initialize_pipeline(
const ov::AnyMap& properties,
const DeviceConfig& device_config,
ov::Core& core) {
ov::CompiledModel compiled_model;

// apply LoRA
if (auto filtered_properties = extract_adapters_from_properties(properties, &m_generation_config.adapters)) {
auto filtered_properties = extract_adapters_from_properties(properties, &m_generation_config.adapters);
if (m_generation_config.adapters) {
m_generation_config.adapters->set_tensor_name_prefix("base_model.model.model.");
m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device_config.get_device()); // TODO: Make the prefix name configurable
compiled_model = core.compile_model(model, device_config.get_device(), *filtered_properties);
} else {
compiled_model = core.compile_model(model, device_config.get_device(), properties);
}
ov::CompiledModel compiled_model = core.compile_model(model, device_config.get_device(), *filtered_properties);

ov::genai::utils::print_compiled_model_properties(compiled_model, "LLM with Paged Attention");
ov::InferRequest infer_request = compiled_model.create_infer_request();
Expand Down
6 changes: 6 additions & 0 deletions src/cpp/src/image_generation/diffusion_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include "image_generation/schedulers/ischeduler.hpp"
#include "openvino/genai/image_generation/generation_config.hpp"
#include "lora_helper.hpp"
#include "lora_names_mapping.hpp"

#include "json_utils.hpp"
namespace {
Expand Down Expand Up @@ -132,6 +134,10 @@ class DiffusionPipeline {
}
}

static std::optional<AdapterConfig> derived_adapters(const AdapterConfig& adapters) {
return ov::genai::derived_adapters(adapters, diffusers_adapter_normalization);
}

PipelineType m_pipeline_type;
std::shared_ptr<IScheduler> m_scheduler;
ImageGenerationConfig m_generation_config;
Expand Down
38 changes: 28 additions & 10 deletions src/cpp/src/image_generation/flux_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,26 +173,28 @@ class FluxPipeline : public DiffusionPipeline {

set_scheduler(Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"));

auto updated_properties = update_adapters_in_properties(properties, &FluxPipeline::derived_adapters);

const std::string text_encoder = data["text_encoder"][1].get<std::string>();
if (text_encoder == "CLIPTextModel") {
m_clip_text_encoder = std::make_shared<CLIPTextModel>(root_dir / "text_encoder", device, properties);
m_clip_text_encoder = std::make_shared<CLIPTextModel>(root_dir / "text_encoder", device, *updated_properties);
} else {
OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type");
}

const std::string t5_text_encoder = data["text_encoder_2"][1].get<std::string>();
if (t5_text_encoder == "T5EncoderModel") {
m_t5_text_encoder = std::make_shared<T5EncoderModel>(root_dir / "text_encoder_2", device, properties);
m_t5_text_encoder = std::make_shared<T5EncoderModel>(root_dir / "text_encoder_2", device, *updated_properties);
} else {
OPENVINO_THROW("Unsupported '", t5_text_encoder, "' text encoder type");
}

const std::string vae = data["vae"][1].get<std::string>();
if (vae == "AutoencoderKL") {
if (m_pipeline_type == PipelineType::TEXT_2_IMAGE)
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, properties);
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, *updated_properties);
else if (m_pipeline_type == PipelineType::IMAGE_2_IMAGE || m_pipeline_type == PipelineType::INPAINTING) {
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_encoder", root_dir / "vae_decoder", device, properties);
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_encoder", root_dir / "vae_decoder", device, *updated_properties);
} else {
OPENVINO_ASSERT("Unsupported pipeline type");
}
Expand All @@ -202,13 +204,14 @@ class FluxPipeline : public DiffusionPipeline {

const std::string transformer = data["transformer"][1].get<std::string>();
if (transformer == "FluxTransformer2DModel") {
m_transformer = std::make_shared<FluxTransformer2DModel>(root_dir / "transformer", device, properties);
m_transformer = std::make_shared<FluxTransformer2DModel>(root_dir / "transformer", device, *updated_properties);
} else {
OPENVINO_THROW("Unsupported '", transformer, "' Transformer type");
}

// initialize generation config
initialize_generation_config(data["_class_name"].get<std::string>());
update_adapters_from_properties(properties, m_generation_config.adapters);
}

FluxPipeline(PipelineType pipeline_type,
Expand Down Expand Up @@ -247,10 +250,12 @@ class FluxPipeline : public DiffusionPipeline {
}

void compile(const std::string& device, const ov::AnyMap& properties) override {
m_clip_text_encoder->compile(device, properties);
m_t5_text_encoder->compile(device, properties);
m_vae->compile(device, properties);
m_transformer->compile(device, properties);
update_adapters_from_properties(properties, m_generation_config.adapters);
auto updated_properties = update_adapters_in_properties(properties, &FluxPipeline::derived_adapters);
m_clip_text_encoder->compile(device, *updated_properties);
m_t5_text_encoder->compile(device, *updated_properties);
m_vae->compile(device, *updated_properties);
m_transformer->compile(device, *updated_properties);
}

void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override {
Expand Down Expand Up @@ -312,7 +317,13 @@ class FluxPipeline : public DiffusionPipeline {
}

void set_lora_adapters(std::optional<AdapterConfig> adapters) override {
OPENVINO_THROW("LORA adapters are not implemented for FLUX pipeline yet");
if(adapters) {
if(auto updated_adapters = derived_adapters(*adapters)) {
adapters = updated_adapters;
}
m_clip_text_encoder->set_adapters(adapters);
m_transformer->set_adapters(adapters);
}
}

ov::Tensor generate(const std::string& positive_prompt,
Expand All @@ -339,6 +350,8 @@ class FluxPipeline : public DiffusionPipeline {

check_inputs(m_custom_generation_config, initial_image);

set_lora_adapters(m_custom_generation_config.adapters);

compute_hidden_states(positive_prompt, m_custom_generation_config);

ov::Tensor latents, processed_image, image_latent, noise;
Expand Down Expand Up @@ -472,6 +485,11 @@ class FluxPipeline : public DiffusionPipeline {
}
}

// Returns non-empty updated adapters iff they are required to be updated
static std::optional<AdapterConfig> derived_adapters(const AdapterConfig& adapters) {
return ov::genai::derived_adapters(adapters, flux_adapter_normalization);
}

std::shared_ptr<FluxTransformer2DModel> m_transformer = nullptr;
std::shared_ptr<CLIPTextModel> m_clip_text_encoder = nullptr;
std::shared_ptr<T5EncoderModel> m_t5_text_encoder = nullptr;
Expand Down
24 changes: 4 additions & 20 deletions src/cpp/src/image_generation/models/autoencoder_kl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,23 +131,15 @@ AutoencoderKL::AutoencoderKL(const std::filesystem::path& vae_decoder_path,
const std::string& device,
const ov::AnyMap& properties)
: AutoencoderKL(vae_decoder_path) {
if (auto filtered_properties = extract_adapters_from_properties(properties)) {
compile(device, *filtered_properties);
} else {
compile(device, properties);
}
compile(device, *extract_adapters_from_properties(properties));
}

AutoencoderKL::AutoencoderKL(const std::filesystem::path& vae_encoder_path,
const std::filesystem::path& vae_decoder_path,
const std::string& device,
const ov::AnyMap& properties)
: AutoencoderKL(vae_encoder_path, vae_decoder_path) {
if (auto filtered_properties = extract_adapters_from_properties(properties)) {
compile(device, *filtered_properties);
} else {
compile(device, properties);
}
compile(device, *extract_adapters_from_properties(properties));
}

AutoencoderKL::AutoencoderKL(const std::string& vae_decoder_model,
Expand All @@ -174,11 +166,7 @@ AutoencoderKL::AutoencoderKL(const std::string& vae_decoder_model,
const std::string& device,
const ov::AnyMap& properties)
: AutoencoderKL(vae_decoder_model, vae_decoder_weights, vae_decoder_config) {
if (auto filtered_properties = extract_adapters_from_properties(properties)) {
compile(device, *filtered_properties);
} else {
compile(device, properties);
}
compile(device, *extract_adapters_from_properties(properties));
}

AutoencoderKL::AutoencoderKL(const std::string& vae_encoder_model,
Expand All @@ -193,11 +181,7 @@ AutoencoderKL::AutoencoderKL(const std::string& vae_encoder_model,
vae_decoder_model,
vae_decoder_weights,
vae_decoder_config) {
if (auto filtered_properties = extract_adapters_from_properties(properties)) {
compile(device, *filtered_properties);
} else {
compile(device, properties);
}
compile(device, *extract_adapters_from_properties(properties));
}

AutoencoderKL::AutoencoderKL(const AutoencoderKL&) = default;
Expand Down
9 changes: 3 additions & 6 deletions src/cpp/src/image_generation/models/clip_text_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,13 @@ CLIPTextModel& CLIPTextModel::reshape(int batch_size) {

CLIPTextModel& CLIPTextModel::compile(const std::string& device, const ov::AnyMap& properties) {
OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model");
ov::Core core = utils::singleton_core();
ov::CompiledModel compiled_model;
std::optional<AdapterConfig> adapters;
if (auto filtered_properties = extract_adapters_from_properties(properties, &adapters)) {
auto filtered_properties = extract_adapters_from_properties(properties, &adapters);
if (adapters) {
adapters->set_tensor_name_prefix(adapters->get_tensor_name_prefix().value_or("lora_te"));
m_adapter_controller = AdapterController(m_model, *adapters, device);
compiled_model = core.compile_model(m_model, device, *filtered_properties);
} else {
compiled_model = core.compile_model(m_model, device, properties);
}
ov::CompiledModel compiled_model = utils::singleton_core().compile_model(m_model, device, *filtered_properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "Clip Text model");
m_request = compiled_model.create_infer_request();
// release the original model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,13 @@ CLIPTextModelWithProjection& CLIPTextModelWithProjection::reshape(int batch_size
CLIPTextModelWithProjection& CLIPTextModelWithProjection::compile(const std::string& device, const ov::AnyMap& properties) {
OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model");
ov::Core core = utils::singleton_core();
ov::CompiledModel compiled_model;
std::optional<AdapterConfig> adapters;
if (auto filtered_properties = extract_adapters_from_properties(properties, &adapters)) {
auto filtered_properties = extract_adapters_from_properties(properties, &adapters);
if (adapters) {
adapters->set_tensor_name_prefix(adapters->get_tensor_name_prefix().value_or("lora_te"));
m_adapter_controller = AdapterController(m_model, *adapters, device);
compiled_model = core.compile_model(m_model, device, *filtered_properties);
} else {
compiled_model = core.compile_model(m_model, device, properties);
}
ov::CompiledModel compiled_model = core.compile_model(m_model, device, *filtered_properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "Clip Text with projection model");
m_request = compiled_model.create_infer_request();
// release the original model
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "json_utils.hpp"
#include "utils.hpp"
#include "lora_helper.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -106,7 +107,13 @@ FluxTransformer2DModel& FluxTransformer2DModel::reshape(int batch_size,

FluxTransformer2DModel& FluxTransformer2DModel::compile(const std::string& device, const ov::AnyMap& properties) {
OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model");
ov::CompiledModel compiled_model = utils::singleton_core().compile_model(m_model, device, properties);
std::optional<AdapterConfig> adapters;
auto filtered_properties = extract_adapters_from_properties(properties, &adapters);
if (adapters) {
adapters->set_tensor_name_prefix(adapters->get_tensor_name_prefix().value_or("transformer"));
m_adapter_controller = AdapterController(m_model, *adapters, device);
}
ov::CompiledModel compiled_model = utils::singleton_core().compile_model(m_model, device, *filtered_properties);
ov::genai::utils::print_compiled_model_properties(compiled_model, "Flux Transformer 2D model");
m_request = compiled_model.create_infer_request();
// release the original model
Expand All @@ -120,6 +127,13 @@ void FluxTransformer2DModel::set_hidden_states(const std::string& tensor_name, o
m_request.set_tensor(tensor_name, encoder_hidden_states);
}

void FluxTransformer2DModel::set_adapters(const std::optional<AdapterConfig>& adapters) {
OPENVINO_ASSERT(m_request, "Transformer model must be compiled first");
if(adapters) {
m_adapter_controller.apply(m_request, *adapters);
}
}

ov::Tensor FluxTransformer2DModel::infer(const ov::Tensor latent_model_input, const ov::Tensor timestep) {
OPENVINO_ASSERT(m_request, "Transformer model must be compiled first. Cannot infer non-compiled model");

Expand Down
Loading

0 comments on commit 2d71315

Please sign in to comment.