-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
26 changed files
with
5,768 additions
and
5,073 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,66 +1,74 @@ | ||
#pragma once | ||
|
||
#include "llama-impl.h" | ||
#include "llama-hparams.h" | ||
#include "llama.h" | ||
|
||
#include "ggml-cpp.h" | ||
|
||
#include <string> | ||
#include <unordered_map> | ||
#include <vector> | ||
|
||
// TODO: pimpl | ||
|
||
// | ||
// llama_adapter_cvec | ||
// | ||
|
||
// TODO: rename to llama_adapter_cvec | ||
struct llama_control_vector { | ||
std::vector<ggml_context_ptr> ctxs; | ||
std::vector<ggml_backend_buffer_ptr> bufs; | ||
struct llama_adapter_cvec { | ||
struct ggml_tensor * tensor_for(int il) const; | ||
|
||
std::vector<struct ggml_tensor *> tensors; // per layer | ||
struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const; | ||
|
||
int32_t apply( | ||
const llama_model & model, | ||
const float * data, | ||
size_t len, | ||
int32_t n_embd, | ||
int32_t il_start, | ||
int32_t il_end); | ||
|
||
private: | ||
bool init(const llama_model & model); | ||
|
||
int32_t layer_start = -1; | ||
int32_t layer_end = -1; | ||
|
||
struct ggml_tensor * tensor_for(int il) const; | ||
std::vector<ggml_context_ptr> ctxs; | ||
std::vector<ggml_backend_buffer_ptr> bufs; | ||
|
||
struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const; | ||
std::vector<struct ggml_tensor *> tensors; // per layer | ||
}; | ||
|
||
int32_t llama_control_vector_apply( | ||
struct llama_control_vector & cvec, | ||
const llama_model & model, | ||
const float * data, | ||
size_t len, | ||
int32_t n_embd, | ||
int32_t il_start, | ||
int32_t il_end); | ||
|
||
// | ||
// llama_adapter_lora | ||
// | ||
|
||
// TODO: rename to llama_adapter_lora_weight | ||
struct llama_lora_weight { | ||
struct llama_adapter_lora_weight { | ||
struct ggml_tensor * a = nullptr; | ||
struct ggml_tensor * b = nullptr; | ||
|
||
llama_lora_weight() = default; | ||
llama_lora_weight(struct ggml_tensor * a, struct ggml_tensor * b) : a(a), b(b) {} | ||
// get actual scale based on rank and alpha | ||
float get_scale(float alpha, float adapter_scale) const { | ||
const float rank = (float) b->ne[0]; | ||
const float scale = alpha ? adapter_scale * alpha / rank : adapter_scale; | ||
return scale; | ||
} | ||
|
||
llama_adapter_lora_weight() = default; | ||
llama_adapter_lora_weight(struct ggml_tensor * a, struct ggml_tensor * b) : a(a), b(b) {} | ||
}; | ||
|
||
// TODO: rename to llama_adapter_lora | ||
struct llama_lora_adapter { | ||
struct llama_adapter_lora { | ||
// map tensor name to lora_a_b | ||
std::unordered_map<std::string, struct llama_lora_weight> ab_map; | ||
std::unordered_map<std::string, struct llama_adapter_lora_weight> ab_map; | ||
|
||
std::vector<ggml_context_ptr> ctxs; | ||
std::vector<ggml_backend_buffer_ptr> bufs; | ||
|
||
float alpha; | ||
|
||
llama_lora_adapter() = default; | ||
~llama_lora_adapter() = default; | ||
llama_adapter_lora() = default; | ||
~llama_adapter_lora() = default; | ||
|
||
llama_lora_weight * get_weight(struct ggml_tensor * w); | ||
llama_adapter_lora_weight * get_weight(struct ggml_tensor * w); | ||
}; |
Oops, something went wrong.