Skip to content

Commit

Permalink
llama : model loader
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Jan 2, 2025
1 parent e39a9c1 commit 1521f9e
Show file tree
Hide file tree
Showing 7 changed files with 1,264 additions and 1,164 deletions.
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ add_library(llama
llama-kv-cache.cpp
llama-mmap.cpp
llama-model.cpp
llama-model-loader.cpp
llama-sampling.cpp
llama-vocab.cpp
unicode.h
Expand Down
13 changes: 7 additions & 6 deletions src/llama-hparams.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,13 @@ struct llama_hparams {
uint32_t time_decay_extra_dim = 0;
uint32_t wkv_head_size = 0;

float rope_attn_factor = 1.0f;
float rope_freq_base_train;
float rope_freq_scale_train;
uint32_t n_ctx_orig_yarn;
float rope_yarn_log_mul;
int rope_sections[4]; // TODO: actually this should be std::array (I was wrong)
float rope_attn_factor = 1.0f;
float rope_freq_base_train;
float rope_freq_scale_train;
uint32_t n_ctx_orig_yarn;
float rope_yarn_log_mul;

std::array<int, 4> rope_sections;

// for State Space Models
uint32_t ssm_d_conv = 0;
Expand Down
74 changes: 74 additions & 0 deletions src/llama-impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

#include "llama.h"

#include <cinttypes>
#include <climits>
#include <cstdarg>
#include <vector>
#include <sstream>

struct llama_logger_state {
ggml_log_callback log_callback = llama_log_callback_default;
Expand Down Expand Up @@ -89,3 +91,75 @@ std::string format(const char * fmt, ...) {
va_end(ap);
return std::string(buf.data(), size);
}

std::string llama_format_tensor_shape(const std::vector<int64_t> & ne) {
char buf[256];
snprintf(buf, sizeof(buf), "%5" PRId64, ne.at(0));
for (size_t i = 1; i < ne.size(); i++) {
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, ne.at(i));
}
return buf;
}

std::string llama_format_tensor_shape(const struct ggml_tensor * t) {
char buf[256];
snprintf(buf, sizeof(buf), "%5" PRId64, t->ne[0]);
for (int i = 1; i < GGML_MAX_DIMS; i++) {
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, t->ne[i]);
}
return buf;
}

static std::string gguf_data_to_str(enum gguf_type type, const void * data, int i) {
switch (type) {
case GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
case GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
case GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
case GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
case GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
case GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
case GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
default: return format("unknown type %d", type);
}
}

std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i) {
const enum gguf_type type = gguf_get_kv_type(ctx_gguf, i);

switch (type) {
case GGUF_TYPE_STRING:
return gguf_get_val_str(ctx_gguf, i);
case GGUF_TYPE_ARRAY:
{
const enum gguf_type arr_type = gguf_get_arr_type(ctx_gguf, i);
int arr_n = gguf_get_arr_n(ctx_gguf, i);
const void * data = gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "[";
for (int j = 0; j < arr_n; j++) {
if (arr_type == GGUF_TYPE_STRING) {
std::string val = gguf_get_arr_str(ctx_gguf, i, j);
// escape quotes
replace_all(val, "\\", "\\\\");
replace_all(val, "\"", "\\\"");
ss << '"' << val << '"';
} else if (arr_type == GGUF_TYPE_ARRAY) {
ss << "???";
} else {
ss << gguf_data_to_str(arr_type, data, j);
}
if (j < arr_n - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
default:
return gguf_data_to_str(type, gguf_get_val_data(ctx_gguf, i), 0);
}
}
12 changes: 12 additions & 0 deletions src/llama-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "ggml.h" // for ggml_log_level

#include <string>
#include <vector>

#ifdef __GNUC__
#ifdef __MINGW32__
Expand Down Expand Up @@ -33,6 +34,12 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void *
// helpers
//

template <typename T>
struct no_init {
T value;
no_init() { /* do nothing */ }
};

struct time_meas {
time_meas(int64_t & t_acc, bool disable = false);
~time_meas();
Expand All @@ -47,3 +54,8 @@ void replace_all(std::string & s, const std::string & search, const std::string
// TODO: rename to llama_format ?
LLAMA_ATTRIBUTE_FORMAT(1, 2)
std::string format(const char * fmt, ...);

std::string llama_format_tensor_shape(const std::vector<int64_t> & ne);
std::string llama_format_tensor_shape(const struct ggml_tensor * t);

std::string gguf_kv_to_str(const struct gguf_context * ctx_gguf, int i);
Loading

0 comments on commit 1521f9e

Please sign in to comment.