llama : sync gguf-llama.cpp with latest llama.cpp (#2608)

* llama : sync gguf-llama.cpp with latest llama.cpp * minor : indentation + assert * llama : refactor gguf_buffer and gguf_ctx_buffer * llama : minor
ggerganov · Aug 14, 2023 · f00780b · f00780b
1 parent 6f64b6c
commit f00780b
Show file tree

Hide file tree

Showing 6 changed files with 688 additions and 459 deletions.
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
@@ -8,14 +8,19 @@
 #include <sstream>
 #include <fstream>
 #include <vector>
-/*
+
+#undef MIN
+#undef MAX
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
 template<typename T>
 static std::string to_string(const T & val) {
     std::stringstream ss;
     ss << val;
     return ss.str();
 }
-*/
+
 void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
     const int32_t n = val.size();
     fout.write((const char *) &n, sizeof(n));
@@ -377,28 +382,28 @@ bool gguf_ex_read_2(const std::string & fname) {
 
     struct gguf_file file(fname.c_str(), "rb");
     gguf_mmap data_mmap(&file, 0, false);
+
     const int n_tensors = gguf_get_n_tensors(ctx);
 
     for (int i = 0; i < n_tensors; ++i) {
-        const char * name             = gguf_get_tensor_name(ctx, i);
-        const size_t offset      = gguf_get_data_offset(ctx) + gguf_get_tensor_offset(ctx, i);
+        const char * name   = gguf_get_tensor_name(ctx, i);
+        const size_t offset = gguf_get_data_offset(ctx) + gguf_get_tensor_offset(ctx, i);
+
         struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
 
         cur->data = static_cast<char *>(data_mmap.addr) + offset;
 
         // print first 10 elements
-    const float * data = (const float *) cur->data;
+        const float * data = (const float *) cur->data;
 
         printf("%s data[:10] : ", name);
-
-        for (int j = 0; j < 10; ++j) {
+        for (int j = 0; j < MIN(10, ggml_nelements(cur)); ++j) {
             printf("%f ", data[j]);
         }
-
         printf("\n\n");
     }
 
-fprintf(stdout, "%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data));
+    fprintf(stdout, "%s: ctx_data size: %zu\n", __func__, ggml_get_mem_size(ctx_data));
 
     ggml_free(ctx_data);
     gguf_free(ctx);

diff --git a/ggml-metal.h b/ggml-metal.h
@@ -38,6 +38,9 @@ struct ggml_metal_context;
 struct ggml_metal_context * ggml_metal_init(int n_cb);
 void ggml_metal_free(struct ggml_metal_context * ctx);
 
+void * ggml_metal_host_malloc(size_t n);
+void   ggml_metal_host_free  (void * data);
+
 // set the number of command buffers to use
 void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb);
 

diff --git a/ggml-metal.m b/ggml-metal.m
@@ -224,6 +224,21 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
     free(ctx);
 }
 
+void * ggml_metal_host_malloc(size_t n) {
+    void * data = NULL;
+    const int result = posix_memalign((void **) &data, getpagesize(), n);
+    if (result != 0) {
+        fprintf(stderr, "%s: error: posix_memalign failed\n", __func__);
+        return NULL;
+    }
+
+    return data;
+}
+
+void ggml_metal_host_free(void * data) {
+    free(data);
+}
+
 void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb) {
     ctx->n_cb = n_cb;
 }