reverted memory fixes, see ggerganov#3527

l3utterfly · Oct 12, 2023 · 4e6db1f · 4e6db1f
1 parent fe0606c
commit 4e6db1f
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 76 deletions.
diff --git a/ggml-alloc.c b/ggml-alloc.c
@@ -317,8 +317,9 @@ struct ggml_allocr * ggml_allocr_new_from_buffer(struct ggml_backend_buffer * bu
         /*.parse_seq     = */ {0},
         /*.parse_seq_len = */ 0,
 #ifdef GGML_ALLOCATOR_DEBUG
-    (*alloc).allocated_tensors = {0};
+        /*.allocated_tensors = */ {0},
 #endif
+    };
 
     ggml_allocr_reset(alloc);
 
@@ -590,4 +591,4 @@ size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph *
 
 size_t ggml_allocr_max_size(struct ggml_allocr * alloc) {
     return alloc->max_size;
-}
+}
diff --git a/ggml.c b/ggml.c
@@ -4698,21 +4698,19 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
     }
 
     const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
-
-    ctx = (struct ggml_context *)malloc(sizeof(struct ggml_context));
-
-    struct ggml_scratch empty_scratch = { 0, 0, NULL };
-
-    (*ctx).mem_size = mem_size;
-    (*ctx).mem_buffer = params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size);
-    (*ctx).mem_buffer_owned = params.mem_buffer ? false : true;
-    (*ctx).no_alloc = params.no_alloc;
-    (*ctx).no_alloc_save = params.no_alloc;
-    (*ctx).n_objects = 0;
-    (*ctx).objects_begin = NULL;
-    (*ctx).objects_end = NULL;
-    (*ctx).scratch = empty_scratch;
-    (*ctx).scratch_save = empty_scratch;
+
+    *ctx = (struct ggml_context) {
+        /*.mem_size           =*/ mem_size,
+        /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size),
+        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
+        /*.no_alloc           =*/ params.no_alloc,
+        /*.no_alloc_save      =*/ params.no_alloc,
+        /*.n_objects          =*/ 0,
+        /*.objects_begin      =*/ NULL,
+        /*.objects_end        =*/ NULL,
+        /*.scratch            =*/ { 0, 0, NULL, },
+        /*.scratch_save       =*/ { 0, 0, NULL, },
+    };
 
     GGML_ASSERT(ctx->mem_buffer != NULL);
 
@@ -18054,18 +18052,19 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
 struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
     struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
     struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
-
-    (*cgraph).n_nodes = 0;
-    (*cgraph).n_leafs = 0;
-    (*cgraph).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
-    (*cgraph).perf_runs = 0;
-    (*cgraph).perf_cycles = 0;
-    (*cgraph).perf_time_us = 0;
-
-    memset((*cgraph).nodes, 0, sizeof((*cgraph).nodes));
-    memset((*cgraph).grads, 0, sizeof((*cgraph).grads));
-    memset((*cgraph).leafs, 0, sizeof((*cgraph).leafs));
-    memset((*cgraph).visited_hash_table, 0, sizeof((*cgraph).visited_hash_table));
+
+    *cgraph = (struct ggml_cgraph) {
+        /*.n_nodes      =*/ 0,
+        /*.n_leafs      =*/ 0,
+        /*.nodes        =*/ { NULL },
+        /*.grads        =*/ { NULL },
+        /*.leafs        =*/ { NULL },
+        /*.hash_table   =*/ { NULL },
+        /*.order        =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
+        /*.perf_runs    =*/ 0,
+        /*.perf_cycles  =*/ 0,
+        /*.perf_time_us =*/ 0,
+    };
 
     return cgraph;
 }
@@ -22005,4 +22004,4 @@ int ggml_cpu_has_vsx(void) {
 #endif
 }
 
-////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
diff --git a/llama.cpp b/llama.cpp
@@ -9001,21 +9001,7 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
             const size_t elt_size = ggml_element_size(kv_self.k);
 
             ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
-
-            // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
-            struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));
-
-            (*gf).n_nodes = 0;
-            (*gf).n_leafs = 0;
-            (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
-            (*gf).perf_runs = 0;
-            (*gf).perf_cycles = 0;
-            (*gf).perf_time_us = 0;
-
-            memset((*gf).nodes, 0, sizeof((*gf).nodes));
-            memset((*gf).grads, 0, sizeof((*gf).grads));
-            memset((*gf).leafs, 0, sizeof((*gf).leafs));
-            memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table));
+            ggml_cgraph gf{};
 
             ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
             std::vector<uint8_t> kout3d_data(ggml_nbytes(kout3d), 0);
@@ -9033,20 +9019,16 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat
                 kv_head, n_embd, n_layer,
                 elt_size*n_ctx, elt_size*n_ctx*n_embd, 0);
 
-            ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, k3d, kout3d));
-            ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, v3d, vout3d));
-            ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1);
+            ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d));
+            ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d));
+            ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
 
             ggml_free(cpy_ctx);
 
             // our data is now in the kout3d_data and vout3d_data buffers
             // write them to file
             data_ctx->write(kout3d_data.data(), kout3d_data.size());
             data_ctx->write(vout3d_data.data(), vout3d_data.size());
-
-            // free our allocated graph
-            free(gf);
-            gf = NULL;
         }
 
         for (uint32_t i = 0; i < kv_size; ++i) {
@@ -9147,21 +9129,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
             const size_t elt_size = ggml_element_size(kv_self.k);
 
             ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true });
-
-            // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph`
-            struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph)));
-
-            (*gf).n_nodes = 0;
-            (*gf).n_leafs = 0;
-            (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT;
-            (*gf).perf_runs = 0;
-            (*gf).perf_cycles = 0;
-            (*gf).perf_time_us = 0;
-
-            memset((*gf).nodes, 0, sizeof((*gf).nodes));
-            memset((*gf).grads, 0, sizeof((*gf).grads));
-            memset((*gf).leafs, 0, sizeof((*gf).leafs));
-            memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table));
+            ggml_cgraph gf{};
 
             ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);
             kin3d->data = (void *) inp;
@@ -9179,9 +9147,9 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
                 kv_head, n_embd, n_layer,
                 elt_size*n_ctx, elt_size*n_ctx*n_embd, 0);
 
-            ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, kin3d, k3d));
-            ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, vin3d, v3d));
-            ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1);
+            ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d));
+            ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d));
+            ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
 
             ggml_free(cpy_ctx);
         }
@@ -9233,11 +9201,10 @@ static bool llama_load_session_file_internal(struct llama_context * ctx, const c
         llama_hparams session_hparams;
         file.read_raw(&session_hparams, sizeof(llama_hparams));
 
-        // TODO: need to do floating point comparison imprecisely for norm_eps
-        //if (session_hparams != ctx->model.hparams) {
-        //    LLAMA_LOG_INFO("%s : model hparams didn't match from session file!\n", __func__);
-        //    return false;
-        //}
+        if (session_hparams != ctx->model.hparams) {
+            LLAMA_LOG_INFO("%s : model hparams didn't match from session file!\n", __func__);
+            return false;
+        }
     }
 
     // load the prompt
@@ -9662,4 +9629,4 @@ static void llama_log_callback_default(ggml_log_level level, const char * text,
     (void) user_data;
     fputs(text, stderr);
     fflush(stderr);
-}
+}