diff --git a/ggml-alloc.c b/ggml-alloc.c index 0fb6c563c9d1a..49657b79760e3 100644 --- a/ggml-alloc.c +++ b/ggml-alloc.c @@ -317,8 +317,9 @@ struct ggml_allocr * ggml_allocr_new_from_buffer(struct ggml_backend_buffer * bu /*.parse_seq = */ {0}, /*.parse_seq_len = */ 0, #ifdef GGML_ALLOCATOR_DEBUG - (*alloc).allocated_tensors = {0}; + /*.allocated_tensors = */ {0}, #endif + }; ggml_allocr_reset(alloc); @@ -590,4 +591,4 @@ size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * size_t ggml_allocr_max_size(struct ggml_allocr * alloc) { return alloc->max_size; -} +} \ No newline at end of file diff --git a/ggml.c b/ggml.c index ab71c30a7c65b..d16233f12c999 100644 --- a/ggml.c +++ b/ggml.c @@ -4698,21 +4698,19 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { } const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN); - - ctx = (struct ggml_context *)malloc(sizeof(struct ggml_context)); - - struct ggml_scratch empty_scratch = { 0, 0, NULL }; - - (*ctx).mem_size = mem_size; - (*ctx).mem_buffer = params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size); - (*ctx).mem_buffer_owned = params.mem_buffer ? false : true; - (*ctx).no_alloc = params.no_alloc; - (*ctx).no_alloc_save = params.no_alloc; - (*ctx).n_objects = 0; - (*ctx).objects_begin = NULL; - (*ctx).objects_end = NULL; - (*ctx).scratch = empty_scratch; - (*ctx).scratch_save = empty_scratch; + + *ctx = (struct ggml_context) { + /*.mem_size =*/ mem_size, + /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : GGML_ALIGNED_MALLOC(mem_size), + /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, + /*.no_alloc =*/ params.no_alloc, + /*.no_alloc_save =*/ params.no_alloc, + /*.n_objects =*/ 0, + /*.objects_begin =*/ NULL, + /*.objects_end =*/ NULL, + /*.scratch =*/ { 0, 0, NULL, }, + /*.scratch_save =*/ { 0, 0, NULL, }, + }; GGML_ASSERT(ctx->mem_buffer != NULL); @@ -18054,18 +18052,19 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) { struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE); struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs); - - (*cgraph).n_nodes = 0; - (*cgraph).n_leafs = 0; - (*cgraph).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT; - (*cgraph).perf_runs = 0; - (*cgraph).perf_cycles = 0; - (*cgraph).perf_time_us = 0; - - memset((*cgraph).nodes, 0, sizeof((*cgraph).nodes)); - memset((*cgraph).grads, 0, sizeof((*cgraph).grads)); - memset((*cgraph).leafs, 0, sizeof((*cgraph).leafs)); - memset((*cgraph).visited_hash_table, 0, sizeof((*cgraph).visited_hash_table)); + + *cgraph = (struct ggml_cgraph) { + /*.n_nodes =*/ 0, + /*.n_leafs =*/ 0, + /*.nodes =*/ { NULL }, + /*.grads =*/ { NULL }, + /*.leafs =*/ { NULL }, + /*.hash_table =*/ { NULL }, + /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, + /*.perf_runs =*/ 0, + /*.perf_cycles =*/ 0, + /*.perf_time_us =*/ 0, + }; return cgraph; } @@ -22005,4 +22004,4 @@ int ggml_cpu_has_vsx(void) { #endif } -//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// \ No newline at end of file diff --git a/llama.cpp b/llama.cpp index cff17028f5bf6..e99705b514dd4 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9001,21 +9001,7 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat const size_t elt_size = ggml_element_size(kv_self.k); ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true }); - - // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph` - struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph))); - - (*gf).n_nodes = 0; - (*gf).n_leafs = 0; - (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT; - (*gf).perf_runs = 0; - (*gf).perf_cycles = 0; - (*gf).perf_time_us = 0; - - memset((*gf).nodes, 0, sizeof((*gf).nodes)); - memset((*gf).grads, 0, sizeof((*gf).grads)); - memset((*gf).leafs, 0, sizeof((*gf).leafs)); - memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table)); + ggml_cgraph gf{}; ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer); std::vector kout3d_data(ggml_nbytes(kout3d), 0); @@ -9033,9 +9019,9 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat kv_head, n_embd, n_layer, elt_size*n_ctx, elt_size*n_ctx*n_embd, 0); - ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, k3d, kout3d)); - ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, v3d, vout3d)); - ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1); + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, k3d, kout3d)); + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, v3d, vout3d)); + ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1); ggml_free(cpy_ctx); @@ -9043,10 +9029,6 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat // write them to file data_ctx->write(kout3d_data.data(), kout3d_data.size()); data_ctx->write(vout3d_data.data(), vout3d_data.size()); - - // free our allocated graph - free(gf); - gf = NULL; } for (uint32_t i = 0; i < kv_size; ++i) { @@ -9147,21 +9129,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { const size_t elt_size = ggml_element_size(kv_self.k); ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true }); - - // create a temporary cgraph without initialising ggml objects, code inspired from `ggml.c:ggml_new_graph` - struct ggml_cgraph * gf = (struct ggml_cgraph *) (malloc(sizeof(ggml_cgraph))); - - (*gf).n_nodes = 0; - (*gf).n_leafs = 0; - (*gf).order = GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT; - (*gf).perf_runs = 0; - (*gf).perf_cycles = 0; - (*gf).perf_time_us = 0; - - memset((*gf).nodes, 0, sizeof((*gf).nodes)); - memset((*gf).grads, 0, sizeof((*gf).grads)); - memset((*gf).leafs, 0, sizeof((*gf).leafs)); - memset((*gf).visited_hash_table, 0, sizeof((*gf).visited_hash_table)); + ggml_cgraph gf{}; ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer); kin3d->data = (void *) inp; @@ -9179,9 +9147,9 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { kv_head, n_embd, n_layer, elt_size*n_ctx, elt_size*n_ctx*n_embd, 0); - ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, kin3d, k3d)); - ggml_build_forward_expand(gf, ggml_cpy(cpy_ctx, vin3d, v3d)); - ggml_graph_compute_helper(ctx->work_buffer, gf, /*n_threads*/ 1); + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, kin3d, k3d)); + ggml_build_forward_expand(&gf, ggml_cpy(cpy_ctx, vin3d, v3d)); + ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1); ggml_free(cpy_ctx); } @@ -9233,11 +9201,10 @@ static bool llama_load_session_file_internal(struct llama_context * ctx, const c llama_hparams session_hparams; file.read_raw(&session_hparams, sizeof(llama_hparams)); - // TODO: need to do floating point comparison imprecisely for norm_eps - //if (session_hparams != ctx->model.hparams) { - // LLAMA_LOG_INFO("%s : model hparams didn't match from session file!\n", __func__); - // return false; - //} + if (session_hparams != ctx->model.hparams) { + LLAMA_LOG_INFO("%s : model hparams didn't match from session file!\n", __func__); + return false; + } } // load the prompt @@ -9662,4 +9629,4 @@ static void llama_log_callback_default(ggml_log_level level, const char * text, (void) user_data; fputs(text, stderr); fflush(stderr); -} +} \ No newline at end of file