From 6919a522dc2e75f494f8db1f36dadeda95b2d243 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 2 Nov 2023 19:02:35 +0200 Subject: [PATCH] llama : fix save/load state context size --- llama.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama.cpp b/llama.cpp index 2c74e8b0ebc5b4..3f930fe85a85f8 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8558,7 +8558,7 @@ static void llama_copy_state_data_internal(struct llama_context * ctx, llama_dat if (kv_buf_size) { const size_t elt_size = ggml_element_size(kv_self.k); - ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true }); + ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true }); ggml_cgraph * gf = ggml_new_graph(cpy_ctx); ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer); @@ -8686,7 +8686,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) { const size_t elt_size = ggml_element_size(kv_self.k); - ggml_context * cpy_ctx = ggml_init({ 4096, NULL, /* no_alloc */ true }); + ggml_context * cpy_ctx = ggml_init({ 6*ggml_tensor_overhead() + ggml_graph_overhead(), NULL, /* no_alloc */ true }); ggml_cgraph * gf = ggml_new_graph(cpy_ctx); ggml_tensor * kin3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_head, n_layer);