From ef2c4793c75d4950bfa36f1c1b7c2458bb3bb95a Mon Sep 17 00:00:00 2001 From: intellinjun <105184542+intellinjun@users.noreply.github.com> Date: Tue, 14 Nov 2023 11:25:56 +0800 Subject: [PATCH] [LLM Runtime]Fix gptneox bug (#671) Signed-off-by: intellinjun --- .../llm/runtime/graph/models/gptneox/gptneox_utils.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/intel_extension_for_transformers/llm/runtime/graph/models/gptneox/gptneox_utils.cpp b/intel_extension_for_transformers/llm/runtime/graph/models/gptneox/gptneox_utils.cpp index c2d8527b634..47b49917561 100644 --- a/intel_extension_for_transformers/llm/runtime/graph/models/gptneox/gptneox_utils.cpp +++ b/intel_extension_for_transformers/llm/runtime/graph/models/gptneox/gptneox_utils.cpp @@ -45,6 +45,7 @@ void model_load_internal(const std::string& fname, model_archs arch, model_conte std::unique_ptr ms(new GPTNEOX()); ms->init(fname.c_str(), lctx, n_gpu_layers, use_mmap, use_mlock, vocab_only); ms->load(lctx, progress_callback, progress_callback_user_data); + lctx.support_jblas_kv = true; } void GPTNEOX::init(const char* path_model, model_context& lctx, int n_gpu_layer_, bool use_mmap_, bool use_mlock_,