From e3bc7c7a2451a3acc75a37c286c5dab51deafcaa Mon Sep 17 00:00:00 2001 From: Slaren <2141330+slaren@users.noreply.github.com> Date: Fri, 28 Apr 2023 00:48:01 +0200 Subject: [PATCH] cuBLAS: also pin kv cache --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama.cpp b/llama.cpp index 8c89e9912e9faf..bd065e961adecc 100644 --- a/llama.cpp +++ b/llama.cpp @@ -136,7 +136,7 @@ struct llama_kv_cache { struct ggml_context * ctx = NULL; - llama_buffer buf; + llama_ctx_buffer buf; int n; // number of tokens currently in the cache