diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 4ae202185f711b..c249c67dd3cc87 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -4040,7 +4040,9 @@ static __global__ void cpy_f32_q8_0( src = &zero; } else { src = x; - memcpy(&dst[1 + iqs/8].qs[sizeof(float) * (iqs % 8)], src, sizeof(float)); + if (i0 / QK8_0 == (i_blck_0 + ne00) / QK8_0) { + memcpy(&dst[1 + iqs/8].qs[sizeof(float) * (iqs % 8)], src, sizeof(float)); + } } float val;