diff --git a/ggml-cuda.cu b/ggml-cuda.cu index f87f18802c8f8..58e7ecb2ed40d 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -6251,6 +6251,8 @@ static int64_t get_row_rounding(ggml_type type) { return max_compute_capability >= CC_RDNA2 ? 128 : 64; case GGML_TYPE_F16: return 1; + case GGML_TYPE_F32: + return 1; case GGML_TYPE_Q2_K: return max_compute_capability >= CC_RDNA2 ? 128 : 32; case GGML_TYPE_Q3_K: @@ -6273,6 +6275,8 @@ static int64_t get_row_rounding(ggml_type type) { return 64; case GGML_TYPE_F16: return 1; + case GGML_TYPE_F32: + return 1; case GGML_TYPE_Q2_K: case GGML_TYPE_Q3_K: case GGML_TYPE_Q4_K: