Skip to content

Commit

Permalink
cuda : fix LLAMA_CUDA_F16 (ggerganov#5262)
Browse files Browse the repository at this point in the history
  • Loading branch information
slaren authored and hodlen committed Apr 1, 2024
1 parent a244dee commit 35b195c
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -8657,9 +8657,9 @@ static void ggml_cuda_op_dequantize_mul_mat_vec(

if (src1_convert_f16) {
src1_dfloat = src1_dfloat_a.alloc(ne00);
ggml_cpy_f32_f16_cuda((const char *) src1_ddf_i, (char *) src1_dfloat, ne00,
ne00, 1, sizeof(float), 0, 0,
ne00, 1, sizeof(half), 0, 0, stream);
const to_fp16_cuda_t to_fp16_cuda = ggml_get_to_fp16_cuda(src1->type);
GGML_ASSERT(to_fp16_cuda != nullptr);
to_fp16_cuda(src1_ddf_i, src1_dfloat, ne00, stream);
}
#else
const dfloat * src1_dfloat = (const dfloat *) src1_ddf_i; // dfloat == float, no conversion
Expand Down

0 comments on commit 35b195c

Please sign in to comment.