Skip to content

Commit

Permalink
MUSA: Stop explicitly setting use_mul_mat_vec_q to false
Browse files Browse the repository at this point in the history
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
  • Loading branch information
yeahdongcn committed Jul 15, 2024
1 parent 12329e6 commit bf41f17
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 7 deletions.
6 changes: 0 additions & 6 deletions ggml/src/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1906,17 +1906,11 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
const int cc = ggml_cuda_info().devices[id].cc;
use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
#ifdef GGML_USE_MUSA
use_mul_mat_vec_q = false;
#endif // GGML_USE_MUSA
}
} else {
const int cc = ggml_cuda_info().devices[ctx.device].cc;
use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
#ifdef GGML_USE_MUSA
use_mul_mat_vec_q = false;
#endif // GGML_USE_MUSA
}

// debug helpers
Expand Down
2 changes: 1 addition & 1 deletion ggml/src/ggml-cuda/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@
#define cudaMemcpyAsync musaMemcpyAsync
#define cudaMemcpyPeerAsync musaMemcpyPeerAsync
#define cudaMemcpy2DAsync musaMemcpy2DAsync
#define cudaMemcpyDeviceToDevice musaMemcpyDeviceToDevice
#define cudaMemcpyDeviceToDevice musaMemcpyDefault
#define cudaMemcpyDeviceToHost musaMemcpyDeviceToHost
#define cudaMemcpyHostToDevice musaMemcpyHostToDevice
#define cudaMemcpyKind musaMemcpyKind
Expand Down

0 comments on commit bf41f17

Please sign in to comment.