MUSA: Stop explicitly setting use_mul_mat_vec_q to false

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
ggerganov · Jul 15, 2024 · bf41f17 · bf41f17
1 parent 12329e6
commit bf41f17
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 7 deletions.
diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
@@ -1906,17 +1906,11 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
             const int cc            = ggml_cuda_info().devices[id].cc;
             use_mul_mat_q           = use_mul_mat_q           && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
             any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
-#ifdef GGML_USE_MUSA
-            use_mul_mat_vec_q       = false;
-#endif // GGML_USE_MUSA
         }
     } else {
         const int cc            = ggml_cuda_info().devices[ctx.device].cc;
         use_mul_mat_q           = use_mul_mat_q           && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]);
         any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_available(cc);
-#ifdef GGML_USE_MUSA
-        use_mul_mat_vec_q       = false;
-#endif // GGML_USE_MUSA
     }
 
     // debug helpers

diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
@@ -181,7 +181,7 @@
 #define cudaMemcpyAsync musaMemcpyAsync
 #define cudaMemcpyPeerAsync musaMemcpyPeerAsync
 #define cudaMemcpy2DAsync musaMemcpy2DAsync
-#define cudaMemcpyDeviceToDevice musaMemcpyDeviceToDevice
+#define cudaMemcpyDeviceToDevice musaMemcpyDefault
 #define cudaMemcpyDeviceToHost musaMemcpyDeviceToHost
 #define cudaMemcpyHostToDevice musaMemcpyHostToDevice
 #define cudaMemcpyKind musaMemcpyKind