Skip to content

Commit

Permalink
try fewer event waiting
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesGaessler committed Sep 11, 2023
1 parent 54f041b commit c42b303
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5975,7 +5975,7 @@ static void ggml_cuda_op_mul_mat(

// if multiple devices are used they need to wait for the main device
// here an event is recorded that signals that the main device has finished calculating the input data
if (split) {
if (split && g_device_count > 1) {
CUDA_CHECK(cudaSetDevice(g_main_device));
CUDA_CHECK(cudaEventRecord(src0_extra->events[g_main_device][0], g_cudaStreams[g_main_device][0]));
}
Expand Down Expand Up @@ -6114,10 +6114,12 @@ static void ggml_cuda_op_mul_mat(
}

// main device waits for all other devices to be finished
if (split) {
if (split && g_device_count > 1) {
const int64_t is_max = ne11/MUL_MAT_SRC1_COL_STRIDE <= MAX_STREAMS ? ne11/MUL_MAT_SRC1_COL_STRIDE : MAX_STREAMS;

CUDA_CHECK(cudaSetDevice(g_main_device));
for (int64_t id = 0; id < g_device_count; ++id) {
for (int64_t is = 0; is < MAX_STREAMS; ++is) {
for (int64_t is = 0; is < is_max; ++is) {
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[g_main_device][0], src0_extra->events[id][is]));
}
}
Expand Down

0 comments on commit c42b303

Please sign in to comment.