Skip to content

Commit

Permalink
attempt to fix peer access ggerganov#9761
Browse files Browse the repository at this point in the history
  • Loading branch information
thamwangjun committed Oct 8, 2024
1 parent dca1d4b commit 27d9881
Showing 1 changed file with 3 additions and 37 deletions.
40 changes: 3 additions & 37 deletions ggml/src/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include <stdlib.h>
#include <string>
#include <vector>
#include <unistd.h>

static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size");

Expand Down Expand Up @@ -1284,6 +1285,7 @@ static void ggml_cuda_op_mul_mat_cublas(
}

static void ggml_cuda_set_peer_access(const int n_tokens, int main_device) {

static bool peer_access_enabled = false;

const bool enable_peer_access = n_tokens <= GGML_CUDA_PEER_MAX_BATCH_SIZE;
Expand All @@ -1292,43 +1294,7 @@ static void ggml_cuda_set_peer_access(const int n_tokens, int main_device) {
return;
}

#ifdef NDEBUG
for (int id = 0; id < ggml_backend_cuda_get_device_count(); ++id) {
ggml_cuda_set_device(id);
CUDA_CHECK(cudaDeviceSynchronize());
}

for (int id = 0; id < ggml_backend_cuda_get_device_count(); ++id) {
ggml_cuda_set_device(id);

for (int id_other = 0; id_other < ggml_backend_cuda_get_device_count(); ++id_other) {
if (id == id_other) {
continue;
}
if (id != main_device && id_other != main_device) {
continue;
}

int can_access_peer;
CUDA_CHECK(cudaDeviceCanAccessPeer(&can_access_peer, id, id_other));
if (can_access_peer) {
if (enable_peer_access) {
cudaError_t err = cudaDeviceEnablePeerAccess(id_other, 0);
if (err != cudaErrorPeerAccessAlreadyEnabled) {
CUDA_CHECK(err);
}
} else {
cudaError_t err = cudaDeviceDisablePeerAccess(id_other);
if (err != cudaErrorPeerAccessNotEnabled) {
CUDA_CHECK(err);
}
}
}
}
}

ggml_cuda_set_device(main_device);
#endif // NDEBUG
usleep(100000);

peer_access_enabled = enable_peer_access;

Expand Down

0 comments on commit 27d9881

Please sign in to comment.