diff --git a/llama-util.h b/llama-util.h index 3cac9f681800b..1f6c0d9cdcbcd 100644 --- a/llama-util.h +++ b/llama-util.h @@ -163,6 +163,9 @@ static std::string llama_format_win_err(DWORD err) { } #endif +extern "C" { +bool ggml_is_numa(); +} struct llama_mmap { void * addr; size_t size; @@ -176,8 +179,10 @@ struct llama_mmap { size = file->size; int fd = fileno(file->fp); int flags = MAP_SHARED; + // prefetch/readahead impairs performance on NUMA systems + if (ggml_is_numa()) prefetch = 0; #ifdef __linux__ - flags |= MAP_POPULATE; + if (prefetch) flags |= MAP_POPULATE; #endif addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0); if (addr == MAP_FAILED) { @@ -191,6 +196,14 @@ struct llama_mmap { strerror(errno)); } } + if (ggml_is_numa()) { + // advise the kernel not to use readahead + // (because the next page might not belong on the same node) + if (madvise(addr, file->size, MADV_RANDOM)) { + fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n", + strerror(errno)); + } + } } ~llama_mmap() {