diff --git a/llama-util.h b/llama-util.h
index 3cac9f681800b..1f6c0d9cdcbcd 100644
--- a/llama-util.h
+++ b/llama-util.h
@@ -163,6 +163,9 @@ static std::string llama_format_win_err(DWORD err) {
 }
 #endif
 
+extern "C" {
+bool ggml_is_numa();
+}
 struct llama_mmap {
     void * addr;
     size_t size;
@@ -176,8 +179,10 @@ struct llama_mmap {
         size = file->size;
         int fd = fileno(file->fp);
         int flags = MAP_SHARED;
+        // prefetch/readahead impairs performance on NUMA systems
+        if (ggml_is_numa()) prefetch = 0;
 #ifdef __linux__
-        flags |= MAP_POPULATE;
+        if (prefetch) flags |= MAP_POPULATE;
 #endif
         addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
         if (addr == MAP_FAILED) {
@@ -191,6 +196,14 @@ struct llama_mmap {
                         strerror(errno));
             }
         }
+        if (ggml_is_numa()) {
+            // advise the kernel not to use readahead
+            // (because the next page might not belong on the same node)
+            if (madvise(addr, file->size, MADV_RANDOM)) {
+                fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n",
+                        strerror(errno));
+            }
+        }
     }
 
     ~llama_mmap() {