From 7e596d4c61cd6e60d8862d496c9ac0b7c1f9a8f1 Mon Sep 17 00:00:00 2001 From: fxzjshm Date: Mon, 3 Feb 2025 22:33:38 +0800 Subject: [PATCH] HIP: force max threads per block to be 1024 Some old compilers still use 256. Explicitly set it to 1024 to get correct result from ops like ARGMAX and GROUP_NORM. Related: #10610, #11619 Signed-off-by: fxzjshm --- ggml/src/ggml-hip/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index eb03e10fa48a1b..acd8fe124eeea3 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -40,6 +40,9 @@ find_package(hip REQUIRED) find_package(hipblas REQUIRED) find_package(rocblas REQUIRED) +# Workaround old compilers +set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} --gpu-max-threads-per-block=1024") + if (${hip_VERSION} VERSION_LESS 5.5) message(FATAL_ERROR "At least ROCM/HIP V5.5 is required") endif()