From d3d4f7e29f09e8c44adc93d8935a620aa552f61d Mon Sep 17 00:00:00 2001 From: Mike Wall Date: Sun, 30 Jan 2022 14:08:12 -0500 Subject: [PATCH] Implement rocSOLVER dsyevd for MAGMA build o Pattern after cuSOLVER implementation o Build is controlled using BML_ROCSOLVER - Like BML_CUSOLVER --- CMakeLists.txt | 41 ++++++++++++++ build.sh | 3 ++ scripts/setenv_crusher_magma_gcc.sh | 2 + scripts/setenv_spock_magma_gcc.sh | 1 + src/C-interface/dense/bml_diagonalize_dense.c | 54 ++++++++++++++++++- 5 files changed, 100 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eda8cd7aa..7cd25ce63 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -253,6 +253,35 @@ if (CUDA_FOUND) include_directories(${CUDA_INCLUDE_DIRS}) endif() +list(APPEND CMAKE_PREFIX_PATH $ENV{ROCM_PATH}) + +if(BML_MAGMA) + find_package(hip) +endif() + +if(BML_ROCSOLVER) + find_package(rocblas) + find_package(rocsolver) +endif() + +if (hip_FOUND) + message("HIP libraries: ${hip_LIBRARIES}") + list(APPEND LINK_LIBRARIES ${hip_LIBRARIES}) + include_directories(${hip_INCLUDE_DIRS}) +endif() + +if (rocblas_FOUND) + message("ROCBLAS libraries: ${rocblas_LIBRARIES}") + list(APPEND LINK_LIBRARIES ${rocblas_LIBRARIES}) + include_directories(${rocblas_INCLUDE_DIRS}) +endif() + +if (rocsolver_FOUND) + message("ROCSOLVER libraries: ${rocsolver_LIBRARIES}") + list(APPEND LINK_LIBRARIES ${rocsolver_LIBRARIES}) + include_directories(${rocsolver_INCLUDE_DIRS}) +endif() + set(BML_OPENCL FALSE CACHE BOOL "Whether to compiler with OpenCL support") if(BML_OPENCL) include(FindOpenCL) @@ -261,9 +290,11 @@ endif() SET(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") set(BML_MAGMA FALSE CACHE BOOL "Whether to use MAGMA library") + if(BML_MAGMA) FIND_PACKAGE(MAGMA) endif() + if (MAGMA_FOUND) add_definitions(-DBML_USE_MAGMA) message(STATUS @@ -284,6 +315,10 @@ if (MAGMA_FOUND) message(STATUS "Will use cuSOLVER") add_definitions(-DBML_USE_CUSOLVER) endif() + if(BML_ROCSOLVER) + message(STATUS "Will use rocSOLVER") + add_definitions(-DBML_USE_ROCSOLVER) + endif() endif() if (CUBLAS_FOUND) @@ -344,6 +379,9 @@ if(MAGMA_FOUND) if(BML_CUSOLVER) message(STATUS "cuSOLVER: ${CUDA_cusolver_LIBRARY}") endif() + if(BML_ROCSOLVER) + message(STATUS "rocSOLVER: ${ROCM_rocsolver_LIBRARY}") + endif() endif() #check existence of required math and linear algebra functions @@ -454,6 +492,9 @@ if(MAGMA_FOUND) if(BML_CUSOLVER) list(APPEND LINK_LIBRARIES ${CUDA_cusolver_LIBRARY}) endif() + if(BML_ROCSOLVER) + list(APPEND LINK_LIBRARIES ${ROCM_rocsolver_LIBRARY}) + endif() endif() if(BML_SCALAPACK) list(APPEND LINK_LIBRARIES ${SCALAPACK_LIBRARIES}) diff --git a/build.sh b/build.sh index 3cf20d4c9..313b5fe2f 100755 --- a/build.sh +++ b/build.sh @@ -83,6 +83,7 @@ EOF echo "BML_CUDA Build with CUDA (default is ${BML_CUDA})" echo "BML_MAGMA Build with MAGMA (default is ${BML_MAGMA})" echo "BML_CUSOLVER Build with cuSOLVER (default is ${BML_CUSOLVER})" + echo "BML_ROCSOLVER Build with rocSOLVER (default is ${BML_ROCSOLVER})" echo "BML_XSMM Build with XSMM (default is ${BML_XSMM})" echo "BML_SCALAPACK Build with SCALAPACK (default is ${BML_SCALAPACK})" echo "SCALAPACK_LIBRARIES ScaLapack libraries (default is ${SCALAPACK_LIBRARIES})" @@ -129,6 +130,7 @@ set_defaults() { : ${BML_CUDA:=no} : ${BML_MAGMA:=no} : ${BML_CUSOLVER:=no} + : ${BML_ROCSOLVER:=no} : ${BML_XSMM:=no} : ${BML_SCALAPACK:=no} : ${BML_ELLBLOCK_MEMPOOL:=no} @@ -215,6 +217,7 @@ configure() { -DBML_CUDA="${BML_CUDA}" \ -DBML_MAGMA="${BML_MAGMA}" \ -DBML_CUSOLVER="${BML_CUSOLVER}" \ + -DBML_ROCSOLVER="${BML_ROCSOLVER}" \ -DBML_XSMM="${BML_XSMM}" \ -DBML_SCALAPACK="${BML_SCALAPACK}" \ -DBML_ELLBLOCK_MEMPOOL="${BML_ELLBLOCK_MEMPOOL}" \ diff --git a/scripts/setenv_crusher_magma_gcc.sh b/scripts/setenv_crusher_magma_gcc.sh index 5a6a94d21..ca1dc5618 100644 --- a/scripts/setenv_crusher_magma_gcc.sh +++ b/scripts/setenv_crusher_magma_gcc.sh @@ -4,3 +4,5 @@ module load PrgEnv-gnu module load magma module load cmake module load openblas +module load rocm + diff --git a/scripts/setenv_spock_magma_gcc.sh b/scripts/setenv_spock_magma_gcc.sh index 5a6a94d21..18c0305af 100644 --- a/scripts/setenv_spock_magma_gcc.sh +++ b/scripts/setenv_spock_magma_gcc.sh @@ -4,3 +4,4 @@ module load PrgEnv-gnu module load magma module load cmake module load openblas +module load rocm diff --git a/src/C-interface/dense/bml_diagonalize_dense.c b/src/C-interface/dense/bml_diagonalize_dense.c index 0fd4ca7cc..33d7d206b 100644 --- a/src/C-interface/dense/bml_diagonalize_dense.c +++ b/src/C-interface/dense/bml_diagonalize_dense.c @@ -14,6 +14,11 @@ #include #include #endif +#ifdef BML_USE_ROCSOLVER +#include +#include +#include +#endif #else #include "../lapack.h" #endif @@ -189,6 +194,53 @@ bml_diagonalize_dense_double_real( if (cusolverH) cusolverDnDestroy(cusolverH); +#else +#ifdef BML_USE_ROCSOLVER + // See https://rocsolver.readthedocs.io/_/downloads/en/latest/pdf/ + // create cusolver/cublas handle + rocblas_handle rocblasH = NULL; + rocblas_status rocblasS = rocblas_create_handle(&rocblasH); + assert(rocblas_status_success == rocblasS); + + // allocate memory for eigenvalues + double *d_W = NULL; + hipError_t hipStat = hipMalloc((void **) &d_W, sizeof(double) * A->N); + assert(hipSuccess == hipStat); + + // compute eigenvalues and eigenvectors + rocblas_evect evect = rocblas_evect_original; + rocblas_fill uplo = rocblas_fill_lower; + + // allocate working space of syevd + double *d_work = NULL; + hipStat = hipMalloc((void **) &d_work, sizeof(double) * A->N * A->N); + assert(hipSuccess == hipStat); + + // solve + rocblas_int *devInfo = NULL; + hipStat = hipMalloc((void **) &devInfo, sizeof(rocblas_int)); + assert(hipSuccess == hipStat); + + rocblasS = + rocsolver_dsyevd(rocblasH, evect, uplo, A->N, evecs, A->ld, d_W, + d_work, devInfo); + hipStat = hipDeviceSynchronize(); + assert(rocblas_status_success == rocblasS); + assert(hipSuccess == hipStat); + + // copy eigenvalues to CPU + hipStat = + hipMemcpy(typed_eigenvalues, d_W, sizeof(double) * A->N, + hipMemcpyDeviceToHost); + assert(hipSuccess == hipStat); + + // free resources + hipFree(d_W); + hipFree(devInfo); + hipFree(d_work); + + if (rocblasH) + rocblas_destroy_handle(rocblasH); #else // MAGMA int nb = magma_get_ssytrd_nb(A->N); @@ -222,7 +274,7 @@ bml_diagonalize_dense_double_real( // printf("norm = %le\n", norm); //} #endif - +#endif // transpose eigenvactors matrix on GPU A_matrix = (double *) eigenvectors->matrix; magmablas_dtranspose(A->N, A->N, evecs, A->ld,