From 1d4e45adb16b8bf0fc0f9e6cc71e7e427a0d8bf2 Mon Sep 17 00:00:00 2001 From: Daniel Osei-Kuffuor Date: Wed, 16 Nov 2022 11:03:22 -0800 Subject: [PATCH 1/2] Added cusparse capability for bml_transpose_ellpack. --- .../ellpack/bml_transpose_ellpack.h | 16 +++ .../ellpack/bml_transpose_ellpack_typed.c | 126 ++++++++++++++++++ 2 files changed, 142 insertions(+) diff --git a/src/C-interface/ellpack/bml_transpose_ellpack.h b/src/C-interface/ellpack/bml_transpose_ellpack.h index c297e7da8..91fe370b9 100644 --- a/src/C-interface/ellpack/bml_transpose_ellpack.h +++ b/src/C-interface/ellpack/bml_transpose_ellpack.h @@ -33,4 +33,20 @@ void bml_transpose_ellpack_single_complex( void bml_transpose_ellpack_double_complex( bml_matrix_ellpack_t * A); +#if defined(BML_USE_CUSPARSE) +void bml_transpose_cusparse_ellpack( + bml_matrix_ellpack_t * A); + +void bml_transpose_cusparse_ellpack_single_real( + bml_matrix_ellpack_t * A); + +void bml_transpose_cusparse_ellpack_double_real( + bml_matrix_ellpack_t * A); + +void bml_transpose_cusparse_ellpack_single_complex( + bml_matrix_ellpack_t * A); + +void bml_transpose_cusparse_ellpack_double_complex( + bml_matrix_ellpack_t * A); +#endif #endif diff --git a/src/C-interface/ellpack/bml_transpose_ellpack_typed.c b/src/C-interface/ellpack/bml_transpose_ellpack_typed.c index 35b976def..cc0ac8022 100644 --- a/src/C-interface/ellpack/bml_transpose_ellpack_typed.c +++ b/src/C-interface/ellpack/bml_transpose_ellpack_typed.c @@ -17,6 +17,11 @@ #include #endif +#ifdef BML_USE_CUSPARSE +#include +#include "bml_copy_ellpack.h" +#endif + #define COMPUTE_ON_HOST /** Transpose a matrix. @@ -50,6 +55,10 @@ bml_matrix_ellpack_t int myRank = bml_getMyRank(); +#if defined(BML_USE_CUSPARSE) + TYPED_FUNC(bml_copy_ellpack) (A, B); + TYPED_FUNC(bml_transpose_cusparse_ellpack) (B); +#else // Transpose all elements #ifdef _OPENMP omp_lock_t *row_lock = (omp_lock_t *) malloc(sizeof(omp_lock_t) * N); @@ -94,6 +103,7 @@ bml_matrix_ellpack_t #if defined (USE_OMP_OFFLOAD) && defined(COMPUTE_ON_HOST) #pragma omp target update to(B_index[:N*M], B_value[:N*M], B_nnz[:N]) +#endif #endif return B; /* @@ -146,6 +156,9 @@ void TYPED_FUNC( int *A_index = A->index; int *A_nnz = A->nnz; +#if defined(BML_USE_CUSPARSE) + TYPED_FUNC(bml_transpose_cusparse_ellpack) (A); +#else #if defined(USE_OMP_OFFLOAD) #ifdef COMPUTE_ON_HOST #pragma omp target update from(A_index[:N*M], A_value[:N*M], A_nnz[:N]) @@ -204,5 +217,118 @@ void TYPED_FUNC( #pragma omp target update to(A_index[:N*M], A_value[:N*M], A_nnz[:N]) #endif #endif +#endif +} + +#if defined(BML_USE_CUSPARSE) +/** cuSPARSE matrix transpose + * + * \ingroup transpose_group + * + * \param A The matrix to be transposed + * \return the transposed A + */ +void TYPED_FUNC( + bml_transpose_cusparse_ellpack) ( + bml_matrix_ellpack_t * A) +{ + int N = A->N; + int M = A->M; + REAL_T *A_value = (REAL_T *) A->value; + int *csrColIndA = A->csrColInd; + int *csrRowPtrA = A->csrRowPtr; + REAL_T *csrValA = (REAL_T *) A->csrVal; + + /* temporary arrays to hold compressed sparse column (CSC) values */ + int *cscRowInd = NULL; + int *cscColPtr = NULL; + REAL_T *cscVal = NULL; + + cusparseStatus_t status = CUSPARSE_STATUS_SUCCESS; + cudaDataType valType = BML_CUSPARSE_T; + + // CUSPARSE APIs + cusparseHandle_t handle = NULL; + void *dBuffer = NULL; + size_t bufferSize = 0; + + // convert ellpack to cucsr + TYPED_FUNC(bml_ellpack2cucsr_ellpack) (A); + + // Create cusparse matrix A and B in CSR format + // Note: The following update is not necessary since the ellpack2cucsr + // routine updates the csr rowpointers on host and device +#pragma omp target update from(csrRowPtrA[:N+1]) + int nnzA = csrRowPtrA[N]; + + // Allocate memory for result arrays + cscVal = + (REAL_T *) omp_target_alloc(sizeof(REAL_T) * nnzA, + omp_get_default_device()); + cscRowInd = + (int *) omp_target_alloc(sizeof(int) * nnzA, + omp_get_default_device()); + cscColPtr = + (int *) omp_target_alloc(sizeof(int) * (N + 1), + omp_get_default_device()); + + BML_CHECK_CUSPARSE(cusparseCreate(&handle)); +#pragma omp target data use_device_ptr(csrRowPtrA,csrColIndA,csrValA) + { + // Get storage buffer size + BML_CHECK_CUSPARSE(cusparseCsr2cscEx2_bufferSize + (handle, N, N, nnzA, csrValA, csrRowPtrA, + csrColIndA, cscVal, cscColPtr, cscRowInd, valType, + CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, &bufferSize)); + + // Allocate buffer and perform transpose operation + dBuffer = + (char *) omp_target_alloc(bufferSize, omp_get_default_device()); + + BML_CHECK_CUSPARSE(cusparseCsr2cscEx2 + (handle, N, N, nnzA, csrValA, csrRowPtrA, + csrColIndA, cscVal, cscColPtr, cscRowInd, valType, + CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, dBuffer)); + + /* Update matA with new result. Note that only device arrays are updated. */ + omp_target_memcpy(csrRowPtrA, cscColPtr, (N + 1) * sizeof(int), + 0, 0, omp_get_default_device(), + omp_get_default_device()); + omp_target_memcpy(csrColIndA, cscRowInd, nnzA * sizeof(int), 0, + 0, omp_get_default_device(), + omp_get_default_device()); + omp_target_memcpy(csrValA, cscVal, nnzA * sizeof(REAL_T), 0, 0, + omp_get_default_device(), omp_get_default_device()); + + // deallocate storage buffer + omp_target_free(dBuffer, omp_get_default_device()); + } + +/* +// DEBUG: +#pragma omp target update from(csrRowPtrA[:N+1]) +#pragma omp target update from(csrValA[:nnzA]) +#pragma omp target update from(csrColIndA[:nnzA]) + printf("From cuSPARSE: \n\n"); +for(int i=0; i Date: Thu, 17 Nov 2022 06:40:36 -0800 Subject: [PATCH 2/2] Removed commented out code. --- .../ellpack/bml_transpose_ellpack_typed.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/src/C-interface/ellpack/bml_transpose_ellpack_typed.c b/src/C-interface/ellpack/bml_transpose_ellpack_typed.c index cc0ac8022..b610a4891 100644 --- a/src/C-interface/ellpack/bml_transpose_ellpack_typed.c +++ b/src/C-interface/ellpack/bml_transpose_ellpack_typed.c @@ -307,21 +307,6 @@ void TYPED_FUNC( omp_target_free(dBuffer, omp_get_default_device()); } -/* -// DEBUG: -#pragma omp target update from(csrRowPtrA[:N+1]) -#pragma omp target update from(csrValA[:nnzA]) -#pragma omp target update from(csrColIndA[:nnzA]) - printf("From cuSPARSE: \n\n"); -for(int i=0; i