Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workarounds for removed cusparse functions #2270

Merged
merged 1 commit into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 18 additions & 38 deletions perf_test/sparse/KokkosSparse_spiluk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@
#include <unordered_map>
#include <iomanip> // std::setprecision

#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
// cuSPARSE ILU and IC factorizations were removed
// completely in cuSPARSE 12.5
#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && (CUSPARSE_VERSION < 12500)
#define USE_CUSPARSE_ILU
#endif

#ifdef USE_CUSPARSE_ILU
#include <cusparse.h>
#endif

Expand All @@ -39,8 +45,6 @@
#include <KokkosKernels_IOUtils.hpp>
#include <KokkosSparse_IOUtils.hpp>

#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA) && \
(!defined(KOKKOS_ENABLE_CUDA) || (8000 <= CUDA_VERSION))
using namespace KokkosSparse;
using namespace KokkosSparse::Experimental;
using namespace KokkosKernels;
Expand All @@ -52,8 +56,8 @@ int test_spiluk_perf(std::vector<int> tests, std::string afilename, int kin,
int team_size, int /*vector_length*/,
/*int idx_offset,*/ int loop) {
typedef default_scalar scalar_t;
typedef default_lno_t lno_t;
typedef default_size_type size_type;
typedef int lno_t;
typedef int size_type;
typedef Kokkos::DefaultExecutionSpace execution_space;
typedef typename execution_space::memory_space memory_space;

Expand Down Expand Up @@ -82,6 +86,11 @@ int test_spiluk_perf(std::vector<int> tests, std::string afilename, int kin,

std::cout << "\n\n" << std::endl;
if (!afilename.empty()) {
#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && !defined(USE_CUSPARSE_ILU)
std::cout << "** Note: cuSPARSE is enabled, but the cusparseXcsrilu*\n";
std::cout << " functions were removed in cuSPARSE 12.5.\n";
std::cout << " Only KokkosKernels spiluk will be run.\n\n";
#endif
std::cout << "ILU(K) Begin: Read matrix filename " << afilename
<< std::endl;
crsmat_t A = KokkosSparse::Impl::read_kokkos_crst_matrix<crsmat_t>(
Expand All @@ -91,11 +100,7 @@ int test_spiluk_perf(std::vector<int> tests, std::string afilename, int kin,
const int nnz = A.nnz();
const typename KernelHandle::const_nnz_lno_t fill_lev = lno_t(kin);

#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
// cuSPARSE requires lno_t = size_type = int. For both, int is always used
// (if enabled)
#if defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \
defined(KOKKOSKERNELS_INST_OFFSET_INT)
#ifdef USE_CUSPARSE_ILU
// std::cout << " cusparse: create handle" << std::endl;
cusparseStatus_t status;
cusparseHandle_t handle = 0;
Expand Down Expand Up @@ -131,10 +136,6 @@ int test_spiluk_perf(std::vector<int> tests, std::string afilename, int kin,
info, &pBufferSize);
// pBuffer returned by cudaMalloc is automatically aligned to 128 bytes.
cudaMalloc((void **)&pBuffer, pBufferSize);
#else
std::cout << "Note: the cuSPARSE TPL is enabled, but either offset=int or "
"ordinal=int is disabled, so it can't be used.\n";
#endif
#endif

for (auto test : tests) {
Expand Down Expand Up @@ -223,11 +224,7 @@ int test_spiluk_perf(std::vector<int> tests, std::string afilename, int kin,
std::cout << "nrm2(A*e-L*U*e) = " << std::setprecision(15) << bb_nrm
<< std::endl;

#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
// cuSPARSE requires lno_t = size_type = int. For both, int is always used
// (if enabled)
#if defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \
defined(KOKKOSKERNELS_INST_OFFSET_INT)
#ifdef USE_CUSPARSE_ILU
if (fill_lev == 0) {
std::cout << "CUSPARSE: No KK interface added yet" << std::endl;

Expand Down Expand Up @@ -383,7 +380,6 @@ int test_spiluk_perf(std::vector<int> tests, std::string afilename, int kin,
} // end row
std::cout << "ILU(0) SUCCESS!" << std::endl;
} // fill_lev=0
#endif
#endif

// Benchmark
Expand All @@ -407,11 +403,7 @@ int test_spiluk_perf(std::vector<int> tests, std::string afilename, int kin,
std::cout << "LOOP_MAX_TIME: " << max_time << std::endl;
std::cout << "LOOP_MIN_TIME: " << min_time << std::endl;

#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
// cuSPARSE requires lno_t = size_type = int. For both, int is always used
// (if enabled)
#if defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \
defined(KOKKOSKERNELS_INST_OFFSET_INT)
#ifdef USE_CUSPARSE_ILU
if (fill_lev == 0) {
lno_view_t A_row_map("A_row_map", nrows + 1);
lno_nnz_view_t A_entries("A_entries", nnz);
Expand Down Expand Up @@ -441,21 +433,15 @@ int test_spiluk_perf(std::vector<int> tests, std::string afilename, int kin,
std::cout << "LOOP_MAX_TIME (cuSPARSE): " << max_time << std::endl;
std::cout << "LOOP_MIN_TIME (cuSPARSE): " << min_time << std::endl;
} // fill_lev=0
#endif
#endif
} // end tests

#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
// cuSPARSE requires lno_t = size_type = int. For both, int is always used
// (if enabled)
#if defined(KOKKOSKERNELS_INST_ORDINAL_INT) && \
defined(KOKKOSKERNELS_INST_OFFSET_INT)
#ifdef USE_CUSPARSE_ILU
// step 6: free resources
cudaFree(pBuffer);
cusparseDestroyCsrilu02Info(info);
cusparseDestroyMatDescr(descr);
cusparseDestroy(handle);
#endif
#endif
} // end if (!afilename.empty())

Expand Down Expand Up @@ -583,9 +569,3 @@ int main(int argc, char **argv) {
Kokkos::finalize();
return 0;
}
#else
int main() {
std::cout << "The SPILUK perf_test requires CUDA >= 8.0\n";
return 0;
}
#endif
62 changes: 38 additions & 24 deletions perf_test/sparse/KokkosSparse_sptrsv_aux.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,25 +228,37 @@ std::string getCuSparseErrorString(cusparseStatus_t status) {
/* =========================================================================================
*/
#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE)
#if CUSPARSE_VERSION >= 12500
template <typename crsmat_t, typename host_crsmat_t>
bool check_cusparse(host_crsmat_t &, bool, crsmat_t &, bool, crsmat_t &, int *,
int *, double, int) {
// TODO: call KokkosSparse::sptrsv (if hardcoded problem settings below are
// compatible), or add wrappers for modern interface (cusparseSpSV*)
throw std::logic_error("Legacy cuSPARSE csrsv interface not available.");
return false;
}

#else

template <typename crsmat_t, typename host_crsmat_t>
bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,
bool col_majorU, crsmat_t &U, int *perm_r, int *perm_c,
double tol, int loop) {
using values_view_t = typename crsmat_t::values_type::non_const_type;
using scalar_t = typename values_view_t::value_type;
using size_type = typename crsmat_t::size_type;
using scalar_t = typename values_view_t::value_type;
using size_type = typename crsmat_t::size_type;

using host_values_view_t =
typename host_crsmat_t::values_type::non_const_type;

using execution_space = typename values_view_t::execution_space;
using memory_space = typename execution_space::memory_space;
using memory_space = typename execution_space::memory_space;

using host_execution_space = typename host_values_view_t::execution_space;
using host_memory_space = typename host_execution_space::memory_space;
using host_memory_space = typename host_execution_space::memory_space;

using host_scalar_view_t = Kokkos::View<scalar_t *, host_memory_space>;
using scalar_view_t = Kokkos::View<scalar_t *, memory_space>;
using scalar_view_t = Kokkos::View<scalar_t *, memory_space>;

const scalar_t ZERO(0.0);
const scalar_t ONE(1.0);
Expand All @@ -258,7 +270,7 @@ bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,
// > create a handle
cusparseStatus_t status;
cusparseHandle_t handle = 0;
status = cusparseCreate(&handle);
status = cusparseCreate(&handle);
if (CUSPARSE_STATUS_SUCCESS != status) {
std::cout << " ** cusparseCreate failed with "
<< getCuSparseErrorString(status) << " ** " << std::endl;
Expand All @@ -269,7 +281,7 @@ bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,

// > create a empty info structure for L-solve (e.g., analysis results)
csrsv2Info_t infoL = 0;
status = cusparseCreateCsrsv2Info(&infoL);
status = cusparseCreateCsrsv2Info(&infoL);
if (CUSPARSE_STATUS_SUCCESS != status) {
std::cout << " ** cusparseCreateCsrsv2Info failed with "
<< getCuSparseErrorString(status) << " ** " << std::endl;
Expand All @@ -279,14 +291,14 @@ bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,
// Preparing for L-solve
// step 1: create a descriptor
size_type nnzL = L.nnz();
auto graphL = L.graph; // in_graph
auto row_mapL = graphL.row_map;
auto entriesL = graphL.entries;
auto valuesL = L.values;
auto graphL = L.graph; // in_graph
auto row_mapL = graphL.row_map;
auto entriesL = graphL.entries;
auto valuesL = L.values;

// NOTE: it is stored in CSC = UPPER + TRANSPOSE
cusparseMatDescr_t descrL = 0;
status = cusparseCreateMatDescr(&descrL);
status = cusparseCreateMatDescr(&descrL);
if (CUSPARSE_STATUS_SUCCESS != status) {
std::cout << " ** cusparseCreateMatDescr failed with "
<< getCuSparseErrorString(status) << " ** " << std::endl;
Expand All @@ -300,7 +312,7 @@ bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,
// step 2: query how much memory used in csrsv2, and allocate the buffer
// pBuffer returned by cudaMalloc is automatically aligned to 128 bytes.
int pBufferSize;
void *pBufferL = 0;
void *pBufferL = 0;
cusparseOperation_t transL = (col_majorL ? CUSPARSE_OPERATION_TRANSPOSE
: CUSPARSE_OPERATION_NON_TRANSPOSE);
if (std::is_same<scalar_t, double>::value) {
Expand Down Expand Up @@ -374,14 +386,14 @@ bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,
timer.reset();
if (std::is_same<scalar_t, double>::value) {
const double alpha = 1.0;
status = cusparseDcsrsv2_solve(
status = cusparseDcsrsv2_solve(
handle, transL, nrows, nnzL, &alpha, descrL,
reinterpret_cast<double *>(valuesL.data()), row_mapL.data(),
entriesL.data(), infoL, reinterpret_cast<double *>(rhs.data()),
reinterpret_cast<double *>(sol.data()), policy, pBufferL);
} else {
const cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
status = cusparseZcsrsv2_solve(
status = cusparseZcsrsv2_solve(
handle, transL, nrows, nnzL, &alpha, descrL,
reinterpret_cast<cuDoubleComplex *>(valuesL.data()), row_mapL.data(),
entriesL.data(), infoL, reinterpret_cast<cuDoubleComplex *>(rhs.data()),
Expand All @@ -404,14 +416,14 @@ bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,
// ==============================================
// Preparing for U-solve
size_type nnzU = U.nnz();
auto graphU = U.graph; // in_graph
auto row_mapU = graphU.row_map;
auto entriesU = graphU.entries;
auto valuesU = U.values;
auto graphU = U.graph; // in_graph
auto row_mapU = graphU.row_map;
auto entriesU = graphU.entries;
auto valuesU = U.values;

// > create a empty info structure for U-solve (e.g., analysis results)
csrsv2Info_t infoU = 0;
status = cusparseCreateCsrsv2Info(&infoU);
status = cusparseCreateCsrsv2Info(&infoU);
if (CUSPARSE_STATUS_SUCCESS != status) {
std::cout << " ** cusparseCreateCsrsv2Info failed with "
<< getCuSparseErrorString(status) << " ** " << std::endl;
Expand All @@ -420,7 +432,7 @@ bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,
// ==============================================
// step 1: create a descriptor
cusparseMatDescr_t descrU = 0;
status = cusparseCreateMatDescr(&descrU);
status = cusparseCreateMatDescr(&descrU);
if (CUSPARSE_STATUS_SUCCESS != status) {
std::cout << " ** cusparseCreateMatDescr create status error name "
<< getCuSparseErrorString(status) << " ** " << std::endl;
Expand All @@ -438,7 +450,7 @@ bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,
// ==============================================
// step 2: query how much memory used in csrsv2, and allocate the buffer
// pBuffer returned by cudaMalloc is automatically aligned to 128 bytes.
void *pBufferU = 0;
void *pBufferU = 0;
cusparseOperation_t transU = (col_majorU ? CUSPARSE_OPERATION_TRANSPOSE
: CUSPARSE_OPERATION_NON_TRANSPOSE);
if (std::is_same<scalar_t, double>::value) {
Expand Down Expand Up @@ -485,14 +497,14 @@ bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,
timer.reset();
if (std::is_same<scalar_t, double>::value) {
const double alpha = 1.0;
status = cusparseDcsrsv2_solve(
status = cusparseDcsrsv2_solve(
handle, transU, nrows, nnzU, &alpha, descrU,
reinterpret_cast<double *>(valuesU.data()), row_mapU.data(),
entriesU.data(), infoU, reinterpret_cast<double *>(sol.data()),
reinterpret_cast<double *>(rhs.data()), policy, pBufferU);
} else {
const cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0);
status = cusparseZcsrsv2_solve(
status = cusparseZcsrsv2_solve(
handle, transU, nrows, nnzU, &alpha, descrU,
reinterpret_cast<cuDoubleComplex *>(valuesU.data()), row_mapU.data(),
entriesU.data(), infoU, reinterpret_cast<cuDoubleComplex *>(sol.data()),
Expand Down Expand Up @@ -652,6 +664,8 @@ bool check_cusparse(host_crsmat_t &Mtx, bool col_majorL, crsmat_t &L,
}
return success;
}
#endif

#else
template <typename crsmat_t, typename host_crsmat_t>
bool check_cusparse(host_crsmat_t & /*Mtx*/, bool /*col_majorL*/,
Expand Down
Loading