Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cuda11 support #603

Merged
merged 16 commits into from
Aug 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions .github/workflows/windows-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,35 @@ on: [push]

jobs:
windows_cuda:
name: cuda102/release/shared (only compile)
strategy:
fail-fast: false
matrix:
config:
- {version: "10.2.89.20191206", name: "cuda102/release/shared"}
- {version: "latest", name: "cuda-latest/release/shared"}
name: msvc/${{ matrix.config.name }} (only compile)
runs-on: [windows-latest]
steps:
- uses: actions/checkout@v2
- name: setup
- name: setup (versioned)
if: matrix.config.version != 'latest'
run: |
choco install cuda --version=${{ matrix.config.version }} -y
- name: setup (latest)
if: matrix.config.version == 'latest'
run: |
choco install cuda --version=10.2.89.20191206 -y
choco install cuda -y
- name: configure
run: |
$env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.."
$env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.."
Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
refreshenv
mkdir build
cd build
$env:PATH="$pwd\windows_shared_library;$env:PATH"
cmake -DGINKGO_BUILD_CUDA=ON -DGINKGO_BUILD_OMP=OFF ..
cmake --build . -j4 --config Release

windows_ref:
strategy:
fail-fast: false
Expand Down
120 changes: 109 additions & 11 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -339,17 +339,17 @@ build/cuda101/clang/all/release/static:
- cuda
- gpu

build/cuda101/intel/cuda/debug/static:
# clang-cuda with cuda 10.1 and friends
build/clang-cuda101/gcc/all/release/shared:
<<: *default_build_with_test
image: localhost:5000/gko-cuda101-gnu8-llvm7-intel2019
image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019
variables:
<<: *default_variables
C_COMPILER: "icc"
CXX_COMPILER: "icpc"
CUDA_COMPILER: "clang++"
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_TYPE: "Debug"
BUILD_SHARED_LIBS: "OFF"
BUILD_HIP: "ON"
BUILD_TYPE: "Release"
CUDA_ARCH: 35
only:
variables:
Expand All @@ -359,17 +359,58 @@ build/cuda101/intel/cuda/debug/static:
- cuda
- gpu

# clang-cuda with cuda 10.1 and friends
build/clang-cuda101/gcc/all/release/shared:
build/clang-cuda101/clang/cuda/debug/static:
<<: *default_build_with_test
image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019
variables:
<<: *default_variables
C_COMPILER: "clang"
CXX_COMPILER: "clang++"
CUDA_COMPILER: "clang++"
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_TYPE: "Debug"
BUILD_SHARED_LIBS: "OFF"
CUDA_ARCH: 35
only:
variables:
- $RUN_CI_TAG
tags:
- private_ci
- cuda
- gpu

# cuda 10.2 and friends
build/cuda102/gcc/all/debug/shared:
<<: *default_build_with_test
image: localhost:5000/gko-cuda102-gnu8-llvm8-intel2019
variables:
<<: *default_variables
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_HIP: "ON"
BUILD_TYPE: "Debug"
CUDA_ARCH: 35
only:
variables:
- $RUN_CI_TAG
tags:
- private_ci
- cuda
- gpu

build/cuda102/clang/all/release/static:
<<: *default_build_with_test
image: localhost:5000/gko-cuda102-gnu8-llvm8-intel2019
variables:
<<: *default_variables
C_COMPILER: "clang"
CXX_COMPILER: "clang++"
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_HIP: "ON"
BUILD_TYPE: "Release"
BUILD_SHARED_LIBS: "OFF"
CUDA_ARCH: 35
only:
variables:
Expand All @@ -379,14 +420,71 @@ build/clang-cuda101/gcc/all/release/shared:
- cuda
- gpu

build/clang-cuda101/clang/cuda/debug/static:
build/cuda102/intel/cuda/debug/static:
<<: *default_build_with_test
image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019
image: localhost:5000/gko-cuda102-gnu8-llvm8-intel2019
variables:
<<: *default_variables
C_COMPILER: "icc"
CXX_COMPILER: "icpc"
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_TYPE: "Debug"
BUILD_SHARED_LIBS: "OFF"
CUDA_ARCH: 35
only:
variables:
- $RUN_CI_TAG
tags:
- private_ci
- cuda
- gpu

# cuda 11.0 and friends
build/cuda110/gcc/cuda/debug/shared:
<<: *default_build_with_test
image: localhost:5000/gko-cuda110-gnu9-llvm9-intel2020
variables:
<<: *default_variables
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_TYPE: "Debug"
CUDA_ARCH: 35
only:
variables:
- $RUN_CI_TAG
tags:
- private_ci
- cuda
- gpu

build/cuda110/clang/cuda/release/static:
<<: *default_build_with_test
image: localhost:5000/gko-cuda110-gnu9-llvm9-intel2020
variables:
<<: *default_variables
C_COMPILER: "clang"
CXX_COMPILER: "clang++"
CUDA_COMPILER: "clang++"
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_TYPE: "Release"
BUILD_SHARED_LIBS: "OFF"
CUDA_ARCH: 35
only:
variables:
- $RUN_CI_TAG
tags:
- private_ci
- cuda
- gpu

build/cuda110/intel/cuda/debug/static:
<<: *default_build_with_test
image: localhost:5000/gko-cuda110-gnu9-llvm9-intel2020
variables:
<<: *default_variables
C_COMPILER: "icc"
CXX_COMPILER: "icpc"
BUILD_OMP: "ON"
BUILD_CUDA: "ON"
BUILD_TYPE: "Debug"
Expand Down
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.12)
cmake_policy(SET CMP0074 NEW)
endif()

# Let CAS handle the CUDA architecture flags (for now)
# Windows still gives CMP0104 warning if putting it in cuda.
if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
cmake_policy(SET CMP0104 OLD)
endif()

project(Ginkgo LANGUAGES C CXX VERSION 1.2.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures")
set(Ginkgo_VERSION_TAG "develop")
set(PROJECT_VERSION_TAG ${Ginkgo_VERSION_TAG})
Expand Down
41 changes: 30 additions & 11 deletions benchmark/utils/cuda_linops.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ class CuspBase : public gko::LinOp {
};


#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)


template <typename ValueType = gko::default_precision,
typename IndexType = gko::int32>
class CuspCsrmp
Expand Down Expand Up @@ -298,6 +301,9 @@ class CuspCsrmm
};


#endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)


template <typename ValueType = gko::default_precision,
typename IndexType = gko::int32>
class CuspCsrEx
Expand Down Expand Up @@ -388,7 +394,7 @@ class CuspCsrEx
{
#ifdef ALLOWMP
algmode_ = CUSPARSE_ALG_MERGE_PATH;
#endif
#endif // ALLOWMP
}

private:
Expand All @@ -400,6 +406,9 @@ class CuspCsrEx
};


#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)


template <typename ValueType = gko::default_precision,
typename IndexType = gko::int32,
cusparseHybPartition_t Partition = CUSPARSE_HYB_PARTITION_AUTO,
Expand Down Expand Up @@ -484,8 +493,12 @@ class CuspHybrid
};


#if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010) && \
!(defined(_WIN32) || defined(__CYGWIN__))
#endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)


#if defined(CUDA_VERSION) && \
(CUDA_VERSION >= 11000 || \
((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__))))


template <typename ValueType>
Expand All @@ -512,7 +525,7 @@ void cusp_generic_spmv(std::shared_ptr<const gko::CudaExecutor> gpu_exec,
&vecb, dense_b->get_num_stored_elements(),
as_culibs_type(const_cast<ValueType *>(db)), cu_value));

size_t buffer_size = 0;
gko::size_type buffer_size = 0;
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpMV_bufferSize(
gpu_exec->get_cusparse_handle(), trans, &scalars.get_const_data()[0],
mat, vecb, &scalars.get_const_data()[1], vecx, cu_value, alg,
Expand Down Expand Up @@ -680,22 +693,25 @@ class CuspGenericCoo
};


#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010) &&
// !(defined(_WIN32) || defined(__CYGWIN__))
#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >=
// 10010) && !(defined(_WIN32) || defined(__CYGWIN__))))


} // namespace detail


// Some shortcuts
using cusp_csr = detail::CuspCsr<>;
using cusp_csrex = detail::CuspCsrEx<>;
#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
using cusp_csr = detail::CuspCsr<>;
using cusp_csrmp = detail::CuspCsrmp<>;
using cusp_csrmm = detail::CuspCsrmm<>;
#endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)


#if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010) && \
!(defined(_WIN32) || defined(__CYGWIN__))
#if defined(CUDA_VERSION) && \
(CUDA_VERSION >= 11000 || \
((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__))))


using cusp_gcsr = detail::CuspGenericCsr<>;
Expand All @@ -704,14 +720,17 @@ using cusp_gcsr2 =
using cusp_gcoo = detail::CuspGenericCoo<>;


#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010) &&
// !(defined(_WIN32) || defined(__CYGWIN__))
#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >=
// 10010) && !(defined(_WIN32) || defined(__CYGWIN__))))


#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
using cusp_coo =
detail::CuspHybrid<double, gko::int32, CUSPARSE_HYB_PARTITION_USER, 0>;
using cusp_ell =
detail::CuspHybrid<double, gko::int32, CUSPARSE_HYB_PARTITION_MAX, 0>;
using cusp_hybrid = detail::CuspHybrid<>;
#endif // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)


#endif // GKO_BENCHMARK_UTILS_CUDA_LINOPS_HPP_
Loading