ginkgo-project · yhmtsai · Aug 10, 2020 · Jul 1, 2020 · Jul 2, 2020 · Jul 2, 2020
diff --git a/.github/workflows/windows-build.yml b/.github/workflows/windows-build.yml
@@ -4,24 +4,35 @@ on: [push]
 
 jobs:
   windows_cuda:
-    name: cuda102/release/shared (only compile)
+    strategy:
+      fail-fast: false
+      matrix:
+        config:
+        - {version: "10.2.89.20191206", name: "cuda102/release/shared"}
+        - {version: "latest", name: "cuda-latest/release/shared"}
+    name: msvc/${{ matrix.config.name }} (only compile)
     runs-on: [windows-latest]
     steps:
     - uses: actions/checkout@v2
-    - name: setup
+    - name: setup (versioned)
+      if: matrix.config.version != 'latest'
+      run: |
+        choco install cuda --version=${{ matrix.config.version }} -y
+    - name: setup (latest)
+      if: matrix.config.version == 'latest'
       run: |
-        choco install cuda --version=10.2.89.20191206 -y
+        choco install cuda -y
     - name: configure
       run: |
-        $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.."   
+        $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.."
         Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1"
         refreshenv
         mkdir build
         cd build
         $env:PATH="$pwd\windows_shared_library;$env:PATH"
         cmake -DGINKGO_BUILD_CUDA=ON -DGINKGO_BUILD_OMP=OFF ..
         cmake --build . -j4 --config Release
-        
+
   windows_ref:
     strategy:
       fail-fast: false

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -339,17 +339,17 @@ build/cuda101/clang/all/release/static:
     - cuda
     - gpu
 
-build/cuda101/intel/cuda/debug/static:
+# clang-cuda with cuda 10.1 and friends
+build/clang-cuda101/gcc/all/release/shared:
   <<: *default_build_with_test
-  image: localhost:5000/gko-cuda101-gnu8-llvm7-intel2019
+  image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019
   variables:
     <<: *default_variables
-    C_COMPILER: "icc"
-    CXX_COMPILER: "icpc"
+    CUDA_COMPILER: "clang++"
     BUILD_OMP: "ON"
     BUILD_CUDA: "ON"
-    BUILD_TYPE: "Debug"
-    BUILD_SHARED_LIBS: "OFF"
+    BUILD_HIP: "ON"
+    BUILD_TYPE: "Release"
     CUDA_ARCH: 35
   only:
     variables:
@@ -359,17 +359,58 @@ build/cuda101/intel/cuda/debug/static:
     - cuda
     - gpu
 
-# clang-cuda with cuda 10.1 and friends
-build/clang-cuda101/gcc/all/release/shared:
+build/clang-cuda101/clang/cuda/debug/static:
   <<: *default_build_with_test
   image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019
   variables:
     <<: *default_variables
+    C_COMPILER: "clang"
+    CXX_COMPILER: "clang++"
     CUDA_COMPILER: "clang++"
     BUILD_OMP: "ON"
     BUILD_CUDA: "ON"
+    BUILD_TYPE: "Debug"
+    BUILD_SHARED_LIBS: "OFF"
+    CUDA_ARCH: 35
+  only:
+    variables:
+      - $RUN_CI_TAG
+  tags:
+    - private_ci
+    - cuda
+    - gpu
+
+# cuda 10.2 and friends
+build/cuda102/gcc/all/debug/shared:
+  <<: *default_build_with_test
+  image: localhost:5000/gko-cuda102-gnu8-llvm8-intel2019
+  variables:
+    <<: *default_variables
+    BUILD_OMP: "ON"
+    BUILD_CUDA: "ON"
+    BUILD_HIP: "ON"
+    BUILD_TYPE: "Debug"
+    CUDA_ARCH: 35
+  only:
+    variables:
+      - $RUN_CI_TAG
+  tags:
+    - private_ci
+    - cuda
+    - gpu
+
+build/cuda102/clang/all/release/static:
+  <<: *default_build_with_test
+  image: localhost:5000/gko-cuda102-gnu8-llvm8-intel2019
+  variables:
+    <<: *default_variables
+    C_COMPILER: "clang"
+    CXX_COMPILER: "clang++"
+    BUILD_OMP: "ON"
+    BUILD_CUDA: "ON"
     BUILD_HIP: "ON"
     BUILD_TYPE: "Release"
+    BUILD_SHARED_LIBS: "OFF"
     CUDA_ARCH: 35
   only:
     variables:
@@ -379,14 +420,71 @@ build/clang-cuda101/gcc/all/release/shared:
     - cuda
     - gpu
 
-build/clang-cuda101/clang/cuda/debug/static:
+build/cuda102/intel/cuda/debug/static:
   <<: *default_build_with_test
-  image: localhost:5000/gko-cuda101-gnu8-llvm10-intel2019
+  image: localhost:5000/gko-cuda102-gnu8-llvm8-intel2019
+  variables:
+    <<: *default_variables
+    C_COMPILER: "icc"
+    CXX_COMPILER: "icpc"
+    BUILD_OMP: "ON"
+    BUILD_CUDA: "ON"
+    BUILD_TYPE: "Debug"
+    BUILD_SHARED_LIBS: "OFF"
+    CUDA_ARCH: 35
+  only:
+    variables:
+      - $RUN_CI_TAG
+  tags:
+    - private_ci
+    - cuda
+    - gpu
+
+# cuda 11.0 and friends
+build/cuda110/gcc/cuda/debug/shared:
+  <<: *default_build_with_test
+  image: localhost:5000/gko-cuda110-gnu9-llvm9-intel2020
+  variables:
+    <<: *default_variables
+    BUILD_OMP: "ON"
+    BUILD_CUDA: "ON"
+    BUILD_TYPE: "Debug"
+    CUDA_ARCH: 35
+  only:
+    variables:
+      - $RUN_CI_TAG
+  tags:
+    - private_ci
+    - cuda
+    - gpu
+
+build/cuda110/clang/cuda/release/static:
+  <<: *default_build_with_test
+  image: localhost:5000/gko-cuda110-gnu9-llvm9-intel2020
   variables:
     <<: *default_variables
     C_COMPILER: "clang"
     CXX_COMPILER: "clang++"
-    CUDA_COMPILER: "clang++"
+    BUILD_OMP: "ON"
+    BUILD_CUDA: "ON"
+    BUILD_TYPE: "Release"
+    BUILD_SHARED_LIBS: "OFF"
+    CUDA_ARCH: 35
+  only:
+    variables:
+      - $RUN_CI_TAG
+  tags:
+    - private_ci
+    - cuda
+    - gpu
+
+build/cuda110/intel/cuda/debug/static:
+  <<: *default_build_with_test
+  image: localhost:5000/gko-cuda110-gnu9-llvm9-intel2020
+  variables:
+    <<: *default_variables
+    C_COMPILER: "icc"
+    CXX_COMPILER: "icpc"
     BUILD_OMP: "ON"
     BUILD_CUDA: "ON"
     BUILD_TYPE: "Debug"

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -5,6 +5,12 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.12)
     cmake_policy(SET CMP0074 NEW)
 endif()
 
+# Let CAS handle the CUDA architecture flags (for now)
+# Windows still gives CMP0104 warning if putting it in cuda.
+if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
+    cmake_policy(SET CMP0104 OLD)
+endif()
+
 project(Ginkgo LANGUAGES C CXX VERSION 1.2.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures")
 set(Ginkgo_VERSION_TAG "develop")
 set(PROJECT_VERSION_TAG ${Ginkgo_VERSION_TAG})

diff --git a/benchmark/utils/cuda_linops.hpp b/benchmark/utils/cuda_linops.hpp
@@ -117,6 +117,9 @@ class CuspBase : public gko::LinOp {
 };
 
 
+#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+
+
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32>
 class CuspCsrmp
@@ -298,6 +301,9 @@ class CuspCsrmm
 };
 
 
+#endif  // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+
+
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32>
 class CuspCsrEx
@@ -388,7 +394,7 @@ class CuspCsrEx
     {
 #ifdef ALLOWMP
         algmode_ = CUSPARSE_ALG_MERGE_PATH;
-#endif
+#endif  // ALLOWMP
     }
 
 private:
@@ -400,6 +406,9 @@ class CuspCsrEx
 };
 
 
+#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+
+
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32,
           cusparseHybPartition_t Partition = CUSPARSE_HYB_PARTITION_AUTO,
@@ -484,8 +493,12 @@ class CuspHybrid
 };
 
 
-#if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010) && \
-    !(defined(_WIN32) || defined(__CYGWIN__))
+#endif  // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+
+
+#if defined(CUDA_VERSION) &&  \
+    (CUDA_VERSION >= 11000 || \
+     ((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__))))
 
 
 template <typename ValueType>
@@ -512,7 +525,7 @@ void cusp_generic_spmv(std::shared_ptr<const gko::CudaExecutor> gpu_exec,
         &vecb, dense_b->get_num_stored_elements(),
         as_culibs_type(const_cast<ValueType *>(db)), cu_value));
 
-    size_t buffer_size = 0;
+    gko::size_type buffer_size = 0;
     GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpMV_bufferSize(
         gpu_exec->get_cusparse_handle(), trans, &scalars.get_const_data()[0],
         mat, vecb, &scalars.get_const_data()[1], vecx, cu_value, alg,
@@ -680,22 +693,25 @@ class CuspGenericCoo
 };
 
 
-#endif  // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010) &&
-        // !(defined(_WIN32) || defined(__CYGWIN__))
+#endif  // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >=
+        // 10010) && !(defined(_WIN32) || defined(__CYGWIN__))))
 
 
 }  // namespace detail
 
 
 // Some shortcuts
-using cusp_csr = detail::CuspCsr<>;
 using cusp_csrex = detail::CuspCsrEx<>;
+#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+using cusp_csr = detail::CuspCsr<>;
 using cusp_csrmp = detail::CuspCsrmp<>;
 using cusp_csrmm = detail::CuspCsrmm<>;
+#endif  // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
 
 
-#if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010) && \
-    !(defined(_WIN32) || defined(__CYGWIN__))
+#if defined(CUDA_VERSION) &&  \
+    (CUDA_VERSION >= 11000 || \
+     ((CUDA_VERSION >= 10010) && !(defined(_WIN32) || defined(__CYGWIN__))))
 
 
 using cusp_gcsr = detail::CuspGenericCsr<>;
@@ -704,14 +720,17 @@ using cusp_gcsr2 =
 using cusp_gcoo = detail::CuspGenericCoo<>;
 
 
-#endif  // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010) &&
-        // !(defined(_WIN32) || defined(__CYGWIN__))
+#endif  // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >=
+        // 10010) && !(defined(_WIN32) || defined(__CYGWIN__))))
 
 
+#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
 using cusp_coo =
     detail::CuspHybrid<double, gko::int32, CUSPARSE_HYB_PARTITION_USER, 0>;
 using cusp_ell =
     detail::CuspHybrid<double, gko::int32, CUSPARSE_HYB_PARTITION_MAX, 0>;
 using cusp_hybrid = detail::CuspHybrid<>;
+#endif  // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+
 
 #endif  // GKO_BENCHMARK_UTILS_CUDA_LINOPS_HPP_