ginkgo-project · Slaedr · Oct 26, 2021 · Mar 31, 2021 · Mar 31, 2021 · Jan 18, 2021
diff --git a/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc b/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc
@@ -0,0 +1,75 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2021, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+namespace kernel {
+
+
+template <int mat_blk_sz, int subwarp_size, typename ValueType,
+          typename IndexType>
+__global__ __launch_bounds__(default_block_size) void transpose_blocks(
+    const IndexType nbnz, ValueType* const values)
+{
+    const auto total_subwarp_count =
+        thread::get_subwarp_num_flat<subwarp_size, IndexType>();
+    const IndexType begin_blk =
+        thread::get_subwarp_id_flat<subwarp_size, IndexType>();
+
+    auto thread_block = group::this_thread_block();
+    auto subwarp_grp = group::tiled_partition<subwarp_size>(thread_block);
+    const int sw_threadidx = subwarp_grp.thread_rank();
+
+    constexpr int mat_blk_sz_2{mat_blk_sz * mat_blk_sz};
+    constexpr int num_entries_per_thread{(mat_blk_sz_2 - 1) / subwarp_size + 1};
+    ValueType orig_vals[num_entries_per_thread];
+
+    for (auto ibz = begin_blk; ibz < nbnz; ibz += total_subwarp_count) {
+        for (int i = sw_threadidx; i < mat_blk_sz_2; i += subwarp_size) {
+            orig_vals[i / subwarp_size] = values[ibz * mat_blk_sz_2 + i];
+        }
+        subwarp_grp.sync();
+
+        for (int i = 0; i < num_entries_per_thread; i++) {
+            const int orig_pos = i * subwarp_size + sw_threadidx;
+            if (orig_pos >= mat_blk_sz_2) {
+                break;
+            }
+            const int orig_row = orig_pos % mat_blk_sz;
+            const int orig_col = orig_pos / mat_blk_sz;
+            const int new_pos = orig_row * mat_blk_sz + orig_col;
+            values[ibz * mat_blk_sz_2 + new_pos] = orig_vals[i];
+        }
+        subwarp_grp.sync();
+    }
+}
+
+
+}  // namespace kernel
diff --git a/core/base/block_sizes.hpp b/core/base/block_sizes.hpp
@@ -0,0 +1,64 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2021, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_CORE_BASE_BLOCK_SIZES_HPP_
+#define GKO_CORE_BASE_BLOCK_SIZES_HPP_
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/synthesizer/containers.hpp>
+
+
+namespace gko {
+namespace fixedblock {
+
+
+/**
+ * @def GKO_FIXED_BLOCK_CUSTOM_SIZES
+ * Optionally-defined comma-separated list of fixed block sizes to compile.
+ */
+#ifdef GKO_FIXED_BLOCK_CUSTOM_SIZES
+/**
+ * A compile-time list of block sizes for which dedicated fixed-block matrix
+ * and corresponding preconditioner kernels should be compiled.
+ */
+using compiled_kernels = syn::value_list<int, GKO_FIXED_BLOCK_CUSTOM_SIZES>;
+#else
+using compiled_kernels = syn::value_list<int, 2, 3, 4, 7>;
+#endif
+
+
+}  // namespace fixedblock
+}  // namespace gko
+
+
+#endif  // GKO_CORE_BASE_BLOCK_SIZES_HPP_
diff --git a/core/base/utils.hpp b/core/base/utils.hpp
@@ -219,6 +219,20 @@ std::shared_ptr<const Dest> convert_to_with_sorting(
                                                             skip_sorting);
 }
 
+/**
+ * Converts the given arguments into an array of entries of the requested
+ * template type.
+ *
+ * @tparam T  The requested type of entries in the output array.
+ *
+ * @param args  Entities to be filled into an array after casting to type T.
+ */
+template <typename T, typename... Args>
+constexpr std::array<T, sizeof...(Args)> to_std_array(Args&&... args)
+{
+    return {static_cast<T>(args)...};
+}
+
 
 }  // namespace gko
 

diff --git a/core/matrix/fbcsr.cpp b/core/matrix/fbcsr.cpp
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
 #include <ginkgo/core/base/math.hpp>
+#include <ginkgo/core/base/precision_dispatch.hpp>
 #include <ginkgo/core/base/utils.hpp>
 #include <ginkgo/core/matrix/dense.hpp>
 #include <ginkgo/core/matrix/identity.hpp>
@@ -155,14 +156,17 @@ template <typename ValueType, typename IndexType>
 void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const b,
                                              LinOp* const x) const
 {
-    using Dense = Dense<ValueType>;
     if (auto b_fbcsr = dynamic_cast<const Fbcsr<ValueType, IndexType>*>(b)) {
         // if b is a FBCSR matrix, we need an SpGeMM
         GKO_NOT_SUPPORTED(b_fbcsr);
     } else {
         // otherwise we assume that b is dense and compute a SpMV/SpMM
-        this->get_executor()->run(
-            fbcsr::make_spmv(this, as<Dense>(b), as<Dense>(x)));
+        precision_dispatch_real_complex<ValueType>(
+            [this](auto dense_b, auto dense_x) {
+                this->get_executor()->run(
+                    fbcsr::make_spmv(this, dense_b, dense_x));
+            },
+            b, x);
     }
 }
 
@@ -173,7 +177,6 @@ void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const alpha,
                                              const LinOp* const beta,
                                              LinOp* const x) const
 {
-    using Dense = Dense<ValueType>;
     if (auto b_fbcsr = dynamic_cast<const Fbcsr<ValueType, IndexType>*>(b)) {
         // if b is a FBCSR matrix, we need an SpGeMM
         GKO_NOT_SUPPORTED(b_fbcsr);
@@ -182,9 +185,13 @@ void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const alpha,
         GKO_NOT_SUPPORTED(b_ident);
     } else {
         // otherwise we assume that b is dense and compute a SpMV/SpMM
-        this->get_executor()->run(
-            fbcsr::make_advanced_spmv(as<Dense>(alpha), this, as<Dense>(b),
-                                      as<Dense>(beta), as<Dense>(x)));
+        precision_dispatch_real_complex<ValueType>(
+            [this](auto dense_alpha, auto dense_b, auto dense_beta,
+                   auto dense_x) {
+                this->get_executor()->run(fbcsr::make_advanced_spmv(
+                    dense_alpha, this, dense_b, dense_beta, dense_x));
+            },
+            alpha, b, beta, x);
     }
 }
 

diff --git a/core/synthesizer/implementation_selection.hpp b/core/synthesizer/implementation_selection.hpp
@@ -68,7 +68,10 @@ namespace syn {
             _name(::gko::syn::value_list<int, Rest...>(), is_eligible,       \
                   int_args, type_args, std::forward<InferredArgs>(args)...); \
         }                                                                    \
-    }
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
 
 #define GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION(_name, _callable)         \
     template <typename Predicate, bool... BoolArgs, int... IntArgs,          \

diff --git a/core/test/utils/CMakeLists.txt b/core/test/utils/CMakeLists.txt
@@ -2,5 +2,6 @@ ginkgo_create_test(array_generator_test)
 ginkgo_create_test(assertions_test)
 ginkgo_create_test(matrix_generator_test)
 ginkgo_create_test(matrix_utils_test)
+ginkgo_create_test(fb_matrix_generator_test)
 ginkgo_create_test(unsort_matrix_test)
 ginkgo_create_test(value_generator_test)