-
Notifications
You must be signed in to change notification settings - Fork 94
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fbcsr kernels for Cuda and OpenMP #775
Changes from all commits
75f503c
e20fae1
e072f50
8703917
00a54f3
611e213
91a987b
4bbcc60
a587c67
6bc6c5e
db7cc15
561d66e
65e3814
a972876
f7bcf1e
44e052b
5ea4723
2f753ef
cd11eb1
7c18da4
877481c
5fe5a23
e9fa3db
2d26a21
9eedddf
05539bc
b0072c3
2a74117
50d9207
92d3680
d83cc17
5ec70fb
60569fa
eff416a
fd01a57
bfc9634
3113d0f
037b4d4
854ca45
4b57f0c
0c30369
af36b2c
433fddd
88c064d
010279a
3d9312d
079c52c
3fa1b4e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/*******************************<GINKGO LICENSE>****************************** | ||
Copyright (c) 2017-2021, the Ginkgo authors | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions | ||
are met: | ||
|
||
1. Redistributions of source code must retain the above copyright | ||
notice, this list of conditions and the following disclaimer. | ||
|
||
2. Redistributions in binary form must reproduce the above copyright | ||
notice, this list of conditions and the following disclaimer in the | ||
documentation and/or other materials provided with the distribution. | ||
|
||
3. Neither the name of the copyright holder nor the names of its | ||
contributors may be used to endorse or promote products derived from | ||
this software without specific prior written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | ||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | ||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
******************************<GINKGO LICENSE>*******************************/ | ||
|
||
namespace kernel { | ||
|
||
|
||
template <int mat_blk_sz, int subwarp_size, typename ValueType, | ||
typename IndexType> | ||
__global__ __launch_bounds__(default_block_size) void transpose_blocks( | ||
const IndexType nbnz, ValueType* const values) | ||
{ | ||
const auto total_subwarp_count = | ||
thread::get_subwarp_num_flat<subwarp_size, IndexType>(); | ||
const IndexType begin_blk = | ||
thread::get_subwarp_id_flat<subwarp_size, IndexType>(); | ||
|
||
auto thread_block = group::this_thread_block(); | ||
auto subwarp_grp = group::tiled_partition<subwarp_size>(thread_block); | ||
const int sw_threadidx = subwarp_grp.thread_rank(); | ||
|
||
constexpr int mat_blk_sz_2{mat_blk_sz * mat_blk_sz}; | ||
constexpr int num_entries_per_thread{(mat_blk_sz_2 - 1) / subwarp_size + 1}; | ||
ValueType orig_vals[num_entries_per_thread]; | ||
|
||
for (auto ibz = begin_blk; ibz < nbnz; ibz += total_subwarp_count) { | ||
for (int i = sw_threadidx; i < mat_blk_sz_2; i += subwarp_size) { | ||
orig_vals[i / subwarp_size] = values[ibz * mat_blk_sz_2 + i]; | ||
} | ||
subwarp_grp.sync(); | ||
|
||
for (int i = 0; i < num_entries_per_thread; i++) { | ||
const int orig_pos = i * subwarp_size + sw_threadidx; | ||
if (orig_pos >= mat_blk_sz_2) { | ||
break; | ||
} | ||
const int orig_row = orig_pos % mat_blk_sz; | ||
const int orig_col = orig_pos / mat_blk_sz; | ||
const int new_pos = orig_row * mat_blk_sz + orig_col; | ||
values[ibz * mat_blk_sz_2 + new_pos] = orig_vals[i]; | ||
} | ||
subwarp_grp.sync(); | ||
} | ||
} | ||
|
||
|
||
} // namespace kernel |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/*******************************<GINKGO LICENSE>****************************** | ||
Copyright (c) 2017-2021, the Ginkgo authors | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions | ||
are met: | ||
|
||
1. Redistributions of source code must retain the above copyright | ||
notice, this list of conditions and the following disclaimer. | ||
|
||
2. Redistributions in binary form must reproduce the above copyright | ||
notice, this list of conditions and the following disclaimer in the | ||
documentation and/or other materials provided with the distribution. | ||
|
||
3. Neither the name of the copyright holder nor the names of its | ||
contributors may be used to endorse or promote products derived from | ||
this software without specific prior written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS | ||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | ||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
******************************<GINKGO LICENSE>*******************************/ | ||
|
||
#ifndef GKO_CORE_BASE_BLOCK_SIZES_HPP_ | ||
#define GKO_CORE_BASE_BLOCK_SIZES_HPP_ | ||
|
||
|
||
#include <ginkgo/config.hpp> | ||
#include <ginkgo/core/synthesizer/containers.hpp> | ||
|
||
|
||
namespace gko { | ||
namespace fixedblock { | ||
|
||
|
||
/** | ||
* @def GKO_FIXED_BLOCK_CUSTOM_SIZES | ||
* Optionally-defined comma-separated list of fixed block sizes to compile. | ||
*/ | ||
#ifdef GKO_FIXED_BLOCK_CUSTOM_SIZES | ||
/** | ||
* A compile-time list of block sizes for which dedicated fixed-block matrix | ||
* and corresponding preconditioner kernels should be compiled. | ||
*/ | ||
using compiled_kernels = syn::value_list<int, GKO_FIXED_BLOCK_CUSTOM_SIZES>; | ||
#else | ||
using compiled_kernels = syn::value_list<int, 2, 3, 4, 7>; | ||
MarcelKoch marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#endif | ||
|
||
|
||
} // namespace fixedblock | ||
} // namespace gko | ||
|
||
|
||
#endif // GKO_CORE_BASE_BLOCK_SIZES_HPP_ |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
#include <ginkgo/core/base/exception_helpers.hpp> | ||
#include <ginkgo/core/base/executor.hpp> | ||
#include <ginkgo/core/base/math.hpp> | ||
#include <ginkgo/core/base/precision_dispatch.hpp> | ||
#include <ginkgo/core/base/utils.hpp> | ||
#include <ginkgo/core/matrix/dense.hpp> | ||
#include <ginkgo/core/matrix/identity.hpp> | ||
|
@@ -155,14 +156,17 @@ template <typename ValueType, typename IndexType> | |
void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const b, | ||
LinOp* const x) const | ||
{ | ||
using Dense = Dense<ValueType>; | ||
if (auto b_fbcsr = dynamic_cast<const Fbcsr<ValueType, IndexType>*>(b)) { | ||
// if b is a FBCSR matrix, we need an SpGeMM | ||
GKO_NOT_SUPPORTED(b_fbcsr); | ||
} else { | ||
Comment on lines
159
to
162
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. precision_dispatch_real_complex also throw the error when input not dense, so this part is unncessary There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But I don't want the spmv kernel to be called when There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think temporary_conversion is implemented by dynamic_cast. Maybe @upsj can correct me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, we check against the exact type, not ConvertibleTo |
||
// otherwise we assume that b is dense and compute a SpMV/SpMM | ||
this->get_executor()->run( | ||
fbcsr::make_spmv(this, as<Dense>(b), as<Dense>(x))); | ||
precision_dispatch_real_complex<ValueType>( | ||
[this](auto dense_b, auto dense_x) { | ||
this->get_executor()->run( | ||
fbcsr::make_spmv(this, dense_b, dense_x)); | ||
}, | ||
b, x); | ||
} | ||
} | ||
|
||
|
@@ -173,7 +177,6 @@ void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const alpha, | |
const LinOp* const beta, | ||
LinOp* const x) const | ||
{ | ||
using Dense = Dense<ValueType>; | ||
if (auto b_fbcsr = dynamic_cast<const Fbcsr<ValueType, IndexType>*>(b)) { | ||
// if b is a FBCSR matrix, we need an SpGeMM | ||
GKO_NOT_SUPPORTED(b_fbcsr); | ||
|
@@ -182,9 +185,13 @@ void Fbcsr<ValueType, IndexType>::apply_impl(const LinOp* const alpha, | |
GKO_NOT_SUPPORTED(b_ident); | ||
} else { | ||
// otherwise we assume that b is dense and compute a SpMV/SpMM | ||
this->get_executor()->run( | ||
fbcsr::make_advanced_spmv(as<Dense>(alpha), this, as<Dense>(b), | ||
as<Dense>(beta), as<Dense>(x))); | ||
precision_dispatch_real_complex<ValueType>( | ||
[this](auto dense_alpha, auto dense_b, auto dense_beta, | ||
auto dense_x) { | ||
this->get_executor()->run(fbcsr::make_advanced_spmv( | ||
dense_alpha, this, dense_b, dense_beta, dense_x)); | ||
}, | ||
alpha, b, beta, x); | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should it be under cuda/hip matrix/fb_csr there?
like jacobi generate stuff.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will be used by all backends, so it cannot be cuda or hip. Further, any algorithm that uses static fixed-size blocks, like the ParBILU that I was working on, will also use this. So I decided to have a common
fixedblock
namespace for such common things.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, that makes sense.
Is ParBILU for blockCSR or different format?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At least initially, ParBILU will only be for Fbcsr.