From ae7aa12995fb49d1231dcc13af39cf2e7c803af0 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Wed, 15 Mar 2023 13:49:47 +0100 Subject: [PATCH 1/5] fixes wrong index ordering --- benchmark/utils/stencil_matrix.hpp | 34 ++++++++++++++---------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/benchmark/utils/stencil_matrix.hpp b/benchmark/utils/stencil_matrix.hpp index ca89aa86079..dceb20211db 100644 --- a/benchmark/utils/stencil_matrix.hpp +++ b/benchmark/utils/stencil_matrix.hpp @@ -98,9 +98,9 @@ gko::matrix_data generate_2d_stencil_box( gko::dim<2>{static_cast(global_size), static_cast(global_size)}); - auto global_offset = [&](const int position_x, const int position_y) { - return static_cast(local_size) * dims[0] * position_x + - static_cast(local_size) * position_y; + auto global_offset = [&](const int position_y, const int position_x) { + return static_cast(local_size) * position_x + + static_cast(local_size) * dims[0] * position_y; }; auto target_position = [&](const IndexType i, const int position) { @@ -116,19 +116,18 @@ gko::matrix_data generate_2d_stencil_box( : discretization_points - i); }; - auto flat_idx = [&](const IndexType ix, const IndexType iy) { + auto flat_idx = [&](const IndexType iy, const IndexType ix) { auto tpx = target_position(ix, positions[0]); auto tpy = target_position(iy, positions[1]); if (is_in_box(tpx, dims[0]) && is_in_box(tpy, dims[1])) { - return global_offset(tpx, tpy) + - target_local_idx(ix) * discretization_points + - target_local_idx(iy); + return global_offset(tpy, tpx) + target_local_idx(ix) + + target_local_idx(iy) * discretization_points; } else { return static_cast(-1); } }; - auto is_valid_neighbor = [&](const IndexType d_i, const IndexType d_j) { + auto is_valid_neighbor = [&](const IndexType d_j, const IndexType d_i) { return !restricted || d_i == 0 || d_j == 0; }; @@ -209,11 +208,11 @@ gko::matrix_data generate_3d_stencil_box( gko::dim<2>{static_cast(global_size), static_cast(global_size)}); - auto global_offset = [&](const int position_x, const int position_y, - const int position_z) { - return position_x * static_cast(local_size) * dims[0] * dims[1] + + auto global_offset = [&](const int position_z, const int position_y, + const int position_x) { + return position_x * static_cast(local_size) + position_y * static_cast(local_size) * dims[0] + - position_z * static_cast(local_size); + position_z * static_cast(local_size) * dims[0] * dims[1]; }; auto target_position = [&](const IndexType i, const int position) { @@ -229,18 +228,17 @@ gko::matrix_data generate_3d_stencil_box( : discretization_points - i); }; - auto flat_idx = [&](const IndexType ix, const IndexType iy, - const IndexType iz) { + auto flat_idx = [&](const IndexType iz, const IndexType iy, + const IndexType ix) { auto tpx = target_position(ix, positions[0]); auto tpy = target_position(iy, positions[1]); auto tpz = target_position(iz, positions[2]); if (is_in_box(tpx, dims[0]) && is_in_box(tpy, dims[1]) && is_in_box(tpz, dims[2])) { - return global_offset(tpx, tpy, tpz) + - target_local_idx(ix) * discretization_points * - discretization_points + + return global_offset(tpz, tpy, tpx) + target_local_idx(ix) + target_local_idx(iy) * discretization_points + - target_local_idx(iz); + target_local_idx(iz) * discretization_points * + discretization_points; } else { return static_cast(-1); } From 42c85b650a349d7b33f56b1dad76798074ae177e Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 24 Mar 2023 11:46:49 +0100 Subject: [PATCH 2/5] review update: - rename variables to match function calls Co-authored-by: Yu-Hsiang M. Tsai --- benchmark/utils/stencil_matrix.hpp | 42 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/benchmark/utils/stencil_matrix.hpp b/benchmark/utils/stencil_matrix.hpp index dceb20211db..652bd940f04 100644 --- a/benchmark/utils/stencil_matrix.hpp +++ b/benchmark/utils/stencil_matrix.hpp @@ -127,8 +127,8 @@ gko::matrix_data generate_2d_stencil_box( } }; - auto is_valid_neighbor = [&](const IndexType d_j, const IndexType d_i) { - return !restricted || d_i == 0 || d_j == 0; + auto is_valid_neighbor = [&](const IndexType dy, const IndexType dx) { + return !restricted || dx == 0 || dy == 0; }; auto nnz_in_row = [&]() { @@ -144,13 +144,13 @@ gko::matrix_data generate_2d_stencil_box( }; const auto diag_value = static_cast(nnz_in_row() - 1); - for (IndexType i = 0; i < discretization_points; ++i) { - for (IndexType j = 0; j < discretization_points; ++j) { - auto row = flat_idx(i, j); - for (IndexType d_i : {-1, 0, 1}) { - for (IndexType d_j : {-1, 0, 1}) { - if (is_valid_neighbor(d_i, d_j)) { - auto col = flat_idx(i + d_i, j + d_j); + for (IndexType iy = 0; iy < discretization_points; ++iy) { + for (IndexType ix = 0; ix < discretization_points; ++ix) { + auto row = flat_idx(iy, ix); + for (IndexType dy : {-1, 0, 1}) { + for (IndexType dx : {-1, 0, 1}) { + if (is_valid_neighbor(dy, dx)) { + auto col = flat_idx(iy + dy, ix + dx); if (is_in_box(col, static_cast(global_size))) { if (col != row) { @@ -244,9 +244,9 @@ gko::matrix_data generate_3d_stencil_box( } }; - auto is_valid_neighbor = [&](const IndexType d_i, const IndexType d_j, - const IndexType d_k) { - return !restricted || ((d_i == 0) + (d_j == 0) + (d_k == 0) >= 2); + auto is_valid_neighbor = [&](const IndexType dz, const IndexType dy, + const IndexType dx) { + return !restricted || ((dz == 0) + (dy == 0) + (dx == 0) >= 2); }; auto nnz_in_row = [&]() { @@ -264,15 +264,15 @@ gko::matrix_data generate_3d_stencil_box( }; const auto diag_value = static_cast(nnz_in_row() - 1); - for (IndexType i = 0; i < discretization_points; ++i) { - for (IndexType j = 0; j < discretization_points; ++j) { - for (IndexType k = 0; k < discretization_points; ++k) { - auto row = flat_idx(i, j, k); - for (IndexType d_i : {-1, 0, 1}) { - for (IndexType d_j : {-1, 0, 1}) { - for (IndexType d_k : {-1, 0, 1}) { - if (is_valid_neighbor(d_i, d_j, d_k)) { - auto col = flat_idx(i + d_i, j + d_j, k + d_k); + for (IndexType iz = 0; iz < discretization_points; ++iz) { + for (IndexType iy = 0; iy < discretization_points; ++iy) { + for (IndexType ix = 0; ix < discretization_points; ++ix) { + auto row = flat_idx(iz, iy, ix); + for (IndexType dz : {-1, 0, 1}) { + for (IndexType dy : {-1, 0, 1}) { + for (IndexType dx : {-1, 0, 1}) { + if (is_valid_neighbor(dz, dy, dx)) { + auto col = flat_idx(iz + dz, iy + dy, ix + dx); if (is_in_box(col, static_cast( global_size))) { if (col != row) { From 4c3759b0b869289f37837d7a9ec2514952525253 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Fri, 24 Mar 2023 11:48:54 +0100 Subject: [PATCH 3/5] reserve enough space for the non-zeros --- benchmark/utils/stencil_matrix.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmark/utils/stencil_matrix.hpp b/benchmark/utils/stencil_matrix.hpp index 652bd940f04..f6cfd80501d 100644 --- a/benchmark/utils/stencil_matrix.hpp +++ b/benchmark/utils/stencil_matrix.hpp @@ -144,6 +144,8 @@ gko::matrix_data generate_2d_stencil_box( }; const auto diag_value = static_cast(nnz_in_row() - 1); + A_data.nonzeros.reserve(nnz_in_row() * local_size); + for (IndexType iy = 0; iy < discretization_points; ++iy) { for (IndexType ix = 0; ix < discretization_points; ++ix) { auto row = flat_idx(iy, ix); @@ -264,6 +266,8 @@ gko::matrix_data generate_3d_stencil_box( }; const auto diag_value = static_cast(nnz_in_row() - 1); + A_data.nonzeros.reserve(nnz_in_row() * local_size); + for (IndexType iz = 0; iz < discretization_points; ++iz) { for (IndexType iy = 0; iy < discretization_points; ++iy) { for (IndexType ix = 0; ix < discretization_points; ++ix) { From 100c35065d53040f5d805de373ec3edb28581f21 Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 28 Mar 2023 10:11:18 +0200 Subject: [PATCH 4/5] review updates: - prevent possible overflow Co-authored-by: Yu-Hsiang M. Tsai --- benchmark/utils/stencil_matrix.hpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/benchmark/utils/stencil_matrix.hpp b/benchmark/utils/stencil_matrix.hpp index f6cfd80501d..b9d8f689b29 100644 --- a/benchmark/utils/stencil_matrix.hpp +++ b/benchmark/utils/stencil_matrix.hpp @@ -98,9 +98,10 @@ gko::matrix_data generate_2d_stencil_box( gko::dim<2>{static_cast(global_size), static_cast(global_size)}); - auto global_offset = [&](const int position_y, const int position_x) { - return static_cast(local_size) * position_x + - static_cast(local_size) * dims[0] * position_y; + auto global_offset = [&](const IndexType position_y, + const IndexType position_x) { + return static_cast(local_size) * position_x + + static_cast(local_size) * dims[0] * position_y; }; auto target_position = [&](const IndexType i, const int position) { @@ -210,11 +211,13 @@ gko::matrix_data generate_3d_stencil_box( gko::dim<2>{static_cast(global_size), static_cast(global_size)}); - auto global_offset = [&](const int position_z, const int position_y, - const int position_x) { - return position_x * static_cast(local_size) + - position_y * static_cast(local_size) * dims[0] + - position_z * static_cast(local_size) * dims[0] * dims[1]; + auto global_offset = [&](const IndexType position_z, + const IndexType position_y, + const IndexType position_x) { + return position_x * static_cast(local_size) + + position_y * static_cast(local_size) * dims[0] + + position_z * static_cast(local_size) * dims[0] * + dims[1]; }; auto target_position = [&](const IndexType i, const int position) { From 8188957a05b285a5885cfa97fa5a5f410fb2ee9c Mon Sep 17 00:00:00 2001 From: Marcel Koch Date: Tue, 18 Apr 2023 08:38:14 +0200 Subject: [PATCH 5/5] review updates: - add comments to lambdas Co-authored-by: Tobias Ribizel --- benchmark/utils/stencil_matrix.hpp | 68 +++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/benchmark/utils/stencil_matrix.hpp b/benchmark/utils/stencil_matrix.hpp index b9d8f689b29..93ec4a0fb2e 100644 --- a/benchmark/utils/stencil_matrix.hpp +++ b/benchmark/utils/stencil_matrix.hpp @@ -53,7 +53,11 @@ double closest_nth_root(T v, int n) } } - +/** + * Checks if an index is within [0, bound). + * + * @return True if i in [0, bound). + */ template bool is_in_box(const IndexType i, const IndexType bound) { @@ -98,18 +102,35 @@ gko::matrix_data generate_2d_stencil_box( gko::dim<2>{static_cast(global_size), static_cast(global_size)}); + /** + * This computes the offsets in the global indices for a box at (position_y, + * position_x). + */ auto global_offset = [&](const IndexType position_y, const IndexType position_x) { return static_cast(local_size) * position_x + static_cast(local_size) * dims[0] * position_y; }; + /** + * This computes a single dimension of the target box position + * for a given index. The target box is the box that owns the given index. + * If the index is within the local indices [0, discretization_points) this + * returns the current position, otherwise it is shifted by +-1. + */ auto target_position = [&](const IndexType i, const int position) { return is_in_box(i, discretization_points) ? position : (i < 0 ? position - 1 : position + 1); }; + /** + * This computes a single dimension of target local index for a given index. + * The target local index is the local index within the index set of the box + * that owns the index. If the index is within the local indices [0, + * discretization_points), this returns the index unchanged, otherwise it is + * projected into the index set of the owning, adjacent box. + */ auto target_local_idx = [&](const IndexType i) { return is_in_box(i, discretization_points) ? i @@ -117,6 +138,12 @@ gko::matrix_data generate_2d_stencil_box( : discretization_points - i); }; + /** + * For a two dimensional pair of local indices (iy, ix), this computes the + * corresponding global one dimensional index. + * If any target positions of the owning box is not inside [0, dims[0]] x + * [0, dims[1]] then the invalid index -1 is returned. + */ auto flat_idx = [&](const IndexType iy, const IndexType ix) { auto tpx = target_position(ix, positions[0]); auto tpy = target_position(iy, positions[1]); @@ -128,10 +155,18 @@ gko::matrix_data generate_2d_stencil_box( } }; + /** + * This computes if the given local-index-offsets are valid neighbors for + * the current stencil, regardless of any global indices. + */ auto is_valid_neighbor = [&](const IndexType dy, const IndexType dx) { return !restricted || dx == 0 || dy == 0; }; + /** + * This computes the maximal number of non-zeros per row for the current + * stencil. + */ auto nnz_in_row = [&]() { int num_neighbors = 0; for (IndexType d_i : {-1, 0, 1}) { @@ -211,6 +246,10 @@ gko::matrix_data generate_3d_stencil_box( gko::dim<2>{static_cast(global_size), static_cast(global_size)}); + /** + * This computes the offsets in the global indices for a box at (position_z, + * position_y, position_x). + */ auto global_offset = [&](const IndexType position_z, const IndexType position_y, const IndexType position_x) { @@ -220,12 +259,25 @@ gko::matrix_data generate_3d_stencil_box( dims[1]; }; + /** + * This computes a single dimension of the target box position + * for a given index. The target box is the box that owns the given index. + * If the index is within the local indices [0, discretization_points) this + * returns the current position, otherwise it is shifted by +-1. + */ auto target_position = [&](const IndexType i, const int position) { return is_in_box(i, discretization_points) ? position : (i < 0 ? position - 1 : position + 1); }; + /** + * This computes a single dimension of target local index for a given index. + * The target local index is the local index within the index set of the box + * that owns the index. If the index is within the local indices [0, + * discretization_points), this returns the index unchanged, otherwise it is + * projected into the index set of the owning, adjacent box. + */ auto target_local_idx = [&](const IndexType i) { return is_in_box(i, discretization_points) ? i @@ -233,6 +285,12 @@ gko::matrix_data generate_3d_stencil_box( : discretization_points - i); }; + /** + * For a three dimensional tuple of local indices (iz, iy, ix), this + * computes the corresponding global one dimensional index. If any target + * positions of the owning box is not inside [0, dims[0]] x [0, dims[1]] x + * [0, dims[2]] then the invalid index -1 is returned. + */ auto flat_idx = [&](const IndexType iz, const IndexType iy, const IndexType ix) { auto tpx = target_position(ix, positions[0]); @@ -249,11 +307,19 @@ gko::matrix_data generate_3d_stencil_box( } }; + /** + * This computes if the given local-index-offsets are valid neighbors for + * the current stencil, regardless of any global indices. + */ auto is_valid_neighbor = [&](const IndexType dz, const IndexType dy, const IndexType dx) { return !restricted || ((dz == 0) + (dy == 0) + (dx == 0) >= 2); }; + /** + * This computes the maximal number of non-zeros per row for the current + * stencil. + */ auto nnz_in_row = [&]() { int num_neighbors = 0; for (IndexType d_i : {-1, 0, 1}) {