diff --git a/benchmark/utils/stencil_matrix.hpp b/benchmark/utils/stencil_matrix.hpp index ca89aa86079..93ec4a0fb2e 100644 --- a/benchmark/utils/stencil_matrix.hpp +++ b/benchmark/utils/stencil_matrix.hpp @@ -53,7 +53,11 @@ double closest_nth_root(T v, int n) } } - +/** + * Checks if an index is within [0, bound). + * + * @return True if i in [0, bound). + */ template bool is_in_box(const IndexType i, const IndexType bound) { @@ -98,17 +102,35 @@ gko::matrix_data generate_2d_stencil_box( gko::dim<2>{static_cast(global_size), static_cast(global_size)}); - auto global_offset = [&](const int position_x, const int position_y) { - return static_cast(local_size) * dims[0] * position_x + - static_cast(local_size) * position_y; + /** + * This computes the offsets in the global indices for a box at (position_y, + * position_x). + */ + auto global_offset = [&](const IndexType position_y, + const IndexType position_x) { + return static_cast(local_size) * position_x + + static_cast(local_size) * dims[0] * position_y; }; + /** + * This computes a single dimension of the target box position + * for a given index. The target box is the box that owns the given index. + * If the index is within the local indices [0, discretization_points) this + * returns the current position, otherwise it is shifted by +-1. + */ auto target_position = [&](const IndexType i, const int position) { return is_in_box(i, discretization_points) ? position : (i < 0 ? position - 1 : position + 1); }; + /** + * This computes a single dimension of target local index for a given index. + * The target local index is the local index within the index set of the box + * that owns the index. If the index is within the local indices [0, + * discretization_points), this returns the index unchanged, otherwise it is + * projected into the index set of the owning, adjacent box. + */ auto target_local_idx = [&](const IndexType i) { return is_in_box(i, discretization_points) ? i @@ -116,22 +138,35 @@ gko::matrix_data generate_2d_stencil_box( : discretization_points - i); }; - auto flat_idx = [&](const IndexType ix, const IndexType iy) { + /** + * For a two dimensional pair of local indices (iy, ix), this computes the + * corresponding global one dimensional index. + * If any target positions of the owning box is not inside [0, dims[0]] x + * [0, dims[1]] then the invalid index -1 is returned. + */ + auto flat_idx = [&](const IndexType iy, const IndexType ix) { auto tpx = target_position(ix, positions[0]); auto tpy = target_position(iy, positions[1]); if (is_in_box(tpx, dims[0]) && is_in_box(tpy, dims[1])) { - return global_offset(tpx, tpy) + - target_local_idx(ix) * discretization_points + - target_local_idx(iy); + return global_offset(tpy, tpx) + target_local_idx(ix) + + target_local_idx(iy) * discretization_points; } else { return static_cast(-1); } }; - auto is_valid_neighbor = [&](const IndexType d_i, const IndexType d_j) { - return !restricted || d_i == 0 || d_j == 0; + /** + * This computes if the given local-index-offsets are valid neighbors for + * the current stencil, regardless of any global indices. + */ + auto is_valid_neighbor = [&](const IndexType dy, const IndexType dx) { + return !restricted || dx == 0 || dy == 0; }; + /** + * This computes the maximal number of non-zeros per row for the current + * stencil. + */ auto nnz_in_row = [&]() { int num_neighbors = 0; for (IndexType d_i : {-1, 0, 1}) { @@ -145,13 +180,15 @@ gko::matrix_data generate_2d_stencil_box( }; const auto diag_value = static_cast(nnz_in_row() - 1); - for (IndexType i = 0; i < discretization_points; ++i) { - for (IndexType j = 0; j < discretization_points; ++j) { - auto row = flat_idx(i, j); - for (IndexType d_i : {-1, 0, 1}) { - for (IndexType d_j : {-1, 0, 1}) { - if (is_valid_neighbor(d_i, d_j)) { - auto col = flat_idx(i + d_i, j + d_j); + A_data.nonzeros.reserve(nnz_in_row() * local_size); + + for (IndexType iy = 0; iy < discretization_points; ++iy) { + for (IndexType ix = 0; ix < discretization_points; ++ix) { + auto row = flat_idx(iy, ix); + for (IndexType dy : {-1, 0, 1}) { + for (IndexType dx : {-1, 0, 1}) { + if (is_valid_neighbor(dy, dx)) { + auto col = flat_idx(iy + dy, ix + dx); if (is_in_box(col, static_cast(global_size))) { if (col != row) { @@ -209,19 +246,38 @@ gko::matrix_data generate_3d_stencil_box( gko::dim<2>{static_cast(global_size), static_cast(global_size)}); - auto global_offset = [&](const int position_x, const int position_y, - const int position_z) { - return position_x * static_cast(local_size) * dims[0] * dims[1] + - position_y * static_cast(local_size) * dims[0] + - position_z * static_cast(local_size); + /** + * This computes the offsets in the global indices for a box at (position_z, + * position_y, position_x). + */ + auto global_offset = [&](const IndexType position_z, + const IndexType position_y, + const IndexType position_x) { + return position_x * static_cast(local_size) + + position_y * static_cast(local_size) * dims[0] + + position_z * static_cast(local_size) * dims[0] * + dims[1]; }; + /** + * This computes a single dimension of the target box position + * for a given index. The target box is the box that owns the given index. + * If the index is within the local indices [0, discretization_points) this + * returns the current position, otherwise it is shifted by +-1. + */ auto target_position = [&](const IndexType i, const int position) { return is_in_box(i, discretization_points) ? position : (i < 0 ? position - 1 : position + 1); }; + /** + * This computes a single dimension of target local index for a given index. + * The target local index is the local index within the index set of the box + * that owns the index. If the index is within the local indices [0, + * discretization_points), this returns the index unchanged, otherwise it is + * projected into the index set of the owning, adjacent box. + */ auto target_local_idx = [&](const IndexType i) { return is_in_box(i, discretization_points) ? i @@ -229,28 +285,41 @@ gko::matrix_data generate_3d_stencil_box( : discretization_points - i); }; - auto flat_idx = [&](const IndexType ix, const IndexType iy, - const IndexType iz) { + /** + * For a three dimensional tuple of local indices (iz, iy, ix), this + * computes the corresponding global one dimensional index. If any target + * positions of the owning box is not inside [0, dims[0]] x [0, dims[1]] x + * [0, dims[2]] then the invalid index -1 is returned. + */ + auto flat_idx = [&](const IndexType iz, const IndexType iy, + const IndexType ix) { auto tpx = target_position(ix, positions[0]); auto tpy = target_position(iy, positions[1]); auto tpz = target_position(iz, positions[2]); if (is_in_box(tpx, dims[0]) && is_in_box(tpy, dims[1]) && is_in_box(tpz, dims[2])) { - return global_offset(tpx, tpy, tpz) + - target_local_idx(ix) * discretization_points * - discretization_points + + return global_offset(tpz, tpy, tpx) + target_local_idx(ix) + target_local_idx(iy) * discretization_points + - target_local_idx(iz); + target_local_idx(iz) * discretization_points * + discretization_points; } else { return static_cast(-1); } }; - auto is_valid_neighbor = [&](const IndexType d_i, const IndexType d_j, - const IndexType d_k) { - return !restricted || ((d_i == 0) + (d_j == 0) + (d_k == 0) >= 2); + /** + * This computes if the given local-index-offsets are valid neighbors for + * the current stencil, regardless of any global indices. + */ + auto is_valid_neighbor = [&](const IndexType dz, const IndexType dy, + const IndexType dx) { + return !restricted || ((dz == 0) + (dy == 0) + (dx == 0) >= 2); }; + /** + * This computes the maximal number of non-zeros per row for the current + * stencil. + */ auto nnz_in_row = [&]() { int num_neighbors = 0; for (IndexType d_i : {-1, 0, 1}) { @@ -266,15 +335,17 @@ gko::matrix_data generate_3d_stencil_box( }; const auto diag_value = static_cast(nnz_in_row() - 1); - for (IndexType i = 0; i < discretization_points; ++i) { - for (IndexType j = 0; j < discretization_points; ++j) { - for (IndexType k = 0; k < discretization_points; ++k) { - auto row = flat_idx(i, j, k); - for (IndexType d_i : {-1, 0, 1}) { - for (IndexType d_j : {-1, 0, 1}) { - for (IndexType d_k : {-1, 0, 1}) { - if (is_valid_neighbor(d_i, d_j, d_k)) { - auto col = flat_idx(i + d_i, j + d_j, k + d_k); + A_data.nonzeros.reserve(nnz_in_row() * local_size); + + for (IndexType iz = 0; iz < discretization_points; ++iz) { + for (IndexType iy = 0; iy < discretization_points; ++iy) { + for (IndexType ix = 0; ix < discretization_points; ++ix) { + auto row = flat_idx(iz, iy, ix); + for (IndexType dz : {-1, 0, 1}) { + for (IndexType dy : {-1, 0, 1}) { + for (IndexType dx : {-1, 0, 1}) { + if (is_valid_neighbor(dz, dy, dx)) { + auto col = flat_idx(iz + dz, iy + dy, ix + dx); if (is_in_box(col, static_cast( global_size))) { if (col != row) {