From f230764f4eea147522d5dbdc0b0565b136dcd4a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Gr=C3=BCtzmacher?= Date: Thu, 27 Oct 2022 13:52:33 +0200 Subject: [PATCH] Review updates Fix GMRES reference test initialization and improve memory read efficiency of hessenberg_qr. Co-authored-by: Pratik Nayak Co-authored-by: Yuhsiang M. Tsai --- .../unified/solver/common_gmres_kernels.cpp | 60 ++++++++++--------- core/solver/cb_gmres.cpp | 2 +- core/solver/gmres.cpp | 3 +- reference/solver/common_gmres_kernels.cpp | 2 +- reference/test/solver/gmres_kernels.cpp | 14 ++--- test/solver/gmres_kernels.cpp | 12 ++-- 6 files changed, 44 insertions(+), 49 deletions(-) diff --git a/common/unified/solver/common_gmres_kernels.cpp b/common/unified/solver/common_gmres_kernels.cpp index aefcb2fa279..13b8ff96919 100644 --- a/common/unified/solver/common_gmres_kernels.cpp +++ b/common/unified/solver/common_gmres_kernels.cpp @@ -107,44 +107,48 @@ void hessenberg_qr(std::shared_ptr exec, } // increment iteration count final_iter_nums[rhs]++; + auto hess_this = hessenberg_iter(0, rhs); + auto hess_next = hessenberg_iter(1, rhs); // apply previous Givens rotations to column - for (int64 j = 0; j < iter; ++j) { - auto out1 = givens_cos(j, rhs) * hessenberg_iter(j, rhs) + - givens_sin(j, rhs) * hessenberg_iter(j + 1, rhs); - auto out2 = - -conj(givens_sin(j, rhs)) * hessenberg_iter(j, rhs) + - conj(givens_cos(j, rhs)) * hessenberg_iter(j + 1, rhs); + for (decltype(iter) j = 0; j < iter; ++j) { + // in here: hess_this = hessenberg_iter(j, rhs); + // hess_next = hessenberg_iter(j+1, rhs); + hess_next = hessenberg_iter(j + 1, rhs); + const auto gc = givens_cos(j, rhs); + const auto gs = givens_sin(j, rhs); + const auto out1 = gc * hess_this + gs * hess_next; + const auto out2 = -conj(gs) * hess_this + conj(gc) * hess_next; hessenberg_iter(j, rhs) = out1; - hessenberg_iter(j + 1, rhs) = out2; + hessenberg_iter(j + 1, rhs) = hess_this = out2; + hess_next = hessenberg_iter(j + 2, rhs); } + // hess_this is hessenberg_iter(iter, rhs) and + // hess_next is hessenberg_iter(iter + 1, rhs) + auto gs = givens_sin(iter, rhs); + auto gc = givens_cos(iter, rhs); // compute new Givens rotation - if (hessenberg_iter(iter, rhs) == zero()) { - givens_cos(iter, rhs) = zero(); - givens_sin(iter, rhs) = one(); + if (hess_this == zero()) { + givens_cos(iter, rhs) = gc = zero(); + givens_sin(iter, rhs) = gs = one(); } else { - const auto this_hess = hessenberg_iter(iter, rhs); - const auto next_hess = hessenberg_iter(iter + 1, rhs); - const auto scale = abs(this_hess) + abs(next_hess); + const auto scale = abs(hess_this) + abs(hess_next); const auto hypotenuse = scale * - sqrt(abs(this_hess / scale) * abs(this_hess / scale) + - abs(next_hess / scale) * abs(next_hess / scale)); - givens_cos(iter, rhs) = conj(this_hess) / hypotenuse; - givens_sin(iter, rhs) = conj(next_hess) / hypotenuse; + sqrt(abs(hess_this / scale) * abs(hess_this / scale) + + abs(hess_next / scale) * abs(hess_next / scale)); + givens_cos(iter, rhs) = gc = conj(hess_this) / hypotenuse; + givens_sin(iter, rhs) = gs = conj(hess_next) / hypotenuse; } // apply new Givens rotation to column - hessenberg_iter(iter, rhs) = - givens_cos(iter, rhs) * hessenberg_iter(iter, rhs) + - givens_sin(iter, rhs) * hessenberg_iter(iter + 1, rhs); + hessenberg_iter(iter, rhs) = gc * hess_this + gs * hess_next; hessenberg_iter(iter + 1, rhs) = zero(); // apply new Givens rotation to RHS of least-squares problem - residual_norm_collection(iter + 1, rhs) = - -conj(givens_sin(iter, rhs)) * - residual_norm_collection(iter, rhs); + const auto rnc_new = + -conj(gs) * residual_norm_collection(iter, rhs); + residual_norm_collection(iter + 1, rhs) = rnc_new; residual_norm_collection(iter, rhs) = - givens_cos(iter, rhs) * residual_norm_collection(iter, rhs); - residual_norm(0, rhs) = - abs(residual_norm_collection(iter + 1, rhs)); + gc * residual_norm_collection(iter, rhs); + residual_norm(0, rhs) = abs(rnc_new); }, hessenberg_iter->get_size()[1], givens_sin, givens_cos, residual_norm, residual_norm_collection, hessenberg_iter, iter, final_iter_nums, @@ -169,9 +173,9 @@ void solve_krylov(std::shared_ptr exec, if (stop[col].is_finalized()) { return; } - for (int i = sizes[col] - 1; i >= 0; i--) { + for (int64 i = sizes[col] - 1; i >= 0; i--) { auto value = rhs(i, col); - for (int j = i + 1; j < sizes[col]; j++) { + for (int64 j = i + 1; j < sizes[col]; j++) { value -= mtx(i, j * num_cols + col) * y(j, col); } // y(i) = (rhs(i) - U(i,i+1:) * y(i+1:)) / U(i, i) diff --git a/core/solver/cb_gmres.cpp b/core/solver/cb_gmres.cpp index 7329546b646..f06aac3f9e1 100644 --- a/core/solver/cb_gmres.cpp +++ b/core/solver/cb_gmres.cpp @@ -470,7 +470,7 @@ void CbGmres::apply_dense_impl( // Solve x auto hessenberg_small = hessenberg->create_submatrix( - span{0, restart_iter}, span{0, num_rhs * (restart_iter)}); + span{0, restart_iter}, span{0, num_rhs * restart_iter}); exec->run(cb_gmres::make_solve_krylov( residual_norm_collection.get(), diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index 813e65c933d..37b885a2e9a 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -159,7 +159,6 @@ void Gmres::apply_dense_impl(const matrix::Dense* dense_b, // rows: rows of Hessenberg matrix, columns: block for each entry auto hessenberg = this->template create_workspace_op( ws::hessenberg, dim<2>{krylov_dim + 1, krylov_dim * num_rhs}); - hessenberg->fill(0); auto givens_sin = this->template create_workspace_op( ws::givens_sin, dim<2>{krylov_dim, num_rhs}); auto givens_cos = this->template create_workspace_op( @@ -332,7 +331,7 @@ void Gmres::apply_dense_impl(const matrix::Dense* dense_b, // calculate next Givens parameters // this_hess = hessenberg(restart_iter) // next_hess = hessenberg(restart_iter+1) - // hypotenuse = norm2([this_hess next_hess]) + // hypotenuse = ||(this_hess, next_hess)|| // cos(restart_iter) = conj(this_hess) / hypotenuse // sin(restart_iter) = conj(next_hess) / this_hess // update Krylov approximation of b, apply new Givens rotation diff --git a/reference/solver/common_gmres_kernels.cpp b/reference/solver/common_gmres_kernels.cpp index 12363888f02..b4d34c75f48 100644 --- a/reference/solver/common_gmres_kernels.cpp +++ b/reference/solver/common_gmres_kernels.cpp @@ -48,7 +48,7 @@ namespace gko { namespace kernels { namespace reference { /** - * @brief The GMRES solver namespace. + * @brief The common GMRES solver namespace. * * @ingroup gmres */ diff --git a/reference/test/solver/gmres_kernels.cpp b/reference/test/solver/gmres_kernels.cpp index ebf06072fe5..0a1183aa2f0 100644 --- a/reference/test/solver/gmres_kernels.cpp +++ b/reference/test/solver/gmres_kernels.cpp @@ -218,8 +218,7 @@ TYPED_TEST(Gmres, KernelRestart) this->small_krylov_bases->fill(9999); std::fill_n(this->small_final_iter_nums.get_data(), this->small_final_iter_nums.get_num_elems(), 999); - auto expected_krylov = Mtx::create(this->exec); - expected_krylov->copy_from(this->small_krylov_bases.get()); + auto expected_krylov = gko::clone(this->exec, this->small_krylov_bases); const auto small_size = this->small_residual->get_size(); for (int i = 0; i < small_size[0]; ++i) { for (int j = 0; j < small_size[1]; ++j) { @@ -305,7 +304,7 @@ TYPED_TEST(Gmres, KernelSolveKrylov) // clang-format off {{-1, 3, 2, -4}, {0, 0, 1, 5}, - {nan, nan, nan}}, + {nan, nan, nan, nan}}, // clang-format on this->exec); this->small_residual_norm_collection = @@ -658,12 +657,9 @@ TYPED_TEST(Gmres, SolvesMultipleDenseSystemForDivergenceCheck) auto alpha = gko::initialize({1.0}, this->exec); auto beta = gko::initialize({-1.0}, this->exec); - auto residual1 = Mtx::create(this->exec, b1->get_size()); - residual1->copy_from(b1.get()); - auto residual2 = Mtx::create(this->exec, b2->get_size()); - residual2->copy_from(b2.get()); - auto residualC = Mtx::create(this->exec, bc->get_size()); - residualC->copy_from(bc.get()); + auto residual1 = gko::clone(this->exec, b1); + auto residual2 = gko::clone(this->exec, b2); + auto residualC = gko::clone(this->exec, bc); this->mtx_big->apply(alpha.get(), x1.get(), beta.get(), residual1.get()); this->mtx_big->apply(alpha.get(), x2.get(), beta.get(), residual2.get()); diff --git a/test/solver/gmres_kernels.cpp b/test/solver/gmres_kernels.cpp index 387528d474f..9f55adf75c4 100644 --- a/test/solver/gmres_kernels.cpp +++ b/test/solver/gmres_kernels.cpp @@ -313,10 +313,8 @@ TEST_F(Gmres, GmresApplyOneRHSIsEquivalentToRef) auto exec_solver = exec_gmres_factory->generate(d_mtx); auto b = gen_mtx(m, n); auto x = gen_mtx(m, n); - auto d_b = Mtx::create(exec); - auto d_x = Mtx::create(exec); - d_b->copy_from(b.get()); - d_x->copy_from(x.get()); + auto d_b = gko::clone(exec, b); + auto d_x = gko::clone(exec, x); ref_solver->apply(b.get(), x.get()); exec_solver->apply(d_b.get(), d_x.get()); @@ -334,10 +332,8 @@ TEST_F(Gmres, GmresApplyMultipleRHSIsEquivalentToRef) auto exec_solver = exec_gmres_factory->generate(d_mtx); auto b = gen_mtx(m, n); auto x = gen_mtx(m, n); - auto d_b = Mtx::create(exec); - auto d_x = Mtx::create(exec); - d_b->copy_from(b.get()); - d_x->copy_from(x.get()); + auto d_b = gko::clone(exec, b); + auto d_x = gko::clone(exec, x); ref_solver->apply(b.get(), x.get()); exec_solver->apply(d_b.get(), d_x.get());