Skip to content

Commit

Permalink
Review updates
Browse files Browse the repository at this point in the history
Fix GMRES reference test initialization and improve memory read
efficiency of hessenberg_qr.

Co-authored-by: Pratik Nayak <pratikvn@protonmail.com>
Co-authored-by: Yuhsiang M. Tsai <yhmtsai@gmail.com>
  • Loading branch information
3 people committed Oct 31, 2022
1 parent 6164a27 commit f230764
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 49 deletions.
60 changes: 32 additions & 28 deletions common/unified/solver/common_gmres_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,44 +107,48 @@ void hessenberg_qr(std::shared_ptr<const DefaultExecutor> exec,
}
// increment iteration count
final_iter_nums[rhs]++;
auto hess_this = hessenberg_iter(0, rhs);
auto hess_next = hessenberg_iter(1, rhs);
// apply previous Givens rotations to column
for (int64 j = 0; j < iter; ++j) {
auto out1 = givens_cos(j, rhs) * hessenberg_iter(j, rhs) +
givens_sin(j, rhs) * hessenberg_iter(j + 1, rhs);
auto out2 =
-conj(givens_sin(j, rhs)) * hessenberg_iter(j, rhs) +
conj(givens_cos(j, rhs)) * hessenberg_iter(j + 1, rhs);
for (decltype(iter) j = 0; j < iter; ++j) {
// in here: hess_this = hessenberg_iter(j, rhs);
// hess_next = hessenberg_iter(j+1, rhs);
hess_next = hessenberg_iter(j + 1, rhs);
const auto gc = givens_cos(j, rhs);
const auto gs = givens_sin(j, rhs);
const auto out1 = gc * hess_this + gs * hess_next;
const auto out2 = -conj(gs) * hess_this + conj(gc) * hess_next;
hessenberg_iter(j, rhs) = out1;
hessenberg_iter(j + 1, rhs) = out2;
hessenberg_iter(j + 1, rhs) = hess_this = out2;
hess_next = hessenberg_iter(j + 2, rhs);
}
// hess_this is hessenberg_iter(iter, rhs) and
// hess_next is hessenberg_iter(iter + 1, rhs)
auto gs = givens_sin(iter, rhs);
auto gc = givens_cos(iter, rhs);
// compute new Givens rotation
if (hessenberg_iter(iter, rhs) == zero<value_type>()) {
givens_cos(iter, rhs) = zero<value_type>();
givens_sin(iter, rhs) = one<value_type>();
if (hess_this == zero<value_type>()) {
givens_cos(iter, rhs) = gc = zero<value_type>();
givens_sin(iter, rhs) = gs = one<value_type>();
} else {
const auto this_hess = hessenberg_iter(iter, rhs);
const auto next_hess = hessenberg_iter(iter + 1, rhs);
const auto scale = abs(this_hess) + abs(next_hess);
const auto scale = abs(hess_this) + abs(hess_next);
const auto hypotenuse =
scale *
sqrt(abs(this_hess / scale) * abs(this_hess / scale) +
abs(next_hess / scale) * abs(next_hess / scale));
givens_cos(iter, rhs) = conj(this_hess) / hypotenuse;
givens_sin(iter, rhs) = conj(next_hess) / hypotenuse;
sqrt(abs(hess_this / scale) * abs(hess_this / scale) +
abs(hess_next / scale) * abs(hess_next / scale));
givens_cos(iter, rhs) = gc = conj(hess_this) / hypotenuse;
givens_sin(iter, rhs) = gs = conj(hess_next) / hypotenuse;
}
// apply new Givens rotation to column
hessenberg_iter(iter, rhs) =
givens_cos(iter, rhs) * hessenberg_iter(iter, rhs) +
givens_sin(iter, rhs) * hessenberg_iter(iter + 1, rhs);
hessenberg_iter(iter, rhs) = gc * hess_this + gs * hess_next;
hessenberg_iter(iter + 1, rhs) = zero<value_type>();
// apply new Givens rotation to RHS of least-squares problem
residual_norm_collection(iter + 1, rhs) =
-conj(givens_sin(iter, rhs)) *
residual_norm_collection(iter, rhs);
const auto rnc_new =
-conj(gs) * residual_norm_collection(iter, rhs);
residual_norm_collection(iter + 1, rhs) = rnc_new;
residual_norm_collection(iter, rhs) =
givens_cos(iter, rhs) * residual_norm_collection(iter, rhs);
residual_norm(0, rhs) =
abs(residual_norm_collection(iter + 1, rhs));
gc * residual_norm_collection(iter, rhs);
residual_norm(0, rhs) = abs(rnc_new);
},
hessenberg_iter->get_size()[1], givens_sin, givens_cos, residual_norm,
residual_norm_collection, hessenberg_iter, iter, final_iter_nums,
Expand All @@ -169,9 +173,9 @@ void solve_krylov(std::shared_ptr<const DefaultExecutor> exec,
if (stop[col].is_finalized()) {
return;
}
for (int i = sizes[col] - 1; i >= 0; i--) {
for (int64 i = sizes[col] - 1; i >= 0; i--) {
auto value = rhs(i, col);
for (int j = i + 1; j < sizes[col]; j++) {
for (int64 j = i + 1; j < sizes[col]; j++) {
value -= mtx(i, j * num_cols + col) * y(j, col);
}
// y(i) = (rhs(i) - U(i,i+1:) * y(i+1:)) / U(i, i)
Expand Down
2 changes: 1 addition & 1 deletion core/solver/cb_gmres.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ void CbGmres<ValueType>::apply_dense_impl(
// Solve x

auto hessenberg_small = hessenberg->create_submatrix(
span{0, restart_iter}, span{0, num_rhs * (restart_iter)});
span{0, restart_iter}, span{0, num_rhs * restart_iter});

exec->run(cb_gmres::make_solve_krylov(
residual_norm_collection.get(),
Expand Down
3 changes: 1 addition & 2 deletions core/solver/gmres.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,6 @@ void Gmres<ValueType>::apply_dense_impl(const matrix::Dense<ValueType>* dense_b,
// rows: rows of Hessenberg matrix, columns: block for each entry
auto hessenberg = this->template create_workspace_op<Vector>(
ws::hessenberg, dim<2>{krylov_dim + 1, krylov_dim * num_rhs});
hessenberg->fill(0);
auto givens_sin = this->template create_workspace_op<Vector>(
ws::givens_sin, dim<2>{krylov_dim, num_rhs});
auto givens_cos = this->template create_workspace_op<Vector>(
Expand Down Expand Up @@ -332,7 +331,7 @@ void Gmres<ValueType>::apply_dense_impl(const matrix::Dense<ValueType>* dense_b,
// calculate next Givens parameters
// this_hess = hessenberg(restart_iter)
// next_hess = hessenberg(restart_iter+1)
// hypotenuse = norm2([this_hess next_hess])
// hypotenuse = ||(this_hess, next_hess)||
// cos(restart_iter) = conj(this_hess) / hypotenuse
// sin(restart_iter) = conj(next_hess) / this_hess
// update Krylov approximation of b, apply new Givens rotation
Expand Down
2 changes: 1 addition & 1 deletion reference/solver/common_gmres_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ namespace gko {
namespace kernels {
namespace reference {
/**
* @brief The GMRES solver namespace.
* @brief The common GMRES solver namespace.
*
* @ingroup gmres
*/
Expand Down
14 changes: 5 additions & 9 deletions reference/test/solver/gmres_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,8 +218,7 @@ TYPED_TEST(Gmres, KernelRestart)
this->small_krylov_bases->fill(9999);
std::fill_n(this->small_final_iter_nums.get_data(),
this->small_final_iter_nums.get_num_elems(), 999);
auto expected_krylov = Mtx::create(this->exec);
expected_krylov->copy_from(this->small_krylov_bases.get());
auto expected_krylov = gko::clone(this->exec, this->small_krylov_bases);
const auto small_size = this->small_residual->get_size();
for (int i = 0; i < small_size[0]; ++i) {
for (int j = 0; j < small_size[1]; ++j) {
Expand Down Expand Up @@ -305,7 +304,7 @@ TYPED_TEST(Gmres, KernelSolveKrylov)
// clang-format off
{{-1, 3, 2, -4},
{0, 0, 1, 5},
{nan, nan, nan}},
{nan, nan, nan, nan}},
// clang-format on
this->exec);
this->small_residual_norm_collection =
Expand Down Expand Up @@ -658,12 +657,9 @@ TYPED_TEST(Gmres, SolvesMultipleDenseSystemForDivergenceCheck)
auto alpha = gko::initialize<Mtx>({1.0}, this->exec);
auto beta = gko::initialize<Mtx>({-1.0}, this->exec);

auto residual1 = Mtx::create(this->exec, b1->get_size());
residual1->copy_from(b1.get());
auto residual2 = Mtx::create(this->exec, b2->get_size());
residual2->copy_from(b2.get());
auto residualC = Mtx::create(this->exec, bc->get_size());
residualC->copy_from(bc.get());
auto residual1 = gko::clone(this->exec, b1);
auto residual2 = gko::clone(this->exec, b2);
auto residualC = gko::clone(this->exec, bc);

this->mtx_big->apply(alpha.get(), x1.get(), beta.get(), residual1.get());
this->mtx_big->apply(alpha.get(), x2.get(), beta.get(), residual2.get());
Expand Down
12 changes: 4 additions & 8 deletions test/solver/gmres_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,10 +313,8 @@ TEST_F(Gmres, GmresApplyOneRHSIsEquivalentToRef)
auto exec_solver = exec_gmres_factory->generate(d_mtx);
auto b = gen_mtx(m, n);
auto x = gen_mtx(m, n);
auto d_b = Mtx::create(exec);
auto d_x = Mtx::create(exec);
d_b->copy_from(b.get());
d_x->copy_from(x.get());
auto d_b = gko::clone(exec, b);
auto d_x = gko::clone(exec, x);

ref_solver->apply(b.get(), x.get());
exec_solver->apply(d_b.get(), d_x.get());
Expand All @@ -334,10 +332,8 @@ TEST_F(Gmres, GmresApplyMultipleRHSIsEquivalentToRef)
auto exec_solver = exec_gmres_factory->generate(d_mtx);
auto b = gen_mtx(m, n);
auto x = gen_mtx(m, n);
auto d_b = Mtx::create(exec);
auto d_x = Mtx::create(exec);
d_b->copy_from(b.get());
d_x->copy_from(x.get());
auto d_b = gko::clone(exec, b);
auto d_x = gko::clone(exec, x);

ref_solver->apply(b.get(), x.get());
exec_solver->apply(d_b.get(), d_x.get());
Expand Down

0 comments on commit f230764

Please sign in to comment.