diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index 3c96d884c0c..21e86cc8f08 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -76,6 +76,7 @@ std::unique_ptr Gmres::transpose() const share(as(this->get_preconditioner())->transpose())) .with_criteria(this->get_stop_criterion_factory()) .with_krylov_dim(this->get_krylov_dim()) + .with_flexible(this->get_parameters().flexible) .on(this->get_executor()) ->generate( share(as(this->get_system_matrix())->transpose())); @@ -90,6 +91,7 @@ std::unique_ptr Gmres::conj_transpose() const as(this->get_preconditioner())->conj_transpose())) .with_criteria(this->get_stop_criterion_factory()) .with_krylov_dim(this->get_krylov_dim()) + .with_flexible(this->get_parameters().flexible) .on(this->get_executor()) ->generate(share( as(this->get_system_matrix())->conj_transpose())); @@ -196,7 +198,7 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, auto exec = this->get_executor(); this->setup_workspace(); - + const auto is_flexible = this->get_parameters().flexible; const auto num_rows = this->get_size()[0]; const auto local_num_rows = ::gko::detail::get_local(dense_b)->get_size()[0]; @@ -204,9 +206,17 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, const auto krylov_dim = this->get_krylov_dim(); GKO_SOLVER_VECTOR(residual, dense_b); GKO_SOLVER_VECTOR(preconditioned_vector, dense_b); + // TODO: check whether it can be empty when it is flexible GMRES auto krylov_bases = this->create_workspace_op_with_type_of( ws::krylov_bases, dense_b, dim<2>{num_rows * (krylov_dim + 1), num_rhs}, dim<2>{local_num_rows * (krylov_dim + 1), num_rhs}); + VectorType* preconditioned_krylov_bases = nullptr; + if (is_flexible) { + preconditioned_krylov_bases = this->create_workspace_op_with_type_of( + ws::preconditioned_krylov_bases, dense_b, + dim<2>{num_rows * (krylov_dim + 1), num_rhs}, + dim<2>{local_num_rows * (krylov_dim + 1), num_rhs}); + } // rows: rows of Hessenberg matrix, columns: block for each entry auto hessenberg = this->template create_workspace_op( ws::hessenberg, dim<2>{krylov_dim + 1, krylov_dim * num_rhs}); @@ -341,9 +351,19 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, span{local_num_rows * (restart_iter + 1), local_num_rows * (restart_iter + 2)}, span{0, num_rhs}); - // preconditioned_vector = get_preconditioner() * this_krylov + std::unique_ptr preconditioned_krylov; + auto preconditioned_krylov_vector = preconditioned_vector; + if (is_flexible) { + preconditioned_krylov = create_submatrix_helper( + preconditioned_krylov_bases, dim<2>{num_rows, num_rhs}, + span{local_num_rows * restart_iter, + local_num_rows * (restart_iter + 1)}, + span{0, num_rhs}); + preconditioned_krylov_vector = preconditioned_krylov.get(); + } + // preconditioned_krylov_vector = get_preconditioner() * this_krylov this->get_preconditioner()->apply(this_krylov.get(), - preconditioned_vector); + preconditioned_krylov_vector); // Create view of current column in the hessenberg matrix: // hessenberg_iter = hessenberg(:, restart_iter); @@ -352,8 +372,8 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, span{num_rhs * restart_iter, num_rhs * (restart_iter + 1)}); // Start of Arnoldi - // next_krylov = A * preconditioned_vector - this->get_system_matrix()->apply(preconditioned_vector, + // next_krylov = A * preconditioned_krylov_vector + this->get_system_matrix()->apply(preconditioned_krylov_vector, next_krylov.get()); for (size_type i = 0; i <= restart_iter; i++) { @@ -411,9 +431,6 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, restart_iter++; } - auto krylov_bases_small = create_submatrix_helper( - krylov_bases, dim<2>{num_rows, num_rhs}, - span{0, local_num_rows * (restart_iter + 1)}, span{0, num_rhs}); auto hessenberg_small = hessenberg->create_submatrix( span{0, restart_iter}, span{0, num_rhs * (restart_iter)}); @@ -422,15 +439,30 @@ void Gmres::apply_dense_impl(const VectorType* dense_b, exec->run(gmres::make_solve_krylov( residual_norm_collection, hessenberg_small.get(), y, final_iter_nums.get_const_data(), stop_status.get_const_data())); - // before_preconditioner = krylov_bases * y - exec->run(gmres::make_multi_axpy( - gko::detail::get_local(krylov_bases_small.get()), y, - gko::detail::get_local(before_preconditioner), - final_iter_nums.get_const_data(), stop_status.get_data())); - - // x = x + get_preconditioner() * before_preconditioner - this->get_preconditioner()->apply(before_preconditioner, - after_preconditioner); + if (is_flexible) { + auto preconditioned_krylov_bases_small = create_submatrix_helper( + preconditioned_krylov_bases, dim<2>{num_rows, num_rhs}, + span{0, local_num_rows * (restart_iter + 1)}, span{0, num_rhs}); + // after_preconditioner = preconditioned_krylov_bases * y + exec->run(gmres::make_multi_axpy( + gko::detail::get_local(preconditioned_krylov_bases_small.get()), y, + gko::detail::get_local(after_preconditioner), + final_iter_nums.get_const_data(), stop_status.get_data())); + } else { + auto krylov_bases_small = create_submatrix_helper( + krylov_bases, dim<2>{num_rows, num_rhs}, + span{0, local_num_rows * (restart_iter + 1)}, span{0, num_rhs}); + // before_preconditioner = krylov_bases * y + exec->run(gmres::make_multi_axpy( + gko::detail::get_local(krylov_bases_small.get()), y, + gko::detail::get_local(before_preconditioner), + final_iter_nums.get_const_data(), stop_status.get_data())); + + // after_preconditioner = get_preconditioner() * before_preconditioner + this->get_preconditioner()->apply(before_preconditioner, + after_preconditioner); + } + // x = x + after_preconditioner dense_x->add_scaled(one_op, after_preconditioner); } @@ -463,7 +495,7 @@ int workspace_traits>::num_arrays(const Solver&) template int workspace_traits>::num_vectors(const Solver&) { - return 14; + return 15; } @@ -484,7 +516,8 @@ std::vector workspace_traits>::op_names( "after_preconditioner", "one", "minus_one", - "next_krylov_norm_tmp"}; + "next_krylov_norm_tmp", + "preconditioned_krylov_bases"}; } @@ -509,8 +542,12 @@ std::vector workspace_traits>::scalars(const Solver&) template std::vector workspace_traits>::vectors(const Solver&) { - return {residual, preconditioned_vector, krylov_bases, - before_preconditioner, after_preconditioner}; + return {residual, + preconditioned_vector, + krylov_bases, + before_preconditioner, + after_preconditioner, + preconditioned_krylov_bases}; } diff --git a/include/ginkgo/core/solver/gmres.hpp b/include/ginkgo/core/solver/gmres.hpp index 0d80b401cf0..6df100a8038 100644 --- a/include/ginkgo/core/solver/gmres.hpp +++ b/include/ginkgo/core/solver/gmres.hpp @@ -132,6 +132,11 @@ class Gmres * Krylov dimension factory. */ size_type GKO_FACTORY_PARAMETER_SCALAR(krylov_dim, 0u); + + /** + * Flexible GMRES + */ + bool GKO_FACTORY_PARAMETER_SCALAR(flexible, false); }; GKO_ENABLE_LIN_OP_FACTORY(Gmres, parameters, Factory); GKO_ENABLE_BUILD_METHOD(Factory); @@ -208,6 +213,8 @@ struct workspace_traits> { constexpr static int minus_one = 12; // temporary norm vector of next_krylov to copy into hessenberg matrix constexpr static int next_krylov_norm_tmp = 13; + // preconditioned krylov basis multivector + constexpr static int preconditioned_krylov_bases = 14; // stopping status array constexpr static int stop = 0; diff --git a/test/solver/solver.cpp b/test/solver/solver.cpp index 87715266836..3c330076357 100644 --- a/test/solver/solver.cpp +++ b/test/solver/solver.cpp @@ -274,6 +274,36 @@ struct Gmres : SimpleSolverTest> { }; +template +struct FGmres : SimpleSolverTest> { + static typename solver_type::parameters_type build( + std::shared_ptr exec, + gko::size_type iteration_count) + { + return solver_type::build() + .with_criteria(gko::stop::Iteration::build() + .with_max_iters(iteration_count) + .on(exec)) + .with_krylov_dim(dimension) + .with_flexible(true); + } + + static typename solver_type::parameters_type build_preconditioned( + std::shared_ptr exec, + gko::size_type iteration_count) + { + return solver_type::build() + .with_criteria(gko::stop::Iteration::build() + .with_max_iters(iteration_count) + .on(exec)) + .with_preconditioner( + precond_type::build().with_max_block_size(1u).on(exec)) + .with_krylov_dim(dimension) + .with_flexible(true); + } +}; + + struct LowerTrs : SimpleSolverTest> { static constexpr bool will_not_allocate() { return false; } @@ -855,8 +885,9 @@ using SolverTypes = ::testing::Types, Idr<4>,*/ - Ir, CbGmres<2>, CbGmres<10>, Gmres<2>, Gmres<10>, LowerTrs, - UpperTrs, LowerTrsUnitdiag, UpperTrsUnitdiag + Ir, CbGmres<2>, CbGmres<10>, Gmres<2>, Gmres<10>, + FGmres<2>, FGmres<10>, LowerTrs, UpperTrs, + LowerTrsUnitdiag, UpperTrsUnitdiag #ifdef GKO_COMPILING_CUDA , LowerTrsSyncfree, UpperTrsSyncfree,