From 3f588f5a6a526e537aaeceb1a2e93cf4d8e3cf45 Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Fri, 3 Jan 2025 17:54:58 +0100 Subject: [PATCH] pass alpha/beta by value to kernel --- common/unified/solver/chebyshev_kernels.cpp | 14 +- core/solver/chebyshev.cpp | 67 +------- core/solver/chebyshev_kernels.hpp | 4 +- include/ginkgo/core/solver/chebyshev.hpp | 14 +- reference/solver/chebyshev_kernels.cpp | 16 +- reference/test/solver/chebyshev_kernels.cpp | 176 -------------------- 6 files changed, 20 insertions(+), 271 deletions(-) diff --git a/common/unified/solver/chebyshev_kernels.cpp b/common/unified/solver/chebyshev_kernels.cpp index 968b3b32be9..616dcb5b691 100644 --- a/common/unified/solver/chebyshev_kernels.cpp +++ b/common/unified/solver/chebyshev_kernels.cpp @@ -17,7 +17,7 @@ namespace chebyshev { template void init_update(std::shared_ptr exec, - const ScalarType* alpha, + const ScalarType alpha, const matrix::Dense* inner_sol, matrix::Dense* update_sol, matrix::Dense* output) @@ -28,7 +28,7 @@ void init_update(std::shared_ptr exec, auto update_sol, auto output) { const auto inner_val = inner_sol(row, col); update_sol(row, col) = val; - output(row, col) += alpha_val * inner_val; + output(row, col) += alpha * inner_val; }, output->get_size(), alpha, inner_sol, update_sol, output); } @@ -38,9 +38,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template -void update(std::shared_ptr exec, - const ScalarType* alpha, const ScalarType* beta, - matrix::Dense* inner_sol, +void update(std::shared_ptr exec, const ScalarType alpha, + const ScalarType beta, matrix::Dense* inner_sol, matrix::Dense* update_sol, matrix::Dense* output) { @@ -48,11 +47,10 @@ void update(std::shared_ptr exec, exec, [] GKO_KERNEL(auto row, auto col, auto alpha, auto beta, auto inner_sol, auto update_sol, auto output) { - const auto val = - inner_sol(row, col) + beta[0] * update_sol(row, col); + const auto val = inner_sol(row, col) + beta * update_sol(row, col); inner_sol(row, col) = val; update_sol(row, col) = val; - output(row, col) += alpha[0] * val; + output(row, col) += alpha * val; }, output->get_size(), alpha, beta, inner_sol, update_sol, output); } diff --git a/core/solver/chebyshev.cpp b/core/solver/chebyshev.cpp index d8141aaf371..b5839cbe4d6 100644 --- a/core/solver/chebyshev.cpp +++ b/core/solver/chebyshev.cpp @@ -165,20 +165,6 @@ void Chebyshev::apply_with_initial_guess_impl( } -template -void visit_criteria(Fn&& fn, - std::shared_ptr c) -{ - fn(c); - if (auto combined = - std::dynamic_pointer_cast(c)) { - for (const auto& factory : combined->get_parameters().criteria) { - visit_criteria(std::forward(fn), factory); - } - } -} - - template template void Chebyshev::apply_dense_impl(const VectorType* dense_b, @@ -195,27 +181,6 @@ void Chebyshev::apply_dense_impl(const VectorType* dense_b, GKO_SOLVER_VECTOR(inner_solution, dense_b); GKO_SOLVER_VECTOR(update_solution, dense_b); - auto old_num_max_generation = num_max_generation_; - // Use the scalar first - // get the iteration information from stopping criterion. - visit_criteria( - [&](auto factory) { - if (auto iter = std::dynamic_pointer_cast< - const gko::stop::Iteration::Factory>(factory)) { - num_max_generation_ = std::max( - num_max_generation_, iter->get_parameters().max_iters); - } - }, - this->get_stop_criterion_factory()); - // Regenerate the vector if we realloc the memory. - if (old_num_max_generation != num_max_generation_) { - num_generated_scalar_ = 0; - } - auto alpha = this->template create_workspace_scalar( - GKO_SOLVER_TRAITS::alpha, num_max_generation_ + 1); - auto beta = this->template create_workspace_scalar( - GKO_SOLVER_TRAITS::beta, num_max_generation_ + 1); - GKO_SOLVER_ONE_MINUS_ONE(); auto alpha_ref = ValueType{1} / center_; @@ -263,24 +228,11 @@ void Chebyshev::apply_dense_impl(const VectorType* dense_b, inner_solution->copy_from(residual_ptr); } this->get_preconditioner()->apply(residual_ptr, inner_solution); - size_type index = - (iter >= num_max_generation_) ? num_max_generation_ : iter; - auto alpha_scalar = - alpha->create_submatrix(span{0, 1}, span{index, index + 1}); - auto beta_scalar = - beta->create_submatrix(span{0, 1}, span{index, index + 1}); if (iter == 0) { - if (num_generated_scalar_ < num_max_generation_) { - alpha_scalar->fill(alpha_ref); - // unused beta for first iteration, but fill zero - beta_scalar->fill(zero()); - num_generated_scalar_++; - } // x = x + alpha * inner_solution // update_solultion = inner_solution exec->run(chebyshev::make_init_update( - alpha_scalar->get_const_values(), - gko::detail::get_local(inner_solution), + alpha_ref, gko::detail::get_local(inner_solution), gko::detail::get_local(update_solution), gko::detail::get_local(dense_x))); continue; @@ -291,21 +243,11 @@ void Chebyshev::apply_dense_impl(const VectorType* dense_b, (foci_direction_ * alpha_ref / ValueType{2.0}); } alpha_ref = ValueType{1.0} / (center_ - beta_ref / alpha_ref); - // The last one is always the updated one - if (num_generated_scalar_ < num_max_generation_ || - iter >= num_max_generation_) { - alpha_scalar->fill(alpha_ref); - beta_scalar->fill(beta_ref); - } - if (num_generated_scalar_ < num_max_generation_) { - num_generated_scalar_++; - } // z = z + beta * p // p = z // x += alpha * p exec->run(chebyshev::make_update( - alpha_scalar->get_const_values(), beta_scalar->get_const_values(), - gko::detail::get_local(inner_solution), + alpha_ref, beta_ref, gko::detail::get_local(inner_solution), gko::detail::get_local(update_solution), gko::detail::get_local(dense_x))); } @@ -351,7 +293,7 @@ int workspace_traits>::num_arrays(const Solver&) template int workspace_traits>::num_vectors(const Solver&) { - return 7; + return 5; } @@ -360,8 +302,7 @@ std::vector workspace_traits>::op_names( const Solver&) { return { - "residual", "inner_solution", "update_solution", "alpha", "beta", - "one", "minus_one", + "residual", "inner_solution", "update_solution", "one", "minus_one", }; } diff --git a/core/solver/chebyshev_kernels.hpp b/core/solver/chebyshev_kernels.hpp index 038f736e536..f1557369eb2 100644 --- a/core/solver/chebyshev_kernels.hpp +++ b/core/solver/chebyshev_kernels.hpp @@ -22,14 +22,14 @@ namespace chebyshev { #define GKO_DECLARE_CHEBYSHEV_INIT_UPDATE_KERNEL(ValueType, ScalarType) \ void init_update(std::shared_ptr exec, \ - const ScalarType* alpha, \ + const ScalarType alpha, \ const matrix::Dense* inner_sol, \ matrix::Dense* update_sol, \ matrix::Dense* output) #define GKO_DECLARE_CHEBYSHEV_UPDATE_KERNEL(ValueType, ScalarType) \ void update(std::shared_ptr exec, \ - const ScalarType* alpha, const ScalarType* beta, \ + const ScalarType alpha, const ScalarType beta, \ matrix::Dense* inner_sol, \ matrix::Dense* update_sol, \ matrix::Dense* output) diff --git a/include/ginkgo/core/solver/chebyshev.hpp b/include/ginkgo/core/solver/chebyshev.hpp index 9ac0f59282d..18c979b0741 100644 --- a/include/ginkgo/core/solver/chebyshev.hpp +++ b/include/ginkgo/core/solver/chebyshev.hpp @@ -182,12 +182,6 @@ class Chebyshev final private: std::shared_ptr solver_{}; - // num_generated_scalar_ tracks the number of generated scalar alpha - // and beta. - mutable size_type num_generated_scalar_ = 0; - // num_max_generation_ is the number of generated scalar kept in the - // workspace. - mutable size_type num_max_generation_ = 3; ValueType center_; ValueType foci_direction_; }; @@ -215,14 +209,10 @@ struct workspace_traits> { constexpr static int inner_solution = 1; // update solution constexpr static int update_solution = 2; - // alpha - constexpr static int alpha = 3; - // beta - constexpr static int beta = 4; // constant 1.0 scalar - constexpr static int one = 5; + constexpr static int one = 3; // constant -1.0 scalar - constexpr static int minus_one = 6; + constexpr static int minus_one = 4; // stopping status array constexpr static int stop = 0; diff --git a/reference/solver/chebyshev_kernels.cpp b/reference/solver/chebyshev_kernels.cpp index d43e28b800f..2adf0a8026d 100644 --- a/reference/solver/chebyshev_kernels.cpp +++ b/reference/solver/chebyshev_kernels.cpp @@ -14,17 +14,16 @@ namespace chebyshev { template void init_update(std::shared_ptr exec, - const ScalarType* alpha, + const ScalarType alpha, const matrix::Dense* inner_sol, matrix::Dense* update_sol, matrix::Dense* output) { - const auto alpha_val = alpha[0]; for (size_t row = 0; row < output->get_size()[0]; row++) { for (size_t col = 0; col < output->get_size()[1]; col++) { const auto inner_val = inner_sol->at(row, col); update_sol->at(row, col) = inner_val; - output->at(row, col) += alpha_val * inner_val; + output->at(row, col) += alpha * inner_val; } } } @@ -34,21 +33,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE( template -void update(std::shared_ptr exec, - const ScalarType* alpha, const ScalarType* beta, - matrix::Dense* inner_sol, +void update(std::shared_ptr exec, const ScalarType alpha, + const ScalarType beta, matrix::Dense* inner_sol, matrix::Dense* update_sol, matrix::Dense* output) { - const auto alpha_val = alpha[0]; - const auto beta_val = beta[0]; for (size_t row = 0; row < output->get_size()[0]; row++) { for (size_t col = 0; col < output->get_size()[1]; col++) { const auto val = - inner_sol->at(row, col) + beta[0] * update_sol->at(row, col); + inner_sol->at(row, col) + beta * update_sol->at(row, col); inner_sol->at(row, col) = val; update_sol->at(row, col) = val; - output->at(row, col) += alpha_val * val; + output->at(row, col) += alpha * val; } } } diff --git a/reference/test/solver/chebyshev_kernels.cpp b/reference/test/solver/chebyshev_kernels.cpp index 876683b1584..5a1043ca982 100644 --- a/reference/test/solver/chebyshev_kernels.cpp +++ b/reference/test/solver/chebyshev_kernels.cpp @@ -15,8 +15,6 @@ #include "core/test/utils.hpp" -template -using workspace_traits = gko::solver::workspace_traits; template class Chebyshev : public ::testing::Test { @@ -47,180 +45,6 @@ class Chebyshev : public ::testing::Test { TYPED_TEST_SUITE(Chebyshev, gko::test::ValueTypes, TypenameNameGenerator); -TYPED_TEST(Chebyshev, CheckDefaultNumAlphaBetaWithoutIteration) -{ - using Mtx = typename TestFixture::Mtx; - using Solver = typename TestFixture::Solver; - using value_type = typename TestFixture::value_type; - auto upper = value_type{1.1}; - auto lower = value_type{0.9}; - auto factory = - Solver::build() - .with_criteria(gko::stop::ResidualNorm::build() - .with_reduction_factor(r::value)) - .with_foci(lower, upper) - .on(this->exec); - auto solver = factory->generate(this->mtx); - auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); - auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - - solver->apply(b.get(), x.get()); - - auto alpha = - gko::as(solver->get_workspace_op(workspace_traits::alpha)); - auto beta = - gko::as(solver->get_workspace_op(workspace_traits::beta)); - // if the stop criterion does not contain iteration limit, it will use the - // default value. - ASSERT_EQ(alpha->get_size(), (gko::dim<2>{1, 4})); - ASSERT_EQ(beta->get_size(), (gko::dim<2>{1, 4})); -} - - -TYPED_TEST(Chebyshev, CheckDefaultNumAlphaBetaWithLessIteration) -{ - using Mtx = typename TestFixture::Mtx; - using Solver = typename TestFixture::Solver; - using value_type = typename TestFixture::value_type; - auto upper = value_type{1.1}; - auto lower = value_type{0.9}; - auto factory = - Solver::build() - .with_criteria(gko::stop::ResidualNorm::build() - .with_reduction_factor(r::value), - gko::stop::Iteration::build().with_max_iters(1u)) - .with_foci(lower, upper) - .on(this->exec); - auto solver = factory->generate(this->mtx); - auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); - auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - - solver->apply(b.get(), x.get()); - - auto alpha = - gko::as(solver->get_workspace_op(workspace_traits::alpha)); - auto beta = - gko::as(solver->get_workspace_op(workspace_traits::beta)); - // if the iteration limit less than the default value, it will use the - // default value. - ASSERT_EQ(alpha->get_size(), (gko::dim<2>{1, 4})); - ASSERT_EQ(beta->get_size(), (gko::dim<2>{1, 4})); -} - - -TYPED_TEST(Chebyshev, CheckStoredAlphaBeta) -{ - using Mtx = typename TestFixture::Mtx; - using Solver = typename TestFixture::Solver; - using value_type = typename TestFixture::value_type; - auto upper = value_type{1.1}; - auto lower = value_type{0.9}; - auto factory = - Solver::build() - .with_criteria(gko::stop::Iteration::build().with_max_iters(6u)) - .with_foci(lower, upper) - .on(this->exec); - auto solver = factory->generate(this->mtx); - auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); - auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - - solver->apply(b.get(), x.get()); - - auto alpha = - gko::as(solver->get_workspace_op(workspace_traits::alpha)); - auto beta = - gko::as(solver->get_workspace_op(workspace_traits::beta)); - // the iteration is more than default - ASSERT_EQ(alpha->get_size(), (gko::dim<2>{1, 7})); - ASSERT_EQ(beta->get_size(), (gko::dim<2>{1, 7})); - // check the num_keep alpha, beta - auto d = (upper + lower) / value_type{2}; - auto c = (upper - lower) / value_type{2}; - EXPECT_EQ(alpha->at(0, 0), value_type{1} / d); - EXPECT_EQ(beta->at(0, 0), value_type{0}); - EXPECT_EQ(beta->at(0, 1), - value_type{0.5} * (c * alpha->at(0, 0)) * (c * alpha->at(0, 0))); - EXPECT_EQ(alpha->at(0, 1), - value_type{1} / (d - beta->at(0, 1) / alpha->at(0, 0))); - EXPECT_EQ(beta->at(0, 2), (c * alpha->at(0, 1) / value_type{2}) * - (c * alpha->at(0, 1) / value_type{2})); - EXPECT_EQ(alpha->at(0, 2), - value_type{1} / (d - beta->at(0, 2) / alpha->at(0, 1))); -} - - -TYPED_TEST(Chebyshev, AlphaBetaFromChangingCriterion) -{ - using Mtx = typename TestFixture::Mtx; - using Solver = typename TestFixture::Solver; - using value_type = typename TestFixture::value_type; - auto upper = value_type{1.1}; - auto lower = value_type{0.9}; - auto factory = - Solver::build() - .with_criteria(gko::stop::ResidualNorm::build() - .with_reduction_factor(r::value), - gko::stop::Iteration::build().with_max_iters(6u)) - .with_foci(lower, upper) - .on(this->exec); - auto solver = factory->generate(this->mtx); - auto b = gko::initialize({3.9, 9.0, 2.2}, this->exec); - auto x = gko::initialize({0.0, 0.0, 0.0}, this->exec); - - // same as previous test, but it works with combined factory - solver->apply(b.get(), x.get()); - - auto alpha = - gko::as(solver->get_workspace_op(workspace_traits::alpha)); - auto beta = - gko::as(solver->get_workspace_op(workspace_traits::beta)); - auto alpha_ref = alpha->clone(); - auto beta_ref = beta->clone(); - // if the iteration limit is less than the default value, it will use the - // default value. - ASSERT_EQ(alpha->get_size(), (gko::dim<2>{1, 7})); - ASSERT_EQ(beta->get_size(), (gko::dim<2>{1, 7})); - { - // Set less iteration limit - solver->set_stop_criterion_factory( - gko::stop::Iteration::build().with_max_iters(4u).on(this->exec)); - - solver->apply(b.get(), x.get()); - - auto alpha_tmp = gko::as( - solver->get_workspace_op(workspace_traits::alpha)); - auto beta_tmp = gko::as( - solver->get_workspace_op(workspace_traits::beta)); - // if the iteration limit is less than the previous one, it keeps the - // storage. - ASSERT_EQ(alpha_tmp->get_size(), (gko::dim<2>{1, 7})); - ASSERT_EQ(beta_tmp->get_size(), (gko::dim<2>{1, 7})); - ASSERT_EQ(alpha_tmp->get_const_values(), alpha->get_const_values()); - ASSERT_EQ(beta_tmp->get_const_values(), beta->get_const_values()); - GKO_ASSERT_MTX_NEAR(alpha_tmp, alpha_ref, 0.0); - GKO_ASSERT_MTX_NEAR(beta_tmp, beta_ref, 0.0); - } - { - // Set more iteration limit - solver->set_stop_criterion_factory( - gko::stop::Iteration::build().with_max_iters(10u).on(this->exec)); - - solver->apply(b.get(), x.get()); - - auto alpha_tmp = gko::as( - solver->get_workspace_op(workspace_traits::alpha)); - auto beta_tmp = gko::as( - solver->get_workspace_op(workspace_traits::beta)); - // if the iteration limit is more than the previous one, it regenerates - // workspace - ASSERT_EQ(alpha_tmp->get_size(), (gko::dim<2>{1, 11})); - ASSERT_EQ(beta_tmp->get_size(), (gko::dim<2>{1, 11})); - ASSERT_NE(alpha_tmp->get_const_values(), alpha->get_const_values()); - ASSERT_NE(beta_tmp->get_const_values(), beta->get_const_values()); - } -} - - TYPED_TEST(Chebyshev, SolvesTriangularSystem) { using Mtx = typename TestFixture::Mtx;