From 3f588f5a6a526e537aaeceb1a2e93cf4d8e3cf45 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Fri, 3 Jan 2025 17:54:58 +0100
Subject: [PATCH] pass alpha/beta by value to kernel

---
 common/unified/solver/chebyshev_kernels.cpp |  14 +-
 core/solver/chebyshev.cpp                   |  67 +-------
 core/solver/chebyshev_kernels.hpp           |   4 +-
 include/ginkgo/core/solver/chebyshev.hpp    |  14 +-
 reference/solver/chebyshev_kernels.cpp      |  16 +-
 reference/test/solver/chebyshev_kernels.cpp | 176 --------------------
 6 files changed, 20 insertions(+), 271 deletions(-)
diff --git a/common/unified/solver/chebyshev_kernels.cpp b/common/unified/solver/chebyshev_kernels.cpp
index 968b3b32be9..616dcb5b691 100644
--- a/common/unified/solver/chebyshev_kernels.cpp
+++ b/common/unified/solver/chebyshev_kernels.cpp
@@ -17,7 +17,7 @@ namespace chebyshev {
 
 template <typename ValueType, typename ScalarType>
 void init_update(std::shared_ptr<const DefaultExecutor> exec,
-                 const ScalarType* alpha,
+                 const ScalarType alpha,
                  const matrix::Dense<ValueType>* inner_sol,
                  matrix::Dense<ValueType>* update_sol,
                  matrix::Dense<ValueType>* output)
@@ -28,7 +28,7 @@ void init_update(std::shared_ptr<const DefaultExecutor> exec,
                       auto update_sol, auto output) {
             const auto inner_val = inner_sol(row, col);
             update_sol(row, col) = val;
-            output(row, col) += alpha_val * inner_val;
+            output(row, col) += alpha * inner_val;
         },
         output->get_size(), alpha, inner_sol, update_sol, output);
 }
@@ -38,9 +38,8 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(
 
 
 template <typename ValueType, typename ScalarType>
-void update(std::shared_ptr<const DefaultExecutor> exec,
-            const ScalarType* alpha, const ScalarType* beta,
-            matrix::Dense<ValueType>* inner_sol,
+void update(std::shared_ptr<const DefaultExecutor> exec, const ScalarType alpha,
+            const ScalarType beta, matrix::Dense<ValueType>* inner_sol,
             matrix::Dense<ValueType>* update_sol,
             matrix::Dense<ValueType>* output)
 {
@@ -48,11 +47,10 @@ void update(std::shared_ptr<const DefaultExecutor> exec,
         exec,
         [] GKO_KERNEL(auto row, auto col, auto alpha, auto beta, auto inner_sol,
                       auto update_sol, auto output) {
-            const auto val =
-                inner_sol(row, col) + beta[0] * update_sol(row, col);
+            const auto val = inner_sol(row, col) + beta * update_sol(row, col);
             inner_sol(row, col) = val;
             update_sol(row, col) = val;
-            output(row, col) += alpha[0] * val;
+            output(row, col) += alpha * val;
         },
         output->get_size(), alpha, beta, inner_sol, update_sol, output);
 }
diff --git a/core/solver/chebyshev.cpp b/core/solver/chebyshev.cpp
index d8141aaf371..b5839cbe4d6 100644
--- a/core/solver/chebyshev.cpp
+++ b/core/solver/chebyshev.cpp
@@ -165,20 +165,6 @@ void Chebyshev<ValueType>::apply_with_initial_guess_impl(
 }
 
 
-template <typename Fn>
-void visit_criteria(Fn&& fn,
-                    std::shared_ptr<const gko::stop::CriterionFactory> c)
-{
-    fn(c);
-    if (auto combined =
-            std::dynamic_pointer_cast<const stop::Combined::Factory>(c)) {
-        for (const auto& factory : combined->get_parameters().criteria) {
-            visit_criteria(std::forward<Fn>(fn), factory);
-        }
-    }
-}
-
-
 template <typename ValueType>
 template <typename VectorType>
 void Chebyshev<ValueType>::apply_dense_impl(const VectorType* dense_b,
@@ -195,27 +181,6 @@ void Chebyshev<ValueType>::apply_dense_impl(const VectorType* dense_b,
     GKO_SOLVER_VECTOR(inner_solution, dense_b);
     GKO_SOLVER_VECTOR(update_solution, dense_b);
 
-    auto old_num_max_generation = num_max_generation_;
-    // Use the scalar first
-    // get the iteration information from stopping criterion.
-    visit_criteria(
-        [&](auto factory) {
-            if (auto iter = std::dynamic_pointer_cast<
-                    const gko::stop::Iteration::Factory>(factory)) {
-                num_max_generation_ = std::max(
-                    num_max_generation_, iter->get_parameters().max_iters);
-            }
-        },
-        this->get_stop_criterion_factory());
-    // Regenerate the vector if we realloc the memory.
-    if (old_num_max_generation != num_max_generation_) {
-        num_generated_scalar_ = 0;
-    }
-    auto alpha = this->template create_workspace_scalar<ValueType>(
-        GKO_SOLVER_TRAITS::alpha, num_max_generation_ + 1);
-    auto beta = this->template create_workspace_scalar<ValueType>(
-        GKO_SOLVER_TRAITS::beta, num_max_generation_ + 1);
-
     GKO_SOLVER_ONE_MINUS_ONE();
 
     auto alpha_ref = ValueType{1} / center_;
@@ -263,24 +228,11 @@ void Chebyshev<ValueType>::apply_dense_impl(const VectorType* dense_b,
             inner_solution->copy_from(residual_ptr);
         }
         this->get_preconditioner()->apply(residual_ptr, inner_solution);
-        size_type index =
-            (iter >= num_max_generation_) ? num_max_generation_ : iter;
-        auto alpha_scalar =
-            alpha->create_submatrix(span{0, 1}, span{index, index + 1});
-        auto beta_scalar =
-            beta->create_submatrix(span{0, 1}, span{index, index + 1});
         if (iter == 0) {
-            if (num_generated_scalar_ < num_max_generation_) {
-                alpha_scalar->fill(alpha_ref);
-                // unused beta for first iteration, but fill zero
-                beta_scalar->fill(zero<ValueType>());
-                num_generated_scalar_++;
-            }
             // x = x + alpha * inner_solution
             // update_solultion = inner_solution
             exec->run(chebyshev::make_init_update(
-                alpha_scalar->get_const_values(),
-                gko::detail::get_local(inner_solution),
+                alpha_ref, gko::detail::get_local(inner_solution),
                 gko::detail::get_local(update_solution),
                 gko::detail::get_local(dense_x)));
             continue;
@@ -291,21 +243,11 @@ void Chebyshev<ValueType>::apply_dense_impl(const VectorType* dense_b,
                        (foci_direction_ * alpha_ref / ValueType{2.0});
         }
         alpha_ref = ValueType{1.0} / (center_ - beta_ref / alpha_ref);
-        // The last one is always the updated one
-        if (num_generated_scalar_ < num_max_generation_ ||
-            iter >= num_max_generation_) {
-            alpha_scalar->fill(alpha_ref);
-            beta_scalar->fill(beta_ref);
-        }
-        if (num_generated_scalar_ < num_max_generation_) {
-            num_generated_scalar_++;
-        }
         // z = z + beta * p
         // p = z
         // x += alpha * p
         exec->run(chebyshev::make_update(
-            alpha_scalar->get_const_values(), beta_scalar->get_const_values(),
-            gko::detail::get_local(inner_solution),
+            alpha_ref, beta_ref, gko::detail::get_local(inner_solution),
             gko::detail::get_local(update_solution),
             gko::detail::get_local(dense_x)));
     }
@@ -351,7 +293,7 @@ int workspace_traits<Chebyshev<ValueType>>::num_arrays(const Solver&)
 template <typename ValueType>
 int workspace_traits<Chebyshev<ValueType>>::num_vectors(const Solver&)
 {
-    return 7;
+    return 5;
 }
 
 
@@ -360,8 +302,7 @@ std::vector<std::string> workspace_traits<Chebyshev<ValueType>>::op_names(
     const Solver&)
 {
     return {
-        "residual", "inner_solution", "update_solution", "alpha", "beta",
-        "one",      "minus_one",
+        "residual", "inner_solution", "update_solution", "one", "minus_one",
     };
 }
 
diff --git a/core/solver/chebyshev_kernels.hpp b/core/solver/chebyshev_kernels.hpp
index 038f736e536..f1557369eb2 100644
--- a/core/solver/chebyshev_kernels.hpp
+++ b/core/solver/chebyshev_kernels.hpp
@@ -22,14 +22,14 @@ namespace chebyshev {
 
 #define GKO_DECLARE_CHEBYSHEV_INIT_UPDATE_KERNEL(ValueType, ScalarType) \
     void init_update(std::shared_ptr<const DefaultExecutor> exec,       \
-                     const ScalarType* alpha,                           \
+                     const ScalarType alpha,                            \
                      const matrix::Dense<ValueType>* inner_sol,         \
                      matrix::Dense<ValueType>* update_sol,              \
                      matrix::Dense<ValueType>* output)
 
 #define GKO_DECLARE_CHEBYSHEV_UPDATE_KERNEL(ValueType, ScalarType) \
     void update(std::shared_ptr<const DefaultExecutor> exec,       \
-                const ScalarType* alpha, const ScalarType* beta,   \
+                const ScalarType alpha, const ScalarType beta,     \
                 matrix::Dense<ValueType>* inner_sol,               \
                 matrix::Dense<ValueType>* update_sol,              \
                 matrix::Dense<ValueType>* output)
diff --git a/include/ginkgo/core/solver/chebyshev.hpp b/include/ginkgo/core/solver/chebyshev.hpp
index 9ac0f59282d..18c979b0741 100644
--- a/include/ginkgo/core/solver/chebyshev.hpp
+++ b/include/ginkgo/core/solver/chebyshev.hpp
@@ -182,12 +182,6 @@ class Chebyshev final
 
 private:
     std::shared_ptr<const LinOp> solver_{};
-    // num_generated_scalar_ tracks the number of generated scalar alpha
-    // and beta.
-    mutable size_type num_generated_scalar_ = 0;
-    // num_max_generation_ is the number of generated scalar kept in the
-    // workspace.
-    mutable size_type num_max_generation_ = 3;
     ValueType center_;
     ValueType foci_direction_;
 };
@@ -215,14 +209,10 @@ struct workspace_traits<Chebyshev<ValueType>> {
     constexpr static int inner_solution = 1;
     // update solution
     constexpr static int update_solution = 2;
-    // alpha
-    constexpr static int alpha = 3;
-    // beta
-    constexpr static int beta = 4;
     // constant 1.0 scalar
-    constexpr static int one = 5;
+    constexpr static int one = 3;
     // constant -1.0 scalar
-    constexpr static int minus_one = 6;
+    constexpr static int minus_one = 4;
 
     // stopping status array
     constexpr static int stop = 0;
diff --git a/reference/solver/chebyshev_kernels.cpp b/reference/solver/chebyshev_kernels.cpp
index d43e28b800f..2adf0a8026d 100644
--- a/reference/solver/chebyshev_kernels.cpp
+++ b/reference/solver/chebyshev_kernels.cpp
@@ -14,17 +14,16 @@ namespace chebyshev {
 
 template <typename ValueType, typename ScalarType>
 void init_update(std::shared_ptr<const DefaultExecutor> exec,
-                 const ScalarType* alpha,
+                 const ScalarType alpha,
                  const matrix::Dense<ValueType>* inner_sol,
                  matrix::Dense<ValueType>* update_sol,
                  matrix::Dense<ValueType>* output)
 {
-    const auto alpha_val = alpha[0];
     for (size_t row = 0; row < output->get_size()[0]; row++) {
         for (size_t col = 0; col < output->get_size()[1]; col++) {
             const auto inner_val = inner_sol->at(row, col);
             update_sol->at(row, col) = inner_val;
-            output->at(row, col) += alpha_val * inner_val;
+            output->at(row, col) += alpha * inner_val;
         }
     }
 }
@@ -34,21 +33,18 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_SCALAR_TYPE(
 
 
 template <typename ValueType, typename ScalarType>
-void update(std::shared_ptr<const DefaultExecutor> exec,
-            const ScalarType* alpha, const ScalarType* beta,
-            matrix::Dense<ValueType>* inner_sol,
+void update(std::shared_ptr<const DefaultExecutor> exec, const ScalarType alpha,
+            const ScalarType beta, matrix::Dense<ValueType>* inner_sol,
             matrix::Dense<ValueType>* update_sol,
             matrix::Dense<ValueType>* output)
 {
-    const auto alpha_val = alpha[0];
-    const auto beta_val = beta[0];
     for (size_t row = 0; row < output->get_size()[0]; row++) {
         for (size_t col = 0; col < output->get_size()[1]; col++) {
             const auto val =
-                inner_sol->at(row, col) + beta[0] * update_sol->at(row, col);
+                inner_sol->at(row, col) + beta * update_sol->at(row, col);
             inner_sol->at(row, col) = val;
             update_sol->at(row, col) = val;
-            output->at(row, col) += alpha_val * val;
+            output->at(row, col) += alpha * val;
         }
     }
 }
diff --git a/reference/test/solver/chebyshev_kernels.cpp b/reference/test/solver/chebyshev_kernels.cpp
index 876683b1584..5a1043ca982 100644
--- a/reference/test/solver/chebyshev_kernels.cpp
+++ b/reference/test/solver/chebyshev_kernels.cpp
@@ -15,8 +15,6 @@
 
 #include "core/test/utils.hpp"
 
-template <typename T>
-using workspace_traits = gko::solver::workspace_traits<T>;
 
 template <typename T>
 class Chebyshev : public ::testing::Test {
@@ -47,180 +45,6 @@ class Chebyshev : public ::testing::Test {
 TYPED_TEST_SUITE(Chebyshev, gko::test::ValueTypes, TypenameNameGenerator);
 
 
-TYPED_TEST(Chebyshev, CheckDefaultNumAlphaBetaWithoutIteration)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using Solver = typename TestFixture::Solver;
-    using value_type = typename TestFixture::value_type;
-    auto upper = value_type{1.1};
-    auto lower = value_type{0.9};
-    auto factory =
-        Solver::build()
-            .with_criteria(gko::stop::ResidualNorm<value_type>::build()
-                               .with_reduction_factor(r<value_type>::value))
-            .with_foci(lower, upper)
-            .on(this->exec);
-    auto solver = factory->generate(this->mtx);
-    auto b = gko::initialize<Mtx>({3.9, 9.0, 2.2}, this->exec);
-    auto x = gko::initialize<Mtx>({0.0, 0.0, 0.0}, this->exec);
-
-    solver->apply(b.get(), x.get());
-
-    auto alpha =
-        gko::as<Mtx>(solver->get_workspace_op(workspace_traits<Solver>::alpha));
-    auto beta =
-        gko::as<Mtx>(solver->get_workspace_op(workspace_traits<Solver>::beta));
-    // if the stop criterion does not contain iteration limit, it will use the
-    // default value.
-    ASSERT_EQ(alpha->get_size(), (gko::dim<2>{1, 4}));
-    ASSERT_EQ(beta->get_size(), (gko::dim<2>{1, 4}));
-}
-
-
-TYPED_TEST(Chebyshev, CheckDefaultNumAlphaBetaWithLessIteration)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using Solver = typename TestFixture::Solver;
-    using value_type = typename TestFixture::value_type;
-    auto upper = value_type{1.1};
-    auto lower = value_type{0.9};
-    auto factory =
-        Solver::build()
-            .with_criteria(gko::stop::ResidualNorm<value_type>::build()
-                               .with_reduction_factor(r<value_type>::value),
-                           gko::stop::Iteration::build().with_max_iters(1u))
-            .with_foci(lower, upper)
-            .on(this->exec);
-    auto solver = factory->generate(this->mtx);
-    auto b = gko::initialize<Mtx>({3.9, 9.0, 2.2}, this->exec);
-    auto x = gko::initialize<Mtx>({0.0, 0.0, 0.0}, this->exec);
-
-    solver->apply(b.get(), x.get());
-
-    auto alpha =
-        gko::as<Mtx>(solver->get_workspace_op(workspace_traits<Solver>::alpha));
-    auto beta =
-        gko::as<Mtx>(solver->get_workspace_op(workspace_traits<Solver>::beta));
-    // if the iteration limit less than the default value, it will use the
-    // default value.
-    ASSERT_EQ(alpha->get_size(), (gko::dim<2>{1, 4}));
-    ASSERT_EQ(beta->get_size(), (gko::dim<2>{1, 4}));
-}
-
-
-TYPED_TEST(Chebyshev, CheckStoredAlphaBeta)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using Solver = typename TestFixture::Solver;
-    using value_type = typename TestFixture::value_type;
-    auto upper = value_type{1.1};
-    auto lower = value_type{0.9};
-    auto factory =
-        Solver::build()
-            .with_criteria(gko::stop::Iteration::build().with_max_iters(6u))
-            .with_foci(lower, upper)
-            .on(this->exec);
-    auto solver = factory->generate(this->mtx);
-    auto b = gko::initialize<Mtx>({3.9, 9.0, 2.2}, this->exec);
-    auto x = gko::initialize<Mtx>({0.0, 0.0, 0.0}, this->exec);
-
-    solver->apply(b.get(), x.get());
-
-    auto alpha =
-        gko::as<Mtx>(solver->get_workspace_op(workspace_traits<Solver>::alpha));
-    auto beta =
-        gko::as<Mtx>(solver->get_workspace_op(workspace_traits<Solver>::beta));
-    // the iteration is more than default
-    ASSERT_EQ(alpha->get_size(), (gko::dim<2>{1, 7}));
-    ASSERT_EQ(beta->get_size(), (gko::dim<2>{1, 7}));
-    // check the num_keep alpha, beta
-    auto d = (upper + lower) / value_type{2};
-    auto c = (upper - lower) / value_type{2};
-    EXPECT_EQ(alpha->at(0, 0), value_type{1} / d);
-    EXPECT_EQ(beta->at(0, 0), value_type{0});
-    EXPECT_EQ(beta->at(0, 1),
-              value_type{0.5} * (c * alpha->at(0, 0)) * (c * alpha->at(0, 0)));
-    EXPECT_EQ(alpha->at(0, 1),
-              value_type{1} / (d - beta->at(0, 1) / alpha->at(0, 0)));
-    EXPECT_EQ(beta->at(0, 2), (c * alpha->at(0, 1) / value_type{2}) *
-                                  (c * alpha->at(0, 1) / value_type{2}));
-    EXPECT_EQ(alpha->at(0, 2),
-              value_type{1} / (d - beta->at(0, 2) / alpha->at(0, 1)));
-}
-
-
-TYPED_TEST(Chebyshev, AlphaBetaFromChangingCriterion)
-{
-    using Mtx = typename TestFixture::Mtx;
-    using Solver = typename TestFixture::Solver;
-    using value_type = typename TestFixture::value_type;
-    auto upper = value_type{1.1};
-    auto lower = value_type{0.9};
-    auto factory =
-        Solver::build()
-            .with_criteria(gko::stop::ResidualNorm<value_type>::build()
-                               .with_reduction_factor(r<value_type>::value),
-                           gko::stop::Iteration::build().with_max_iters(6u))
-            .with_foci(lower, upper)
-            .on(this->exec);
-    auto solver = factory->generate(this->mtx);
-    auto b = gko::initialize<Mtx>({3.9, 9.0, 2.2}, this->exec);
-    auto x = gko::initialize<Mtx>({0.0, 0.0, 0.0}, this->exec);
-
-    // same as previous test, but it works with combined factory
-    solver->apply(b.get(), x.get());
-
-    auto alpha =
-        gko::as<Mtx>(solver->get_workspace_op(workspace_traits<Solver>::alpha));
-    auto beta =
-        gko::as<Mtx>(solver->get_workspace_op(workspace_traits<Solver>::beta));
-    auto alpha_ref = alpha->clone();
-    auto beta_ref = beta->clone();
-    // if the iteration limit is less than the default value, it will use the
-    // default value.
-    ASSERT_EQ(alpha->get_size(), (gko::dim<2>{1, 7}));
-    ASSERT_EQ(beta->get_size(), (gko::dim<2>{1, 7}));
-    {
-        // Set less iteration limit
-        solver->set_stop_criterion_factory(
-            gko::stop::Iteration::build().with_max_iters(4u).on(this->exec));
-
-        solver->apply(b.get(), x.get());
-
-        auto alpha_tmp = gko::as<Mtx>(
-            solver->get_workspace_op(workspace_traits<Solver>::alpha));
-        auto beta_tmp = gko::as<Mtx>(
-            solver->get_workspace_op(workspace_traits<Solver>::beta));
-        // if the iteration limit is less than the previous one, it keeps the
-        // storage.
-        ASSERT_EQ(alpha_tmp->get_size(), (gko::dim<2>{1, 7}));
-        ASSERT_EQ(beta_tmp->get_size(), (gko::dim<2>{1, 7}));
-        ASSERT_EQ(alpha_tmp->get_const_values(), alpha->get_const_values());
-        ASSERT_EQ(beta_tmp->get_const_values(), beta->get_const_values());
-        GKO_ASSERT_MTX_NEAR(alpha_tmp, alpha_ref, 0.0);
-        GKO_ASSERT_MTX_NEAR(beta_tmp, beta_ref, 0.0);
-    }
-    {
-        // Set more iteration limit
-        solver->set_stop_criterion_factory(
-            gko::stop::Iteration::build().with_max_iters(10u).on(this->exec));
-
-        solver->apply(b.get(), x.get());
-
-        auto alpha_tmp = gko::as<Mtx>(
-            solver->get_workspace_op(workspace_traits<Solver>::alpha));
-        auto beta_tmp = gko::as<Mtx>(
-            solver->get_workspace_op(workspace_traits<Solver>::beta));
-        // if the iteration limit is more than the previous one, it regenerates
-        // workspace
-        ASSERT_EQ(alpha_tmp->get_size(), (gko::dim<2>{1, 11}));
-        ASSERT_EQ(beta_tmp->get_size(), (gko::dim<2>{1, 11}));
-        ASSERT_NE(alpha_tmp->get_const_values(), alpha->get_const_values());
-        ASSERT_NE(beta_tmp->get_const_values(), beta->get_const_values());
-    }
-}
-
-
 TYPED_TEST(Chebyshev, SolvesTriangularSystem)
 {
     using Mtx = typename TestFixture::Mtx;