From 72b1801227fad26d139577dc6fbdb7fbb805070c Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Tue, 26 Apr 2022 16:56:09 +0200 Subject: [PATCH 1/3] Add an option for warmup solver iterations config --- benchmark/run_all_benchmarks.sh | 50 +++++++++++++++++---------- benchmark/solver/solver.cpp | 61 ++++++++++++++++++--------------- 2 files changed, 65 insertions(+), 46 deletions(-) diff --git a/benchmark/run_all_benchmarks.sh b/benchmark/run_all_benchmarks.sh index db7fa5bb5ca..7999550191a 100644 --- a/benchmark/run_all_benchmarks.sh +++ b/benchmark/run_all_benchmarks.sh @@ -1,33 +1,40 @@ +#!/bin/bash ################################################################################ # Environment variable detection +# + +print_default() { + local var=$1 + echo "$var environment variable not set - assuming \"${!var}\"" 1>&2 +} if [ ! "${BENCHMARK}" ]; then BENCHMARK="spmv" - echo "BENCHMARK environment variable not set - assuming \"${BENCHMARK}\"" 1>&2 + print_default BENCHMARK fi if [ ! "${DRY_RUN}" ]; then DRY_RUN="false" - echo "DRY_RUN environment variable not set - assuming \"${DRY_RUN}\"" 1>&2 + print_default DRY_RUN fi if [ ! "${EXECUTOR}" ]; then EXECUTOR="cuda" - echo "EXECUTOR environment variable not set - assuming \"${EXECUTOR}\"" 1>&2 + print_default EXECUTOR fi if [ ! "${REPETITIONS}" ]; then REPETITIONS=10 - echo "REPETITIONS environment variable not set - assuming ${REPETITIONS}" 1>&2 + print_default REPETITIONS fi if [ ! "${SOLVER_REPETITIONS}" ]; then SOLVER_REPETITIONS=1 - echo "SOLVER_REPETITIONS environment variable not set - assuming ${SOLVER_REPETITIONS}" 1>&2 + print_default SOLVER_REPETITIONS fi if [ ! "${SEGMENTS}" ]; then - echo "SEGMENTS environment variable not set - running entire suite" 1>&2 + echo "SEGMENTS environment variable not set - running entire suite" 1>&2 SEGMENTS=1 SEGMENT_ID=1 elif [ ! "${SEGMENT_ID}" ]; then @@ -37,57 +44,62 @@ fi if [ ! "${PRECONDS}" ]; then PRECONDS="none" - echo "PRECONDS environment variable not set - assuming \"${PRECONDS}\"" 1>&2 + print_default PRECONDS fi if [ ! "${FORMATS}" ]; then - echo "FORMATS environment variable not set - assuming \"csr,coo,ell,hybrid,sellp\"" 1>&2 FORMATS="csr,coo,ell,hybrid,sellp" + print_default FORMATS fi if [ ! "${ELL_IMBALANCE_LIMIT}" ]; then - echo "ELL_IMBALANCE_LIMIT environment variable not set - assuming 100" 1>&2 ELL_IMBALANCE_LIMIT=100 + print_default ELL_IMBALANCE_LIMIT fi if [ ! "${SOLVERS}" ]; then SOLVERS="bicgstab,cg,cgs,fcg,gmres,cb_gmres_reduce1,idr" - echo "SOLVERS environment variable not set - assuming \"${SOLVERS}\"" 1>&2 + print_default SOLVERS fi if [ ! "${SOLVERS_PRECISION}" ]; then SOLVERS_PRECISION=1e-6 - echo "SOLVERS_PRECISION environment variable not set - assuming \"${SOLVERS_PRECISION}\"" 1>&2 + print_default SOLVERS_PRECISION fi if [ ! "${SOLVERS_MAX_ITERATIONS}" ]; then SOLVERS_MAX_ITERATIONS=10000 - echo "SOLVERS_MAX_ITERATIONS environment variable not set - assuming \"${SOLVERS_MAX_ITERATIONS}\"" 1>&2 + print_default SOLVERS_MAX_ITERATIONS +fi + +if [ ! "${SOLVERS_WARMUP_MAX_ITERATIONS}" ]; then + SOLVERS_WARMUP_MAX_ITERATIONS=100 + print_default SOLVERS_WARMUP_MAX_ITERATIONS fi if [ ! "${SOLVERS_GMRES_RESTART}" ]; then SOLVERS_GMRES_RESTART=100 - echo "SOLVERS_GMRES_RESTART environment variable not set - assuming \"${SOLVERS_GMRES_RESTART}\"" 1>&2 + print_default SOLVERS_GMRES_RESTART fi if [ ! "${SYSTEM_NAME}" ]; then SYSTEM_NAME="unknown" - echo "SYSTEM_MANE environment variable not set - assuming \"${SYSTEM_NAME}\"" 1>&2 + print_default SYSTEM_NAME fi if [ ! "${DEVICE_ID}" ]; then DEVICE_ID="0" - echo "DEVICE_ID environment variable not set - assuming \"${DEVICE_ID}\"" 1>&2 + print_default DEVICE_ID fi if [ ! "${SOLVERS_JACOBI_MAX_BS}" ]; then SOLVERS_JACOBI_MAX_BS="32" - echo "SOLVERS_JACOBI_MAX_BS environment variable not set - assuming \"${SOLVERS_JACOBI_MAX_BS}\"" 1>&2 + print_default SOLVERS_JACOBI_MAX_BS fi if [ ! "${BENCHMARK_PRECISION}" ]; then BENCHMARK_PRECISION="double" - echo "BENCHMARK_PRECISION not set - assuming \"${BENCHMARK_PRECISION}\"" 1>&2 + print_default BENCHMARK_PRECISION fi if [ "${BENCHMARK_PRECISION}" == "double" ]; then @@ -123,7 +135,7 @@ fi if [ ! "${SOLVERS_INITIAL_GUESS}" ]; then SOLVERS_INITIAL_GUESS="rhs" - echo "SOLVERS_RHS environment variable not set - assuming \"${SOLVERS_INITIAL_GUESS}\"" 1>&2 + print_default SOLVERS_INITIAL_GUESS fi if [ "${SOLVERS_INITIAL_GUESS}" == "random" ]; then @@ -140,7 +152,7 @@ fi if [ ! "${GPU_TIMER}" ]; then GPU_TIMER="false" - echo "GPU_TIMER environment variable not set - assuming \"${GPU_TIMER}\"" 1>&2 + print_default GPU_TIMER fi # Control whether to run detailed benchmarks or not. diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 6f56e038907..869dfa552c8 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -64,6 +64,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. DEFINE_uint32(max_iters, 1000, "Maximal number of iterations the solver will be run for"); +DEFINE_uint32(warmup_max_iters, 100, + "Maximal number of warmup iterations the solver will be run for"); + DEFINE_double(rel_res_goal, 1e-6, "The relative residual goal of the solver"); DEFINE_bool( @@ -196,7 +199,7 @@ void validate_option_object(const rapidjson::Value& value) std::shared_ptr create_criterion( - std::shared_ptr exec) + std::shared_ptr exec, std::uint32_t max_iters) { std::shared_ptr residual_stop; if (FLAGS_rel_residual) { @@ -212,7 +215,7 @@ std::shared_ptr create_criterion( .on(exec)); } auto iteration_stop = gko::share( - gko::stop::Iteration::build().with_max_iters(FLAGS_max_iters).on(exec)); + gko::stop::Iteration::build().with_max_iters(max_iters).on(exec)); std::vector> criterion_vector{residual_stop, iteration_stop}; return gko::stop::combine(criterion_vector); @@ -222,9 +225,9 @@ std::shared_ptr create_criterion( template std::unique_ptr add_criteria_precond_finalize( SolverIntermediate inter, const std::shared_ptr& exec, - std::shared_ptr precond) + std::shared_ptr precond, std::uint32_t max_iters) { - return inter.with_criteria(create_criterion(exec)) + return inter.with_criteria(create_criterion(exec, max_iters)) .with_preconditioner(give(precond)) .on(exec); } @@ -233,16 +236,17 @@ std::unique_ptr add_criteria_precond_finalize( template std::unique_ptr add_criteria_precond_finalize( const std::shared_ptr& exec, - std::shared_ptr precond) + std::shared_ptr precond, std::uint32_t max_iters) { - return add_criteria_precond_finalize(Solver::build(), exec, precond); + return add_criteria_precond_finalize(Solver::build(), exec, precond, + max_iters); } std::unique_ptr generate_solver( const std::shared_ptr& exec, std::shared_ptr precond, - const std::string& description) + const std::string& description, std::uint32_t max_iters) { std::string cb_gmres_prefix("cb_gmres_"); if (description.find(cb_gmres_prefix) == 0) { @@ -270,33 +274,33 @@ std::unique_ptr generate_solver( gko::solver::CbGmres::build() .with_krylov_dim(FLAGS_gmres_restart) .with_storage_precision(s_prec), - exec, precond); + exec, precond, max_iters); } else if (description == "bicgstab") { return add_criteria_precond_finalize>( - exec, precond); + exec, precond, max_iters); } else if (description == "bicg") { - return add_criteria_precond_finalize>(exec, - precond); + return add_criteria_precond_finalize>( + exec, precond, max_iters); } else if (description == "cg") { - return add_criteria_precond_finalize>(exec, - precond); + return add_criteria_precond_finalize>( + exec, precond, max_iters); } else if (description == "cgs") { - return add_criteria_precond_finalize>(exec, - precond); + return add_criteria_precond_finalize>( + exec, precond, max_iters); } else if (description == "fcg") { - return add_criteria_precond_finalize>(exec, - precond); + return add_criteria_precond_finalize>( + exec, precond, max_iters); } else if (description == "idr") { return add_criteria_precond_finalize( gko::solver::Idr::build() .with_subspace_dim(FLAGS_idr_subspace_dim) .with_kappa(static_cast(FLAGS_idr_kappa)), - exec, precond); + exec, precond, max_iters); } else if (description == "gmres") { return add_criteria_precond_finalize( gko::solver::Gmres::build().with_krylov_dim( FLAGS_gmres_restart), - exec, precond); + exec, precond, max_iters); } else if (description == "lower_trs") { return gko::solver::LowerTrs::build() .with_num_rhs(FLAGS_nrhs) @@ -306,8 +310,8 @@ std::unique_ptr generate_solver( .with_num_rhs(FLAGS_nrhs) .on(exec); } else if (description == "overhead") { - return add_criteria_precond_finalize>(exec, - precond); + return add_criteria_precond_finalize>( + exec, precond, max_iters); } throw std::range_error(std::string("The provided string <") + description + "> does not match any solver!"); @@ -402,12 +406,14 @@ void solve_system(const std::string& solver_name, IterationControl ic{get_timer(exec, FLAGS_gpu_timer)}; // warm run + std::shared_ptr solver; auto it_logger = std::make_shared(exec); for (auto _ : ic.warmup_run()) { auto x_clone = clone(x); auto precond = precond_factory.at(precond_name)(exec); - auto solver = generate_solver(exec, give(precond), solver_name) - ->generate(system_matrix); + solver = generate_solver(exec, give(precond), solver_name, + FLAGS_warmup_max_iters) + ->generate(system_matrix); solver->add_logger(it_logger); solver->apply(lend(b), lend(x_clone)); exec->synchronize(); @@ -427,8 +433,9 @@ void solve_system(const std::string& solver_name, exec->add_logger(gen_logger); auto precond = precond_factory.at(precond_name)(exec); - auto solver = generate_solver(exec, give(precond), solver_name) - ->generate(system_matrix); + solver = generate_solver(exec, give(precond), solver_name, + FLAGS_max_iters) + ->generate(system_matrix); exec->remove_logger(gko::lend(gen_logger)); gen_logger->write_data(solver_json["generate"]["components"], @@ -476,14 +483,14 @@ void solve_system(const std::string& solver_name, auto generate_timer = get_timer(exec, FLAGS_gpu_timer); auto apply_timer = ic.get_timer(); auto x_clone = clone(x); - std::shared_ptr solver; for (auto status : ic.run(false)) { x_clone = clone(x); exec->synchronize(); generate_timer->tic(); auto precond = precond_factory.at(precond_name)(exec); - solver = generate_solver(exec, give(precond), solver_name) + solver = generate_solver(exec, give(precond), solver_name, + FLAGS_max_iters) ->generate(system_matrix); generate_timer->toc(); From 1268c60b677385d60c17690bc0a5cb704f7b66ef Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Tue, 26 Apr 2022 17:13:47 +0200 Subject: [PATCH 2/3] Use env bash --- benchmark/run_all_benchmarks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/run_all_benchmarks.sh b/benchmark/run_all_benchmarks.sh index 7999550191a..90e6d251c81 100644 --- a/benchmark/run_all_benchmarks.sh +++ b/benchmark/run_all_benchmarks.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash ################################################################################ # Environment variable detection # From 6ede97c0469a79f753f166ab20aa1e5cbb3c1e51 Mon Sep 17 00:00:00 2001 From: Terry Cojean Date: Thu, 12 May 2022 15:29:28 +0200 Subject: [PATCH 3/3] Add iteration logger to the actual solver run --- benchmark/solver/solver.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 869dfa552c8..ed8a99840d1 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -407,20 +407,14 @@ void solve_system(const std::string& solver_name, // warm run std::shared_ptr solver; - auto it_logger = std::make_shared(exec); for (auto _ : ic.warmup_run()) { auto x_clone = clone(x); auto precond = precond_factory.at(precond_name)(exec); solver = generate_solver(exec, give(precond), solver_name, FLAGS_warmup_max_iters) ->generate(system_matrix); - solver->add_logger(it_logger); solver->apply(lend(b), lend(x_clone)); exec->synchronize(); - solver->remove_logger(gko::lend(it_logger)); - } - if (FLAGS_warmup > 0) { - it_logger->write_data(solver_json["apply"], allocator); } // detail run @@ -480,6 +474,7 @@ void solve_system(const std::string& solver_name, } // timed run + auto it_logger = std::make_shared(exec); auto generate_timer = get_timer(exec, FLAGS_gpu_timer); auto apply_timer = ic.get_timer(); auto x_clone = clone(x); @@ -495,10 +490,18 @@ void solve_system(const std::string& solver_name, generate_timer->toc(); exec->synchronize(); + if (ic.get_num_repetitions() == 0) { + solver->add_logger(it_logger); + } apply_timer->tic(); solver->apply(lend(b), lend(x_clone)); apply_timer->toc(); + if (ic.get_num_repetitions() == 0) { + solver->remove_logger(gko::lend(it_logger)); + } } + it_logger->write_data(solver_json["apply"], allocator); + if (b->get_size()[1] == 1 && !FLAGS_overhead) { // a solver is considered direct if it didn't log any iterations if (solver_json["apply"].HasMember("iterations") &&